diff --git a/.github/cargo-deny-composite-action/cargo-deny-generator.sh b/.github/cargo-deny-composite-action/cargo-deny-generator.sh new file mode 100644 index 000000000000..3d9eba242cfb --- /dev/null +++ b/.github/cargo-deny-composite-action/cargo-deny-generator.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Copyright (c) 2022 Red Hat +# +# SPDX-License-Identifier: Apache-2.0 +# + +script_dir=$(dirname "$(readlink -f "$0")") +parent_dir=$(realpath "${script_dir}/../..") +cidir="${parent_dir}/ci" +source "${cidir}/lib.sh" + +cargo_deny_file="${script_dir}/action.yaml" + +cat cargo-deny-skeleton.yaml.in > "${cargo_deny_file}" + +changed_files_status=$(run_get_pr_changed_file_details) +changed_files_status=$(echo "$changed_files_status" | grep "Cargo\.toml$" || true) +changed_files=$(echo "$changed_files_status" | awk '{print $NF}' || true) + +if [ -z "$changed_files" ]; then + cat >> "${cargo_deny_file}" << EOF + - run: echo "No Cargo.toml files to check" + shell: bash +EOF +fi + +for path in $changed_files +do + cat >> "${cargo_deny_file}" << EOF + + - name: ${path} + continue-on-error: true + shell: bash + run: | + pushd $(dirname ${path}) + cargo deny check + popd +EOF +done diff --git a/.github/cargo-deny-composite-action/cargo-deny-skeleton.yaml.in b/.github/cargo-deny-composite-action/cargo-deny-skeleton.yaml.in new file mode 100644 index 000000000000..e48d1f6c86b1 --- /dev/null +++ b/.github/cargo-deny-composite-action/cargo-deny-skeleton.yaml.in @@ -0,0 +1,30 @@ +# +# Copyright (c) 2022 Red Hat +# +# SPDX-License-Identifier: Apache-2.0 +# + +name: 'Cargo Crates Check' +description: 'Checks every Cargo.toml file using cargo-deny' + +env: + CARGO_TERM_COLOR: always + +runs: + using: "composite" + steps: + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + override: true + + - name: Cache + uses: Swatinem/rust-cache@v2 + + - name: Install Cargo deny + shell: bash + run: | + which cargo + cargo install --locked cargo-deny || true diff --git a/.github/workflows/PR-wip-checks.yaml b/.github/workflows/PR-wip-checks.yaml index 97c35145a7ae..98195b8867fc 100644 --- a/.github/workflows/PR-wip-checks.yaml +++ b/.github/workflows/PR-wip-checks.yaml @@ -9,6 +9,10 @@ on: - labeled - unlabeled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: pr_wip_check: runs-on: ubuntu-latest diff --git a/.github/workflows/add-backport-label.yaml b/.github/workflows/add-backport-label.yaml new file mode 100644 index 000000000000..790ff1721f00 --- /dev/null +++ b/.github/workflows/add-backport-label.yaml @@ -0,0 +1,104 @@ +name: Add backport label + +on: + pull_request: + types: + - opened + - synchronize + - reopened + - edited + - labeled + - unlabeled + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check-issues: + if: ${{ github.event.label.name != 'auto-backport' }} + runs-on: ubuntu-latest + steps: + - name: Checkout code to allow hub to communicate with the project + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + uses: actions/checkout@v3 + + - name: Install hub extension script + run: | + pushd $(mktemp -d) &>/dev/null + git clone --single-branch --depth 1 "https://github.com/kata-containers/.github" && cd .github/scripts + sudo install hub-util.sh /usr/local/bin + popd &>/dev/null + + - name: Determine whether to add label + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CONTAINS_AUTO_BACKPORT: ${{ contains(github.event.pull_request.labels.*.name, 'auto-backport') }} + id: add_label + run: | + pr=${{ github.event.pull_request.number }} + linked_issue_urls=$(hub-util.sh \ + list-issues-for-pr "$pr" |\ + grep -v "^\#" |\ + cut -d';' -f3 || true) + [ -z "$linked_issue_urls" ] && { + echo "::error::No linked issues for PR $pr" + exit 1 + } + has_bug=false + for issue_url in $(echo "$linked_issue_urls") + do + issue=$(echo "$issue_url"| awk -F\/ '{print $NF}' || true) + [ -z "$issue" ] && { + echo "::error::Cannot determine issue number from $issue_url for PR $pr" + exit 1 + } + labels=$(hub-util.sh list-labels-for-issue "$issue") + + label_names=$(echo $labels | jq -r '.[].name' || true) + if [[ "$label_names" =~ "bug" ]]; then + has_bug=true + break + fi + done + + has_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'needs-backport') }} + has_no_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'no-backport-needed') }} + + echo "add_backport_label=false" >> $GITHUB_OUTPUT + if [ $has_backport_needed_label = true ] || [ $has_bug = true ]; then + if [[ $has_no_backport_needed_label = false ]]; then + echo "add_backport_label=true" >> $GITHUB_OUTPUT + fi + fi + + # Do not spam comment, only if auto-backport label is going to be newly added. + echo "auto_backport_added=$CONTAINS_AUTO_BACKPORT" >> $GITHUB_OUTPUT + + - name: Add comment + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' && steps.add_label.outputs.auto_backport_added == 'false' }} + uses: actions/github-script@v6 + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: 'This issue has been marked for auto-backporting. Add label(s) backport-to-BRANCHNAME to backport to them' + }) + + # Allow label to be removed by adding no-backport-needed label + - name: Remove auto-backport label + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'false' }} + uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90 + with: + remove-labels: "auto-backport" + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Add auto-backport label + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' }} + uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90 + with: + add-labels: "auto-backport" + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/add-issues-to-project.yaml b/.github/workflows/add-issues-to-project.yaml index 93c31e7a1fcd..6ba266261f1a 100644 --- a/.github/workflows/add-issues-to-project.yaml +++ b/.github/workflows/add-issues-to-project.yaml @@ -11,6 +11,10 @@ on: - opened - reopened +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: add-new-issues-to-backlog: runs-on: ubuntu-latest @@ -35,7 +39,7 @@ jobs: popd &>/dev/null - name: Checkout code to allow hub to communicate with the project - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Add issue to issue backlog env: diff --git a/.github/workflows/add-pr-sizing-label.yaml b/.github/workflows/add-pr-sizing-label.yaml index ffd9b06a96b3..2fd0abc647a7 100644 --- a/.github/workflows/add-pr-sizing-label.yaml +++ b/.github/workflows/add-pr-sizing-label.yaml @@ -12,12 +12,25 @@ on: - reopened - synchronize +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: add-pr-size-label: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v1 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install PR sizing label script run: | diff --git a/.github/workflows/auto-backport.yaml b/.github/workflows/auto-backport.yaml new file mode 100644 index 000000000000..e2be39022798 --- /dev/null +++ b/.github/workflows/auto-backport.yaml @@ -0,0 +1,33 @@ +on: + pull_request_target: + types: ["labeled", "closed"] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + backport: + name: Backport PR + runs-on: ubuntu-latest + if: | + github.event.pull_request.merged == true + && contains(github.event.pull_request.labels.*.name, 'auto-backport') + && ( + (github.event.action == 'labeled' && github.event.label.name == 'auto-backport') + || (github.event.action == 'closed') + ) + steps: + - name: Backport Action + uses: sqren/backport-github-action@v8.9.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + auto_backport_label_prefix: backport-to- + + - name: Info log + if: ${{ success() }} + run: cat /home/runner/.backport/backport.info.log + + - name: Debug log + if: ${{ failure() }} + run: cat /home/runner/.backport/backport.debug.log diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml new file mode 100644 index 000000000000..20a59bb8c4df --- /dev/null +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -0,0 +1,129 @@ +name: CI | Build kata-static tarball for amd64 +on: + workflow_call: + inputs: + stage: + required: false + type: string + default: test + tarball-suffix: + required: false + type: string + push-to-registry: + required: false + type: string + default: no + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-asset: + runs-on: ubuntu-latest + strategy: + matrix: + asset: + - cloud-hypervisor + - cloud-hypervisor-glibc + - firecracker + - kernel + - kernel-sev + - kernel-dragonball-experimental + - kernel-tdx-experimental + - kernel-nvidia-gpu + - kernel-nvidia-gpu-snp + - kernel-nvidia-gpu-tdx-experimental + - nydus + - ovmf + - ovmf-sev + - qemu + - qemu-snp-experimental + - qemu-tdx-experimental + - rootfs-image + - rootfs-image-tdx + - rootfs-initrd + - rootfs-initrd-mariner + - rootfs-initrd-sev + - shim-v2 + - tdvf + - virtiofsd + stage: + - ${{ inputs.stage }} + exclude: + - asset: cloud-hypervisor-glibc + stage: release + steps: + - name: Login to Kata Containers quay.io + if: ${{ inputs.push-to-registry == 'yes' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Build ${{ matrix.asset }} + run: | + make "${KATA_ASSET}-tarball" + build_dir=$(readlink -f build) + # store-artifact does not work with symlink + sudo cp -r "${build_dir}" "kata-build" + env: + KATA_ASSET: ${{ matrix.asset }} + TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: store-artifact ${{ matrix.asset }} + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-amd64${{ inputs.tarball-suffix }} + path: kata-build/kata-static-${{ matrix.asset }}.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: ubuntu-latest + needs: build-asset + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + - name: merge-artifacts + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml + - name: store-artifacts + uses: actions/upload-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-static.tar.xz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml new file mode 100644 index 000000000000..4225abee8bc6 --- /dev/null +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -0,0 +1,120 @@ +name: CI | Build kata-static tarball for arm64 +on: + workflow_call: + inputs: + stage: + required: false + type: string + default: test + tarball-suffix: + required: false + type: string + push-to-registry: + required: false + type: string + default: no + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-asset: + runs-on: arm64 + strategy: + matrix: + asset: + - cloud-hypervisor + - firecracker + - kernel + - kernel-dragonball-experimental + - nydus + - qemu + - rootfs-image + - rootfs-initrd + - shim-v2 + - virtiofsd + stage: + - ${{ inputs.stage }} + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Login to Kata Containers quay.io + if: ${{ inputs.push-to-registry == 'yes' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Build ${{ matrix.asset }} + run: | + make "${KATA_ASSET}-tarball" + build_dir=$(readlink -f build) + # store-artifact does not work with symlink + sudo cp -r "${build_dir}" "kata-build" + env: + KATA_ASSET: ${{ matrix.asset }} + TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: store-artifact ${{ matrix.asset }} + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-arm64${{ inputs.tarball-suffix }} + path: kata-build/kata-static-${{ matrix.asset }}.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: arm64 + needs: build-asset + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-arm64${{ inputs.tarball-suffix }} + path: kata-artifacts + - name: merge-artifacts + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml + - name: store-artifacts + uses: actions/upload-artifact@v3 + with: + name: kata-static-tarball-arm64${{ inputs.tarball-suffix }} + path: kata-static.tar.xz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-ppc64le.yaml b/.github/workflows/build-kata-static-tarball-ppc64le.yaml new file mode 100644 index 000000000000..d82b78b76fde --- /dev/null +++ b/.github/workflows/build-kata-static-tarball-ppc64le.yaml @@ -0,0 +1,117 @@ +name: CI | Build kata-static tarball for ppc64le +on: + workflow_call: + inputs: + stage: + required: false + type: string + default: test + tarball-suffix: + required: false + type: string + push-to-registry: + required: false + type: string + default: no + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-asset: + runs-on: ppc64le + strategy: + matrix: + asset: + - kernel + - qemu + - rootfs-image + - rootfs-initrd + - shim-v2 + - virtiofsd + stage: + - ${{ inputs.stage }} + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Login to Kata Containers quay.io + if: ${{ inputs.push-to-registry == 'yes' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Build ${{ matrix.asset }} + run: | + CROSS_BUILD=true ARCH=ppc64le TARGET_ARCH=ppc64le make "${KATA_ASSET}-tarball" + build_dir=$(readlink -f build) + # store-artifact does not work with symlink + sudo cp -r "${build_dir}" "kata-build" + sudo chown -R $(id -u):$(id -g) "kata-build" + env: + KATA_ASSET: ${{ matrix.asset }} + TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: store-artifact ${{ matrix.asset }} + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-ppc64le${{ inputs.tarball-suffix }} + path: kata-build/kata-static-${{ matrix.asset }}.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: ppc64le + needs: build-asset + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-ppc64le${{ inputs.tarball-suffix }} + path: kata-artifacts + - name: merge-artifacts + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml + - name: store-artifacts + uses: actions/upload-artifact@v3 + with: + name: kata-static-tarball-ppc64le${{ inputs.tarball-suffix }} + path: kata-static.tar.xz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml new file mode 100644 index 000000000000..90be3aa771d2 --- /dev/null +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -0,0 +1,117 @@ +name: CI | Build kata-static tarball for s390x +on: + workflow_call: + inputs: + stage: + required: false + type: string + default: test + tarball-suffix: + required: false + type: string + push-to-registry: + required: false + type: string + default: no + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-asset: + runs-on: s390x + strategy: + matrix: + asset: + - kernel + - qemu + - rootfs-image + - rootfs-initrd + - shim-v2 + - virtiofsd + stage: + - ${{ inputs.stage }} + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - name: Login to Kata Containers quay.io + if: ${{ inputs.push-to-registry == 'yes' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Build ${{ matrix.asset }} + run: | + make "${KATA_ASSET}-tarball" + build_dir=$(readlink -f build) + # store-artifact does not work with symlink + sudo cp -r "${build_dir}" "kata-build" + sudo chown -R $(id -u):$(id -g) "kata-build" + env: + KATA_ASSET: ${{ matrix.asset }} + TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: store-artifact ${{ matrix.asset }} + uses: actions/upload-artifact@v3 + with: + name: kata-artifacts-s390x${{ inputs.tarball-suffix }} + path: kata-build/kata-static-${{ matrix.asset }}.tar.xz + retention-days: 1 + if-no-files-found: error + + create-kata-tarball: + runs-on: s390x + needs: build-asset + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: get-artifacts + uses: actions/download-artifact@v3 + with: + name: kata-artifacts-s390x${{ inputs.tarball-suffix }} + path: kata-artifacts + - name: merge-artifacts + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts versions.yaml + - name: store-artifacts + uses: actions/upload-artifact@v3 + with: + name: kata-static-tarball-s390x${{ inputs.tarball-suffix }} + path: kata-static.tar.xz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/cargo-deny-runner.yaml b/.github/workflows/cargo-deny-runner.yaml new file mode 100644 index 000000000000..21d3d1f53e73 --- /dev/null +++ b/.github/workflows/cargo-deny-runner.yaml @@ -0,0 +1,31 @@ +name: Cargo Crates Check Runner +on: + pull_request: + types: + - opened + - edited + - reopened + - synchronize + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + cargo-deny-runner: + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + uses: actions/checkout@v3 + - name: Generate Action + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: bash cargo-deny-generator.sh + working-directory: ./.github/cargo-deny-composite-action/ + env: + GOPATH: ${{ runner.workspace }}/kata-containers + - name: Run Action + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + uses: ./.github/cargo-deny-composite-action diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml new file mode 100644 index 000000000000..75f5f2667518 --- /dev/null +++ b/.github/workflows/ci-nightly.yaml @@ -0,0 +1,19 @@ +name: Kata Containers Nightly CI +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + kata-containers-ci-on-push: + uses: ./.github/workflows/ci.yaml + with: + commit-hash: ${{ github.sha }} + pr-number: "nightly" + tag: ${{ github.sha }}-nightly + target-branch: ${{ github.ref_name }} + secrets: inherit diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml new file mode 100644 index 000000000000..59a297b784b0 --- /dev/null +++ b/.github/workflows/ci-on-push.yaml @@ -0,0 +1,32 @@ +name: Kata Containers CI +on: + pull_request_target: + branches: + - 'main' + - 'stable-*' + types: + # Adding 'labeled' to the list of activity types that trigger this event + # (default: opened, synchronize, reopened) so that we can run this + # workflow when the 'ok-to-test' label is added. + # Reference: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + - opened + - synchronize + - reopened + - labeled + paths-ignore: + - 'docs/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + kata-containers-ci-on-push: + if: ${{ contains(github.event.pull_request.labels.*.name, 'ok-to-test') }} + uses: ./.github/workflows/ci.yaml + with: + commit-hash: ${{ github.event.pull_request.head.sha }} + pr-number: ${{ github.event.pull_request.number }} + tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }} + target-branch: ${{ github.event.pull_request.base.ref }} + secrets: inherit diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000000..7479e6777793 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,226 @@ +name: Run the Kata Containers CI +on: + workflow_call: + inputs: + commit-hash: + required: true + type: string + pr-number: + required: true + type: string + tag: + required: true + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + build-kata-static-tarball-amd64: + uses: ./.github/workflows/build-kata-static-tarball-amd64.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + publish-kata-deploy-payload-amd64: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/publish-kata-deploy-payload-amd64.yaml + with: + tarball-suffix: -${{ inputs.tag }} + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + build-and-publish-tee-confidential-unencrypted-image: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to Kata Containers ghcr.io + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker build and push + uses: docker/build-push-action@v4 + with: + tags: ghcr.io/kata-containers/test-images:unencrypted-${{ inputs.pr-number }} + push: true + context: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ + platforms: linux/amd64, linux/s390x + file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile + + run-docker-tests-on-garm: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-docker-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-nerdctl-tests-on-garm: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-nerdctl-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-kata-deploy-tests-on-aks: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-aks.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-kata-deploy-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-kata-deploy-tests-on-tdx: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-kata-deploy-tests-on-tdx.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + + run-k8s-tests-on-aks: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-on-aks.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-k8s-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-k8s-tests-with-crio-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-with-crio-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-k8s-tests-on-sev: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-k8s-tests-on-sev.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + + run-k8s-tests-on-snp: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-k8s-tests-on-snp.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + + run-k8s-tests-on-tdx: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-k8s-tests-on-tdx.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + + run-metrics-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-metrics.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-cri-containerd-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-cri-containerd-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-nydus-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-nydus-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-vfio-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-vfio-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/commit-message-check.yaml b/.github/workflows/commit-message-check.yaml index fbdb02b6df84..b54c0a7e402b 100644 --- a/.github/workflows/commit-message-check.yaml +++ b/.github/workflows/commit-message-check.yaml @@ -6,6 +6,10 @@ on: - reopened - synchronize +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + env: error_msg: |+ See the document below for help on formatting commits for the project. @@ -62,6 +66,9 @@ jobs: # to be specified at the start of the regex as the action is passed # the entire commit message. # + # - This check will pass if the commit message only contains a subject + # line, as other body message properties are enforced elsewhere. + # # - Body lines *can* be longer than the maximum if they start # with a non-alphabetic character or if there is no whitespace in # the line. @@ -75,7 +82,7 @@ jobs: # # - A SoB comment can be any length (as it is unreasonable to penalise # people with long names/email addresses :) - pattern: '^.+(\n([a-zA-Z].{0,150}|[^a-zA-Z\n].*|[^\s\n]*|Signed-off-by:.*|))+$' + pattern: '(^[^\n]+$|^.+(\n([a-zA-Z].{0,150}|[^a-zA-Z\n].*|[^\s\n]*|Signed-off-by:.*|))+$)' error: 'Body line too long (max 150)' post_error: ${{ env.error_msg }} diff --git a/.github/workflows/darwin-tests.yaml b/.github/workflows/darwin-tests.yaml index 5a83add32d85..8b3f9041a705 100644 --- a/.github/workflows/darwin-tests.yaml +++ b/.github/workflows/darwin-tests.yaml @@ -5,21 +5,22 @@ on: - edited - reopened - synchronize + paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true name: Darwin tests jobs: test: - strategy: - matrix: - go-version: [1.16.x, 1.17.x] - os: [macos-latest] - runs-on: ${{ matrix.os }} + runs-on: macos-latest steps: - name: Install Go uses: actions/setup-go@v2 with: - go-version: ${{ matrix.go-version }} + go-version: 1.19.3 - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build utils run: ./ci/darwin-test.sh diff --git a/.github/workflows/docs-url-alive-check.yaml b/.github/workflows/docs-url-alive-check.yaml index 0ef3e47d1396..543215f77e00 100644 --- a/.github/workflows/docs-url-alive-check.yaml +++ b/.github/workflows/docs-url-alive-check.yaml @@ -5,11 +5,7 @@ on: name: Docs URL Alive Check jobs: test: - strategy: - matrix: - go-version: [1.17.x] - os: [ubuntu-20.04] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-20.04 # don't run this action on forks if: github.repository_owner == 'kata-containers' env: @@ -18,7 +14,7 @@ jobs: - name: Install Go uses: actions/setup-go@v2 with: - go-version: ${{ matrix.go-version }} + go-version: 1.19.3 env: GOPATH: ${{ runner.workspace }}/kata-containers - name: Set env @@ -26,7 +22,7 @@ jobs: echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 path: ./src/github.com/${{ github.repository }} diff --git a/.github/workflows/kata-deploy-push.yaml b/.github/workflows/kata-deploy-push.yaml deleted file mode 100644 index 090f89195eb6..000000000000 --- a/.github/workflows/kata-deploy-push.yaml +++ /dev/null @@ -1,84 +0,0 @@ -name: kata deploy build - -on: - pull_request: - types: - - opened - - edited - - reopened - - synchronize - paths: - - tools/** - - versions.yaml - -jobs: - build-asset: - runs-on: ubuntu-latest - strategy: - matrix: - asset: - - kernel - - shim-v2 - - qemu - - cloud-hypervisor - - firecracker - - rootfs-image - - rootfs-initrd - - virtiofsd - steps: - - uses: actions/checkout@v2 - - name: Install docker - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - curl -fsSL https://test.docker.com -o test-docker.sh - sh test-docker.sh - - - name: Build ${{ matrix.asset }} - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - make "${KATA_ASSET}-tarball" - build_dir=$(readlink -f build) - # store-artifact does not work with symlink - sudo cp -r --preserve=all "${build_dir}" "kata-build" - env: - KATA_ASSET: ${{ matrix.asset }} - - - name: store-artifact ${{ matrix.asset }} - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/upload-artifact@v2 - with: - name: kata-artifacts - path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - if-no-files-found: error - - create-kata-tarball: - runs-on: ubuntu-latest - needs: build-asset - steps: - - uses: actions/checkout@v2 - - name: get-artifacts - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/download-artifact@v2 - with: - name: kata-artifacts - path: build - - name: merge-artifacts - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - make merge-builds - - name: store-artifacts - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/upload-artifact@v2 - with: - name: kata-static-tarball - path: kata-static.tar.xz - - make-kata-tarball: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: make kata-tarball - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - make kata-tarball - sudo make install-tarball diff --git a/.github/workflows/kata-deploy-test.yaml b/.github/workflows/kata-deploy-test.yaml deleted file mode 100644 index e0d6afd7c122..000000000000 --- a/.github/workflows/kata-deploy-test.yaml +++ /dev/null @@ -1,149 +0,0 @@ -on: - workflow_dispatch: # this is used to trigger the workflow on non-main branches - issue_comment: - types: [created, edited] - -name: test-kata-deploy - -jobs: - check-comment-and-membership: - runs-on: ubuntu-latest - if: | - github.event.issue.pull_request - && github.event_name == 'issue_comment' - && github.event.action == 'created' - && startsWith(github.event.comment.body, '/test_kata_deploy') - steps: - - name: Check membership - uses: kata-containers/is-organization-member@1.0.1 - id: is_organization_member - with: - organization: kata-containers - username: ${{ github.event.comment.user.login }} - token: ${{ secrets.GITHUB_TOKEN }} - - name: Fail if not member - run: | - result=${{ steps.is_organization_member.outputs.result }} - if [ $result == false ]; then - user=${{ github.event.comment.user.login }} - echo Either ${user} is not part of the kata-containers organization - echo or ${user} has its Organization Visibility set to Private at - echo https://github.com/orgs/kata-containers/people?query=${user} - echo - echo Ensure you change your Organization Visibility to Public and - echo trigger the test again. - exit 1 - fi - - build-asset: - runs-on: ubuntu-latest - needs: check-comment-and-membership - strategy: - matrix: - asset: - - cloud-hypervisor - - firecracker - - kernel - - qemu - - rootfs-image - - rootfs-initrd - - shim-v2 - - virtiofsd - steps: - - name: get-PR-ref - id: get-PR-ref - run: | - ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#') - echo "reference for PR: " ${ref} - echo "##[set-output name=pr-ref;]${ref}" - - uses: actions/checkout@v2 - with: - ref: ${{ steps.get-PR-ref.outputs.pr-ref }} - - - name: Install docker - run: | - curl -fsSL https://test.docker.com -o test-docker.sh - sh test-docker.sh - - - name: Build ${{ matrix.asset }} - run: | - make "${KATA_ASSET}-tarball" - build_dir=$(readlink -f build) - # store-artifact does not work with symlink - sudo cp -r "${build_dir}" "kata-build" - env: - KATA_ASSET: ${{ matrix.asset }} - TAR_OUTPUT: ${{ matrix.asset }}.tar.gz - - - name: store-artifact ${{ matrix.asset }} - uses: actions/upload-artifact@v2 - with: - name: kata-artifacts - path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - if-no-files-found: error - - create-kata-tarball: - runs-on: ubuntu-latest - needs: build-asset - steps: - - name: get-PR-ref - id: get-PR-ref - run: | - ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#') - echo "reference for PR: " ${ref} - echo "##[set-output name=pr-ref;]${ref}" - - uses: actions/checkout@v2 - with: - ref: ${{ steps.get-PR-ref.outputs.pr-ref }} - - name: get-artifacts - uses: actions/download-artifact@v2 - with: - name: kata-artifacts - path: kata-artifacts - - name: merge-artifacts - run: | - ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts - - name: store-artifacts - uses: actions/upload-artifact@v2 - with: - name: kata-static-tarball - path: kata-static.tar.xz - - kata-deploy: - needs: create-kata-tarball - runs-on: ubuntu-latest - steps: - - name: get-PR-ref - id: get-PR-ref - run: | - ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#') - echo "reference for PR: " ${ref} - echo "##[set-output name=pr-ref;]${ref}" - - uses: actions/checkout@v2 - with: - ref: ${{ steps.get-PR-ref.outputs.pr-ref }} - - name: get-kata-tarball - uses: actions/download-artifact@v2 - with: - name: kata-static-tarball - - name: build-and-push-kata-deploy-ci - id: build-and-push-kata-deploy-ci - run: | - PR_SHA=$(git log --format=format:%H -n1) - mv kata-static.tar.xz $GITHUB_WORKSPACE/tools/packaging/kata-deploy/kata-static.tar.xz - docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t quay.io/kata-containers/kata-deploy-ci:$PR_SHA $GITHUB_WORKSPACE/tools/packaging/kata-deploy - docker login -u ${{ secrets.QUAY_DEPLOYER_USERNAME }} -p ${{ secrets.QUAY_DEPLOYER_PASSWORD }} quay.io - docker push quay.io/kata-containers/kata-deploy-ci:$PR_SHA - mkdir -p packaging/kata-deploy - ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${PR_SHA}" - - name: test-kata-deploy-ci-in-aks - uses: ./packaging/kata-deploy/action - with: - packaging-sha: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} - env: - PKG_SHA: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} - AZ_APPID: ${{ secrets.AZ_APPID }} - AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} - AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }} - AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} diff --git a/.github/workflows/kata-runtime-classes-sync.yaml b/.github/workflows/kata-runtime-classes-sync.yaml new file mode 100644 index 000000000000..9cb995df17b7 --- /dev/null +++ b/.github/workflows/kata-runtime-classes-sync.yaml @@ -0,0 +1,36 @@ +on: + pull_request: + types: + - opened + - edited + - reopened + - synchronize + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + kata-deploy-runtime-classes-check: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: Ensure the split out runtime classes match the all-in-one file + run: | + pushd tools/packaging/kata-deploy/runtimeclasses/ + echo "::group::Combine runtime classes" + for runtimeClass in `find . -type f \( -name "*.yaml" -and -not -name "kata-runtimeClasses.yaml" \) | sort`; do + echo "Adding ${runtimeClass} to the resultingRuntimeClasses.yaml" + cat ${runtimeClass} >> resultingRuntimeClasses.yaml; + done + echo "::endgroup::" + echo "::group::Displaying the content of resultingRuntimeClasses.yaml" + cat resultingRuntimeClasses.yaml + echo "::endgroup::" + echo "" + echo "::group::Displaying the content of kata-runtimeClasses.yaml" + cat kata-runtimeClasses.yaml + echo "::endgroup::" + echo "" + diff resultingRuntimeClasses.yaml kata-runtimeClasses.yaml diff --git a/.github/workflows/move-issues-to-in-progress.yaml b/.github/workflows/move-issues-to-in-progress.yaml index 0e15abaea3e5..23819e18c4d9 100644 --- a/.github/workflows/move-issues-to-in-progress.yaml +++ b/.github/workflows/move-issues-to-in-progress.yaml @@ -38,7 +38,17 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Move issue to "In progress" if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml new file mode 100644 index 000000000000..bcc2aa7a0390 --- /dev/null +++ b/.github/workflows/payload-after-push.yaml @@ -0,0 +1,91 @@ +name: CI | Publish Kata Containers payload +on: + push: + branches: + - main + - stable-* + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + build-assets-amd64: + uses: ./.github/workflows/build-kata-static-tarball-amd64.yaml + with: + commit-hash: ${{ github.sha }} + push-to-registry: yes + target-branch: ${{ github.ref_name }} + secrets: inherit + + build-assets-arm64: + uses: ./.github/workflows/build-kata-static-tarball-arm64.yaml + with: + commit-hash: ${{ github.sha }} + push-to-registry: yes + target-branch: ${{ github.ref_name }} + secrets: inherit + + build-assets-s390x: + uses: ./.github/workflows/build-kata-static-tarball-s390x.yaml + with: + commit-hash: ${{ github.sha }} + push-to-registry: yes + target-branch: ${{ github.ref_name }} + secrets: inherit + + publish-kata-deploy-payload-amd64: + needs: build-assets-amd64 + uses: ./.github/workflows/publish-kata-deploy-payload-amd64.yaml + with: + commit-hash: ${{ github.sha }} + registry: quay.io + repo: kata-containers/kata-deploy-ci + tag: kata-containers-amd64 + target-branch: ${{ github.ref_name }} + secrets: inherit + + publish-kata-deploy-payload-arm64: + needs: build-assets-arm64 + uses: ./.github/workflows/publish-kata-deploy-payload-arm64.yaml + with: + commit-hash: ${{ github.sha }} + registry: quay.io + repo: kata-containers/kata-deploy-ci + tag: kata-containers-arm64 + target-branch: ${{ github.ref_name }} + secrets: inherit + + publish-kata-deploy-payload-s390x: + needs: build-assets-s390x + uses: ./.github/workflows/publish-kata-deploy-payload-s390x.yaml + with: + commit-hash: ${{ github.sha }} + registry: quay.io + repo: kata-containers/kata-deploy-ci + tag: kata-containers-s390x + target-branch: ${{ github.ref_name }} + secrets: inherit + + publish-manifest: + runs-on: ubuntu-latest + needs: [publish-kata-deploy-payload-amd64, publish-kata-deploy-payload-arm64, publish-kata-deploy-payload-s390x] + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Push multi-arch manifest + run: | + docker manifest create quay.io/kata-containers/kata-deploy-ci:kata-containers-latest \ + --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-amd64 \ + --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-arm64 \ + --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-s390x + docker manifest push quay.io/kata-containers/kata-deploy-ci:kata-containers-latest diff --git a/.github/workflows/publish-kata-deploy-payload-amd64.yaml b/.github/workflows/publish-kata-deploy-payload-amd64.yaml new file mode 100644 index 000000000000..253b93fbcbf7 --- /dev/null +++ b/.github/workflows/publish-kata-deploy-payload-amd64.yaml @@ -0,0 +1,66 @@ +name: CI | Publish kata-deploy payload for amd64 +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + kata-payload: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + + - name: Login to Kata Containers quay.io + if: ${{ inputs.registry == 'quay.io' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Login to Kata Containers ghcr.io + if: ${{ inputs.registry == 'ghcr.io' }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: build-and-push-kata-payload + id: build-and-push-kata-payload + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz \ + ${{ inputs.registry }}/${{ inputs.repo }} ${{ inputs.tag }} diff --git a/.github/workflows/publish-kata-deploy-payload-arm64.yaml b/.github/workflows/publish-kata-deploy-payload-arm64.yaml new file mode 100644 index 000000000000..0bdf909f169a --- /dev/null +++ b/.github/workflows/publish-kata-deploy-payload-arm64.yaml @@ -0,0 +1,71 @@ +name: CI | Publish kata-deploy payload for arm64 +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + kata-payload: + runs-on: arm64 + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-arm64${{ inputs.tarball-suffix }} + + - name: Login to Kata Containers quay.io + if: ${{ inputs.registry == 'quay.io' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Login to Kata Containers ghcr.io + if: ${{ inputs.registry == 'ghcr.io' }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: build-and-push-kata-payload + id: build-and-push-kata-payload + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz \ + ${{ inputs.registry }}/${{ inputs.repo }} ${{ inputs.tag }} + diff --git a/.github/workflows/publish-kata-deploy-payload-s390x.yaml b/.github/workflows/publish-kata-deploy-payload-s390x.yaml new file mode 100644 index 000000000000..6d1d44f7b743 --- /dev/null +++ b/.github/workflows/publish-kata-deploy-payload-s390x.yaml @@ -0,0 +1,70 @@ +name: CI | Publish kata-deploy payload for s390x +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + kata-payload: + runs-on: s390x + steps: + - name: Adjust a permission for repo + run: | + sudo chown -R $USER:$USER $GITHUB_WORKSPACE + + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-s390x${{ inputs.tarball-suffix }} + + - name: Login to Kata Containers quay.io + if: ${{ inputs.registry == 'quay.io' }} + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Login to Kata Containers ghcr.io + if: ${{ inputs.registry == 'ghcr.io' }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: build-and-push-kata-payload + id: build-and-push-kata-payload + run: | + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz \ + ${{ inputs.registry }}/${{ inputs.repo }} ${{ inputs.tag }} diff --git a/.github/workflows/release-amd64.yaml b/.github/workflows/release-amd64.yaml new file mode 100644 index 000000000000..6ff990696904 --- /dev/null +++ b/.github/workflows/release-amd64.yaml @@ -0,0 +1,53 @@ +name: Publish Kata release artifacts for amd64 +on: + workflow_call: + inputs: + target-arch: + required: true + type: string + +jobs: + build-kata-static-tarball-amd64: + uses: ./.github/workflows/build-kata-static-tarball-amd64.yaml + with: + stage: release + + kata-deploy: + needs: build-kata-static-tarball-amd64 + runs-on: ubuntu-latest + steps: + - name: Login to Kata Containers docker.io + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64 + + - name: build-and-push-kata-deploy-ci-amd64 + id: build-and-push-kata-deploy-ci-amd64 + run: | + # We need to do such trick here as the format of the $GITHUB_REF + # is "refs/tags/" + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tags=($tag) + tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable")) + for tag in ${tags[@]}; do + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "docker.io/katadocker/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + done diff --git a/.github/workflows/release-arm64.yaml b/.github/workflows/release-arm64.yaml new file mode 100644 index 000000000000..cd7db8fdfbfd --- /dev/null +++ b/.github/workflows/release-arm64.yaml @@ -0,0 +1,53 @@ +name: Publish Kata release artifacts for arm64 +on: + workflow_call: + inputs: + target-arch: + required: true + type: string + +jobs: + build-kata-static-tarball-arm64: + uses: ./.github/workflows/build-kata-static-tarball-arm64.yaml + with: + stage: release + + kata-deploy: + needs: build-kata-static-tarball-arm64 + runs-on: arm64 + steps: + - name: Login to Kata Containers docker.io + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-arm64 + + - name: build-and-push-kata-deploy-ci-arm64 + id: build-and-push-kata-deploy-ci-arm64 + run: | + # We need to do such trick here as the format of the $GITHUB_REF + # is "refs/tags/" + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tags=($tag) + tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable")) + for tag in ${tags[@]}; do + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "docker.io/katadocker/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + done diff --git a/.github/workflows/release-s390x.yaml b/.github/workflows/release-s390x.yaml new file mode 100644 index 000000000000..5f3aaf05fb3f --- /dev/null +++ b/.github/workflows/release-s390x.yaml @@ -0,0 +1,53 @@ +name: Publish Kata release artifacts for s390x +on: + workflow_call: + inputs: + target-arch: + required: true + type: string + +jobs: + build-kata-static-tarball-s390x: + uses: ./.github/workflows/build-kata-static-tarball-s390x.yaml + with: + stage: release + + kata-deploy: + needs: build-kata-static-tarball-s390x + runs-on: s390x + steps: + - name: Login to Kata Containers docker.io + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - uses: actions/checkout@v3 + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-s390x + + - name: build-and-push-kata-deploy-ci-s390x + id: build-and-push-kata-deploy-ci-s390x + run: | + # We need to do such trick here as the format of the $GITHUB_REF + # is "refs/tags/" + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tags=($tag) + tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable")) + for tag in ${tags[@]}; do + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "docker.io/katadocker/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \ + $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy" \ + "${tag}-${{ inputs.target-arch }}" + done diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 81f05942cf43..d732a6723119 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,145 +1,146 @@ -name: Publish Kata 2.x release artifacts +name: Publish Kata release artifacts on: push: tags: - - '2.*' + - '[0-9]+.[0-9]+.[0-9]+*' + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: - build-asset: - runs-on: ubuntu-latest - strategy: - matrix: - asset: - - cloud-hypervisor - - firecracker - - kernel - - qemu - - rootfs-image - - rootfs-initrd - - shim-v2 - - virtiofsd - steps: - - uses: actions/checkout@v2 - - name: Install docker - run: | - curl -fsSL https://test.docker.com -o test-docker.sh - sh test-docker.sh + build-and-push-assets-amd64: + uses: ./.github/workflows/release-amd64.yaml + with: + target-arch: amd64 + secrets: inherit - - name: Build ${{ matrix.asset }} - run: | - ./tools/packaging/kata-deploy/local-build/kata-deploy-copy-yq-installer.sh - ./tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh --build="${KATA_ASSET}" - build_dir=$(readlink -f build) - # store-artifact does not work with symlink - sudo cp -r "${build_dir}" "kata-build" - env: - KATA_ASSET: ${{ matrix.asset }} - TAR_OUTPUT: ${{ matrix.asset }}.tar.gz + build-and-push-assets-arm64: + uses: ./.github/workflows/release-arm64.yaml + with: + target-arch: arm64 + secrets: inherit - - name: store-artifact ${{ matrix.asset }} - uses: actions/upload-artifact@v2 - with: - name: kata-artifacts - path: kata-build/kata-static-${{ matrix.asset }}.tar.xz - if-no-files-found: error + build-and-push-assets-s390x: + uses: ./.github/workflows/release-s390x.yaml + with: + target-arch: s390x + secrets: inherit - create-kata-tarball: + publish-multi-arch-images: runs-on: ubuntu-latest - needs: build-asset + needs: [build-and-push-assets-amd64, build-and-push-assets-arm64, build-and-push-assets-s390x] steps: - - uses: actions/checkout@v2 - - name: get-artifacts - uses: actions/download-artifact@v2 - with: - name: kata-artifacts - path: kata-artifacts - - name: merge-artifacts - run: | - ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts - - name: store-artifacts - uses: actions/upload-artifact@v2 - with: - name: kata-static-tarball - path: kata-static.tar.xz + - name: Checkout repository + uses: actions/checkout@v3 - kata-deploy: - needs: create-kata-tarball - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: get-kata-tarball - uses: actions/download-artifact@v2 + - name: Login to Kata Containers docker.io + uses: docker/login-action@v2 with: - name: kata-static-tarball - - name: build-and-push-kata-deploy-ci - id: build-and-push-kata-deploy-ci - run: | - tag=$(echo $GITHUB_REF | cut -d/ -f3-) - pushd $GITHUB_WORKSPACE - git checkout $tag - pkg_sha=$(git rev-parse HEAD) - popd - mv kata-static.tar.xz $GITHUB_WORKSPACE/tools/packaging/kata-deploy/kata-static.tar.xz - docker build --build-arg KATA_ARTIFACTS=kata-static.tar.xz -t katadocker/kata-deploy-ci:$pkg_sha -t quay.io/kata-containers/kata-deploy-ci:$pkg_sha $GITHUB_WORKSPACE/tools/packaging/kata-deploy - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - docker push katadocker/kata-deploy-ci:$pkg_sha - docker login -u ${{ secrets.QUAY_DEPLOYER_USERNAME }} -p ${{ secrets.QUAY_DEPLOYER_PASSWORD }} quay.io - docker push quay.io/kata-containers/kata-deploy-ci:$pkg_sha - mkdir -p packaging/kata-deploy - ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${pkg_sha}" - - name: test-kata-deploy-ci-in-aks - uses: ./packaging/kata-deploy/action + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Login to Kata Containers quay.io + uses: docker/login-action@v2 with: - packaging-sha: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} - env: - PKG_SHA: ${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} - AZ_APPID: ${{ secrets.AZ_APPID }} - AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} - AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }} - AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} - - name: push-tarball + registry: quay.io + username: ${{ secrets.QUAY_DEPLOYER_USERNAME }} + password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + + - name: Push multi-arch manifest run: | # tag the container image we created and push to DockerHub tag=$(echo $GITHUB_REF | cut -d/ -f3-) tags=($tag) tags+=($([[ "$tag" =~ "alpha"|"rc" ]] && echo "latest" || echo "stable")) - for tag in ${tags[@]}; do \ - docker tag katadocker/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} katadocker/kata-deploy:${tag} && \ - docker tag quay.io/kata-containers/kata-deploy-ci:${{steps.build-and-push-kata-deploy-ci.outputs.PKG_SHA}} quay.io/kata-containers/kata-deploy:${tag} && \ - docker push katadocker/kata-deploy:${tag} && \ - docker push quay.io/kata-containers/kata-deploy:${tag}; \ + # push to quay.io and docker.io + for tag in ${tags[@]}; do + docker manifest create quay.io/kata-containers/kata-deploy:${tag} \ + --amend quay.io/kata-containers/kata-deploy:${tag}-amd64 \ + --amend quay.io/kata-containers/kata-deploy:${tag}-arm64 \ + --amend quay.io/kata-containers/kata-deploy:${tag}-s390x + + docker manifest create docker.io/katadocker/kata-deploy:${tag} \ + --amend docker.io/katadocker/kata-deploy:${tag}-amd64 \ + --amend docker.io/katadocker/kata-deploy:${tag}-arm64 \ + --amend docker.io/katadocker/kata-deploy:${tag}-s390x + + docker manifest push quay.io/kata-containers/kata-deploy:${tag} + docker manifest push docker.io/katadocker/kata-deploy:${tag} done - upload-static-tarball: - needs: kata-deploy + upload-multi-arch-static-tarball: + needs: publish-multi-arch-images runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: download-artifacts - uses: actions/download-artifact@v2 - with: - name: kata-static-tarball + - uses: actions/checkout@v3 - name: install hub run: | - HUB_VER=$(curl -s "https://api.github.com/repos/github/hub/releases/latest" | jq -r .tag_name | sed 's/^v//') - wget -q -O- https://github.com/github/hub/releases/download/v$HUB_VER/hub-linux-amd64-$HUB_VER.tgz | \ + wget -q -O- https://github.com/mislav/hub/releases/download/v2.14.2/hub-linux-amd64-2.14.2.tgz | \ tar xz --strip-components=2 --wildcards '*/bin/hub' && sudo mv hub /usr/local/bin/hub - - name: push static tarball to github + + - name: download-artifacts-amd64 + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64 + - name: push amd64 static tarball to github run: | tag=$(echo $GITHUB_REF | cut -d/ -f3-) - tarball="kata-static-$tag-x86_64.tar.xz" + tarball="kata-static-$tag-amd64.tar.xz" mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}" pushd $GITHUB_WORKSPACE echo "uploading asset '${tarball}' for tag: ${tag}" GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}" popd + - name: download-artifacts-arm64 + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-arm64 + - name: push arm64 static tarball to github + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tarball="kata-static-$tag-arm64.tar.xz" + mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}" + pushd $GITHUB_WORKSPACE + echo "uploading asset '${tarball}' for tag: ${tag}" + GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}" + popd + + - name: download-artifacts-s390x + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-s390x + - name: push s390x static tarball to github + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + tarball="kata-static-$tag-s390x.tar.xz" + mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}" + pushd $GITHUB_WORKSPACE + echo "uploading asset '${tarball}' for tag: ${tag}" + GITHUB_TOKEN=${{ secrets.GIT_UPLOAD_TOKEN }} hub release edit -m "" -a "${tarball}" "${tag}" + popd + + upload-versions-yaml: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: upload versions.yaml + env: + GITHUB_TOKEN: ${{ secrets.GIT_UPLOAD_TOKEN }} + run: | + tag=$(echo $GITHUB_REF | cut -d/ -f3-) + pushd $GITHUB_WORKSPACE + versions_file="kata-containers-$tag-versions.yaml" + cp versions.yaml ${versions_file} + hub release edit -m "" -a "${versions_file}" "${tag}" + popd + upload-cargo-vendored-tarball: - needs: upload-static-tarball + needs: upload-multi-arch-static-tarball runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: generate-and-upload-tarball run: | tag=$(echo $GITHUB_REF | cut -d/ -f3-) @@ -153,7 +154,7 @@ jobs: needs: upload-cargo-vendored-tarball runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: download-and-upload-tarball env: GITHUB_TOKEN: ${{ secrets.GIT_UPLOAD_TOKEN }} diff --git a/.github/workflows/require-pr-porting-labels.yaml b/.github/workflows/require-pr-porting-labels.yaml index 585e86bc425c..4f799c4ba8fd 100644 --- a/.github/workflows/require-pr-porting-labels.yaml +++ b/.github/workflows/require-pr-porting-labels.yaml @@ -15,6 +15,10 @@ on: branches: - main +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: check-pr-porting-labels: runs-on: ubuntu-latest @@ -32,7 +36,17 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install porting checker script run: | diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml new file mode 100644 index 000000000000..f42833609a05 --- /dev/null +++ b/.github/workflows/run-cri-containerd-tests.yaml @@ -0,0 +1,56 @@ +name: CI | Run cri-containerd tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-cri-containerd: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts + + - name: Run cri-containerd tests + run: bash tests/integration/cri-containerd/gha-run.sh run diff --git a/.github/workflows/run-docker-tests-on-garm.yaml b/.github/workflows/run-docker-tests-on-garm.yaml new file mode 100644 index 000000000000..ea90759fa2d5 --- /dev/null +++ b/.github/workflows/run-docker-tests-on-garm.yaml @@ -0,0 +1,56 @@ +name: CI | Run docker integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-docker-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - qemu + runs-on: garm-ubuntu-2304-smaller + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/docker/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts + + - name: Run docker smoke test + timeout-minutes: 5 + run: bash tests/integration/docker/gha-run.sh run diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml new file mode 100644 index 000000000000..23439e3f1bc9 --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -0,0 +1,98 @@ +name: CI | Run kubernetes tests on AKS +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + host_os: + - ubuntu + vmm: + - clh + - dragonball + - qemu + instance-type: + - small + - normal + include: + - host_os: cbl-mariner + vmm: clh + runs-on: ubuntu-latest + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + GH_PR_NUMBER: ${{ inputs.pr-number }} + KATA_HOST_OS: ${{ matrix.host_os }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Download Azure CLI + run: bash tests/integration/kubernetes/gha-run.sh install-azure-cli + + - name: Log into the Azure account + run: bash tests/integration/kubernetes/gha-run.sh login-azure + env: + AZ_APPID: ${{ secrets.AZ_APPID }} + AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} + AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} + + - name: Create AKS cluster + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh create-cluster + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Install `kubectl` + run: bash tests/integration/kubernetes/gha-run.sh install-kubectl + + - name: Download credentials for the Kubernetes CLI to use them + run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks + + - name: Run tests + timeout-minutes: 60 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete AKS cluster + if: always() + run: bash tests/integration/kubernetes/gha-run.sh delete-cluster diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml new file mode 100644 index 000000000000..1fd4b00ee16c --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -0,0 +1,88 @@ +name: CI | Run kubernetes tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh #cloud-hypervisor + - fc #firecracker + - qemu + snapshotter: + - devmapper + k8s: + - k3s + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + runs-on: ${{ matrix.instance }} + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + SNAPSHOTTER: ${{ matrix.snapshotter }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Configure the ${{ matrix.snapshotter }} snapshotter + run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml new file mode 100644 index 000000000000..a720e24869ca --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -0,0 +1,65 @@ +name: CI | Run kubernetes tests on SEV +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-sev + runs-on: sev + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-sev + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-sev diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml new file mode 100644 index 000000000000..33ae57d3a812 --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -0,0 +1,65 @@ +name: CI | Run kubernetes tests on SEV-SNP +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-snp + runs-on: sev-snp + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml new file mode 100644 index 000000000000..940fd0a39692 --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -0,0 +1,64 @@ +name: CI | Run kubernetes tests on TDX +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx diff --git a/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml new file mode 100644 index 000000000000..14000dc638fc --- /dev/null +++ b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml @@ -0,0 +1,86 @@ +name: CI | Run kubernetes tests, using CRI-O, on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + k8s: + - k0s + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + - k8s: k0s + k8s-extra-params: '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' + runs-on: ${{ matrix.instance }} + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + KUBERNETES_EXTRA_PARAMS: ${{ matrix.k8s-extra-params }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Configure CRI-O + run: bash tests/integration/kubernetes/gha-run.sh setup-crio + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm diff --git a/.github/workflows/run-kata-deploy-tests-on-aks.yaml b/.github/workflows/run-kata-deploy-tests-on-aks.yaml new file mode 100644 index 000000000000..74fcb0ea5731 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml @@ -0,0 +1,89 @@ +name: CI | Run kata-deploy tests on AKS +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + host_os: + - ubuntu + vmm: + - clh + - dragonball + - qemu + include: + - host_os: cbl-mariner + vmm: clh + runs-on: ubuntu-latest + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + GH_PR_NUMBER: ${{ inputs.pr-number }} + KATA_HOST_OS: ${{ matrix.host_os }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" + USING_NFD: "false" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Download Azure CLI + run: bash tests/functional/kata-deploy/gha-run.sh install-azure-cli + + - name: Log into the Azure account + run: bash tests/functional/kata-deploy/gha-run.sh login-azure + env: + AZ_APPID: ${{ secrets.AZ_APPID }} + AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} + AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} + + - name: Create AKS cluster + timeout-minutes: 10 + run: bash tests/functional/kata-deploy/gha-run.sh create-cluster + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Install `kubectl` + run: bash tests/functional/kata-deploy/gha-run.sh install-kubectl + + - name: Download credentials for the Kubernetes CLI to use them + run: bash tests/functional/kata-deploy/gha-run.sh get-cluster-credentials + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests + + - name: Delete AKS cluster + if: always() + run: bash tests/functional/kata-deploy/gha-run.sh delete-cluster diff --git a/.github/workflows/run-kata-deploy-tests-on-garm.yaml b/.github/workflows/run-kata-deploy-tests-on-garm.yaml new file mode 100644 index 000000000000..7514a27b6493 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-garm.yaml @@ -0,0 +1,65 @@ +name: CI | Run kata-deploy tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh + - qemu + k8s: + - k0s + - k3s + - rke2 + runs-on: garm-ubuntu-2004-smaller + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + USING_NFD: "false" + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/functional/kata-deploy/gha-run.sh deploy-k8s + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests diff --git a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml new file mode 100644 index 000000000000..6b439cea8c62 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml @@ -0,0 +1,54 @@ +name: CI | Run kata-deploy tests on TDX +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml new file mode 100644 index 000000000000..c0e43c3606f7 --- /dev/null +++ b/.github/workflows/run-metrics.yaml @@ -0,0 +1,94 @@ +name: CI | Run test metrics +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + setup-kata: + name: Kata Setup + runs-on: metrics + env: + GOPATH: ${{ github.workspace }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/metrics/gha-run.sh install-kata kata-artifacts + + run-metrics: + needs: setup-kata + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + vmm: ['clh', 'qemu'] + max-parallel: 1 + runs-on: metrics + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - name: enabling the hypervisor + run: bash tests/metrics/gha-run.sh enabling-hypervisor + + - name: run launch times test + run: bash tests/metrics/gha-run.sh run-test-launchtimes + + - name: run memory foot print test + run: bash tests/metrics/gha-run.sh run-test-memory-usage + + - name: run memory usage inside container test + run: bash tests/metrics/gha-run.sh run-test-memory-usage-inside-container + + - name: run blogbench test + run: bash tests/metrics/gha-run.sh run-test-blogbench + + - name: run tensorflow test + run: bash tests/metrics/gha-run.sh run-test-tensorflow + + - name: run fio test + run: bash tests/metrics/gha-run.sh run-test-fio + + - name: run iperf test + run: bash tests/metrics/gha-run.sh run-test-iperf + + - name: run latency test + run: bash tests/metrics/gha-run.sh run-test-latency + + - name: make metrics tarball ${{ matrix.vmm }} + run: bash tests/metrics/gha-run.sh make-tarball-results + + - name: archive metrics results ${{ matrix.vmm }} + uses: actions/upload-artifact@v3 + with: + name: metrics-artifacts-${{ matrix.vmm }} + path: results-${{ matrix.vmm }}.tar.gz + retention-days: 1 + if-no-files-found: error diff --git a/.github/workflows/run-nerdctl-tests-on-garm.yaml b/.github/workflows/run-nerdctl-tests-on-garm.yaml new file mode 100644 index 000000000000..eb4e04bc2ad2 --- /dev/null +++ b/.github/workflows/run-nerdctl-tests-on-garm.yaml @@ -0,0 +1,57 @@ +name: CI | Run nerdctl integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-nerdctl-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - dragonball + - qemu + runs-on: garm-ubuntu-2304-smaller + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nerdctl/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nerdctl/gha-run.sh install-kata kata-artifacts + + - name: Run nerdctl smoke test + timeout-minutes: 5 + run: bash tests/integration/nerdctl/gha-run.sh run diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml new file mode 100644 index 000000000000..71ee0fe8697e --- /dev/null +++ b/.github/workflows/run-nydus-tests.yaml @@ -0,0 +1,56 @@ +name: CI | Run nydus tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-nydus: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu', 'dragonball'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nydus/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts + + - name: Run nydus tests + run: bash tests/integration/nydus/gha-run.sh run diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml new file mode 100644 index 000000000000..4542ec1a3a06 --- /dev/null +++ b/.github/workflows/run-vfio-tests.yaml @@ -0,0 +1,49 @@ +name: CI | Run vfio tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-vfio: + strategy: + fail-fast: false + matrix: + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2304 + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/vfio/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Run vfio tests + timeout-minutes: 15 + run: bash tests/functional/vfio/gha-run.sh run diff --git a/.github/workflows/snap-release.yaml b/.github/workflows/snap-release.yaml deleted file mode 100644 index ecd34978f7e1..000000000000 --- a/.github/workflows/snap-release.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: Release Kata 2.x in snapcraft store -on: - push: - tags: - - '2.*' -jobs: - release-snap: - runs-on: ubuntu-20.04 - steps: - - name: Check out Git repository - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Install Snapcraft - uses: samuelmeuli/action-snapcraft@v1 - with: - snapcraft_token: ${{ secrets.snapcraft_token }} - - - name: Build snap - run: | - # Removing man-db, workflow kept failing, fixes: #4480 - sudo apt -y remove --purge man-db - sudo apt-get install -y git git-extras - kata_url="https://github.com/kata-containers/kata-containers" - latest_version=$(git ls-remote --tags ${kata_url} | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1) - current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)" - # Check semantic versioning format (x.y.z) and if the current tag is the latest tag - if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then - # Current version is the latest version, build it - snapcraft snap --debug --destructive-mode - fi - - - name: Upload snap - run: | - snap_version="$(echo ${GITHUB_REF} | cut -d/ -f3)" - snap_file="kata-containers_${snap_version}_amd64.snap" - # Upload the snap if it exists - if [ -f ${snap_file} ]; then - snapcraft upload --release=stable ${snap_file} - fi diff --git a/.github/workflows/snap.yaml b/.github/workflows/snap.yaml deleted file mode 100644 index 9176d45d2a20..000000000000 --- a/.github/workflows/snap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: snap CI -on: - pull_request: - types: - - opened - - synchronize - - reopened - - edited - -jobs: - test: - runs-on: ubuntu-20.04 - steps: - - name: Check out - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Install Snapcraft - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: samuelmeuli/action-snapcraft@v1 - - - name: Build snap - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - snapcraft snap --debug --destructive-mode diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index cc6a7149c7fe..c55adf4c7fbf 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -6,91 +6,189 @@ on: - reopened - synchronize +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + name: Static checks jobs: - test: + check-kernel-config-version: + runs-on: ubuntu-latest + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Ensure the kernel config version has been updated + run: | + kernel_dir="tools/packaging/kernel/" + kernel_version_file="${kernel_dir}kata_config_version" + modified_files=$(git diff --name-only origin/$GITHUB_BASE_REF..HEAD) + if git diff --name-only origin/$GITHUB_BASE_REF..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then + echo "Kernel directory has changed, checking if $kernel_version_file has been updated" + if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then + echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) + else + echo "Readme file changed, no need for kernel config version update." + fi + echo "Check passed" + fi + + build-checks: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + component: + - agent + - dragonball + - runtime + - runtime-rs + - agent-ctl + - kata-ctl + - log-parser-rs + - runk + - trace-forwarder + command: + - "make vendor" + - "make check" + - "make test" + - "sudo -E PATH=\"$PATH\" make test" + include: + - component: agent + component-path: src/agent + - component: dragonball + component-path: src/dragonball + - component: runtime + component-path: src/runtime + - component: runtime-rs + component-path: src/runtime-rs + - component: agent-ctl + component-path: src/tools/agent-ctl + - component: kata-ctl + component-path: src/tools/kata-ctl + - component: log-parser-rs + component-path: src/tools/log-parser-rs + - component: runk + component-path: src/tools/runk + - component: trace-forwarder + component-path: src/tools/trace-forwarder + - install-libseccomp: no + - component: agent + install-libseccomp: yes + - component: runk + install-libseccomp: yes + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install yq + run: | + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + if: ${{ matrix.component == 'runtime' }} + run: | + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install rust + if: ${{ matrix.component != 'runtime' }} + run: | + ./tests/install_rust.sh + echo "${HOME}/.cargo/bin" >> $GITHUB_PATH + - name: Install musl-tools + if: ${{ matrix.component != 'runtime' }} + run: sudo apt-get -y install musl-tools + - name: Install libseccomp + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.install-libseccomp == 'yes' }} + run: | + libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) + gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) + ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" + echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" + echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV + echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV + - name: Setup XDG_RUNTIME_DIR for the `runtime` tests + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.component == 'runtime' }} + run: | + XDG_RUNTIME_DIR=$(mktemp -d /tmp/kata-tests-$USER.XXX | tee >(xargs chmod 0700)) + echo "XDG_RUNTIME_DIR=${XDG_RUNTIME_DIR}" >> $GITHUB_ENV + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} + run: | + cd ${{ matrix.component-path }} + ${{ matrix.command }} + env: + RUST_BACKTRACE: "1" + + build-checks-depending-on-kvm: + runs-on: garm-ubuntu-2004-smaller + strategy: + fail-fast: false + matrix: + component: + - runtime-rs + include: + - component: runtime-rs + command: "sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test" + - component: runtime-rs + component-path: src/dragonball + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install system deps + run: | + sudo apt-get install -y build-essential musl-tools + - name: Install yq + run: | + sudo -E ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install rust + run: | + export PATH="$PATH:/usr/local/bin" + ./tests/install_rust.sh + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} + run: | + export PATH="$PATH:${HOME}/.cargo/bin" + cd ${{ matrix.component-path }} + ${{ matrix.command }} + env: + RUST_BACKTRACE: "1" + + static-checks: + runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: - go-version: [1.16.x, 1.17.x] - os: [ubuntu-20.04] - runs-on: ${{ matrix.os }} + cmd: + - "make static-checks" env: - TRAVIS: "true" - TRAVIS_BRANCH: ${{ github.base_ref }} - TRAVIS_PULL_REQUEST_BRANCH: ${{ github.head_ref }} - TRAVIS_PULL_REQUEST_SHA : ${{ github.event.pull_request.head.sha }} - RUST_BACKTRACE: "1" - target_branch: ${{ github.base_ref }} + GOPATH: ${{ github.workspace }} steps: - - name: Install Go - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/setup-go@v2 - with: - go-version: ${{ matrix.go-version }} - env: - GOPATH: ${{ runner.workspace }}/kata-containers - - name: Setup GOPATH - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "TRAVIS_BRANCH: ${TRAVIS_BRANCH}" - echo "TRAVIS_PULL_REQUEST_BRANCH: ${TRAVIS_PULL_REQUEST_BRANCH}" - echo "TRAVIS_PULL_REQUEST_SHA: ${TRAVIS_PULL_REQUEST_SHA}" - echo "TRAVIS: ${TRAVIS}" - - name: Set env - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV - echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - - name: Checkout code - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 - with: - fetch-depth: 0 - path: ./src/github.com/${{ github.repository }} - - name: Setup travis references - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = \"/\" } ; { print $3 }')}" - target_branch=${TRAVIS_BRANCH} - - name: Setup - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh - env: - GOPATH: ${{ runner.workspace }}/kata-containers - - name: Installing rust - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh - PATH=$PATH:"$HOME/.cargo/bin" - rustup target add x86_64-unknown-linux-musl - rustup component add rustfmt clippy - - name: Setup seccomp - run: | - libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) - gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" - echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" - echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV - echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV - # Check whether the vendored code is up-to-date & working as the first thing - - name: Check vendored code - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && make vendor - - name: Static Checks - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && make static-checks - - name: Run Compiler Checks - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && make check - - name: Run Unit Tests - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && make test - - name: Run Unit Tests As Root User - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && sudo -E PATH="$PATH" make test + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: ./src/github.com/${{ github.repository }} + - name: Install yq + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install system dependencies + run: | + sudo apt-get -y install moreutils hunspell pandoc + - name: Run check + run: | + export PATH=${PATH}:${GOPATH}/bin + cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} diff --git a/.gitignore b/.gitignore index 1a149208c0e5..29d21ac6de87 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,10 @@ **/*.rej **/target **/.vscode +**/.idea +**/.fleet +**/*.swp +**/*.swo pkg/logging/Cargo.lock src/agent/src/version.rs src/agent/kata-agent.service @@ -11,4 +15,3 @@ src/agent/protocols/src/*.rs !src/agent/protocols/src/lib.rs build src/tools/log-parser/kata-log-parser - diff --git a/Makefile b/Makefile index 2b6f6a748f48..0765ae2b6e43 100644 --- a/Makefile +++ b/Makefile @@ -6,25 +6,29 @@ # List of available components COMPONENTS = +COMPONENTS += libs COMPONENTS += agent +COMPONENTS += dragonball COMPONENTS += runtime +COMPONENTS += runtime-rs # List of available tools TOOLS = TOOLS += agent-ctl -TOOLS += trace-forwarder -TOOLS += runk +TOOLS += kata-ctl TOOLS += log-parser +TOOLS += log-parser-rs +TOOLS += runk +TOOLS += trace-forwarder -STANDARD_TARGETS = build check clean install test vendor - -default: all +STANDARD_TARGETS = build check clean install static-checks-build test vendor -all: logging-crate-tests build +# Variables for the build-and-publish-kata-debug target +KATA_DEBUG_REGISTRY ?= "" +KATA_DEBUG_TAG ?= "" -logging-crate-tests: - make -C src/libs/logging +default: all include utils.mk include ./tools/packaging/kata-deploy/local-build/Makefile @@ -38,19 +42,19 @@ generate-protocols: make -C src/agent generate-protocols # Some static checks rely on generated source files of components. -static-checks: build +static-checks: static-checks-build bash ci/static-checks.sh docs-url-alive-check: bash ci/docs-url-alive-check.sh +build-and-publish-kata-debug: + bash tools/packaging/kata-debug/kata-debug-build-and-upload-payload.sh ${KATA_DEBUG_REGISTRY} ${KATA_DEBUG_TAG} + .PHONY: \ all \ - binary-tarball \ + kata-tarball \ + install-tarball \ default \ - install-binary-tarball \ - logging-crate-tests \ static-checks \ docs-url-alive-check - - diff --git a/README.md b/README.md index 90a5c9209a4a..d34110056bda 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ - + + +[![CI | Publish Kata Containers payload](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml) [![Kata Containers Nightly CI](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml) # Kata Containers @@ -71,6 +73,7 @@ See the [official documentation](docs) including: - [Developer guide](docs/Developer-Guide.md) - [Design documents](docs/design) - [Architecture overview](docs/design/architecture) + - [Architecture 3.0 overview](docs/design/architecture_3.0/) ## Configuration @@ -116,9 +119,10 @@ The table below lists the core parts of the project: | Component | Type | Description | |-|-|-| | [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. | +| [runtime-rs](src/runtime-rs) | core | The Rust version runtime. | | [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. | +| [`dragonball`](src/dragonball) | core | An optional built-in VMM brings out-of-the-box Kata Containers experience with optimizations on container workloads | | [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). | -| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) | | [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. | ### Additional components @@ -130,7 +134,10 @@ The table below lists the remaining parts of the project: | [packaging](tools/packaging) | infrastructure | Scripts and metadata for producing packaged binaries
(components, hypervisors, kernel and rootfs). | | [kernel](https://www.kernel.org) | kernel | Linux kernel used by the hypervisor to boot the guest image. Patches are stored [here](tools/packaging/kernel). | | [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images and kernel for the hypervisor. | +| [kata-debug](tools/packaging/kata-debug/README.md) | infrastructure | Utility tool to gather Kata Containers debug information from Kubernetes clusters. | | [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. | +| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. | +| [`log-parser-rs`](src/tools/log-parser-rs) | utility | Tool that aid in analyzing logs from the kata runtime. | | [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. | | [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. | | [`ci`](https://github.com/kata-containers/ci) | CI | Continuous Integration configuration files and scripts. | @@ -140,8 +147,10 @@ The table below lists the remaining parts of the project: Kata Containers is now [available natively for most distributions](docs/install/README.md#packaged-installation-methods). -However, packaging scripts and metadata are still used to generate [snap](snap/local) and GitHub releases. See -the [components](#components) section for further details. + +## Metrics tests + +See the [metrics documentation](tests/metrics/README.md). ## Glossary of Terms diff --git a/VERSION b/VERSION index 3fef9b564845..2f290ae1b67f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.0-rc0 +3.2.0-rc0 diff --git a/ci/install_libseccomp.sh b/ci/install_libseccomp.sh index 4c4a42cb96e6..5d53be73387c 100755 --- a/ci/install_libseccomp.sh +++ b/ci/install_libseccomp.sh @@ -7,12 +7,10 @@ set -o errexit -cidir=$(dirname "$0") -source "${cidir}/lib.sh" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +script_name="$(basename "${BASH_SOURCE[0]}")" -clone_tests_repo - -source "${tests_repo_dir}/.ci/lib.sh" +source "${script_dir}/../tests/common.bash" # The following variables if set on the environment will change the behavior # of gperf and libseccomp configure scripts, that may lead this script to @@ -23,25 +21,27 @@ arch=${ARCH:-$(uname -m)} workdir="$(mktemp -d --tmpdir build-libseccomp.XXXXX)" # Variables for libseccomp -# Currently, specify the libseccomp version directly without using `versions.yaml` -# because the current Snap workflow is incomplete. -# After solving the issue, replace this code by using the `versions.yaml`. -# libseccomp_version=$(get_version "externals.libseccomp.version") -# libseccomp_url=$(get_version "externals.libseccomp.url") -libseccomp_version="2.5.1" -libseccomp_url="https://github.com/seccomp/libseccomp" +libseccomp_version="${LIBSECCOMP_VERSION:-""}" +if [ -z "${libseccomp_version}" ]; then + libseccomp_version=$(get_from_kata_deps "externals.libseccomp.version") +fi +libseccomp_url="${LIBSECCOMP_URL:-""}" +if [ -z "${libseccomp_url}" ]; then + libseccomp_url=$(get_from_kata_deps "externals.libseccomp.url") +fi libseccomp_tarball="libseccomp-${libseccomp_version}.tar.gz" libseccomp_tarball_url="${libseccomp_url}/releases/download/v${libseccomp_version}/${libseccomp_tarball}" cflags="-O2" # Variables for gperf -# Currently, specify the gperf version directly without using `versions.yaml` -# because the current Snap workflow is incomplete. -# After solving the issue, replace this code by using the `versions.yaml`. -# gperf_version=$(get_version "externals.gperf.version") -# gperf_url=$(get_version "externals.gperf.url") -gperf_version="3.1" -gperf_url="https://ftp.gnu.org/gnu/gperf" +gperf_version="${GPERF_VERSION:-""}" +if [ -z "${gperf_version}" ]; then + gperf_version=$(get_from_kata_deps "externals.gperf.version") +fi +gperf_url="${GPERF_URL:-""}" +if [ -z "${gperf_url}" ]; then + gperf_url=$(get_from_kata_deps "externals.gperf.url") +fi gperf_tarball="gperf-${gperf_version}.tar.gz" gperf_tarball_url="${gperf_url}/${gperf_tarball}" @@ -85,7 +85,8 @@ build_and_install_libseccomp() { curl -sLO "${libseccomp_tarball_url}" tar -xf "${libseccomp_tarball}" pushd "libseccomp-${libseccomp_version}" - ./configure --prefix="${libseccomp_install_dir}" CFLAGS="${cflags}" --enable-static --host="${arch}" + [ "${arch}" == $(uname -m) ] && cc_name="" || cc_name="${arch}-linux-gnu-gcc" + CC=${cc_name} ./configure --prefix="${libseccomp_install_dir}" CFLAGS="${cflags}" --enable-static --host="${arch}" make make install popd diff --git a/ci/install_yq.sh b/ci/install_yq.sh index 56ad7d669125..cc3e988b779a 100755 --- a/ci/install_yq.sh +++ b/ci/install_yq.sh @@ -43,6 +43,16 @@ function install_yq() { "aarch64") goarch=arm64 ;; + "arm64") + # If we're on an apple silicon machine, just assign amd64. + # The version of yq we use doesn't have a darwin arm build, + # but Rosetta can come to the rescue here. + if [ $goos == "Darwin" ]; then + goarch=amd64 + else + goarch=arm64 + fi + ;; "ppc64le") goarch=ppc64le ;; @@ -64,7 +74,7 @@ function install_yq() { fi ## NOTE: ${var,,} => gives lowercase value of var - local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}" + local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}" curl -o "${yq_path}" -LSsf "${yq_url}" [ $? -ne 0 ] && die "Download ${yq_url} failed" chmod +x "${yq_path}" diff --git a/ci/lib.sh b/ci/lib.sh index 3cb2c158f64a..29b640a6aef4 100644 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -54,3 +54,13 @@ run_docs_url_alive_check() git fetch -a bash "$tests_repo_dir/.ci/static-checks.sh" --docs --all "github.com/kata-containers/kata-containers" } + +run_get_pr_changed_file_details() +{ + clone_tests_repo + # Make sure we have the targeting branch + git remote set-branches --add origin "${branch}" + git fetch -a + source "$tests_repo_dir/.ci/lib.sh" + get_pr_changed_file_details +} diff --git a/deny.toml b/deny.toml new file mode 100644 index 000000000000..7c97ec4c7e5d --- /dev/null +++ b/deny.toml @@ -0,0 +1,33 @@ +targets = [ + { triple = "x86_64-apple-darwin" }, + { triple = "x86_64-unknown-linux-gnu" }, + { triple = "x86_64-unknown-linux-musl" }, +] + +[advisories] +vulnerability = "deny" +unsound = "deny" +unmaintained = "deny" +ignore = ["RUSTSEC-2020-0071"] + +[bans] +multiple-versions = "allow" +deny = [ + { name = "cmake" }, + { name = "openssl-sys" }, +] + +[licenses] +unlicensed = "deny" +allow-osi-fsf-free = "neither" +copyleft = "allow" +# We want really high confidence when inferring licenses from text +confidence-threshold = 0.93 +allow = ["0BSD", "Apache-2.0", "BSD-2-Clause", "BSD-3-Clause", "CC0-1.0", "ISC", "MIT", "MPL-2.0"] +private = { ignore = true} + +exceptions = [] + +[sources] +unknown-registry = "allow" +unknown-git = "allow" diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index c1c2d62ab125..2f60ab1652a5 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -2,6 +2,8 @@ This document is written **specifically for developers**: it is not intended for end users. +If you want to contribute changes that you have made, please read the [community guidelines](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md) for information about our processes. + # Assumptions - You are working on a non-critical test or development system. @@ -33,51 +35,41 @@ You need to install the following to build Kata Containers components: - `make`. - `gcc` (required for building the shim and runtime). -# Build and install the Kata Containers runtime +# Build and install Kata Containers +## Build and install the Kata Containers runtime -``` -$ go get -d -u github.com/kata-containers/kata-containers -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/runtime -$ make && sudo -E PATH=$PATH make install +```bash +$ git clone https://github.com/kata-containers/kata-containers.git +$ pushd kata-containers/src/runtime +$ make && sudo -E "PATH=$PATH" make install +$ sudo mkdir -p /etc/kata-containers/ +$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers +$ popd ``` The build will create the following: - runtime binary: `/usr/local/bin/kata-runtime` and `/usr/local/bin/containerd-shim-kata-v2` -- configuration file: `/usr/share/defaults/kata-containers/configuration.toml` - -# Check hardware requirements - -You can check if your system is capable of creating a Kata Container by running the following: - -``` -$ sudo kata-runtime check -``` - -If your system is *not* able to run Kata Containers, the previous command will error out and explain why. +- configuration file: `/usr/share/defaults/kata-containers/configuration.toml` and `/etc/kata-containers/configuration.toml` ## Configure to use initrd or rootfs image Kata containers can run with either an initrd image or a rootfs image. -If you want to test with `initrd`, make sure you have `initrd = /usr/share/kata-containers/kata-containers-initrd.img` -in your configuration file, commenting out the `image` line: +If you want to test with `initrd`, make sure you have uncommented `initrd = /usr/share/kata-containers/kata-containers-initrd.img` +in your configuration file, commenting out the `image` line in +`/etc/kata-containers/configuration.toml`. For example: -`/usr/share/defaults/kata-containers/configuration.toml` and comment out the `image` line with the following. For example: - -``` -$ sudo mkdir -p /etc/kata-containers/ -$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers +```bash $ sudo sed -i 's/^\(image =.*\)/# \1/g' /etc/kata-containers/configuration.toml +$ sudo sed -i 's/^# \(initrd =.*\)/\1/g' /etc/kata-containers/configuration.toml ``` You can create the initrd image as shown in the [create an initrd image](#create-an-initrd-image---optional) section. -If you want to test with a rootfs `image`, make sure you have `image = /usr/share/kata-containers/kata-containers.img` +If you want to test with a rootfs `image`, make sure you have uncommented `image = /usr/share/kata-containers/kata-containers.img` in your configuration file, commenting out the `initrd` line. For example: -``` -$ sudo mkdir -p /etc/kata-containers/ -$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers +```bash $ sudo sed -i 's/^\(initrd =.*\)/# \1/g' /etc/kata-containers/configuration.toml ``` The rootfs image is created as shown in the [create a rootfs image](#create-a-rootfs-image) section. @@ -90,19 +82,38 @@ rootfs `image`(100MB+). Enable seccomp as follows: -``` +```bash $ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/configuration.toml ``` This will pass container seccomp profiles to the kata agent. +## Enable SELinux on the guest + +> **Note:** +> +> - To enable SELinux on the guest, SELinux MUST be also enabled on the host. +> - You MUST create and build a rootfs image for SELinux in advance. +> See [Create a rootfs image](#create-a-rootfs-image) and [Build a rootfs image](#build-a-rootfs-image). +> - SELinux on the guest is supported in only a rootfs image currently, so +> you cannot enable SELinux with the agent init (`AGENT_INIT=yes`) yet. + +Enable guest SELinux in Enforcing mode as follows: + +``` +$ sudo sed -i '/^disable_guest_selinux/ s/true/false/g' /etc/kata-containers/configuration.toml +``` + +The runtime automatically will set `selinux=1` to the kernel parameters and `xattr` option to +`virtiofsd` when `disable_guest_selinux` is set to `false`. + +If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kernel parameters. + ## Enable full debug Enable full debug as follows: -``` -$ sudo mkdir -p /etc/kata-containers/ -$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers +```bash $ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml $ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml ``` @@ -175,7 +186,7 @@ and offers possible workarounds and fixes. it stores. When messages are suppressed, it is noted in the logs. This can be checked for by looking for those notifications, such as: -```sh +```bash $ sudo journalctl --since today | fgrep Suppressed Jun 29 14:51:17 mymachine systemd-journald[346]: Suppressed 4150 messages from /system.slice/docker.service ``` @@ -200,7 +211,7 @@ RateLimitBurst=0 Restart `systemd-journald` for the changes to take effect: -```sh +```bash $ sudo systemctl restart systemd-journald ``` @@ -214,39 +225,52 @@ $ sudo systemctl restart systemd-journald The agent is built with a statically linked `musl.` The default `libc` used is `musl`, but on `ppc64le` and `s390x`, `gnu` should be used. To configure this: -``` -$ export ARCH=$(uname -m) +```bash +$ export ARCH="$(uname -m)" $ if [ "$ARCH" = "ppc64le" -o "$ARCH" = "s390x" ]; then export LIBC=gnu; else export LIBC=musl; fi -$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le -$ rustup target add ${ARCH}-unknown-linux-${LIBC} +$ [ "${ARCH}" == "ppc64le" ] && export ARCH=powerpc64le +$ rustup target add "${ARCH}-unknown-linux-${LIBC}" ``` To build the agent: -``` -$ go get -d -u github.com/kata-containers/kata-containers -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent && make -``` - The agent is built with seccomp capability by default. If you want to build the agent without the seccomp capability, you need to run `make` with `SECCOMP=no` as follows. +```bash +$ make -C kata-containers/src/agent SECCOMP=no ``` -$ make -C $GOPATH/src/github.com/kata-containers/kata-containers/src/agent SECCOMP=no + +For building the agent with seccomp support using `musl`, set the environment +variables for the [`libseccomp` crate](https://github.com/libseccomp-rs/libseccomp-rs). + +```bash +$ export LIBSECCOMP_LINK_TYPE=static +$ export LIBSECCOMP_LIB_PATH="the path of the directory containing libseccomp.a" +$ make -C kata-containers/src/agent ``` +If the compilation fails when the agent tries to link the `libseccomp` library statically +against `musl`, you will need to build `libseccomp` manually with `-U_FORTIFY_SOURCE`. +You can use [our script](https://github.com/kata-containers/kata-containers/blob/main/ci/install_libseccomp.sh) +to install `libseccomp` for the agent. + +```bash +$ mkdir -p ${seccomp_install_path} ${gperf_install_path} +$ kata-containers/ci/install_libseccomp.sh ${seccomp_install_path} ${gperf_install_path} +$ export LIBSECCOMP_LIB_PATH="${seccomp_install_path}/lib" +``` + +On `ppc64le` and `s390x`, `glibc` is used. You will need to install the `libseccomp` library +provided by your distribution. + +> e.g. `libseccomp-dev` for Ubuntu, or `libseccomp-devel` for CentOS + > **Note:** > > - If you enable seccomp in the main configuration file but build the agent without seccomp capability, > the runtime exits conservatively with an error message. -## Get the osbuilder - -``` -$ go get -d -u github.com/kata-containers/kata-containers -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder -``` - ## Create a rootfs image ### Create a local rootfs @@ -254,24 +278,32 @@ As a prerequisite, you need to install Docker. Otherwise, you will not be able to run the `rootfs.sh` script with `USE_DOCKER=true` as expected in the following example. -``` -$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs -$ sudo rm -rf ${ROOTFS_DIR} -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder -$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true ./rootfs.sh ${distro}' +```bash +$ export distro="ubuntu" # example +$ export ROOTFS_DIR="$(realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs)" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ script -fec 'sudo -E USE_DOCKER=true ./rootfs.sh "${distro}"' +$ popd ``` You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`. You can get a supported distributions list in the Kata Containers by running the following. -``` -$ ./rootfs.sh -l +```bash +$ ./kata-containers/tools/osbuilder/rootfs-builder/rootfs.sh -l ``` If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows. +```bash +$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"' ``` -$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}' + +If you want to enable SELinux on the guest, you MUST choose `centos` and run the `rootfs.sh` script with `SELINUX=yes` as follows. + +``` +$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh centos' ``` > **Note:** @@ -287,19 +319,33 @@ $ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no > > - You should only do this step if you are testing with the latest version of the agent. -``` -$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent -$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/ -$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/ +```bash +$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/" ``` ### Build a rootfs image +```bash +$ pushd kata-containers/tools/osbuilder/image-builder +$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh "${ROOTFS_DIR}"' +$ popd +``` + +If you want to enable SELinux on the guest, you MUST run the `image_builder.sh` script with `SELINUX=yes` +to label the guest image as follows. +To label the image on the host, you need to make sure that SELinux is enabled (`selinuxfs` is mounted) on the host +and the rootfs MUST be created by running the `rootfs.sh` with `SELINUX=yes`. + ``` -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/image-builder -$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}' +$ script -fec 'sudo -E USE_DOCKER=true SELINUX=yes ./image_builder.sh ${ROOTFS_DIR}' ``` +Currently, the `image_builder.sh` uses `chcon` as an interim solution in order to apply `container_runtime_exec_t` +to the `kata-agent`. Hence, if you run `restorecon` to the guest image after running the `image_builder.sh`, +the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yourself. + > **Notes:** > > - You must ensure that the *default Docker runtime* is `runc` to make use of @@ -309,25 +355,31 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}' > variable in the previous command and ensure the `qemu-img` command is > available on your system. > - If `qemu-img` is not installed, you will likely see errors such as `ERROR: File /dev/loop19p1 is not a block device` and `losetup: /tmp/tmp.bHz11oY851: Warning: file is smaller than 512 bytes; the loop device may be useless or invisible for system tools`. These can be mitigated by installing the `qemu-img` command (available in the `qemu-img` package on Fedora or the `qemu-utils` package on Debian). +> - If `loop` module is not probed, you will likely see errors such as `losetup: cannot find an unused loop device`. Execute `modprobe loop` could resolve it. ### Install the rootfs image -``` -$ commit=$(git log --format=%h -1 HEAD) -$ date=$(date +%Y-%m-%d-%T.%N%z) +```bash +$ pushd kata-containers/tools/osbuilder/image-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" $ image="kata-containers-${date}-${commit}" $ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}" $ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img) +$ popd ``` ## Create an initrd image - OPTIONAL ### Create a local rootfs for initrd image -``` -$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs" -$ sudo rm -rf ${ROOTFS_DIR} -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder -$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true ./rootfs.sh ${distro}' + +```bash +$ export distro="ubuntu" # example +$ export ROOTFS_DIR="$(realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs)" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder/ +$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./rootfs.sh "${distro}"' +$ popd ``` `AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image, always set `AGENT_INIT` to `yes`. @@ -335,14 +387,14 @@ always set `AGENT_INIT` to `yes`. You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`. You can get a supported distributions list in the Kata Containers by running the following. -``` -$ ./rootfs.sh -l +```bash +$ ./kata-containers/tools/osbuilder/rootfs-builder/rootfs.sh -l ``` If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows. -``` -$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}' +```bash +$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"' ``` > **Note:** @@ -351,28 +403,31 @@ $ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no Optionally, add your custom agent binary to the rootfs with the following commands. The default `$LIBC` used is `musl`, but on ppc64le and s390x, `gnu` should be used. Also, Rust refers to ppc64le as `powerpc64le`: -``` -$ export ARCH=$(uname -m) -$ [ ${ARCH} == "ppc64le" ] || [ ${ARCH} == "s390x" ] && export LIBC=gnu || export LIBC=musl -$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le -$ sudo install -o root -g root -m 0550 -T ../../../src/agent/target/${ARCH}-unknown-linux-${LIBC}/release/kata-agent ${ROOTFS_DIR}/sbin/init +```bash +$ export ARCH="$(uname -m)" +$ [ "${ARCH}" == "ppc64le" ] || [ "${ARCH}" == "s390x" ] && export LIBC=gnu || export LIBC=musl +$ [ "${ARCH}" == "ppc64le" ] && export ARCH=powerpc64le +$ sudo install -o root -g root -m 0550 -T "${ROOTFS_DIR}/../../../../src/agent/target/${ARCH}-unknown-linux-${LIBC}/release/kata-agent" "${ROOTFS_DIR}/sbin/init" ``` ### Build an initrd image -``` -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/initrd-builder -$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh ${ROOTFS_DIR}' +```bash +$ pushd kata-containers/tools/osbuilder/initrd-builder +$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh "${ROOTFS_DIR}"' +$ popd ``` ### Install the initrd image -``` -$ commit=$(git log --format=%h -1 HEAD) -$ date=$(date +%Y-%m-%d-%T.%N%z) +```bash +$ pushd kata-containers/tools/osbuilder/initrd-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" $ image="kata-containers-initrd-${date}-${commit}" $ sudo install -o root -g root -m 0640 -D kata-containers-initrd.img "/usr/share/kata-containers/${image}" $ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers-initrd.img) +$ popd ``` # Install guest kernel images @@ -391,44 +446,44 @@ Kata Containers makes use of upstream QEMU branch. The exact version and repository utilized can be found by looking at the [versions file](../versions.yaml). Find the correct version of QEMU from the versions file: -``` -$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh -$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version") -$ echo ${qemu_version} +```bash +$ source kata-containers/tools/packaging/scripts/lib.sh +$ qemu_version="$(get_from_kata_deps "assets.hypervisor.qemu.version")" +$ echo "${qemu_version}" ``` Get source from the matching branch of QEMU: -``` -$ go get -d github.com/qemu/qemu -$ cd ${GOPATH}/src/github.com/qemu/qemu -$ git checkout ${qemu_version} -$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu +```bash +$ git clone -b "${qemu_version}" https://github.com/qemu/qemu.git +$ your_qemu_directory="$(realpath qemu)" ``` There are scripts to manage the build and packaging of QEMU. For the examples below, set your environment as: -``` -$ go get -d github.com/kata-containers/kata-containers -$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging" +```bash +$ packaging_dir="$(realpath kata-containers/tools/packaging)" ``` Kata often utilizes patches for not-yet-upstream and/or backported fixes for components, including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches), and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU version 5.2.0, do: -``` -$ cd $your_qemu_directory -$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/ +```bash +$ "$packaging_dir/scripts/apply_patches.sh" "$packaging_dir/qemu/patches/5.2.x/" ``` To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example: -``` -$ cd $your_qemu_directory -$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg +```bash +$ pushd "$your_qemu_directory" +$ "$packaging_dir/scripts/configure-hypervisor.sh" kata-qemu > kata.cfg $ eval ./configure "$(cat kata.cfg)" $ make -j $(nproc --ignore=1) +# Optional $ sudo -E make install +$ popd ``` +If you do not want to install the respective QEMU version, the configuration file can be modified to point to the correct binary. In `/etc/kata-containers/configuration.toml`, change `path = "/path/to/qemu/build/qemu-system-x86_64"` to point to the correct QEMU binary. + See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata. ### Build a custom QEMU for aarch64/arm64 - REQUIRED @@ -439,11 +494,33 @@ See the [static-build script for QEMU](../tools/packaging/static-build/qemu/buil > under upstream review for supporting NVDIMM on aarch64. > You could build the custom `qemu-system-aarch64` as required with the following command: +```bash +$ git clone https://github.com/kata-containers/tests.git +$ script -fec 'sudo -E tests/.ci/install_qemu.sh' ``` -$ go get -d github.com/kata-containers/tests -$ script -fec 'sudo -E ${GOPATH}/src/github.com/kata-containers/tests/.ci/install_qemu.sh' + +## Build `virtiofsd` + +When using the file system type virtio-fs (default), `virtiofsd` is required + +```bash +$ pushd kata-containers/tools/packaging/static-build/virtiofsd +$ ./build.sh +$ popd +``` + +Modify `/etc/kata-containers/configuration.toml` and update value `virtio_fs_daemon = "/path/to/kata-containers/tools/packaging/static-build/virtiofsd/virtiofsd/virtiofsd"` to point to the binary. + +# Check hardware requirements + +You can check if your system is capable of creating a Kata Container by running the following: + +```bash +$ sudo kata-runtime check ``` +If your system is *not* able to run Kata Containers, the previous command will error out and explain why. + # Run Kata Containers with Containerd Refer to the [How to use Kata Containers and Containerd](how-to/containerd-kata.md) how-to guide. @@ -474,7 +551,7 @@ See [Set up a debug console](#set-up-a-debug-console). ## Checking Docker default runtime -``` +```bash $ sudo docker info 2>/dev/null | grep -i "default runtime" | cut -d: -f2- | grep -q runc && echo "SUCCESS" || echo "ERROR: Incorrect default Docker runtime" ``` ## Set up a debug console @@ -491,7 +568,7 @@ contain either `/bin/sh` or `/bin/bash`. Enable debug_console_enabled in the `configuration.toml` configuration file: -``` +```toml [agent.kata] debug_console_enabled = true ``` @@ -502,7 +579,7 @@ This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process. -``` +```bash $ sudo kata-monitor ``` @@ -510,10 +587,15 @@ $ sudo kata-monitor #### Connect to debug console -Command `kata-runtime exec` is used to connect to the debug console. +You need to start a container for example: +```bash +$ sudo ctr run --runtime io.containerd.kata.v2 -d docker.io/library/ubuntu:latest testdebug +``` + +Then, you can use the command `kata-runtime exec ` to connect to the debug console. ``` -$ kata-runtime exec 1a9ab65be63b8b03dfd0c75036d27f0ed09eab38abb45337fea83acd3cd7bacd +$ kata-runtime exec testdebug bash-4.2# id uid=0(root) gid=0(root) groups=0(root) bash-4.2# pwd @@ -522,7 +604,7 @@ bash-4.2# exit exit ``` -`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured +`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/main/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/). For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option. @@ -564,10 +646,10 @@ an additional `coreutils` package. For example using CentOS: -``` -$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder -$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs -$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos' +```bash +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ export ROOTFS_DIR="$(realpath ./rootfs)" +$ script -fec 'sudo -E USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos' ``` #### Build the debug image @@ -579,12 +661,13 @@ section when using rootfs, or when using initrd, complete the steps in the [Buil Install the image: ->**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img` +>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img` >with the initrd image name `kata-containers-initrd.img`. -``` +```bash $ name="kata-containers-centos-with-debug-console.img" $ sudo install -o root -g root -m 0640 kata-containers.img "/usr/share/kata-containers/${name}" +$ popd ``` Next, modify the `image=` values in the `[hypervisor.qemu]` section of the @@ -593,7 +676,7 @@ to specify the full path to the image name specified in the previous code section. Alternatively, recreate the symbolic link so it points to the new debug image: -``` +```bash $ (cd /usr/share/kata-containers && sudo ln -sf "$name" kata-containers.img) ``` @@ -604,7 +687,7 @@ to avoid all subsequently created containers from using the debug image. Create a container as normal. For example using `crictl`: -``` +```bash $ sudo crictl run -r kata container.yaml pod.yaml ``` @@ -612,25 +695,25 @@ $ sudo crictl run -r kata container.yaml pod.yaml The steps required to enable debug console for QEMU slightly differ with those for firecracker / cloud-hypervisor. - + ##### Enabling debug console for QEMU -Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console. +Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console. -``` +```bash $ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console"/g' "${kata_configuration_file}" ``` -Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml` +Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml` or `/usr/share/defaults/kata-containers/configuration.toml` or `/opt/kata/share/defaults/kata-containers/configuration-{hypervisor}.toml`, if you installed Kata Containers using `kata-deploy`. ##### Enabling debug console for cloud-hypervisor / firecracker -Slightly different configuration is required in case of firecracker and cloud hypervisor. -Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`. -Hence, the kernel command line option `agent.debug_console` will not work for them. +Slightly different configuration is required in case of firecracker and cloud hypervisor. +Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`. +Hence, the kernel command line option `agent.debug_console` will not work for them. These hypervisors support `hybrid vsocks`, which can be used for communication between the host and the guest. The kernel command line option `agent.debug_console_vport` was added to allow developers specify on which `vsock` port the debugging console should be connected. @@ -638,12 +721,12 @@ between the host and the guest. The kernel command line option `agent.debug_cons Add the parameter `agent.debug_console_vport=1026` to the kernel command line as shown below: -``` +```bash sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console_vport=1026"/g' "${kata_configuration_file}" ``` > **Note** Ports 1024 and 1025 are reserved for communication with the agent -> and gathering of agent logs respectively. +> and gathering of agent logs respectively. ##### Connecting to the debug console @@ -651,7 +734,7 @@ Next, connect to the debug console. The VSOCKS paths vary slightly between each VMM solution. In case of cloud-hypervisor, connect to the `vsock` as shown: -``` +```bash $ sudo su -c 'cd /var/run/vc/vm/${sandbox_id}/root/ && socat stdin unix-connect:clh.sock' CONNECT 1026 ``` @@ -659,7 +742,7 @@ CONNECT 1026 **Note**: You need to type `CONNECT 1026` and press `RETURN` key after entering the `socat` command. For firecracker, connect to the `hvsock` as shown: -``` +```bash $ sudo su -c 'cd /var/run/vc/firecracker/${sandbox_id}/root/ && socat stdin unix-connect:kata.hvsock' CONNECT 1026 ``` @@ -668,7 +751,7 @@ CONNECT 1026 For QEMU, connect to the `vsock` as shown: -``` +```bash $ sudo su -c 'cd /var/run/vc/vm/${sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"' ``` @@ -681,7 +764,7 @@ If the image is created using [osbuilder](../tools/osbuilder), the following YAML file exists and contains details of the image and how it was created: -``` +```bash $ cat /var/lib/osbuilder/osbuilder.yaml ``` diff --git a/docs/Limitations.md b/docs/Limitations.md index 1c4cfcb7ba06..74a6acf2d0b3 100644 --- a/docs/Limitations.md +++ b/docs/Limitations.md @@ -60,17 +60,26 @@ This section lists items that might be possible to fix. ## OCI CLI commands ### Docker and Podman support -Currently Kata Containers does not support Docker or Podman. +Currently Kata Containers does not support Podman. See issue https://github.com/kata-containers/kata-containers/issues/722 for more information. +Docker supports Kata Containers since 22.06: + +```bash +$ sudo docker run --runtime io.containerd.kata.v2 +``` + +Kata Containers works perfectly with containerd, we recommend to use +containerd's Docker-style command line tool [`nerdctl`](https://github.com/containerd/nerdctl). + ## Runtime commands ### checkpoint and restore The runtime does not provide `checkpoint` and `restore` commands. There are discussions about using VM save and restore to give us a -`[criu](https://github.com/checkpoint-restore/criu)`-like functionality, +[`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution. Note that the OCI standard does not specify `checkpoint` and `restore` @@ -93,6 +102,42 @@ All other configurations are supported and are working properly. ## Networking +### Host network + +Host network (`nerdctl/docker run --net=host`or [Kubernetes `HostNetwork`](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#hosts-namespaces)) is not supported. +It is not possible to directly access the host networking configuration +from within the VM. + +The `--net=host` option can still be used with `runc` containers and +inter-mixed with running Kata Containers, thus enabling use of `--net=host` +when necessary. + +It should be noted, currently passing the `--net=host` option into a +Kata Container may result in the Kata Container networking setup +modifying, re-configuring and therefore possibly breaking the host +networking setup. Do not use `--net=host` with Kata Containers. + +### Support for joining an existing VM network + +Docker supports the ability for containers to join another containers +namespace with the `docker run --net=containers` syntax. This allows +multiple containers to share a common network namespace and the network +interfaces placed in the network namespace. Kata Containers does not +support network namespace sharing. If a Kata Container is setup to +share the network namespace of a `runc` container, the runtime +effectively takes over all the network interfaces assigned to the +namespace and binds them to the VM. Consequently, the `runc` container loses +its network connectivity. + +### docker run --link + +The runtime does not support the `docker run --link` command. This +command is now deprecated by docker and we have no intention of adding support. +Equivalent functionality can be achieved with the newer docker networking commands. + +See more documentation at +[docs.docker.com](https://docs.docker.com/network/links/). + ## Resource management Due to the way VMs differ in their CPU and memory allocation, and sharing @@ -102,7 +147,8 @@ these commands is potentially challenging. See issue https://github.com/clearcontainers/runtime/issues/341 and [the constraints challenge](#the-constraints-challenge) for more information. For CPUs resource management see -[CPU constraints](design/vcpu-handling.md). +[CPU constraints(in runtime-go)](design/vcpu-handling-runtime-go.md). +[CPU constraints(in runtime-rs)](design/vcpu-handling-runtime-rs.md). # Architectural limitations diff --git a/docs/Release-Process.md b/docs/Release-Process.md index 7dcfb84a30ce..962463c43073 100644 --- a/docs/Release-Process.md +++ b/docs/Release-Process.md @@ -28,23 +28,6 @@ $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH" ``` -### Point tests repository to stable branch - - If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then - you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch. - The objective is that changes in the CI on the main branch will not impact the stable branch. - - In the test directory, change references the main branch in: - * `README.md` - * `versions.yaml` - * `cmd/github-labels/labels.yaml.in` - * `cmd/pmemctl/pmemctl.sh` - * `.ci/lib.sh` - * `.ci/static-checks.sh` - - See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. - - ### Merge all bump version Pull requests - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request. @@ -63,6 +46,24 @@ $ ./tag_repos.sh -p -b "$BRANCH" tag ``` +### Point tests repository to stable branch + + If your release changes a major or minor version number(not a patch release), then the above + `./tag_repos.sh` script will create a new stable branch in all the repositories in addition to tagging them. + This happens when you are making the first `rc` release for a new major or minor version in Kata. + In this case, you should modify the `tests` repository to point to the newly created stable branch and not the `main` branch. + The objective is that changes in the CI on the main branch will not impact the stable branch. + + In the test directory, change references of the `main` branch to the new stable branch in: + * `README.md` + * `versions.yaml` + * `cmd/github-labels/labels.yaml.in` + * `cmd/pmemctl/pmemctl.sh` + * `.ci/lib.sh` + * `.ci/static-checks.sh` + + See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. + ### Check Git-hub Actions We make use of [GitHub actions](https://github.com/features/actions) in this [file](../.github/workflows/release.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository. diff --git a/docs/Unit-Test-Advice.md b/docs/Unit-Test-Advice.md index ea6caa031c05..4bd4da5299e9 100644 --- a/docs/Unit-Test-Advice.md +++ b/docs/Unit-Test-Advice.md @@ -341,7 +341,7 @@ The main repository has the most comprehensive set of skip abilities. See: One method is to use the `nix` crate along with some custom macros: -``` +```rust #[cfg(test)] mod tests { #[allow(unused_macros)] diff --git a/docs/design/README.md b/docs/design/README.md index adcffd01962b..60a1de910f47 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -6,13 +6,19 @@ Kata Containers design documents: - [API Design of Kata Containers](kata-api-design.md) - [Design requirements for Kata Containers](kata-design-requirements.md) - [VSocks](VSocks.md) -- [VCPU handling](vcpu-handling.md) +- [VCPU handling(in runtime-go)](vcpu-handling-runtime-go.md) +- [VCPU handling(in runtime-rs)](vcpu-handling-runtime-rs.md) +- [VCPU threads pinning](vcpu-threads-pinning.md) - [Host cgroups](host-cgroups.md) +- [Agent systemd cgroup](agent-systemd-cgroup.md) - [`Inotify` support](inotify.md) +- [`Hooks` support](hooks-handling.md) - [Metrics(Kata 2.0)](kata-2-0-metrics.md) +- [Metrics in Rust Runtime(runtime-rs)](kata-metrics-in-runtime-rs.md) - [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md) - [Design for direct-assigned volume](direct-blk-device-assignment.md) - [Design for core-scheduling](core-scheduling.md) +- [Virtualization Reference Architecture](kata-vra.md) --- - [Design proposals](proposals) diff --git a/docs/design/VSocks.md b/docs/design/VSocks.md index 0271645c2ba8..9375f30f598d 100644 --- a/docs/design/VSocks.md +++ b/docs/design/VSocks.md @@ -78,4 +78,4 @@ with the containers is if the VM itself or the `containerd-shim-kata-v2` dies, i the containers are removed automatically. [1]: https://wiki.qemu.org/Features/VirtioVsock -[2]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods +[2]: ./vcpu-handling-runtime-go.md#virtual-cpus-and-kubernetes-pods diff --git a/docs/design/agent-systemd-cgroup.md b/docs/design/agent-systemd-cgroup.md new file mode 100644 index 000000000000..686a27b30273 --- /dev/null +++ b/docs/design/agent-systemd-cgroup.md @@ -0,0 +1,84 @@ +# Systemd Cgroup for Agent + +As we know, we can interact with cgroups in two ways, **`cgroupfs`** and **`systemd`**. The former is achieved by reading and writing cgroup `tmpfs` files under `/sys/fs/cgroup` while the latter is done by configuring a transient unit by requesting systemd. Kata agent uses **`cgroupfs`** by default, unless you pass the parameter `--systemd-cgroup`. + +## usage + +For systemd, kata agent configures cgroups according to the following `linux.cgroupsPath` format standard provided by `runc` (`[slice]:[prefix]:[name]`). If you don't provide a valid `linux.cgroupsPath`, kata agent will treat it as `"system.slice:kata_agent:"`. + +> Here slice is a systemd slice under which the container is placed. If empty, it defaults to system.slice, except when cgroup v2 is used and rootless container is created, in which case it defaults to user.slice. +> +> Note that slice can contain dashes to denote a sub-slice (e.g. user-1000.slice is a correct notation, meaning a `subslice` of user.slice), but it must not contain slashes (e.g. user.slice/user-1000.slice is invalid). +> +> A slice of `-` represents a root slice. +> +> Next, prefix and name are used to compose the unit name, which is `-.scope`, unless name has `.slice` suffix, in which case prefix is ignored and the name is used as is. + +## supported properties + +The kata agent will translate the parameters in the `linux.resources` of `config.json` into systemd unit properties, and send it to systemd for configuration. Since systemd supports limited properties, only the following parameters in `linux.resources` will be applied. We will simply treat hybrid mode as legacy mode by the way. + +- CPU + + - v1 + + | runtime spec resource | systemd property name | + | --------------------- | --------------------- | + | `cpu.shares` | `CPUShares` | + + - v2 + + | runtime spec resource | systemd property name | + | -------------------------- | -------------------------- | + | `cpu.shares` | `CPUShares` | + | `cpu.period` | `CPUQuotaPeriodUSec`(v242) | + | `cpu.period` & `cpu.quota` | `CPUQuotaPerSecUSec` | + +- MEMORY + + - v1 + + | runtime spec resource | systemd property name | + | --------------------- | --------------------- | + | `memory.limit` | `MemoryLimit` | + + - v2 + + | runtime spec resource | systemd property name | + | ------------------------------ | --------------------- | + | `memory.low` | `MemoryLow` | + | `memory.max` | `MemoryMax` | + | `memory.swap` & `memory.limit` | `MemorySwapMax` | + +- PIDS + + | runtime spec resource | systemd property name | + | --------------------- | --------------------- | + | `pids.limit ` | `TasksMax` | + +- CPUSET + + | runtime spec resource | systemd property name | + | --------------------- | -------------------------- | + | `cpuset.cpus` | `AllowedCPUs`(v244) | + | `cpuset.mems` | `AllowedMemoryNodes`(v244) | + +## Systemd Interface + +`session.rs` and `system.rs` in `src/agent/rustjail/src/cgroups/systemd/interface` are automatically generated by `zbus-xmlgen`, which is is an accompanying tool provided by `zbus` to generate Rust code from `D-Bus XML interface descriptions`. The specific commands to generate these two files are as follows: + +```shell +// system.rs +zbus-xmlgen --system org.freedesktop.systemd1 /org/freedesktop/systemd1 +// session.rs +zbus-xmlgen --session org.freedesktop.systemd1 /org/freedesktop/systemd1 +``` + +The current implementation of `cgroups/systemd` uses `system.rs` while `session.rs` could be used to build rootless containers in the future. + +## references + +- [runc - systemd cgroup driver](https://github.com/opencontainers/runc/blob/main/docs/systemd.md) + +- [systemd.resource-control — Resource control unit settings](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html) + diff --git a/docs/design/arch-images/vcpus-pinning-process.png b/docs/design/arch-images/vcpus-pinning-process.png new file mode 100644 index 000000000000..266c34acbaa6 Binary files /dev/null and b/docs/design/arch-images/vcpus-pinning-process.png differ diff --git a/docs/design/architecture/kubernetes.md b/docs/design/architecture/kubernetes.md index be8165d75ffa..a607291ec35d 100644 --- a/docs/design/architecture/kubernetes.md +++ b/docs/design/architecture/kubernetes.md @@ -3,16 +3,16 @@ [Kubernetes](https://github.com/kubernetes/kubernetes/), or K8s, is a popular open source container orchestration engine. In Kubernetes, a set of containers sharing resources such as networking, storage, mount, PID, etc. is called a -[pod](https://kubernetes.io/docs/user-guide/pods/). +[pod](https://kubernetes.io/docs/concepts/workloads/pods/). A node can have multiple pods, but at a minimum, a node within a Kubernetes cluster only needs to run a container runtime and a container agent (called a -[Kubelet](https://kubernetes.io/docs/admin/kubelet/)). +[Kubelet](https://kubernetes.io/docs/concepts/overview/components/#kubelet)). Kata Containers represents a Kubelet pod as a VM. A Kubernetes cluster runs a control plane where a scheduler (typically -running on a dedicated master node) calls into a compute Kubelet. This +running on a dedicated control-plane node) calls into a compute Kubelet. This Kubelet instance is responsible for managing the lifecycle of pods within the nodes and eventually relies on a container runtime to handle execution. The Kubelet architecture decouples lifecycle diff --git a/docs/design/architecture/networking.md b/docs/design/architecture/networking.md index ab056849c7af..1550f0ad0117 100644 --- a/docs/design/architecture/networking.md +++ b/docs/design/architecture/networking.md @@ -36,7 +36,7 @@ compatibility, and performance on par with MACVTAP. Kata Containers has deprecated support for bridge due to lacking performance relative to TC-filter and MACVTAP. Kata Containers supports both -[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model) +[CNM](https://github.com/moby/libnetwork/blob/master/docs/design.md#the-container-network-model) and [CNI](https://github.com/containernetworking/cni) for networking management. ## Network Hotplug diff --git a/docs/design/architecture_3.0/README.md b/docs/design/architecture_3.0/README.md new file mode 100644 index 000000000000..bc0acf6b24d4 --- /dev/null +++ b/docs/design/architecture_3.0/README.md @@ -0,0 +1,169 @@ +# Kata 3.0 Architecture +## Overview +In cloud-native scenarios, there is an increased demand for container startup speed, resource consumption, stability, and security, areas where the present Kata Containers runtime is challenged relative to other runtimes. To achieve this, we propose a solid, field-tested and secure Rust version of the kata-runtime. + +Also, we provide the following designs: + +- Turn key solution with builtin `Dragonball` Sandbox +- Async I/O to reduce resource consumption +- Extensible framework for multiple services, runtimes and hypervisors +- Lifecycle management for sandbox and container associated resources + +### Rationale for choosing Rust + +We chose Rust because it is designed as a system language with a focus on efficiency. +In contrast to Go, Rust makes a variety of design trade-offs in order to obtain +good execution performance, with innovative techniques that, in contrast to C or +C++, provide reasonable protection against common memory errors (buffer +overflow, invalid pointers, range errors), error checking (ensuring errors are +dealt with), thread safety, ownership of resources, and more. + +These benefits were verified in our project when the Kata Containers guest agent +was rewritten in Rust. We notably saw a significant reduction in memory usage +with the Rust-based implementation. + + +## Design +### Architecture +![architecture](./images/architecture.png) +### Built-in VMM +#### Current Kata 2.x architecture +![not_builtin_vmm](./images/not_built_in_vmm.png) +As shown in the figure, runtime and VMM are separate processes. The runtime process forks the VMM process and interacts through the inter-process RPC. Typically, process interaction consumes more resources than peers within the process, and it will result in relatively low efficiency. At the same time, the cost of resource operation and maintenance should be considered. For example, when performing resource recovery under abnormal conditions, the exception of any process must be detected by others and activate the appropriate resource recovery process. If there are additional processes, the recovery becomes even more difficult. +#### How To Support Built-in VMM +We provide `Dragonball` Sandbox to enable built-in VMM by integrating VMM's function into the Rust library. We could perform VMM-related functionalities by using the library. Because runtime and VMM are in the same process, there is a benefit in terms of message processing speed and API synchronization. It can also guarantee the consistency of the runtime and the VMM life cycle, reducing resource recovery and exception handling maintenance, as shown in the figure: +![builtin_vmm](./images/built_in_vmm.png) +### Async Support +#### Why Need Async +**Async is already in stable Rust and allows us to write async code** + +- Async provides significantly reduced CPU and memory overhead, especially for workloads with a large amount of IO-bound tasks +- Async is zero-cost in Rust, which means that you only pay for what you use. Specifically, you can use async without heap allocations and dynamic dispatch, which greatly improves efficiency +- For more (see [Why Async?](https://rust-lang.github.io/async-book/01_getting_started/02_why_async.html) and [The State of Asynchronous Rust](https://rust-lang.github.io/async-book/01_getting_started/03_state_of_async_rust.html)). + +**There may be several problems if implementing kata-runtime with Sync Rust** + +- Too many threads with a new TTRPC connection + - TTRPC threads: reaper thread(1) + listener thread(1) + client handler(2) +- Add 3 I/O threads with a new container +- In Sync mode, implementing a timeout mechanism is challenging. For example, in TTRPC API interaction, the timeout mechanism is difficult to align with Golang +#### How To Support Async +The kata-runtime is controlled by TOKIO_RUNTIME_WORKER_THREADS to run the OS thread, which is 2 threads by default. For TTRPC and container-related threads run in the `tokio` thread in a unified manner, and related dependencies need to be switched to Async, such as Timer, File, Netlink, etc. With the help of Async, we can easily support no-block I/O and timer. Currently, we only utilize Async for kata-runtime. The built-in VMM keeps the OS thread because it can ensure that the threads are controllable. + +**For N tokio worker threads and M containers** + +- Sync runtime(both OS thread and `tokio` task are OS thread but without `tokio` worker thread) OS thread number: 4 + 12*M +- Async runtime(only OS thread is OS thread) OS thread number: 2 + N +```shell +├─ main(OS thread) +├─ async-logger(OS thread) +└─ tokio worker(N * OS thread) + ├─ agent log forwarder(1 * tokio task) + ├─ health check thread(1 * tokio task) + ├─ TTRPC reaper thread(M * tokio task) + ├─ TTRPC listener thread(M * tokio task) + ├─ TTRPC client handler thread(7 * M * tokio task) + ├─ container stdin io thread(M * tokio task) + ├─ container stdout io thread(M * tokio task) + └─ container stderr io thread(M * tokio task) +``` +### Extensible Framework +The Kata 3.x runtime is designed with the extension of service, runtime, and hypervisor, combined with configuration to meet the needs of different scenarios. At present, the service provides a register mechanism to support multiple services. Services could interact with runtime through messages. In addition, the runtime handler handles messages from services. To meet the needs of a binary that supports multiple runtimes and hypervisors, the startup must obtain the runtime handler type and hypervisor type through configuration. + +![framework](./images/framework.png) +### Resource Manager +In our case, there will be a variety of resources, and every resource has several subtypes. Especially for `Virt-Container`, every subtype of resource has different operations. And there may be dependencies, such as the share-fs rootfs and the share-fs volume will use share-fs resources to share files to the VM. Currently, network and share-fs are regarded as sandbox resources, while rootfs, volume, and cgroup are regarded as container resources. Also, we abstract a common interface for each resource and use subclass operations to evaluate the differences between different subtypes. +![resource manager](./images/resourceManager.png) + +## Roadmap + +- Stage 1 (June): provide basic features (current delivered) +- Stage 2 (September): support common features +- Stage 3: support full features + +| **Class** | **Sub-Class** | **Development Stage** | **Status** | +| -------------------------- | ------------------- | --------------------- |------------| +| Service | task service | Stage 1 | ✅ | +| | extend service | Stage 3 | 🚫 | +| | image service | Stage 3 | 🚫 | +| Runtime handler | `Virt-Container` | Stage 1 | ✅ | +| Endpoint | VETH Endpoint | Stage 1 | ✅ | +| | Physical Endpoint | Stage 2 | ✅ | +| | Tap Endpoint | Stage 2 | ✅ | +| | `Tuntap` Endpoint | Stage 2 | ✅ | +| | `IPVlan` Endpoint | Stage 2 | ✅ | +| | `MacVlan` Endpoint | Stage 2 | ✅ | +| | MACVTAP Endpoint | Stage 3 | 🚫 | +| | `VhostUserEndpoint` | Stage 3 | 🚫 | +| Network Interworking Model | Tc filter | Stage 1 | ✅ | +| | `MacVtap` | Stage 3 | 🚧 | +| Storage | Virtio-fs | Stage 1 | ✅ | +| | `nydus` | Stage 2 | 🚧 | +| | `device mapper` | Stage 2 | 🚫 | +| `Cgroup V2` | | Stage 2 | 🚧 | +| Hypervisor | `Dragonball` | Stage 1 | 🚧 | +| | QEMU | Stage 2 | 🚫 | +| | ACRN | Stage 3 | 🚫 | +| | Cloud Hypervisor | Stage 3 | 🚫 | +| | Firecracker | Stage 3 | 🚫 | + +## FAQ + +- Are the "service", "message dispatcher" and "runtime handler" all part of the single Kata 3.x runtime binary? + + Yes. They are components in Kata 3.x runtime. And they will be packed into one binary. + 1. Service is an interface, which is responsible for handling multiple services like task service, image service and etc. + 2. Message dispatcher, it is used to match multiple requests from the service module. + 3. Runtime handler is used to deal with the operation for sandbox and container. +- What is the name of the Kata 3.x runtime binary? + + Apparently we can't use `containerd-shim-v2-kata` because it's already used. We are facing the hardest issue of "naming" again. Any suggestions are welcomed. + Internally we use `containerd-shim-v2-rund`. + +- Is the Kata 3.x design compatible with the containerd shimv2 architecture? + + Yes. It is designed to follow the functionality of go version kata. And it implements the `containerd shim v2` interface/protocol. + +- How will users migrate to the Kata 3.x architecture? + + The migration plan will be provided before the Kata 3.x is merging into the main branch. + +- Is `Dragonball` limited to its own built-in VMM? Can the `Dragonball` system be configured to work using an external `Dragonball` VMM/hypervisor? + + The `Dragonball` could work as an external hypervisor. However, stability and performance is challenging in this case. Built in VMM could optimise the container overhead, and it's easy to maintain stability. + + `runD` is the `containerd-shim-v2` counterpart of `runC` and can run a pod/containers. `Dragonball` is a `microvm`/VMM that is designed to run container workloads. Instead of `microvm`/VMM, we sometimes refer to it as secure sandbox. + +- QEMU, Cloud Hypervisor and Firecracker support are planned, but how that would work. Are they working in separate process? + + Yes. They are unable to work as built in VMM. + +- What is `upcall`? + + The `upcall` is used to hotplug CPU/memory/MMIO devices, and it solves two issues. + 1. avoid dependency on PCI/ACPI + 2. avoid dependency on `udevd` within guest and get deterministic results for hotplug operations. So `upcall` is an alternative to ACPI based CPU/memory/device hotplug. And we may cooperate with the community to add support for ACPI based CPU/memory/device hotplug if needed. + + `Dbs-upcall` is a `vsock-based` direct communication tool between VMM and guests. The server side of the `upcall` is a driver in guest kernel (kernel patches are needed for this feature) and it'll start to serve the requests once the kernel has started. And the client side is in VMM , it'll be a thread that communicates with VSOCK through `uds`. We have accomplished device hotplug / hot-unplug directly through `upcall` in order to avoid virtualization of ACPI to minimize virtual machine's overhead. And there could be many other usage through this direct communication channel. It's already open source. + https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall + +- The URL below says the kernel patches work with 4.19, but do they also work with 5.15+ ? + + Forward compatibility should be achievable, we have ported it to 5.10 based kernel. + +- Are these patches platform-specific or would they work for any architecture that supports VSOCK? + + It's almost platform independent, but some message related to CPU hotplug are platform dependent. + +- Could the kernel driver be replaced with a userland daemon in the guest using loopback VSOCK? + + We need to create device nodes for hot-added CPU/memory/devices, so it's not easy for userspace daemon to do these tasks. + +- The fact that `upcall` allows communication between the VMM and the guest suggests that this architecture might be incompatible with https://github.com/confidential-containers where the VMM should have no knowledge of what happens inside the VM. + + 1. `TDX` doesn't support CPU/memory hotplug yet. + 2. For ACPI based device hotplug, it depends on ACPI `DSDT` table, and the guest kernel will execute `ASL` code to handle during handling those hotplug event. And it should be easier to audit VSOCK based communication than ACPI `ASL` methods. + +- What is the security boundary for the monolithic / "Built-in VMM" case? + + It has the security boundary of virtualization. More details will be provided in next stage. \ No newline at end of file diff --git a/docs/design/architecture_3.0/images/architecture.png b/docs/design/architecture_3.0/images/architecture.png new file mode 100644 index 000000000000..9d7fd976ef0e Binary files /dev/null and b/docs/design/architecture_3.0/images/architecture.png differ diff --git a/docs/design/architecture_3.0/images/built_in_vmm.png b/docs/design/architecture_3.0/images/built_in_vmm.png new file mode 100644 index 000000000000..91386c5eb667 Binary files /dev/null and b/docs/design/architecture_3.0/images/built_in_vmm.png differ diff --git a/docs/design/architecture_3.0/images/framework.png b/docs/design/architecture_3.0/images/framework.png new file mode 100644 index 000000000000..992afdfff522 Binary files /dev/null and b/docs/design/architecture_3.0/images/framework.png differ diff --git a/docs/design/architecture_3.0/images/not_built_in_vmm.png b/docs/design/architecture_3.0/images/not_built_in_vmm.png new file mode 100644 index 000000000000..ad1d5b8e34f5 Binary files /dev/null and b/docs/design/architecture_3.0/images/not_built_in_vmm.png differ diff --git a/docs/design/architecture_3.0/images/resourceManager.png b/docs/design/architecture_3.0/images/resourceManager.png new file mode 100644 index 000000000000..7a8fcae0c7a9 Binary files /dev/null and b/docs/design/architecture_3.0/images/resourceManager.png differ diff --git a/docs/design/architecture_3.0/images/source_code/kata_3.0_images.drawio b/docs/design/architecture_3.0/images/source_code/kata_3.0_images.drawio new file mode 100644 index 000000000000..146143cb07b8 --- /dev/null +++ b/docs/design/architecture_3.0/images/source_code/kata_3.0_images.drawio @@ -0,0 +1 @@ +5Vxbk6MoFP41qdp9yFQUNenHTt/2YWZrtrpqp/uRRGLYRskguc2vX2hRI5jWXIxm+ilyRMTvfH4c8JAeuAs3Twwu5t+oj0jPHvibHrjv2bY9urHFj7RsE4tlDbzEEjDsK1tueMa/kDIOlHWJfRQXKnJKCceLonFKowhNecEGGaPrYrUZJcW7LmCADMPzFBLT+gP7fJ5YR+4gt/+FcDDn2fOpMyFMKytDPIc+Xe+YwEMP3DFKeXIUbu4QkeiluCTXPe45m3WMoYjXucDjwxfy+PfMt/758Yjj4Ol5GfQdN2lmBclSPbHqLd+mEDC6jHwkWxn0wHg9xxw9L+BUnl0LrwvbnIdElCxxOMOE3FFCmShHNBKVxmZHVd9XiHG02TGpjj8hGiLOtqKKOuukqCoa2UNVXuc+yerMd/xhj5QRKh4EWds5VOJAoXUActZVAGd5ReCcEtycEtwag802YOMwfpO4IbbCApv9IFqtgTgqguiZIFp2CYhOUyC6w2ryIV/omCpSxuc0oBEkD7l1XKRnXucrpQuF53+I860SZbjktIi2D+N55pmYM/qWaST4CHkOWYD4B8+nhgj5CB/6hyECOV4VpboMbXXpd4pFVzK/As2vmRPTJmK6ZFOkrtJ8lnXjeDcCw4s4lMNRp18GYHfsZXAMFCmfI9ZtFB2vYyi6BwcC5xeMasnai3zyplZLSqX0jBqRnlvG4HanwkJKSlxfmVK/P+6prw/zWn1xkPTgrPLlGYwJURy/C5gHQ/kaRZN48e41j3Dp8QWMCozyfi5l9DueJu/XrcQ1mPzhCThF/0WvBoXDP+WxbE6E8RHvz2CIyTa5LLtlzwbAEb+QYUgMa9ZYDKO4L/QBz/JeiKNA/vpYUBNPlhylHRfwJH1PKphjLSFiPoLaEpNMFbYaG3bUJIvoLqImwzbCEbTB/EVe/sVVpdedM/cb1fJ7YasKzQvK2WOZkxwzOlTlWyF0XyN07eleNvs+O3A3BnBCQvwJ3XQyuijCB6y2o4u0AzvwrTDj/anQcYij92htLvAk8kjDUzwzL4KWRA4abCVIQoKDSBSnAknRMBhLBPEUklt1IsS+T/bJdvE9OINXLKua1aVu0acl53OLuYix65EQRmI0Nz3SAYbrU/IOUNxc2eiitDpuNXDgstJqmfPgLmvrUAPQbZ155gyY4Gi5+Vzq6oIailC21NmcuppT6itRV8/qHMfNyeY1qKsD2ldXc634itTVqRsjNcc8c8q0hnH4ycTVqeb1hcXVnJBdqbi2T/H0Zh0XV31VwCkb4b0yEjb3NdOcQEWIryl76yLz+tqnG6fka/CFmWdOmhhSS2/7X+LfRlYNh9SdxTYmq7Y5l4jn4hH7s/gaGO0OWme0GfTLFJ8rgc9z24bPjFZXlCzDTn7E1eHzyqKiy8JnRkVdHMntwZEjueU2hRwwY6CfKFx2kXZ254ZxYIZBq/AzDOCGK1ofwIEZUfkMBjSaQHlx99nc/hAOriMnVhdRrwy4C4uouUr3BjmUtwqQyqHrOP3aH8OBGQL9++3zSGgpjS8roWYQ9YZYhK5CPr2btvnrmCKQJmR+6jXT0s/9F10zdWpk/6DIv5XbZiSOBMYxnhadUYTpmJQrgSjbvuwWXvMWZDG/7L2UXndstnllJpdCpTKTC9RN5dpxsFvi39R2Yva6rX0qcWtmr5vJpnoQO9QaSpAxGjo0qVVPUvXcCySpOqaYq1hkkGWr6q+A8O9XOBF6X2B+fXlhKMa/4OS9PclcBYxo3B333PtSsn78wuqik+13U3fp7W4pK12qGHwRjrUL+KdbmI7lYVqFzmYxamSDRPqmNCFYdm3BqspaF3W/I4bFw0paaBqXy9prQdXKNa55EUsR7IiIWXo2jf6ppq6I2aBcXI4XsbOT2ZyZn0jmKmKem4TVI6TVKXLZepKgPrDVJZee1aGPtO1zy0yha5hbJwV+lxLFdDm8mrjDThPXPZa4RkM3nWPuwYldDew7O5KYJ+xXq2Sk09Ie2OyPFw6lmj6S2ycPwDU7XLE1Tk+3OXFrnCjmf9mQVM//+QI8/A8=7Zxde5pKEIB/jZf68K1eRk2T9DQnaUxi25s+K6xAg6yFRWN+/VniouBsDTZAICe9KTvgEt6ZnZ3Zr5Y6nD+eBWjhXBILey1Fsh5b6qilKIqkKey/WLLeSGRZMjYSO3AtLtsJxu4T5kKJSyPXwmHmQUqIR91FVmgS38cmzchQEJBV9rEZ8bJvXSAbA8HYRB6UTlyLOhtpT5d28nPs2g7dfh+/M0fJw1wQOsgiq5RIPW2pw4AQurmaPw6xF9NLuOABUabU+O63L0YreuZrshO0N5V9OuYn208IsE//umo6Gc4847cbkadfN79Hq98Xs59trt0l8iLOi38rXScAAxL5Fo4rkVrqYOW4FI8XyIzvrpjNMJlD5x4ryexy5nrekHgkYGWf+OyhQc4/nn/kEgcUP6ZUxz/mDJM5psGaPcLvalwtiWF2eXm103I3UZ2T0rCePIi4Zdnbqnf02AUHeISe3g9MPSdLoyyUKkBJUfgQs8TB0mW8/gxWfhlsSAPygBO0LUXtG10VGXvImRzLlo67JXKXlSx4gRHLIvBqWeA1AN6dxx62IPJ7fGczbJgmk1sodLZ1APVY3f5UkkpUg6rVTA0GUAOhDg7KUAP3LBkNlMVZN96O85co/IwnqzsaLW8mV5/Hy1k0bCt9APplwEe67rQf+bPnqcizazk9u1IW8USVKeL3bkCZZEh8ilw/tvIC0TPwPUsToe8pU9UoFX2/o2fgb4PiFH1FRF8rjT7sWCconJdDv0rPYkh5YMtaSbAdGuGv0RIvrgfm02h9ff/j51UbWvoch+GmN7XccIGo6RRt7Qj3ZqbI2g2zh6ezCuNxpUJPI8xtZIA/iHzqzmP8DvIt7x15mj32eeP30tjD+MVZL+KONSTFO5jSfApwKT3IVa2UaxdwbS5NVcpJczsk86ru0PNCa2Sft+/vLshVRE/G+ErUHQKe2LLxmBdJQB1iEx95pzvpIEt898wXQhac8y9M6ZoPjqGIkv18FAWU35V1JsB+MpImK0erJCRRYOIDRsRjXvZSG9MDdBSeDMYEDmo4wB6i7jI74la47cNQfRQgBnqK4t8WnA6V1Qp6oBUYsBX0SkqBxN0kDFO+4nn0OqKVxHyQpSDkq5YljDmGHoksJjrP0//VhaxcQ7QKQPvJDbAZIPPhYCBXF6Z1NFfoUgPMu4+mhBWanCtKk6uM0hToUhvMUxiniXgWEqeJgUK/+i+mKxI8NKbj19XaNf/EKlNUbwihs7AxUDUIVZD6VgsVphT3xIvmxY/YV2ipbw4VTksNbcZv0RioxptCPeutRrfry+mga335Z7b+SqahKxieHDvsuz81uvlX6FOFUGFH5bl+9Nhq/AC7lms2Q9akTl+FvAuZ0BCOUQDeheMti6gMIi1NhUS3U6aZyLVfFk/oaC+uWPkS+cguwXRLY2tIHU3NwjX6HbUrsthy3IMQrw7wnlgWy7mYz5U24Bhru9GcdbkGnOH8w802sX0PxqzHxmz0d//emDecl7hbmIfHZWvP2JC7iX9+kavc6XVLQtsDaG/xfOEhWvwYTTVjiwLSNbNmOCx2f3nZGFNWldx9ny6gWsSaTiFVwUTD/fD6rjFcdTk319JWVAm5wpxjw1VyCF14kd1kwrpUB8INzjIE3kDXXnK34uFdvTS+oml4w2PvHUzZhR1fjPBmVeYuets8wF64fQYohTGi+1Pt6bU5nL1AHchzbZ8VTaYO9jJ1EBN3WUhzwm/MXct6nv0XqTprDLVSrSzpVbYcmE8uXQaStJchMQ+N3tesFakQdfcl1P1KfRRMLTlpH9MmczbkenGGqaWVuKam9bci2nq9aMPEklv1wVmqemHW6u88YJLJMU+9Jrlpvf6kYW7JSc/nLmkMaIFJ18xPC9ZgeNhGZvwRG4fdHAciMOu60YY56NIhYfGBRzVzXbXvF5MENMX7enjRTNoi664ZbZjFwPw0tRb5f5+Z5pvaFE3Ml6fDHFsnsG+dxEdO7GCn9JMll1myHwtSS/Y3RfZHf+MPPxe+83qeC6PH9K3R+q/0kVnAf2iv5YsL+NPzzaIjE7gs9zp//oZr4vo05Vb3jaKbbL9N6thM0fGf7RQOaurv12SIo7yk3g0pUO+zIW0xvMK2YPhcpm0dZyW1Vf7+9E1e5XdfrqpsfcMg/kPfe1oS7J7bP3glr8IFVe0PRZescMGm9Q+FC7TUU/Y13klip9e3cmA/ZSsd5jQfSj/eF9dS4YPb5frEPnOjBzL2RpO2O+x9E6wlHBJ/5lpMIS6Kf71dwgmHDuoRz2+PKIsLtofCkF9nz9d5wNR0+J0Z+6b01tPjzPCYfSW9jt7NmopgBbQsXO5YQFIg1DicMwGK/fuNZFWR1Xu5yIpW0JQGFk6SLDzkxzkLifFGiwUJ4ODQRyM6eheBsA1J/Y7ULUfbRvvbdTC5/+HLv4LPl0+npxfaRLCTAB7pI83ina/ScHighR1/5kZdDhBT+mAMq9LzZ4RqyRG/FHJ4259xVzJ+qIL0QNQo1PLaxLn74/tZ/1YbuCd3T0++JvXW14I2ESLfmpJ4K8i86AXIb3zyzz5/WbQDV5M6Ja08EfKHxm+mnNF700Diaw4pQN2FCa/UACvujr3dRN+704PV0/8A7VtZe5s4FP01fkw+FhvbjzF2lpk2ScfTpM3LfDIIowYQFcLL/PqRQKzCcdIYnHSSl8DVgjnn6ujqSvR0099cEBC6n7ENvZ6m2JuePu1pmjocG+wft2xTi6EIw5IgW1QqDHP0LxRGRVhjZMOoUpFi7FEUVo0WDgJo0YoNEILX1WoO9qpPDcESSoa5BTzZeo9s6qbW0UAp7JcQLd3syaoiSnyQVRaGyAU2XpdM+qynmwRjml75GxN6HLwMF4apeXtnX9yB7/6Gajd35s33k7Sz85c0yV+BwID+ctcmcLW/f5Dg29Wn+defV6rpuYZooqyAFwu8kM8BZW8LyQpZULw63WZ4EhwHNuR9qj19snYRhfMQWLx0zTyI2Vzqe6LYBpGb13WQ55nYwyTpSHccaFgWs0eU4EdYKrGH4wVjQZ88870FPitIKNyUWBc4XEDsQ0q2rIoo1frivbe1+3XhIWpGu1vyjqweEE65zLsugGcXAvtmHq4f/Nn1l8Xi0p2YN+vZ5OvlxbyBh7Pbq9dhL8HtaDvgNhbGwGgRbuOtoa1KaEtQs8Ee8ksrJt52QoD1COl+zKsEOTigQhDVUY2RAAewRcjVcRVz1ZAxb4JcU9rCXJMwpyB6PJTQ1JwdqvYADpucfWwMddCms9e1pQn5br1dl5DH1IWkFY3vClVdOzaq/Y40xEPh5fH1RH9zejKQ8PdhFCWxS40G1hkLNuEzROQJfI87g/a1tzaFGm9mCo3SYuV00CIBg8Fb8/+hRMAdItRkeAEUcHU3PI72gl8t+ZULAtvjBQecZ9ksO7L7TUNipC10o92gssbI4NhDYiQxcg8iv8wI8DmcwSIKE2CUg1DSyfxbR1sbHhvtsYT2JxTEmxLc7xheXT82vFn6poTvrmiRvSOtYgY8tAy48jOYGP76hCOBLOCdiQIf2ba3a0Yu6FEaZoC2KJDi9qNHmKq8TCVxQJEPn/BtmYyqNItJs2EefY+kSZOA2iFps0909HBhBpPbS+THfwZabE0bEjm/dVw0rMVFow7Dokb85TEzJWCJgwXgbQ8Y+lgWHDhOU+ijG/pYt1sEfVTz+i4zDI2gy7mdL9CP38G8W0fy+PohJ2vOLBK8RyiP7pVNGZp0UWSjFSuwPBClzzR+xnwTZ+KBR3hiMWnl2OTWbAGVtQ6LB2dVYsMZQejYoNyq4QEBPAkrVQpKM6MPyBKx6fasl4hkug3C+LJtFCzrZh8FJxmWvETrh5vdPzwKQbDrZyVxg9TS9HBsX25DHvlFOF9UMjrSvqr9M3PYYEvQzqzvzo+1o0uCnOlqz4/tvgUGcKH9Zn58jgi0kkjr/+nDR9diOTfC3NNBy5iAhZesZ0oiU8P3RYExIJaIiwfKixY1aRiXHRFIuoopFkG0mnqdlbjulCe8KIHwHHupMyfl3PIZrwpDANczewnn2ZsMJ4Vjwryg8FXu+Xk5W+fR7V/QAxThYNZQWzOLm4gCQs/4YYkd3SVLgua2MLB/sWUEl35VYPQzLpalKmyJs2LUVGr0htPnLWDaGh392l5GQy5d07scHHIqSxoDsORIu3yj4gc5ijm/xcJQ8MaQVwqO6oNJqYvV69b0EY6JBZ8AQYDL3mAJ6X4l4YA8STZJ8FmVKzVTJ5reYsR9OU/41w4tjGrkp+8jGtX4z3/FK5axch6hJph5KuhDLT/UskW11Gtbj3pjiqdLuWzIi74HvYQbRL/xiqequPvey9Jz/G66Eb0kN9vSzS0kbKQnI3L68mNxZeV9ait3r/JmmtS29OabH9n8rLemvZM1fPjDu/b968mPlW/OnZXrNqQQa9K7ayPkQ3o/pPeQ0nvS3x+otqW8jSNDzvN+CO8rhDc7Dl8S3qcEqW3dVUc7sgaH1139bvYQXW3v/1n8cG6+nG8j0xw36K7JddfmbgR463xvP5Lc7m1se+ZfCPCbpUhPSf4ZPUJquT15p7TVA0z1RW/TCQOjQUy0A4hJI93POED5im8NOoJ1aOyHtd8lqnKyOPSSbCnFHNs4DDGhH8PnpTyPVOV0rCv5n7p/MCnjU2XYIfPPOJH5uu9H9n6u00lue9B/KRN6x0TIue7nnsxUHJ7+Vkzzt2ROOkPY4s4auy0+zEsDlOLzRn32Hw==7Zpdc5s4FIZ/jS+bAQQYLhvno9OZ7GTqbbbdmx0FFFsTjByQY7u/fiVb4kNSCLbBcZskM4l1EAL0nPfo6OABGM1W1xmcT29IjJKBY8WrAbgYOI5tWz77xy3rrSXgLW6YZDgWnUrDGP9CwmgJ6wLHKK91pIQkFM/rxoikKYpozQazjCzr3R5IUr/qHE6QZhhHMNGt/+CYTsVTeFZp/4LwZEqLBxZHZlB2FoZ8CmOyrJjA5QCMMkLo9tNsNUIJnzw5L9HTl6evV9ercxz+m9w/X8Lnp+Gn7WBXu5xSPEKGUtrt0ILlM0wWYr7Es9K1nMCMLNIY8UGsAThfTjFF4zmM+NElcxlme8BJMiIJyZgpJSniJpLSKzjDCfeav/GMeYBj/YWW7O83MoMp6xLDfLoZ1maNKZ0l4iM/VbiRHbB2y4cXk/SMMopWFfRiMq4RmSGarVkXcdQVZwi/dgXlZekkXhhubdOKgwDZEQrHnBQjl5PPPoj534EF6JrFwAGxh4LYbQekgUGNFW/cQkpRlm4sjsWtOc3II6pcOnDuge/3CJBptUbQ1wm6UrtVgoWgOyfoni7B4+Px6nQCnY7tGuj4fcHxNDjZIqVsHhsY2S8x6pbFfeC5nqWzfwgiFEU9MvKdGqPQwMikoN5CoK8xuru5ecd8HFAXke29NaHh2yUM+69PTo+IwLCOaKgTckxxzg77QhRoiFiOyx76/coIhHVGbx7nQg3RZIFyhsh6ZF7Ld2CHpg3nm9+DGR6LiAMMsrFMyVtfSOR+tcKEbUkpxCnKDsfh+6ORZR2MY5LAPBdXzR8RjaayIfemVp87prDFamSCBnqDZv8G0I6GwxTWjoujxRYWpfFnXtZhrYh7M+aBH60w/cHBnHmi9VNMJv98sRLMNo21aBwGhbHI1j/kuLzxc3P9oSfb5VU3LXnZ3WjmZJFFqKGfyHgpzCaoaTyRd6G4Vu3SfaNavjCgl7YMJZDi53qNzOQP4gq3BPMMoshLLfvMqzuf7dUH2T65OK9allKGUvMnbdHdTo02EHMiuK50m/MOecMtqyuOcNbS37cjlt5fzOoBgmhRETAL4gX3fM07jy+kjvUQtNSDXK5PRBC+FSpLozJEWzm4vpKqKuN0pAZXqc7YvncENeglmA81dKMG+7TUYKuJ4m+mhqHVfFtq/3C3/o51DLXpxbRbEmuCY8km1Tbrsiij12lggie8rBIxB2c5Njjn+SqOYPJZHJjhOOZjGxPsegp+mml1i4KOSTr9bUxblNyMUVOZu1dmWkuQRey0K5GzjKPm2LlvnO44anoto2ZwUkHTCdScuniHvnNODbT0XH2t8kLk7Cz46EXIMWKAIUXMairqf8QhdXtv1Qga3k8ba5ZdxKFvc/T1KY5/XYDl+P72P//O/v7dhFSD+H7ebqovn4Ft4NPX+00zH72ofIeZJshD3sDpTy/9O9JLJSenpY66qP0bOcmL7byeH1jw2mMvdLrreWOEOpEF3VZcz1brrm2X8yIPkD7cU4lMu+GgeVuj9pf31dW2xiwfveT/IZ9O0mFwWuoBijOqTr+veuTmvnf1vFJEsF3zffWrHudN1bPfZrIUTKG5XtTTuKi8Kp/w1OVzZlV+9izIFXciQ77Xj5jUd5sHLi2sWX5Pe9u9/LY7uPwf7Zpdb6M4FIZ/TS5b+QMIXHbSmVlp1aqadnZm9mblgpuwJTgCp0n216+dmAR/pCEJpKmmGqmDD8YEP+e8Pj7Qw4Px/GtBJqMbltCsh0Ay7+HrHkIQgkD8Jy2LlSWULWkYFmmiOm0M9+l/VBmBsk7ThJZaR85YxtOJboxZntOYazZSFGymd3timX7XCRlSy3Afk8y2/kgTPlJP4YON/Q+aDkd8/cDqzJhUnZWhHJGEzWom/LmHBwVjfHU0ng9oJievmpeb8jv4l9G+3/8zGz3+83D3cH97sRrsyz6XrB+hoDlvd2jF8oVkUzVf6ln5oprAgk3zhMpBQA9/mo1STu8nJJZnZ8JlhO0pzbIBy1ghTDnLqTSxnH8h4zSTXvOQjoUHIHBLZ+LvNzYmueiSkHK0HBaKxoiPM3UoL1VuBEPRbvjwapJeaMHpvIZeTcZXysaUFwvRRZ311BXKrz1FebZxEj+KVrZRzUFQoDoS5ZjD9cibyRcHav73YIHbZtFDOPFpmHjNgLzCQGMlG3eEc1rkSwsC0lrygj3T2q1D9IiDoEOAIlY1goFN0KtiVyOIuiLonS/B0+PxdTqhTQd6DjqdhZdvwSmmORfz+AojuI1RuyweQ9/zgc3+KYxpHHfIKEAao8jByBVBXleMAovRXzc3vzEfhPUggv5bE+q/XcJw+PqEOkSE+zqivk0IuXQORl0hCi1EIscVD/37hhGOdEZvrnORhWg4paVABJ6F18od2LFpw6flv6MZnooIwo6wAa7krSsk1X61xkRsSTlJc1ocjyMIBgMAjsYxzEhZqruWz5THo6pR7U1BlzumqMFq5IKGO4MG3wG0k+FwydppcTQoJ9A8uZJlHdGKpTenUvjpPOU/JZhLX7V+qcmUx9dzxWzZWKjGcVAEi2LxsxpXNn4t79/3q/bmrstWddv9aJZsWsR0d8bLSTGkr42n8i6aaNUu2zfq5QsH+spW0Izw9EWvkbn8Qd3hjqUyg1jnpQBe+rrzQV8fZPXk6rp6WcoYysyfrEV3NTXWQMKJyKLWbSI7lK/8ZHPFgdjw99WIG+9fz+oRAdGgpuMOiC3uucs7Tx9ILcdD2DAequX6TAIiAJGxNBpDNA0HLzBSVWOclqLBM6ozMPBPEA0N6mMf0XBQNMDzigZoJorvLBr64PWfZfaP9uuPwCmizS543rHECjiRbHJrs14VZew6DcnSoSyrxMLBRY6NP8l8NY1JdqVOjNMkkWM7E2w9BT/PtLpBQccVOt1tTO2iaDPVNOZux0xbCbLSTlhTzo2OurXzUJ1uWTX9hqoZnpVootDMqdfv0PfOqbGVnpuvVbYoZ2viY9eJ76kATDgVVldR/0OHzO090Ag63k87a5ad6VDl0Xvr0JEb9QNyuHemQ/isdAhipEuHuSFvqkJr+TKyqrazN4iQ8z7bfhf03L+rrWyMT39cXwSLK/z3t5vvJbydZk+LVlZxXbV2vPPyWo+lS8+P6vF0AS7hrg2YbN3RQgT9UqW7iDJsR5mbwHmV0DygeyEOD13sI2OghgW01jzbXuqX6zuYFCymZbltpa859lst+qf5zGwLnvpCjn3bz9qo0ruB2S+I31KK0EFa5NeVCOyQoZYVx2uqONF5KY7xMQkyv0BoqjgQ6QNh01W7Vhz79fn6U64P1VmeDU8nOqK5+QZ6hXjzJTn+/D8=7Vpdc5s6EP01fmwHiU8/Nm7a5s5NJrfONM1TRwHFKAHkEXJs99dXsoUBidrEBsd3ksmMwy5CmD17VquDB/YoXXxlaBpf0ggnA2hFi4H9eQAhAJYn/knPcu0JpCUdE0YiNah0jMlvrJyW8s5IhPPaQE5pwsm07gxpluGQ13yIMTqvD3ugSf2uUzTBhmMcosT03pKIx+opXKv0f8NkEvPNA6szKSoGK0ceo4jOKy77fGCPGKV8fZQuRjiRwSvicnET336f3k3PfesxeAK31xN+82E92ZeXXLJ5BIYzvvfUd/dX83/8X/8Nb6eP367AxdgbAnWJ9YySmYqXela+LALI6CyLsJzEGthn85hwPJ6iUJ6di5QRvgeSJCOaUCZcGc2wdNGMf0EpSWTW3JBUZAC0rvBcfH6nKcrEkAjl8WpaIIyYp4k6lJeqNAKBsFs+vArSM2YcLyrQq2B8xTTFnC3FEHXWUVeovHZUKOZlkrjD4doXVxIEFpmDVGJONjOXwRcHKv4vwAJ0gsXuwG+JdQ0TaVwjzjHLVh5oSW/OGX3CBdwDaAfw3vY8LQ2EP3JxEDk9AujWAfQbAPSbAAR9AQgNANks4wKDLTiCXnA0QLoPXMe1TJAeghCHYY8gebAG0qa6VlACRYGtouT0BZJtgPTj8vINAwTtOo2A99oIOSdfB2HrOqiWw77As/06eIGJ3QanKnZg2Bd4rgGeaLDEQ79dgtlDjWCvXgI9A6PJDOcCI+tJ5Ljs/w9qAUVQz1Z/B4N4LEiamj9oNUAC+4LENyAR+yGOSIbZ4Wh43mhkWSeLhqMTpGkFakLD7guN4B2NkhtN5eqoaAx39wM4iz5JrUBYYYLynMiCjheE/5S4fHSVdadiKY8/LxRkK2OpjMMwEVCw5c9iXmncre7vu4Vd3nVlFbd9GZg5nbEQ7+5xOWITzHd3WjiqSShmalS3VA3IFz6GE8TJc114aUoHdYdrSmRnsOlELfDRrVcC6NYnWT+5uq6qdWhT6X2RsWlfh8aYSCQRWlaGTeWAfMtX1skCbS3d1zOWyb+J6gE6QQvRppkQf0nPXdl5fCJ1zAe3JR/8k+KDZw21dVGboi0bHE/vQLWJOmKDoykywHePwIYWstk7G/ZhQ3BabAB6l/h/Y0Ngbf9ebnPj1Xo8OAbbTI3zmkYG4USvyY1NePFqwJRHUEImUlsJRYKLFts+k+0qCVHySZ1ISRTJuRv763oHfppddQuhpok7ve03gSmDtquaWux2RNpokFXtBJXKWdbR5tq5b53uuGrCllXTPamqCQO9pzZyqnVPbRvtud+uq+6s+JjK8NmMJJxkwtmk4r+Xoe2vzBpkLwCOWoZMudgsQ4JJY2VSxmM6oRlKzkuvFvlyzL+UTlWAHzHnSxVhNOO0Hv6OSwXwWtYKr2WtaF0EDgPD1IUvsoRk+MMPInhAH3IDm7ej4uuvyTZy16up+MDUjNst4fsuqAdqY3tsm15pCQdteXmcNRzob2h1MavtCq7/5MV4C9jRzgcUmaipb/3uTEzR/p0OndDBPjE21JOrMzZA7zhs0HSAl7JBmOVPDtfDyx9u2ud/AA==7Vpdc+I2FP01PLJjSf7iMSGEbadps8smDX1TbAW0ayzGFgH66ythGduSlziAgc5mmGGsa/ka33PP1dE1HdSfrYYJnk/vWEiiDrTCVQfddCDs2a74loZ1ZnAtOzNMEhpmJlAYRvRfooyWsi5oSNLKRM5YxOm8agxYHJOAV2w4SdiyOu2FRdW7zvGEGIZRgCPT+jcN+TSz+o5V2D8TOpnmdwaWOjPD+WRlSKc4ZMuSCQ06qJ8wxrOj2apPIhm7PC645z6MfyNfxwysx8G0+3R/1e1mzm7fc8n2ERIS8+O6hpnrVxwtVLzUs/J1HsCELeKQSCdWB10vp5ST0RwH8uxSZIywvdAo6rOIJcIUs5hIE4v5LZ7RSGbNNzoTGQCtP8lSfH9lMxyLKSFOpxu3QAymfBapQ3mpSiPgi3HDh1dBeiUJJ6sS9CoYQ8JmhCdrMUWdtdUVKq9thfKySBKn18ts01KCwDxzsErMydZzEXxxoOL/DizQUbB4O/A7Yl3BRA7uMeckiTcWaElryhP2g+RwdyDy4TNyXS0NhD10iB/aLQLoVAH0agD06gAEbQFoGwAmi5gLDHbgCFrB0QDp2XdsxzJBevEDEgQtguTCCkjb6lpCCeQFtoyS3RZIjgHS493dLwwQRFUaAffcCLkXXwdh4zqolsO2wENeFTzfxG6LUxk70GsLPM8ATwgs8dC/LsFQTyPY2Uugb2A0WZBUYGT9EDku5f9BElAE9XrzORjEU0FSJ/6gVQMJbAuSngGJ2A9xTGOSHI6G6/b7lnWxaNg6QepWoDo0UFto5ArxA44yEc4HBzDgMFAgcXgluwViFEQ4Taks6WRF+ZME5pOjRmMVTHl8s1KYbQZrNTgMFIFFsn7K/crBeHN/z8nHxV03o/y270MzZYskIDvmKZXLcTIhu/wprUXCShPFzI3ypqoG+tyWkAhz+lptvdTlg7rDPaNSG2y1qAU+OdVaAJ2qk+zJ1XXlbofmSldGxrY9C43hSCQRXpemzeWEdMdP1tkCkZbvmcci+7dRPYAQDdo29YT4SXq+lZ2nJ9KR+eA15EPvovjgWj1tZdRcNGWD7eoaVHN0JDbYWk8GeM4J2NCgcfbBhn3YkOugS6ED0IXi/40OvrX7dzn10qvxfHAKupltznsWGowTapMb+/D87YDZIcERncj2SiAyXIhsdC0FKw1wdKVOzGgYSt+1CruqwS9TVzfo1dRxp7UtJzA7oc3Kpha7NyJtKGRVPEGpdBaFtL547luoj1w27YZl07uoqgl9XVQbOdVYVCNDn3vNZPXRio/ZHL5e0Ih3aSysdZ38jzq0+7VZTesLgFPWIWg2W9qVbwfu8PcQf2eqQ85F1SGAYLV06DvyplVIf3MP3Xa29gBqP7iq3g5WV+7t8+Cv73j45dtoMRx+/wetbmh3XzKUMrZahN54lWUfKcULMhb8G1fod5IlGpnU+LyeXw+uBt5D9Dhyfv/yx+NDl3cva2Pju9VE09/8NF6f81q61sbHX513RbXuDxDWPGEBSdOfrc+l/D3XUn2KxRc6bj1C5eUXOWam7dFcF8Piz2kZxMU//NDgPw==7Z3Rcps4FIafJpdhkAQCLpM0bWfb7qZNO9vuTQeDbNgAYjC24z59hS0MWAQ7MUZsVrnImAMWRv/5dMSRBBfoJn58l7lp8In6JLqAuv94gd5cQGgAwP4XhvXWYOvm1jDLQn9rApXhPvxFuFHn1kXok3njwJzSKA/TptGjSUK8vGFzs4yumodNadQ8a+rOiGC499xItP4d+nnAr8LUK/t7Es6C8sxA53titzyYG+aB69NVzYRuL9BNRmm+/RQ/3pCoqLuyXuIr78fnN95tguKrf4P53XQB08ttYW+f85XdJWQkyV9cdPrwKZtOYvNj+mX94evbL+Yf8c9LiLZlL91owSssI3O6yDzCrBmZEnbOzefJuqgCN/En9JHXR74uK3m+CuPITdjW9Tx3s5y7AasmdB3QLPxFk9yNmKUweEEY+R/dNV0UF5NnhJQbtWO/MjPbC5iR/aDwlzvZnGvz/eKAMCHZ13VKeAnFYXSR+MTnX9qJVXxjFrnzOf88fyC5F/CNKSuK/1Zgs+0jK5oLsiRZTh5rbsYr/h2hMcmzosL4XtvmPsQhQpbGKVpVPolLZIKaP1q6wVngHMx2hVdasw9c7udIb3RIv6fvKghzcp+6XrG9Yu1EoVUeR6VCPVT9NIyiGxrRbHNGNJ1Ooedt/CmjD6S2x8cTbOJWsbpd/Gi1jKZYhu5oumXquz9Rul2jUZeuLKZ/5UxBuYTkK5o9MCOrjDBfj0BBAnyTWG0KOthC7lkVBE5TQggYcNJlw4JsgkzEZ8GLb5JoQle3leF6Y2A76i1qu4ht8mwa5qsiojJLQqvG+m0YlU0zSfzyCK+QPfS2Rn4IeL5mvEE53AyxXzIjnSVy/Yoq6vSBjERuHi6b8b9/NS1BTYZNVoTK6Vzh18AP61ADsumzFX2n0GeNij4khkCl5q6JPKgmwn2ryb96R0N2KVVDoINmS3DpOEjXHGfXm8LNIreXyEvZ85HdzzrBbcQQXPWcwmRK/+cNN9rr+pa3vtJabSRGWcX5Mzi3B+Ic4D3OmZs4MkEXoz2TJN2dTiFeKgWwpWHZlDuK8qd9+CDlpS7DU+7YMiEvr7vmNkvCPLhIgoyGdUa67RttrNtwgvB5E1l792G2IR11AwiaKdSfgTocCHUkdNxtUybqUHCbNFgzadxI4f4U7mMI7YY4wKR4fwbvhjTei6Qdksa7ODhF84BkkmH33XmwK2IwjiFreaVzrBJuHVnRwxzLS7hBYNaHL/eGk8/NsZhwC5MoTIphkmWY5SEdxWCJzKi9f0Nu4hHQrtJuJ9EuLe1mO8DRbGm0i1k3JlviuxFVxD9NPDaQdOKxiu+nJNrxUPEdWHvEW8iy68TDQYnHXQNq8XYuqXTWx5RuN8AIWFfRvcOVD7M+WHTfZx1b2NasinVgDgu7GN6TbWAfC+ljyr4hJH/SE1YDa6eQbg02sCaSbiENyYrqljiwlnsMtXyTgFOwi7CPIaxbamjtJNihNNgBNDUMq7A+LOxQcBsmdK7C+hOkO+aQYR3pywB9T9589q1Z9jNZXv1pR5di89y1aGu3XkpQ8rUs26q7C295RH/oZyUXMPdSbrYtOIOBW5wBA+dM3iA2+//1dVydTv/SZVwI6Bp0qj/bkEzxK10O1CVePVp3+XI9WLce1/tqhGOH00ykQ+ZGu1htnu0WvOu6G7Ga5qPIqL8oy9YP6cJ6I6AZNdIdSzLpr3Tp0SCkO9JIh1Am6WLmZkmjRTyGmD4i0pFhNVGXTDoQu+YK9WNRLx81IoF1IDWqA7EL782Y0qmCvS6TaTqaXu/BS4b9tS7oPwn2ljky7ZUnrQuPTBtoJpQFe8uDAyYR9YoBc3l9+QMTW/vhWebq0nYpVMf8BICH6pkLk9yQga36lNaB+RV75psHfxTgSiS4j5T5eTBnLfC4bseR6qQ/eb95EHskrZOOgNOY/WKcbUlK54U3uI8Lw1juy8fEvA21OvKO7GQ7gor5lzOPZIV66GCo2VBSqEfi4sUq1CvsW3r02Gh5sOXApItLBxXpR5Pe+0P2jibdRMx3Bli50nndLTfl8jiXclNeZNSlI6wSaycgPFRiTURYx3Y9WKNhEW55IOdDmKpI3Yq5Ae1GB112Lr3lgV0K+aORl5aKAyY2NatCHg4ctsVcnE+m7iLKFfat2JtAlzVe/v6bHX5wvwd3i3u69n7E/3z7633LTFZBrya4B9Trbf5oo+9VbNy5eU6yZGOButGqWU9TTA29KZpVvj2g/rIA2CKTeS6ZjlhZ0OTmgEx9jDH3Vdl7aUbLaanslrpG56prMVH15HRedo35nv83KpD7cot7u1E4K3zZY9VJmP26qLHiaVpXfEcc+v4mrrYp2UTySeYaDEl57wbYn73fpq41pLpHPA7rFJJeFGr6mhlvjYykrhec6PPFJF+nRByrU1AdEhobh4UG5pBKH/HwimdRdSDz0lM92vtxXjYwYtoljdykuAOhG2LSlGbic9cUMAeFto8ABvWjNNusXkm2vc+q3uuGbn8D \ No newline at end of file diff --git a/docs/design/direct-blk-device-assignment.md b/docs/design/direct-blk-device-assignment.md index 9997d0e7a663..0703140f1648 100644 --- a/docs/design/direct-blk-device-assignment.md +++ b/docs/design/direct-blk-device-assignment.md @@ -81,7 +81,7 @@ Notes: given that the `mountInfo` is persisted to the disk by the Kata runtime, Instead of the CSI node driver writing the mount info into a `csiPlugin.json` file under the volume root, as described in the original proposal, here we propose that the CSI node driver passes the mount information to the Kata Containers runtime through a new `kata-runtime` commandline command. The `kata-runtime` then writes the mount -information to a `mount-info.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`). +information to a `mountInfo.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`). When the Kata Containers runtime starts a container, it verifies whether a volume mount is a direct-assigned volume by checking whether there is a `mountInfo` file under the computed Kata `direct-volumes` directory. If it is, the runtime parses the `mountInfo` file, diff --git a/docs/design/hooks-handling.md b/docs/design/hooks-handling.md new file mode 100644 index 000000000000..535c70cc4ded --- /dev/null +++ b/docs/design/hooks-handling.md @@ -0,0 +1,63 @@ +# Kata Containers support for `Hooks` + +## Introduction + +During container's lifecycle, different Hooks can be executed to do custom actions. In Kata Containers, we support two types of Hooks, `OCI Hooks` and `Kata Hooks`. + +### OCI Hooks + +The OCI Spec stipulates six hooks that can be executed at different time points and namespaces, including `Prestart Hooks`, `CreateRuntime Hooks`, `CreateContainer Hooks`, `StartContainer Hooks`, `Poststart Hooks` and `Poststop Hooks`. We support these types of Hooks as compatible as possible in Kata Containers. + +The path and arguments of these hooks will be passed to Kata for execution via `bundle/config.json`. For example: +``` +... +"hooks": { + "prestart": [ + { + "path": "/usr/bin/prestart-hook", + "args": ["prestart-hook", "arg1", "arg2"], + "env": [ "key1=value1"] + } + ], + "createRuntime": [ + { + "path": "/usr/bin/createRuntime-hook", + "args": ["createRuntime-hook", "arg1", "arg2"], + "env": [ "key1=value1"] + } + ] +} +... +``` + +### Kata Hooks + +In Kata, we support another three kinds of hooks executed in guest VM, including `Guest Prestart Hook`, `Guest Poststart Hook`, `Guest Poststop Hook`. + +The executable files for Kata Hooks must be packaged in the *guest rootfs*. The file path to those guest hooks should be specified in the configuration file, and guest hooks must be stored in a subdirectory of `guest_hook_path` according to their hook type. For example: + ++ In configuration file: +``` +guest_hook_path="/usr/share/hooks" +``` ++ In guest rootfs, prestart-hook is stored in `/usr/share/hooks/prestart/prestart-hook`. + +## Execution +The table below summarized when and where those different hooks will be executed in Kata Containers: + +| Hook Name | Hook Type | Hook Path | Exec Place | Exec Time | +|---|---|---|---|---| +| `Prestart(deprecated)` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created. | +| `CreateRuntime` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created, after `Prestart` hooks. | +| `CreateContainer` | OCI hook | host runtime namespace | host vmm namespace* | After VM is started, before container is created, after `CreateRuntime` hooks. | +| `StartContainer` | OCI hook | guest container namespace | guest container namespace | After container is created, before container is started. | +| `Poststart` | OCI hook | host runtime namespace | host runtime namespace | After container is started, before start operation returns. | +| `Poststop` | OCI hook | host runtime namespace | host runtime namespace | After container is deleted, before delete operation returns. | +| `Guest Prestart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, before container command is executed. | +| `Guest Poststart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, after container command is executed, before start operation returns. | +| `Guest Poststop` | Kata hook | guest agent namespace | guest agent namespace | During delete operation, after container is deleted, before delete operation returns. | + ++ `Hook Path` specifies where hook's path be resolved. ++ `Exec Place` specifies in which namespace those hooks can be executed. + + For `CreateContainer` Hooks, OCI requires to run them inside the container namespace while the hook executable path is in the host runtime, which is a non-starter for VM-based containers. So we design to keep them running in the *host vmm namespace.* ++ `Exec Time` specifies at which time point those hooks can be executed. \ No newline at end of file diff --git a/docs/design/host-cgroups.md b/docs/design/host-cgroups.md index de5f3288d680..0049ba5ab1c3 100644 --- a/docs/design/host-cgroups.md +++ b/docs/design/host-cgroups.md @@ -12,7 +12,7 @@ The OCI [runtime specification][linux-config] provides guidance on where the con > [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups > hierarchy for containers or to run a new process in an existing container -Cgroups are hierarchical, and this can be seen with the following pod example: +The cgroups are hierarchical, and this can be seen with the following pod example: - Pod 1: `cgroupsPath=/kubepods/pod1` - Container 1: `cgroupsPath=/kubepods/pod1/container1` @@ -247,14 +247,14 @@ cgroup size and constraints accordingly. # Supported cgroups -Kata Containers currently only supports cgroups `v1`. +Kata Containers currently supports cgroups `v1` and `v2`. In the following sections each cgroup is described briefly. -## Cgroups V1 +## cgroups v1 -`Cgroups V1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is -mounted under a separate cgroup filesystem. A `Cgroups v1` hierarchy may look like the following +`cgroups v1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is +mounted under a separate cgroup filesystem. A `cgroups v1` hierarchy may look like the following diagram: ``` @@ -301,13 +301,12 @@ diagram: A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file. -Kata Containers only supports `v1`. To know more about `cgroups v1`, see [cgroupsv1(7)][2]. -## Cgroups V2 +## cgroups v2 -`Cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are -mounted under the same cgroup filesystem. A `Cgroups v2` hierarchy may look like the following +`cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are +mounted under the same cgroup filesystem. A `cgroups v2` hierarchy may look like the following diagram: ``` @@ -354,8 +353,6 @@ Same as `cgroups v1`, a process can join the cgroup by writing its process id (` `cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to `cgroup.threads` file. -Kata Containers does not support cgroups `v2` on the host. - ### Distro Support Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition. diff --git a/docs/design/kata-metrics-in-runtime-rs.md b/docs/design/kata-metrics-in-runtime-rs.md new file mode 100644 index 000000000000..037695894224 --- /dev/null +++ b/docs/design/kata-metrics-in-runtime-rs.md @@ -0,0 +1,50 @@ +# Kata Metrics in Rust Runtime(runtime-rs) + +Rust Runtime(runtime-rs) is responsible for: + +- Gather metrics about `shim`. +- Gather metrics from `hypervisor` (through `channel`). +- Get metrics from `agent` (through `ttrpc`). + +--- + +Here are listed all the metrics gathered by `runtime-rs`. + +> * Current status of each entry is marked as: +> * ✅:DONE +> * 🚧:TODO + +### Kata Shim + +| STATUS | Metric name | Type | Units | Labels | +| ------ | ------------------------------------------------------------ | ----------- | -------------- | ------------------------------------------------------------ | +| 🚧 | `kata_shim_agent_rpc_durations_histogram_milliseconds`:
RPC latency distributions. | `HISTOGRAM` | `milliseconds` |
  • `action` (RPC actions of Kata agent)
    • `grpc.CheckRequest`
    • `grpc.CloseStdinRequest`
    • `grpc.CopyFileRequest`
    • `grpc.CreateContainerRequest`
    • `grpc.CreateSandboxRequest`
    • `grpc.DestroySandboxRequest`
    • `grpc.ExecProcessRequest`
    • `grpc.GetMetricsRequest`
    • `grpc.GuestDetailsRequest`
    • `grpc.ListInterfacesRequest`
    • `grpc.ListProcessesRequest`
    • `grpc.ListRoutesRequest`
    • `grpc.MemHotplugByProbeRequest`
    • `grpc.OnlineCPUMemRequest`
    • `grpc.PauseContainerRequest`
    • `grpc.RemoveContainerRequest`
    • `grpc.ReseedRandomDevRequest`
    • `grpc.ResumeContainerRequest`
    • `grpc.SetGuestDateTimeRequest`
    • `grpc.SignalProcessRequest`
    • `grpc.StartContainerRequest`
    • `grpc.StatsContainerRequest`
    • `grpc.TtyWinResizeRequest`
    • `grpc.UpdateContainerRequest`
    • `grpc.UpdateInterfaceRequest`
    • `grpc.UpdateRoutesRequest`
    • `grpc.WaitProcessRequest`
    • `grpc.WriteStreamRequest`
  • `sandbox_id`
| +| ✅ | `kata_shim_fds`:
Kata containerd shim v2 open FDs. | `GAUGE` | |
  • `sandbox_id`
| +| ✅ | `kata_shim_io_stat`:
Kata containerd shim v2 process IO statistics. | `GAUGE` | |
  • `item` (see `/proc//io`)
    • `cancelledwritebytes`
    • `rchar`
    • `readbytes`
    • `syscr`
    • `syscw`
    • `wchar`
    • `writebytes`
  • `sandbox_id`
| +| ✅ | `kata_shim_netdev`:
Kata containerd shim v2 network devices statistics. | `GAUGE` | |
  • `interface` (network device name)
  • `item` (see `/proc/net/dev`)
    • `recv_bytes`
    • `recv_compressed`
    • `recv_drop`
    • `recv_errs`
    • `recv_fifo`
    • `recv_frame`
    • `recv_multicast`
    • `recv_packets`
    • `sent_bytes`
    • `sent_carrier`
    • `sent_colls`
    • `sent_compressed`
    • `sent_drop`
    • `sent_errs`
    • `sent_fifo`
    • `sent_packets`
  • `sandbox_id`
| +| 🚧 | `kata_shim_pod_overhead_cpu`:
Kata Pod overhead for CPU resources(percent). | `GAUGE` | percent |
  • `sandbox_id`
| +| 🚧 | `kata_shim_pod_overhead_memory_in_bytes`:
Kata Pod overhead for memory resources(bytes). | `GAUGE` | `bytes` |
  • `sandbox_id`
| +| ✅ | `kata_shim_proc_stat`:
Kata containerd shim v2 process statistics. | `GAUGE` | |
  • `item` (see `/proc//stat`)
    • `cstime`
    • `cutime`
    • `stime`
    • `utime`
  • `sandbox_id`
| +| ✅ | `kata_shim_proc_status`:
Kata containerd shim v2 process status. | `GAUGE` | |
  • `item` (see `/proc//status`)
    • `hugetlbpages`
    • `nonvoluntary_ctxt_switches`
    • `rssanon`
    • `rssfile`
    • `rssshmem`
    • `vmdata`
    • `vmexe`
    • `vmhwm`
    • `vmlck`
    • `vmlib`
    • `vmpeak`
    • `vmpin`
    • `vmpmd`
    • `vmpte`
    • `vmrss`
    • `vmsize`
    • `vmstk`
    • `vmswap`
    • `voluntary_ctxt_switches`
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_cpu_seconds_total`:
Total user and system CPU time spent in seconds. | `COUNTER` | `seconds` |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_max_fds`:
Maximum number of open file descriptors. | `GAUGE` | |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_open_fds`:
Number of open file descriptors. | `GAUGE` | |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_resident_memory_bytes`:
Resident memory size in bytes. | `GAUGE` | `bytes` |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_start_time_seconds`:
Start time of the process since `unix` epoch in seconds. | `GAUGE` | `seconds` |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_virtual_memory_bytes`:
Virtual memory size in bytes. | `GAUGE` | `bytes` |
  • `sandbox_id`
| +| 🚧 | `kata_shim_process_virtual_memory_max_bytes`:
Maximum amount of virtual memory available in bytes. | `GAUGE` | `bytes` |
  • `sandbox_id`
| +| 🚧 | `kata_shim_rpc_durations_histogram_milliseconds`:
RPC latency distributions. | `HISTOGRAM` | `milliseconds` |
  • `action` (Kata shim v2 actions)
    • `checkpoint`
    • `close_io`
    • `connect`
    • `create`
    • `delete`
    • `exec`
    • `kill`
    • `pause`
    • `pids`
    • `resize_pty`
    • `resume`
    • `shutdown`
    • `start`
    • `state`
    • `stats`
    • `update`
    • `wait`
  • `sandbox_id`
| +| ✅ | `kata_shim_threads`:
Kata containerd shim v2 process threads. | `GAUGE` | |
  • `sandbox_id`
| + +### Kata Hypervisor + +Different from golang runtime, hypervisor and shim in runtime-rs belong to the **same process**, so all previous metrics for hypervisor and shim only need to be gathered once. Thus, we currently only collect previous metrics in kata shim. + +At the same time, we added the interface(`VmmAction::GetHypervisorMetrics`) to gather hypervisor metrics, in case we design tailor-made metrics for hypervisor in the future. Here're metrics exposed from [src/dragonball/src/metric.rs](https://github.com/kata-containers/kata-containers/blob/main/src/dragonball/src/metric.rs). + +| Metric name | Type | Units | Labels | +| ------------------------------------------------------------ | ---------- | ----- | ------------------------------------------------------------ | +| `kata_hypervisor_scrape_count`:
Metrics scrape count | `COUNTER` | |
  • `sandbox_id`
| +| `kata_hypervisor_vcpu`:
Hypervisor metrics specific to VCPUs' mode of functioning. | `IntGauge` | |
  • `item`
    • `exit_io_in`
    • `exit_io_out`
    • `exit_mmio_read`
    • `exit_mmio_write`
    • `failures`
    • `filter_cpuid`
  • `sandbox_id`
| +| `kata_hypervisor_seccomp`:
Hypervisor metrics for the seccomp filtering. | `IntGauge` | |
  • `item`
    • `num_faults`
  • `sandbox_id`
| +| `kata_hypervisor_seccomp`:
Hypervisor metrics for the seccomp filtering. | `IntGauge` | |
  • `item`
    • `sigbus`
    • `sigsegv`
  • `sandbox_id`
| diff --git a/docs/design/kata-vra.md b/docs/design/kata-vra.md new file mode 100644 index 000000000000..ba53c33712c3 --- /dev/null +++ b/docs/design/kata-vra.md @@ -0,0 +1,434 @@ +# Virtualization Reference Architecture + +## Subject to Change | © 2022 by NVIDIA Corporation. All rights reserved. | For test and development only_ + +Before digging deeper into the virtualization reference architecture, let's +first look at the various GPUDirect use cases in the following table. We’re +distinguishing between two top-tier use cases where the devices are (1) +passthrough and (2) virtualized, where a VM gets assigned a virtual function +(VF) and not the physical function (PF). A combination of PF and VF would also +be possible. + +| Device #1  (passthrough) | Device #2 (passthrough) | P2P Compatibility and Mode | +| ------------------------- | ----------------------- | -------------------------------------------- | +| GPU PF | GPU PF | GPUDirect P2P  | +| GPU PF | NIC PF | GPUDirect RDMA | +| MIG-slice | MIG-slice | _No GPUDirect P2P_ | +| MIG-slice | NIC PF | GPUDirect RDMA | +| **PDevice #1  (virtualized)** | **Device #2 (virtualized)** | **P2P Compatibility and Mode** | +| Time-slice vGPU VF | Time-slice vGPU VF | _No GPUDirect P2P  but NVLINK P2P available_ | +| Time-slice vGPU VF | NIC VF | GPUDirect RDMA | +| MIG-slice vGPU | MIG-slice vGPU | _No GPUDirect P2P_ | +| MIG-slice vGPU | NIC VF | GPUDirect RDMA | + +In a virtualized environment we have several distinct features that may prevent +Peer-to-peer (P2P) communication of two endpoints in a PCI Express topology. The +IOMMU translates IO virtual addresses (IOVA) to physical addresses (PA). Each +device behind an IOMMU has its own IOVA memory space, usually, no two devices +share the same IOVA memory space but it’s up to the hypervisor or OS how it +chooses to map devices to IOVA spaces.  Any PCI Express DMA transactions will +use IOVAs, which the IOMMU must translate. By default, all the traffic is routed +to the root complex and not issued directly to the peer device. + +An IOMMU can be used to isolate and protect devices even if virtualization is +not used; since devices can only access memory regions that are mapped for it, a +DMA from one device to another is not possible. DPDK uses the IOMMU to have +better isolation between devices, another benefit is that IOVA space can be +represented as a contiguous memory even if the PA space is heavily scattered. + +In the case of virtualization, the IOMMU is responsible for isolating the device +and memory between VMs for safe device assignment without compromising the host +and other guest OSes. Without an IOMMU, any device can access the entire system +and perform DMA transactions _anywhere_. + +The second feature is ACS (Access Control Services), which controls which +devices are allowed to communicate with one another and thus avoids improper +routing of packets irrespectively of whether IOMMU is enabled or not. + +When IOMMU is enabled, ACS is normally configured to force all PCI Express DMA +to go through the root complex so IOMMU can translate it, impacting performance +between peers with higher latency and reduced bandwidth. + +A way to avoid the performance hit is to enable Address Translation Services +(ATS). ATS-capable endpoints can prefetch IOVA -> PA translations from the IOMMU +and then perform DMA transactions directly to another endpoint. Hypervisors +enable this by enabling ATS in such endpoints, configuring ACS to enable Direct +Translated P2P, and configuring the IOMMU to allow Address Translation requests. + +Another important factor is that the NVIDIA driver stack will use the PCI +Express topology of the system it is running on to determine whether the +hardware is capable of supporting P2P. The driver stack qualifies specific +chipsets, and PCI Express switches for use with GPUDirect P2P. In virtual +environments, the PCI Express topology is flattened and obfuscated to present a +uniform environment to the software inside the VM, which breaks the GPUDirect +P2P use case. + +On a bare metal machine, the driver stack groups GPUs into cliques that can +perform GPUDirect P2P communication, excluding peer mappings where P2P +communication is not possible, prominently if GPUs are attached to multiple CPU +sockets.   + +CPUs and local memory banks are referred to as NUMA nodes. In a two-socket +server, each of the CPUs has a local memory bank for a total of two NUMA nodes. +Some servers provide the ability to configure additional NUMA nodes per CPU, +which means a CPU socket can have two NUMA nodes  (some servers support four +NUMA nodes per socket) with local memory banks and L3 NUMA domains for improved +performance. + +One of the current solutions is that the hypervisor provides additional topology +information that the driver stack can pick up and enable GPUDirect P2P between +GPUs, even if the virtualized environment does not directly expose it. The PCI +Express virtual P2P approval capability structure in the PCI configuration space +is entirely emulated by the hypervisor of passthrough GPU devices. + +A clique ID is provided where GPUs with the same clique ID belong to a group of +GPUs capable of P2P communication + +On vSphere, Azure, and other CPSs,  the hypervisor lays down a `topologies.xml` +which NCCL can pick up and deduce the right P2P level[^1]. NCCL is leveraging +Infiniband (IB) and/or Unified Communication X (UCX) for communication, and +GPUDirect P2P and GPUDirect RDMA should just work in this case. The only culprit +is that software or applications that do not use the XML file to deduce the +topology will fail and not enable GPUDirect ( [`nccl-p2p-level`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-p2p-level) ) + +## Hypervisor PCI Express Topology + +To enable every part of the accelerator stack, we propose a virtualized +reference architecture to enable GPUDirect P2P and GPUDirect RDMA for any +hypervisor. The idea is split into two parts to enable the right PCI Express +topology. The first part builds upon extending the PCI Express virtual P2P +approval capability structure to every device that wants to do P2P in some way +and groups devices by clique ID. The other part involves replicating a subset of +the host topology so that applications running in the VM do not need to read +additional information and enable the P2P capability like in the bare-metal use +case described above. The driver stack can then deduce automatically if the +topology presented in the VM is capable of P2P communication. + +We will work with the following host topology for the following sections. It is +a system with two converged DPUs, each having an `A100X` GPU and two `ConnectX-6` +network ports connected to the downstream ports of a PCI Express switch. + +```sh ++-00.0-[d8-df]----00.0-[d9-df]--+-00.0-[da-db]--+-00.0 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx network + | +-00.1 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx network + | \-00.2 Mellanox Tech MT42822 BlueField-2 SoC Management Interface + \-01.0-[dc-df]----00.0-[dd-df]----08.0-[de-df]----00.0 NVIDIA Corporation GA100 [A100X] + ++-00.0-[3b-42]----00.0-[3c-42]--+-00.0-[3d-3e]--+-00.0 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx network + | +-00.1 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx network + | \-00.2 Mellanox Tech MT42822 BlueField-2 SoC Management Interface + \-01.0-[3f-42]----00.0-[40-42]----08.0-[41-42]----00.0 NVIDIA Corporation GA100 [A100X] +``` + +The green path highlighted above is the optimal and preferred path for +efficient P2P communication. + +## PCI Express Virtual P2P Approval Capability + +Most of the time, the PCI Express topology is flattened and obfuscated to ensure +easy migration of the VM image between different physical hardware topologies. +In Kata, we can configure the hypervisor to use PCI Express root ports to +hotplug the VFIO  devices one is passing through. A user can select how many PCI +Express root ports to allocate depending on how many devices are passed through. +A recent addition to Kata will detect the right amount of PCI Express devices +that need hotplugging and bail out if the number of root ports is insufficient. +In Kata, we do not automatically increase the number of root ports, we want the +user to be in full control of the topology. + +```toml +# /etc/kata-containers/configuration.toml + +# VFIO devices are hotplugged on a bridge by default. +# Enable hot-plugging on the root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hot-plugging on +# a bridge. +# Default “bridge-port” +hotplug_vfio = "root-port" + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as NVIDIA GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +pcie_root_port = 8 +``` + +VFIO devices are hotplugged on a PCIe-PCI bridge by default. Hotplug of PCI +Express devices is only supported on PCI Express root or downstream ports. With +this configuration set, if we start up a Kata container, we can inspect our +topology and see the allocated PCI Express root ports and the hotplugged +devices. + +```sh +$ lspci -tv + -[0000:00]-+-00.0 Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller + +-01.0 Red Hat, Inc. Virtio console + +-02.0 Red Hat, Inc. Virtio SCSI + +-03.0 Red Hat, Inc. Virtio RNG + +-04.0-[01]----00.0 Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 + +-05.0-[02]----00.0 Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 + +-06.0-[03]----00.0 NVIDIA Corporation Device 20b8 + +-07.0-[04]----00.0 NVIDIA Corporation Device 20b8 + +-08.0-[05]-- + +-09.0-[06]-- + +-0a.0-[07]-- + +-0b.0-[08]-- + +-0c.0 Red Hat, Inc. Virtio socket + +-0d.0 Red Hat, Inc. Virtio file system + +-1f.0 Intel Corporation 82801IB (ICH9) LPC Interface Controller + +-1f.2 Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller + \-1f.3 Intel Corporation 82801I (ICH9 Family) SMBus Controller +``` + +For devices with huge BARs (Base Address Registers) like the GPU (we need to +configure the PCI Express root port properly and allocate enough memory for +mapping), we have added a heuristic to Kata to deduce the right settings. Hence, +the BARs can be mapped correctly. This functionality is added to +[`nvidia/go-nvlib1](https://gitlab.com/nvidia/cloud-native/go-nvlib) which is part +of Kata now. + +```sh +$ sudo dmesg | grep BAR +[ 0.179960] pci 0000:00:04.0: BAR 7: assigned [io 0x1000-0x1fff] +[ 0.179962] pci 0000:00:05.0: BAR 7: assigned [io 0x2000-0x2fff] +[ 0.179963] pci 0000:00:06.0: BAR 7: assigned [io 0x3000-0x3fff] +[ 0.179964] pci 0000:00:07.0: BAR 7: assigned [io 0x4000-0x4fff] +[ 0.179966] pci 0000:00:08.0: BAR 7: assigned [io 0x5000-0x5fff] +[ 0.179967] pci 0000:00:09.0: BAR 7: assigned [io 0x6000-0x6fff] +[ 0.179968] pci 0000:00:0a.0: BAR 7: assigned [io 0x7000-0x7fff] +[ 0.179969] pci 0000:00:0b.0: BAR 7: assigned [io 0x8000-0x8fff] +[ 2.115912] pci 0000:01:00.0: BAR 0: assigned [mem 0x13000000000-0x13001ffffff 64bit pref] +[ 2.116203] pci 0000:01:00.0: BAR 2: assigned [mem 0x13002000000-0x130027fffff 64bit pref] +[ 2.683132] pci 0000:02:00.0: BAR 0: assigned [mem 0x12000000000-0x12001ffffff 64bit pref] +[ 2.683419] pci 0000:02:00.0: BAR 2: assigned [mem 0x12002000000-0x120027fffff 64bit pref] +[ 2.959155] pci 0000:03:00.0: BAR 1: assigned [mem 0x11000000000-0x117ffffffff 64bit pref] +[ 2.959345] pci 0000:03:00.0: BAR 3: assigned [mem 0x11800000000-0x11801ffffff 64bit pref] +[ 2.959523] pci 0000:03:00.0: BAR 0: assigned [mem 0xf9000000-0xf9ffffff] +[ 2.966119] pci 0000:04:00.0: BAR 1: assigned [mem 0x10000000000-0x107ffffffff 64bit pref] +[ 2.966295] pci 0000:04:00.0: BAR 3: assigned [mem 0x10800000000-0x10801ffffff 64bit pref] +[ 2.966472] pci 0000:04:00.0: BAR 0: assigned [mem 0xf7000000-0xf7ffffff] +``` + +The NVIDIA driver stack in this case would refuse to do P2P communication since +(1) the topology is not what it expects, (2)  we do not have a qualified +chipset. Since our P2P devices are not connected to a PCI Express switch port, +we need to provide additional information to support the P2P functionality. One +way of providing such meta information would be to annotate the container; most +of the settings in Kata's configuration file can be overridden via annotations, +but this limits the flexibility, and a user would need to update all the +containers that he wants to run with Kata. The goal is to make such things as +transparent as possible, so we also introduced +[CDI](https://github.com/container-orchestrated-devices/container-device-interface) +(Container Device Interface) to Kata. CDI is a[ +specification](https://github.com/container-orchestrated-devices/container-device-interface/blob/master/SPEC.md) +for container runtimes to support third-party devices. + +As written before, we can provide a clique ID for the devices that belong +together and are capable of doing P2P. This information is provided to the +hypervisor, which will set up things in the VM accordingly. Let's suppose the +user wanted to do GPUDirect RDMA with the first GPU and the NIC that reside on +the same DPU, one could provide the specification telling the hypervisor that +they belong to the same clique. + +```yaml +# /etc/cdi/nvidia.yaml +cdiVersion: 0.4.0 +kind: nvidia.com/gpu +devices: +- name: gpu0 + annotations: + bdf: “41:00.0” + clique-id: “0” + containerEdits: + deviceNodes: + - path: “/dev/vfio/71" + +# /etc/cdi/mellanox.yaml +cdiVersion: 0.4.0 +kind: mellanox.com/nic +devices: +- name: nic0 + annotations: + bdf: “3d:00.0” + clique-id: “0” + attach-pci: “true” + containerEdits: + deviceNodes: + - path: "/dev/vfio/66" +``` + +Since this setting is bound to the device and not the container we do not need +to alter the container just allocate the right resource and GPUDirect RDMA would +be set up correctly. Rather than exposing them separately, an idea would be to +expose a GPUDirect RDMA device via NFD (Node Feature Discovery) that combines +both of them; this way, we could make sure that the right pair is allocated and +used more on  Kubernetes deployment in the next section. + +The GPU driver stack is leveraging the PCI Express virtual P2P approval +capability, but the NIC stack does not use this now. One of the action items is +to enable MOFED to read the P2P approval capability and enable ATS and ACS +settings as described above. + +This way, we could enable GPUDirect P2P and GPUDirect RDMA on any topology +presented to the VM application. It is the responsibility of the administrator +or infrastructure engineer to provide the right information either via +annotations or a CDI specification. + +## Host Topology Replication + +The other way to represent the PCI Express topology in the VM is to replicate a +subset of the topology needed to support the P2P use case inside the VM. Similar +to the configuration for the root ports, we can easily configure the usage of +PCI Express switch ports to hotplug the devices. + +```toml +# /etc/kata-containers/configuration.toml + +# VFIO devices are hotplugged on a bridge by default. +# Enable hot plugging on the root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hot plugging on +# a bridge. +# Default “bridge-port” +hotplug_vfio = "switch-port" + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +pcie_switch_port = 8 +``` + +Each device that is passed through is attached to a PCI Express downstream port +as illustrated below. We can even replicate the host’s two DPUs topologies with +added metadata through the CDI. Most of the time, a container only needs one +pair of GPU and NIC for GPUDirect RDMA. This is more of a showcase of what we +can do with the power of Kata and CDI. One could even think of adding groups of +devices that support P2P, even from different CPU sockets or NUMA nodes, into +one container; indeed, the first group is NUMA node 0 (red), and the second +group is NUMA node 1 (green). Since they are grouped correctly, P2P would be +enabled naturally inside a group, aka clique ID. + +```sh +$ lspci -tv + -[0000:00]-+-00.0 Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller + +-01.0 Red Hat, Inc. Virtio console + +-02.0 Red Hat, Inc. Virtio SCSI + +-03.0 Red Hat, Inc. Virtio RNG + +-04.0-[01-04]----00.0-[02-04]--+-00.0-[03]----00.0 NVIDIA Corporation Device 20b8 + | \-01.0-[04]----00.0 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx + +-05.0-[05-08]----00.0-[06-08]--+-00.0-[07]----00.0 Mellanox Tech MT42822 BlueField-2 integrated ConnectX-6 Dx + | \-01.0-[08]----00.0 NVIDIA Corporation Device 20b8 + +-06.0 Red Hat, Inc. Virtio socket + +-07.0 Red Hat, Inc. Virtio file system + +-1f.0 Intel Corporation 82801IB (ICH9) LPC Interface Controller + +-1f.2 Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] + \-1f.3 Intel Corporation 82801I (ICH9 Family) SMBus Controller + \-1f.3 Intel Corporation 82801I (ICH9 Family) SMBus Controller +``` + +The configuration of using either the root port or switch port can be applied on +a per Container or Pod basis, meaning we can switch PCI Express topologies on +each run of an application. + +## Hypervisor Resource Limits + +Every hypervisor will have resource limits in terms of how many PCI Express root +ports, switch ports, or bridge ports can be created, especially with devices +that need to reserve a 4K IO range per PCI specification. Each instance of root +or switch port will consume 4K IO of very limited capacity, 64k is the maximum. + +Simple math brings us to the conclusion that we can have a maximum of 16 PCI +Express root ports or 16 PCI Express switch ports in QEMU if devices with IO +BARs are used in the PCI Express hierarchy. + +Additionally, one can have 32 slots on the PCI root bus and a maximum of 256 +slots for the complete PCI(e) topology. + +Per default, QEMU will attach a multi-function device in the last slot on the +PCI root bus, + +```sh + +-1f.0 Intel Corporation 82801IB (ICH9) LPC Interface Controller + +-1f.2 Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] + \-1f.3 Intel Corporation 82801I (ICH9 Family) SMBus Controller +``` + +Kata will additionally add `virtio-xxx-pci` devices consuming (5 slots) plus a +PCIe-PCI-bridge (1 slot) and a DRAM controller (1 slot), meaning per default, we +have already eight slots used. This leaves us 24 slots for adding other devices +to the root bus. + +The problem that arises here is one use-case from a customer that uses recent +RTX GPUs with Kata. The user wanted to pass through eight of these GPUs into one +container and ran into issues. The problem is that those cards often consist of +four individual device nodes: GPU, Audio, and two USB controller devices (some +cards have a USB-C output). + +These devices are grouped into one IOMMU group. Since one needs to pass through +the complete IOMMU group into the VM, we need to allocate 32 PCI Express root +ports or 32 PCI Express switch ports, which is technically impossible due to the +resource limits outlined above. Since all the devices appear as PCI Express +devices, we need to hotplug those into a root or switch port. + +The solution to this problem is leveraging CDI. For each device, add the +information if it is going to be hotplugged as a PCI Express or PCI device, +which results in either using a PCI Express root/switch port or an ordinary PCI +bridge. PCI bridges are not affected by the limited IO range. This way, the GPU +is attached as a PCI Express device to a root/switch port and the other three +PCI devices to a PCI bridge, leaving enough resources to create the needed PCI +Express root/switch ports.  For example, we’re going to attach the GPUs to a PCI +Express root port and the NICs to a PCI bridge. + +```jsonld +# /etc/cdi/mellanox.json +cdiVersion: 0.4.0 +kind: mellanox.com/nic +devices: +- name: nic0 + annotations: + bdf: “3d:00.0” + clique-id: “0” + attach-pci: “true” + containerEdits: + deviceNodes: + - path: "/dev/vfio/66" +- name: nic1 + annotations: + bdf: “3d:00.1” + clique-id: “1” + attach-pci: “true” + containerEdits: + deviceNodes: + - path: "/dev/vfio/67” +``` + +The configuration is set to use eight root ports for the GPUs and attach the +NICs to a PCI bridge which is connected to a PCI Express-PCI bridge which is the +preferred way of introducing a PCI topology in a PCI Express machine. + +```sh +$ lspci -tv +-[0000:00]-+-00.0 Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller + +-01.0 Red Hat, Inc. Virtio console + +-02.0 Red Hat, Inc. Virtio SCSI + +-03.0 Red Hat, Inc. Virtio RNG + +-04.0-[01]----00.0 NVIDIA Corporation Device 20b8 + +-05.0-[02]----00.0 NVIDIA Corporation Device 20b8 + +-06.0-[03]-- + +-07.0-[04]-- + +-08.0-[05]-- + +-09.0-[06]-- + +-0a.0-[07]-- + +-0b.0-[08]-- + +-0c.0-[09-0a]----00.0-[0a]--+-00.0 Mellanox Tech MT42822 BlueField-2 ConnectX-6 + | \-01.0 Mellanox Tech MT42822 BlueField-2 ConnectX-6 + +-0d.0 Red Hat, Inc. Virtio socket + +-0e.0 Red Hat, Inc. Virtio file system + +-1f.0 Intel Corporation 82801IB (ICH9) LPC Interface Controller + +-1f.2 Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller + \-1f.3 Intel Corporation 82801I (ICH9 Family) SMBus Controller +``` + +The PCI devices will consume a slot of which we have 256 in the PCI(e) topology +and leave scarce resources for the needed PCI Express devices. diff --git a/docs/design/vcpu-handling.md b/docs/design/vcpu-handling-runtime-go.md similarity index 100% rename from docs/design/vcpu-handling.md rename to docs/design/vcpu-handling-runtime-go.md diff --git a/docs/design/vcpu-handling-runtime-rs.md b/docs/design/vcpu-handling-runtime-rs.md new file mode 100644 index 000000000000..44989ce4e683 --- /dev/null +++ b/docs/design/vcpu-handling-runtime-rs.md @@ -0,0 +1,51 @@ +# Virtual machine vCPU sizing in Kata Containers 3.0 + +> Preview: +> [Kubernetes(since 1.23)][1] and [Containerd(since 1.6.0-beta4)][2] will help calculate `Sandbox Size` info and pass it to Kata Containers through annotations. +> In order to adapt to this beneficial change and be compatible with the past, we have implemented the new vCPUs handling way in `runtime-rs`, which is slightly different from the original `runtime-go`'s design. + +## When do we need to handle vCPUs size? +vCPUs sizing should be determined by the container workloads. So throughout the life cycle of Kata Containers, there are several points in time when we need to think about how many vCPUs should be at the time. Mainly including the time points of `CreateVM`, `CreateContainer`, `UpdateContainer`, and `DeleteContainer`. +* `CreateVM`: When creating a sandbox, we need to know how many vCPUs to start the VM with. +* `CreateContainer`: When creating a new container in the VM, we may need to hot-plug the vCPUs according to the requirements in container's spec. +* `UpdateContainer`: When receiving the `UpdateContainer` request, we may need to update the vCPU resources according to the new requirements of the container. +* `DeleteContainer`: When a container is removed from the VM, we may need to hot-unplug the vCPUs to reclaim the vCPU resources introduced by the container. + +## On what basis do we calculate the number of vCPUs? +When Kata calculate the number of vCPUs, We have three data sources, the `default_vcpus` and `default_maxvcpus` specified in the configuration file (named `TomlConfig` later in the doc), the `io.kubernetes.cri.sandbox-cpu-quota` and `io.kubernetes.cri.sandbox-cpu-period` annotations passed by the upper layer runtime, and the corresponding CPU resource part in the container's spec for the container when `CreateContainer`/`UpdateContainer`/`DeleteContainer` is requested. + +Our understanding and priority of these resources are as follows, which will affect how we calculate the number of vCPUs later. + +* From `TomlConfig`: + * `default_vcpus`: default number of vCPUs when starting a VM. + * `default_maxvcpus`: maximum number of vCPUs. +* From `Annotation`: + * `InitialSize`: we call the size of the resource passed from the annotations as `InitialSize`. Kubernetes will calculate the sandbox size according to the Pod's statement, which is the `InitialSize` here. This size should be the size we want to prioritize. +* From `Container Spec`: + * The amount of CPU resources that the Container wants to use will be declared through the spec. Including the aforementioned annotations, we mainly consider `cpu quota` and `cpuset` when calculating the number of vCPUs. + * `cpu quota`: `cpu quota` is the most common way to declare the amount of CPU resources. The number of vCPUs introduced by `cpu quota` declared in a container's spec is: `vCPUs = ceiling( quota / period )`. + * `cpuset`: `cpuset` is often used to bind the CPUs that tasks can run on. The number of vCPUs may introduced by `cpuset` declared in a container's spec is the number of CPUs specified in the set that do not overlap with other containers. + + +## How to calculate and adjust the vCPUs size: +There are two types of vCPUs that we need to consider, one is the number of vCPUs when starting the VM (named `Boot Size` in the doc). The second is the number of vCPUs when `CreateContainer`/`UpdateContainer`/`DeleteContainer` request is received (`Real-time Size` in the doc). + +### `Boot Size` +The main considerations are `InitialSize` and `default_vcpus`. There are the following principles: +`InitialSize` has priority over `default_vcpus` declared in `TomlConfig`. +1. When there is such an annotation statement, the originally `default_vcpus` will be modified to the number of vCPUs in the `InitialSize` as the `Boot Size`. (Because not all runtimes support this annotation for the time being, we still keep the `default_cpus` in `TomlConfig`.) +2. When the specs of all containers are aggregated for sandbox size calculation, the method is consistent with the calculation method of `InitialSize` here. + +### `Real-time Size` +When we receive an OCI request, it may be for a single container. But what we have to consider is the number of vCPUs for the entire VM. So we will maintain a list. Every time there is a demand for adjustment, the entire list will be traversed to calculate a value for the number of vCPUs. In addition, there are the following principles: +1. Do not cut computing power and try to keep the number of vCPUs specified by `InitialSize`. + * So the number of vCPUs after will not be less than the `Boot Size`. +2. `cpu quota` takes precedence over `cpuset` and the setting history are took into account. + * We think quota describes the CPU time slice that a cgroup can use, and `cpuset` describes the actual CPU number that a cgroup can use. Quota can better describe the size of the CPU time slice that a cgroup actually wants to use. The `cpuset` only describes which CPUs the cgroup can use, but the cgroup can use the specified CPU but consumes a smaller time slice, so the quota takes precedence over the `cpuset`. + * On the one hand, when both `cpu quota` and `cpuset` are specified, we will calculate the number of vCPUs based on `cpu quota` and ignore `cpuset`. On the other hand, if `cpu quota` was used to control the number of vCPUs in the past, and only `cpuset` was updated during `UpdateContainer`, we will not adjust the number of vCPUs at this time. +3. `StaticSandboxResourceMgmt` controls hotplug. + * Some VMMs and kernels of some architectures do not support hotplugging. We can accommodate this situation through `StaticSandboxResourceMgmt`. When `StaticSandboxResourceMgmt = true` is set, we don't make any further attempts to update the number of vCPUs after booting. + + +[1]: https://github.com/kubernetes/kubernetes/pull/104886 +[2]: https://github.com/containerd/containerd/pull/6155 diff --git a/docs/design/vcpu-threads-pinning.md b/docs/design/vcpu-threads-pinning.md new file mode 100644 index 000000000000..4de6ae986154 --- /dev/null +++ b/docs/design/vcpu-threads-pinning.md @@ -0,0 +1,37 @@ +# Design Doc for Kata Containers' VCPUs Pinning Feature + +## Background +By now, vCPU threads of Kata Containers are scheduled randomly to CPUs. And each pod would request a specific set of CPUs which we call it CPU set (just the CPU set meaning in Linux cgroups). + +If the number of vCPU threads are equal to that of CPUs claimed in CPU set, we can then pin each vCPU thread to one specified CPU, to reduce the cost of random scheduling. + +## Detailed Design + +### Passing Config Parameters +Two ways are provided to use this vCPU thread pinning feature: through `QEMU` configuration file and through annotations. Finally the pinning parameter is passed to `HypervisorConfig`. + +### Related Linux Thread Scheduling API + +| API Info | Value | +|-------------------|-----------------------------------------------------------| +| Package | `golang.org/x/sys/unix` | +| Method | `unix.SchedSetaffinity(thread_id, &unixCPUSet)` | +| Official Doc Page | https://pkg.go.dev/golang.org/x/sys/unix#SchedSetaffinity | + +### When is VCPUs Pinning Checked? + +As shown in Section 1, when `num(vCPU threads) == num(CPUs in CPU set)`, we shall pin each vCPU thread to a specified CPU. And when this condition is broken, we should restore to the original random scheduling pattern. +So when may `num(CPUs in CPU set)` change? There are 5 possible scenes: + +| Possible scenes | Related Code | +|-----------------------------------|--------------------------------------------| +| when creating a container | File Sandbox.go, in method `CreateContainer` | +| when starting a container | File Sandbox.go, in method `StartContainer` | +| when deleting a container | File Sandbox.go, in method `DeleteContainer` | +| when updating a container | File Sandbox.go, in method `UpdateContainer` | +| when creating multiple containers | File Sandbox.go, in method `createContainers` | + +### Core Pinning Logics + +We can split the whole process into the following steps. Related methods are `checkVCPUsPinning` and `resetVCPUsPinning`, in file Sandbox.go. +![](arch-images/vcpus-pinning-process.png) \ No newline at end of file diff --git a/docs/design/virtualization.md b/docs/design/virtualization.md index 3bd6ae5827d9..074ef14a4765 100644 --- a/docs/design/virtualization.md +++ b/docs/design/virtualization.md @@ -110,7 +110,7 @@ Devices and features used: - VFIO - hotplug - seccomp filters -- [HTTP OpenAPI](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/vmm/src/api/openapi/cloud-hypervisor.yaml) +- [HTTP OpenAPI](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/api/openapi/cloud-hypervisor.yaml) ### Summary diff --git a/docs/how-to/README.md b/docs/how-to/README.md index aa09b49c73c8..874efc01af89 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -42,4 +42,7 @@ - [How to setup swap devices in guest kernel](how-to-setup-swap-devices-in-guest-kernel.md) - [How to run rootless vmm](how-to-run-rootless-vmm.md) - [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md) -- [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) \ No newline at end of file +- [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) +- [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md) +- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md) +- [How to run Kata Containers with kinds of Block Volumes](how-to-run-kata-containers-with-kinds-of-Block-Volumes.md) \ No newline at end of file diff --git a/docs/how-to/containerd-kata.md b/docs/how-to/containerd-kata.md index 2ac0613c6dbe..71efa6754d5a 100644 --- a/docs/how-to/containerd-kata.md +++ b/docs/how-to/containerd-kata.md @@ -40,7 +40,7 @@ use `RuntimeClass` instead of the deprecated annotations. ### Containerd Runtime V2 API: Shim V2 API The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/cmd/containerd-shim-kata-v2/) -implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata. +implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/main/runtime/v2) for Kata. With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1` shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy` process were used, even with VSOCK not available. @@ -77,8 +77,8 @@ $ command -v containerd You can manually install CNI plugins as follows: ```bash -$ go get github.com/containernetworking/plugins -$ pushd $GOPATH/src/github.com/containernetworking/plugins +$ git clone https://github.com/containernetworking/plugins.git +$ pushd plugins $ ./build_linux.sh $ sudo mkdir /opt/cni $ sudo cp -r bin /opt/cni/ @@ -93,8 +93,8 @@ $ popd You can install the `cri-tools` from source code: ```bash -$ go get github.com/kubernetes-sigs/cri-tools -$ pushd $GOPATH/src/github.com/kubernetes-sigs/cri-tools +$ git clone https://github.com/kubernetes-sigs/cri-tools.git +$ pushd cri-tools $ make $ sudo -E make install $ popd @@ -257,6 +257,48 @@ This launches a BusyBox container named `hello`, and it will be removed by `--rm The `--cni` flag enables CNI networking for the container. Without this flag, a container with just a loopback interface is created. +### Launch containers using `ctr` command line with rootfs bundle + +#### Get rootfs +Use the script to create rootfs +```bash +ctr i pull quay.io/prometheus/busybox:latest +ctr i export rootfs.tar quay.io/prometheus/busybox:latest + +rootfs_tar=rootfs.tar +bundle_dir="./bundle" +mkdir -p "${bundle_dir}" + +# extract busybox rootfs +rootfs_dir="${bundle_dir}/rootfs" +mkdir -p "${rootfs_dir}" +layers_dir="$(mktemp -d)" +tar -C "${layers_dir}" -pxf "${rootfs_tar}" +for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do + tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]") +done +``` +#### Get `config.json` +Use runc spec to generate `config.json` +```bash +cd ./bundle/rootfs +runc spec +mv config.json ../ +``` +Change the root `path` in `config.json` to the absolute path of rootfs + +```JSON +"root":{ + "path":"/root/test/bundle/rootfs", + "readonly": false +}, +``` + +#### Run container +```bash +sudo ctr run -d --runtime io.containerd.run.kata.v2 --config bundle/config.json hello +sudo ctr t exec --exec-id ${ID} -t hello sh +``` ### Launch Pods with `crictl` command line With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag. diff --git a/docs/how-to/how-to-hotplug-memory-arm64.md b/docs/how-to/how-to-hotplug-memory-arm64.md index 799367ad09d0..bc138b458b55 100644 --- a/docs/how-to/how-to-hotplug-memory-arm64.md +++ b/docs/how-to/how-to-hotplug-memory-arm64.md @@ -15,6 +15,18 @@ $ sudo .ci/aarch64/install_rom_aarch64.sh $ popd ``` +## Config KATA QEMU + +After executing the above script, two files will be generated under the directory `/usr/share/kata-containers/` by default, namely `kata-flash0.img` and `kata-flash1.img`. Next we need to change the configuration file of `kata qemu`, which is in `/opt/kata/share/defaults/kata-containers/configuration-qemu.toml` by default, specify in the configuration file to use the UEFI ROM installed above. The above is an example of `kata deploy` installation. For package management installation, please use `kata-runtime env` to find the location of the configuration file. Please refer to the following configuration. + +``` +[hypervisor.qemu] + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = ["/usr/share/kata-containers/kata-flash0.img", "/usr/share/kata-containers/kata-flash1.img"] +``` + ## Run for test Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test. diff --git a/docs/how-to/how-to-run-kata-containers-with-SNP-VMs.md b/docs/how-to/how-to-run-kata-containers-with-SNP-VMs.md new file mode 100644 index 000000000000..3a56cbba2725 --- /dev/null +++ b/docs/how-to/how-to-run-kata-containers-with-SNP-VMs.md @@ -0,0 +1,158 @@ +# Kata Containers with AMD SEV-SNP VMs + +## Disclaimer + +This guide is designed for developers and is - same as the Developer Guide - not intended for production systems or end users. It is advisable to only follow this guide on non-critical development systems. + +## Prerequisites + +To run Kata Containers in SNP-VMs, the following software stack is used. + +![Kubernetes integration with shimv2](./images/SNP-stack.svg) + +The host BIOS and kernel must be capable of supporting AMD SEV-SNP and configured accordingly. For Kata Containers, the host kernel with branch [`sev-snp-iommu-avic_5.19-rc6_v3`](https://github.com/AMDESE/linux/tree/sev-snp-iommu-avic_5.19-rc6_v3) and commit [`3a88547`](https://github.com/AMDESE/linux/commit/3a885471cf89156ea555341f3b737ad2a8d9d3d0) is known to work in conjunction with SEV Firmware version 1.51.3 (0xh\_1.33.03) available on AMD's [SEV developer website](https://developer.amd.com/sev/). See [AMD's guide](https://github.com/AMDESE/AMDSEV/tree/sev-snp-devel) to configure the host accordingly. Verify that you are able to run SEV-SNP encrypted VMs first. The guest components required for Kata Containers are built as described below. + +**Tip**: It is easiest to first have Kata Containers running on your system and then modify it to run containers in SNP-VMs. Follow the [Developer guide](../Developer-Guide.md#warning) and then follow the below steps. Nonetheless, you can just follow this guide from the start. + +## How to build + +Follow all of the below steps to install Kata Containers with SNP-support from scratch. These steps mostly follow the developer guide with modifications to support SNP + +__Steps from the Developer Guide:__ +- Get all the [required components](../Developer-Guide.md#requirements-to-build-individual-components) for building the kata-runtime +- [Build the and install kata-runtime](../Developer-Guide.md#build-and-install-the-kata-containers-runtime) +- [Build a custom agent](../Developer-Guide.md#build-a-custom-kata-agent---optional) +- [Create an initrd image](../Developer-Guide.md#create-an-initrd-image---optional) by first building a rootfs, then building the initrd based on the rootfs, use a custom agent and install. `ubuntu` works as the distribution of choice. +- Get the [required components](../../tools/packaging/kernel/README.md#requirements) to build a custom kernel + +__SNP-specific steps:__ +- Build the SNP-specific kernel as shown below (see this [guide](../../tools/packaging/kernel/README.md#build-kata-containers-kernel) for more information) +```bash +$ pushd kata-containers/tools/packaging/kernel/ +$ ./build-kernel.sh -a x86_64 -x snp setup +$ ./build-kernel.sh -a x86_64 -x snp build +$ sudo -E PATH="${PATH}" ./build-kernel.sh -x snp install +$ popd +``` +- Build a current OVMF capable of SEV-SNP: +```bash +$ pushd kata-containers/tools/packaging/static-build/ovmf +$ ./build.sh +$ tar -xvf edk2-x86_64.tar.gz +$ popd +``` +- Build a custom QEMU +```bash +$ source kata-containers/tools/packaging/scripts/lib.sh +$ qemu_url="$(get_from_kata_deps "assets.hypervisor.qemu-snp-experimental.url")" +$ qemu_tag="$(get_from_kata_deps "assets.hypervisor.qemu-snp-experimental.tag")" +$ git clone "${qemu_url}" +$ pushd qemu +$ git checkout "${qemu_tag}" +$ ./configure --enable-virtfs --target-list=x86_64-softmmu --enable-debug +$ make -j "$(nproc)" +$ popd +``` + +### Kata Containers Configuration for SNP + +The configuration file located at `/etc/kata-containers/configuration.toml` must be adapted as follows to support SNP-VMs: +- Use the SNP-specific kernel for the guest VM (change path) +```toml +kernel = "/usr/share/kata-containers/vmlinuz-snp.container" +``` +- Enable the use of an initrd (uncomment) +```toml +initrd = "/usr/share/kata-containers/kata-containers-initrd.img" +``` +- Disable the use of a rootfs (comment out) +```toml +# image = "/usr/share/kata-containers/kata-containers.img" +``` +- Use the custom QEMU capable of SNP (change path) +```toml +path = "/path/to/qemu/build/qemu-system-x86_64" +``` +- Use `virtio-9p` device since `virtio-fs` is unsupported due to bugs / shortcomings in QEMU version [`snp-v3`](https://github.com/AMDESE/qemu/tree/snp-v3) for SEV and SEV-SNP (change value) +```toml +shared_fs = "virtio-9p" +``` +- Disable `virtiofsd` since it is no longer required (comment out) +```toml +# virtio_fs_daemon = "/usr/libexec/virtiofsd" +``` +- Disable NVDIMM (uncomment) +```toml +disable_image_nvdimm = true +``` +- Disable shared memory (uncomment) +```toml +file_mem_backend = "" +``` +- Enable confidential guests (uncomment) +```toml +confidential_guest = true +``` +- Enable SNP-VMs (uncomment) +```toml +sev_snp_guest = true +``` + - Configure an OVMF (add path) +```toml +firmware = "/path/to/kata-containers/tools/packaging/static-build/ovmf/opt/kata/share/ovmf/OVMF.fd" +``` + +## Test Kata Containers with Containerd + +With Kata Containers configured to support SNP-VMs, we use containerd to test and deploy containers in these VMs. + +### Install Containerd +If not already present, follow [this guide](./containerd-kata.md#install) to install containerd and its related components including `CNI` and the `cri-tools` (skip Kata Containers since we already installed it) + +### Containerd Configuration + +Follow [this guide](./containerd-kata.md#configuration) to configure containerd to use Kata Containers + +## Run Kata Containers in SNP-VMs + +Run the below commands to start a container. See [this guide](./containerd-kata.md#run) for more information +```bash +$ sudo ctr image pull docker.io/library/busybox:latest +$ sudo ctr run --cni --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh +``` + +### Check for active SNP: + +Inside the running container, run the following commands to check if SNP is active. It should look something like this: +``` +/ # dmesg | grep -i sev +[ 0.299242] Memory Encryption Features active: AMD SEV SEV-ES SEV-SNP +[ 0.472286] SEV: Using SNP CPUID table, 31 entries present. +[ 0.514574] SEV: SNP guest platform device initialized. +[ 0.885425] sev-guest sev-guest: Initialized SEV guest driver (using vmpck_id 0) +``` + +### Obtain an SNP Attestation Report + +To obtain an attestation report inside the container, the `/dev/sev-guest` must first be configured. As of now, the VM does not perform this step, however it can be performed inside the container, either in the terminal or in code. + +Example for shell: +``` +/ # SNP_MAJOR=$(cat /sys/devices/virtual/misc/sev-guest/dev | awk -F: '{print $1}') +/ # SNP_MINOR=$(cat /sys/devices/virtual/misc/sev-guest/dev | awk -F: '{print $2}') +/ # mknod -m 600 /dev/sev-guest c "${SNP_MAJOR}" "${SNP_MINOR}" +``` + +## Known Issues + +- Support for cgroups v2 is still [work in progress](https://github.com/kata-containers/kata-containers/issues/927). If issues occur due to cgroups v2 becoming the default in newer systems, one possible solution is to downgrade cgroups to v1: +```bash +sudo sed -i 's/^\(GRUB_CMDLINE_LINUX=".*\)"/\1 systemd.unified_cgroup_hierarchy=0"/' /etc/default/grub +sudo update-grub +sudo reboot +``` +- If both SEV and SEV-SNP are supported by the host, Kata Containers uses SEV-SNP by default. You can verify what features are enabled by checking `/sys/module/kvm_amd/parameters/sev` and `sev_snp`. This means that Kata Containers can not run both SEV-SNP-VMs and SEV-VMs at the same time. If SEV is to be used by Kata Containers instead, reload the `kvm_amd` kernel module without SNP-support, this will disable SNP-support for the entire platform. +```bash +sudo rmmod kvm_amd && sudo modprobe kvm_amd sev_snp=0 +``` + diff --git a/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md b/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md new file mode 100644 index 000000000000..e53a3b53450b --- /dev/null +++ b/docs/how-to/how-to-run-kata-containers-with-kinds-of-Block-Volumes.md @@ -0,0 +1,226 @@ +# A new way for Kata Containers to use Kinds of Block Volumes + +> **Note:** This guide is only available for runtime-rs with default Hypervisor Dragonball. +> Now, other hypervisors are still ongoing, and it'll be updated when they're ready. + + +## Background + +Currently, there is no widely applicable and convenient method available for users to use some kinds of backend storages, such as File on host based block volume, SPDK based volume or VFIO device based volume for Kata Containers, so we adopt [Proposal: Direct Block Device Assignment](https://github.com/kata-containers/kata-containers/blob/main/docs/design/direct-blk-device-assignment.md) to address it. + +## Solution + +According to the proposal, it requires to use the `kata-ctl direct-volume` command to add a direct assigned block volume device to the Kata Containers runtime. + +And then with the help of method [get_volume_mount_info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L95), get information from JSON file: `(mountinfo.json)` and parse them into structure [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70) which is used to save device-related information. + +We only fill the `mountinfo.json`, such as `device` ,`volume_type`, `fs_type`, `metadata` and `options`, which correspond to the fields in [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70), to describe a device. + +The JSON file `mountinfo.json` placed in a sub-path `/kubelet/kata-test-vol-001/volume001` which under fixed path `/run/kata-containers/shared/direct-volumes/`. +And the full path looks like: `/run/kata-containers/shared/direct-volumes/kubelet/kata-test-vol-001/volume001`, But for some security reasons. it is +encoded as `/run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx`. + +Finally, when running a Kata Containers with `ctr run --mount type=X, src=Y, dst=Z,,options=rbind:rw`, the `type=X` should be specified a proprietary type specifically designed for some kind of volume. + +Now, supported types: + +- `directvol` for direct volume +- `vfiovol` for VFIO device based volume +- `spdkvol` for SPDK/vhost-user based volume + + +## Setup Device and Run a Kata-Containers + +### Direct Block Device Based Volume + +#### create raw block based backend storage + +> **Tips:** raw block based backend storage MUST be formatted with `mkfs`. + +```bash +$ sudo dd if=/dev/zero of=/tmp/stor/rawdisk01.20g bs=1M count=20480 +$ sudo mkfs.ext4 /tmp/stor/rawdisk01.20g +``` + +#### setup direct block device for kata-containers + +```json +{ + "device": "/tmp/stor/rawdisk01.20g", + "volume_type": "directvol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +```bash +$ sudo kata-ctl direct-volume add /kubelet/kata-direct-vol-002/directvol002 "{\"device\": \"/tmp/stor/rawdisk01.20g\", \"volume_type\": \"directvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}" +$# /kubelet/kata-direct-vol-002/directvol002 <==> /run/kata-containers/shared/direct-volumes/W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx +$ cat W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx/mountInfo.json +{"volume_type":"directvol","device":"/tmp/stor/rawdisk01.20g","fs_type":"ext4","metadata":{},"options":[]} +``` + +#### Run a Kata container with direct block device volume + +```bash +$ # type=disrectvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw +$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=directvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw "$image" kata-direct-vol-xx05302045 /bin/bash +``` + + +### VFIO Device Based Block Volume + +#### create VFIO device based backend storage + +> **Tip:** It only supports `vfio-pci` based PCI device passthrough mode. + +In this scenario, the device's host kernel driver will be replaced by `vfio-pci`, and IOMMU group ID generated. +And either device's BDF or its VFIO IOMMU group ID in `/dev/vfio/` is fine for "device" in `mountinfo.json`. + +```bash +$ lspci -nn -k -s 45:00.1 +45:00.1 SCSI storage controller +... +Kernel driver in use: vfio-pci +... +$ ls /dev/vfio/110 +/dev/vfio/110 +$ ls /sys/kernel/iommu_groups/110/devices/ +0000:45:00.1 +``` + +#### setup VFIO device for kata-containers + +First, configure the `mountinfo.json`, as below: + +- (1) device with `BB:DD:F` + +```json +{ + "device": "45:00.1", + "volume_type": "vfiovol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +- (2) device with `DDDD:BB:DD:F` + +```json +{ + "device": "0000:45:00.1", + "volume_type": "vfiovol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +- (3) device with `/dev/vfio/X` + +```json +{ + "device": "/dev/vfio/110", + "volume_type": "vfiovol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +Second, run kata-containers with device(`/dev/vfio/110`) as an example: + +```bash +$ sudo kata-ctl direct-volume add /kubelet/kata-vfio-vol-003/vfiovol003 "{\"device\": \"/dev/vfio/110\", \"volume_type\": \"vfiovol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}" +$ # /kubelet/kata-vfio-vol-003/directvol003 <==> /run/kata-containers/shared/direct-volumes/F0va22F0ZvaS12F0YS10a2F0DAxvbC0F0ZXvdm9sdF0Z0YSx +$ cat F0va22F0ZvaS12F0YS10a2F0DAxvbC0F0ZXvdm9sdF0Z0YSx/mountInfo.json +{"volume_type":"vfiovol","device":"/dev/vfio/110","fs_type":"ext4","metadata":{},"options":[]} +``` + +#### Run a Kata container with VFIO block device based volume + +```bash +$ # type=disrectvol,src=/kubelet/kata-vfio-vol-003/vfiovol003,dst=/disk003,options=rbind:rw +$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=vfiovol,src=/kubelet/kata-vfio-vol-003/vfiovol003,dst=/disk003,options=rbind:rw "$image" kata-vfio-vol-xx05302245 /bin/bash +``` + + +### SPDK Device Based Block Volume + +SPDK vhost-user devices in runtime-rs, unlike runtime (golang version), there is no need to `mknod` device node under `/dev/` any more. +Just using the `kata-ctl direct-volume add ..` to make a mount info config is enough. + +#### Run SPDK vhost target and Expose vhost block device + +Run a SPDK vhost target and get vhost-user block controller as an example: + +First, run SPDK vhost target: + +> **Tips:** If driver `vfio-pci` supported, you can run SPDK with `DRIVER_OVERRIDE=vfio-pci` +> Otherwise, Just run without it `sudo HUGEMEM=4096 ./scripts/setup.sh`. + +```bash +$ SPDK_DEVEL=/xx/spdk +$ VHU_UDS_PATH=/tmp/vhu-targets +$ RAW_DISKS=/xx/rawdisks +$ # Reset first +$ ${SPDK_DEVEL}/scripts/setup.sh reset +$ sudo sysctl -w vm.nr_hugepages=2048 +$ #4G Huge Memory for spdk +$ sudo HUGEMEM=4096 DRIVER_OVERRIDE=vfio-pci ${SPDK_DEVEL}/scripts/setup.sh +$ sudo ${SPDK_DEVEL}/build/bin/spdk_tgt -S $VHU_UDS_PATH -s 1024 -m 0x3 & +``` + +Second, create a vhost controller: + +```bash +$ sudo dd if=/dev/zero of=${RAW_DISKS}/rawdisk01.20g bs=1M count=20480 +$ sudo ${SPDK_DEVEL}/scripts/rpc.py bdev_aio_create ${RAW_DISKS}/rawdisk01.20g vhu-rawdisk01.20g 512 +$ sudo ${SPDK_DEVEL}/scripts/rpc.py vhost_create_blk_controller vhost-blk-rawdisk01.sock vhu-rawdisk01.20g +``` + +Here, a vhost controller `vhost-blk-rawdisk01.sock` is created, and the controller will +be passed to Hypervisor, such as Dragonball, Cloud-Hypervisor, Firecracker or QEMU. + + +#### setup vhost-user block device for kata-containers + + +First, `mkdir` a sub-path `kubelet/kata-test-vol-001/` under `/run/kata-containers/shared/direct-volumes/`. + +Second, fill fields in `mountinfo.json`, it looks like as below: +```json +{ + "device": "/tmp/vhu-targets/vhost-blk-rawdisk01.sock", + "volume_type": "spdkvol", + "fs_type": "ext4", + "metadata":"{}", + "options": [] +} +``` + +Third, with the help of `kata-ctl direct-volume` to add block device to generate `mountinfo.json`, and run a kata container with `--mount`. + +```bash +$ # kata-ctl direct-volume add +$ sudo kata-ctl direct-volume add /kubelet/kata-test-vol-001/volume001 "{\"device\": \"/tmp/vhu-targets/vhost-blk-rawdisk01.sock\", \"volume_type\":\"spdkvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}" +$ # /kubelet/kata-test-vol-001/volume001 <==> /run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx +$ cat L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx/mountInfo.json +$ {"volume_type":"spdkvol","device":"/tmp/vhu-targets/vhost-blk-rawdisk01.sock","fs_type":"ext4","metadata":{},"options":[]} +``` + +As `/run/kata-containers/shared/direct-volumes/` is a fixed path , we will be able to run a kata pod with `--mount` and set +`src` sub-path. And the `--mount` argument looks like: `--mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001`. + + +#### Run a Kata container with SPDK vhost-user block device + + +In the case, `ctr run --mount type=X, src=source, dst=dest`, the X will be set `spdkvol` which is a proprietary type specifically designed for SPDK volumes. + +```bash +$ # ctr run with --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001 +$ sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=spdkvol,src=/kubelet/kata-test-vol-001/volume001,dst=/disk001,options=rbind:rw "$image" kata-spdk-vol-xx0530 /bin/bash +``` diff --git a/docs/how-to/how-to-run-rootless-vmm.md b/docs/how-to/how-to-run-rootless-vmm.md index 3986de252bef..7711c1325010 100644 --- a/docs/how-to/how-to-run-rootless-vmm.md +++ b/docs/how-to/how-to-run-rootless-vmm.md @@ -1,5 +1,5 @@ ## Introduction -To improve security, Kata Container supports running the VMM process (currently only QEMU) as a non-`root` user. +To improve security, Kata Container supports running the VMM process (QEMU and cloud-hypervisor) as a non-`root` user. This document describes how to enable the rootless VMM mode and its limitations. ## Pre-requisites @@ -27,7 +27,7 @@ Another necessary change is to move the hypervisor runtime files (e.g. `vhost-fs ## Limitations 1. Only the VMM process is running as a non-root user. Other processes such as Kata Container shimv2 and `virtiofsd` still run as the root user. -2. Currently, this feature is only supported in QEMU. Still need to bring it to Firecracker and Cloud Hypervisor (see https://github.com/kata-containers/kata-containers/issues/2567). +2. Currently, this feature is only supported in QEMU and cloud-hypervisor. For firecracker, you can use jailer to run the VMM process with a non-root user. 3. Certain features will not work when rootless VMM is enabled, including: 1. Passing devices to the guest (`virtio-blk`, `virtio-scsi`) will not work if the non-privileged user does not have permission to access it (leading to a permission denied error). A more permissive permission (e.g. 666) may overcome this issue. However, you need to be aware of the potential security implications of reducing the security on such devices. 2. `vfio` device will also not work because of permission denied error. \ No newline at end of file diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 9f612831c2d0..b8ac511cd42c 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -57,6 +57,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver | | `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor | | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) | +| `io.katacontainers.config.hypervisor.vhost_user_reconnect_timeout_sec` | `string`| the timeout for reconnecting vhost user socket (QEMU) | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) | | `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) | | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory | @@ -87,7 +88,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.use_vsock` | `boolean` | specify use of `vsock` for agent communication | | `io.katacontainers.config.hypervisor.vhost_user_store_path` (R) | `string` | specify the directory path where vhost-user devices related folders, sockets and device nodes should be (QEMU) | | `io.katacontainers.config.hypervisor.virtio_fs_cache_size` | uint32 | virtio-fs DAX cache size in `MiB` | -| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` | +| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `never` | | `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path | | `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon | | `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest | diff --git a/docs/how-to/how-to-setup-swap-devices-in-guest-kernel.md b/docs/how-to/how-to-setup-swap-devices-in-guest-kernel.md index 8ab9e89e5dee..1ec90d760d4c 100644 --- a/docs/how-to/how-to-setup-swap-devices-in-guest-kernel.md +++ b/docs/how-to/how-to-setup-swap-devices-in-guest-kernel.md @@ -17,9 +17,9 @@ Enable setup swap device in guest kernel as follows: $ sudo sed -i -e 's/^#enable_guest_swap.*$/enable_guest_swap = true/g' /etc/kata-containers/configuration.toml ``` -## Run a Kata Container utilizing swap device +## Run a Kata Containers utilizing swap device -Use following command to start a Kata Container with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes). +Use following command to start a Kata Containers with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes). ``` $ pod_yaml=pod.yaml $ container_yaml=container.yaml @@ -43,12 +43,12 @@ command: - top EOF $ sudo crictl pull $image -$ podid=$(sudo crictl runp $pod_yaml) +$ podid=$(sudo crictl runp --runtime kata $pod_yaml) $ cid=$(sudo crictl create $podid $container_yaml $pod_yaml) $ sudo crictl start $cid ``` -Kata Container setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set. +Kata Containers setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set. The following table shows the swap size how to decide if `io.katacontainers.container.resource.swappiness` is set. |`io.katacontainers.container.resource.swap_in_bytes`|`memory_limit_in_bytes`|swap size| diff --git a/docs/how-to/how-to-use-erofs-build-rootfs.md b/docs/how-to/how-to-use-erofs-build-rootfs.md new file mode 100644 index 000000000000..72bf6315f5b3 --- /dev/null +++ b/docs/how-to/how-to-use-erofs-build-rootfs.md @@ -0,0 +1,90 @@ +# Configure Kata Containers to use EROFS build rootfs + +## Introduction +For kata containers, rootfs is used in the read-only way. EROFS can noticeably decrease metadata overhead. + +`mkfs.erofs` can generate compressed and uncompressed EROFS images. + +For uncompressed images, no files are compressed. However, it is optional to inline the data blocks at the end of the file with the metadata. + +For compressed images, each file will be compressed using the lz4 or lz4hc algorithm, and it will be confirmed whether it can save space. Use No compression of the file if compression does not save space. + +## Performance comparison +| | EROFS | EXT4 | XFS | +|-----------------|-------| --- | --- | +| Image Size [MB] | 106(uncompressed) | 256 | 126 | + + +## Guidance +### Install the `erofs-utils` +#### `apt/dnf` install +On newer `Ubuntu/Debian` systems, it can be installed directly using the `apt` command, and on `Fedora` it can be installed directly using the `dnf` command. + +```shell +# Debian/Ubuntu +$ apt install erofs-utils +# Fedora +$ dnf install erofs-utils +``` + +#### Source install +[https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git](https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git) + +##### Compile dependencies +If you need to enable the `Lz4` compression feature, `Lz4 1.8.0+` is required, and `Lz4 1.9.3+` is strongly recommended. + +##### Compilation process +For some old lz4 versions (lz4-1.8.0~1.8.3), if lz4-static is not installed, the lz4hc algorithm will not be supported. lz4-static can be installed with apt install lz4-static.x86_64. However, these versions have some bugs in compression, and it is not recommended to use these versions directly. +If you use `lz4 1.9.0+`, you can directly use the following command to compile. + +```shell +$ ./autogen.sh +$ ./configure +$ make +``` + +The compiled `mkfs.erofs` program will be saved in the `mkfs` directory. Afterwards, the generated tools can be installed to a system directory using make install (requires root privileges). + +### Create a local rootfs +```shell +$ export distro="ubuntu" +$ export FS_TYPE="erofs" +$ export ROOTFS_DIR="realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ script -fec 'sudo -E SECCOMP=no ./rootfs.sh "${distro}"' +$ popd +``` + +### Add a custom agent to the image - OPTIONAL +> Note: +> - You should only do this step if you are testing with the latest version of the agent. +```shell +$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/" +``` + +### Build a root image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ script -fec 'sudo -E ./image_builder.sh "${ROOTFS_DIR}"' +$ popd +``` + +### Install the rootfs image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" +$ rootfs="erofs" +$ image="kata-containers-${rootfs}-${date}-${commit}" +$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}" +$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img) +$ popd +``` + +### Use `EROFS` in the runtime +```shell +$ sudo sed -i -e 's/^# *\(rootfs_type\).*=.*$/\1 = erofs/g' /etc/kata-containers/configuration.toml +``` diff --git a/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md b/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md index de7a34ef6166..3cc1ebb12626 100644 --- a/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md +++ b/docs/how-to/how-to-use-k8s-with-containerd-and-kata.md @@ -139,12 +139,12 @@ By default the CNI plugin binaries is installed under `/opt/cni/bin` (in package EOF ``` -## Allow pods to run in the master node +## Allow pods to run in the control-plane node -By default, the cluster will not schedule pods in the master node. To enable master node scheduling: +By default, the cluster will not schedule pods in the control-plane node. To enable control-plane node scheduling: ```bash -$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master- +$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/control-plane- ``` ## Create runtime class for Kata Containers diff --git a/docs/how-to/how-to-use-kata-containers-with-firecracker.md b/docs/how-to/how-to-use-kata-containers-with-firecracker.md index 03f533ef786a..2dfc3b215705 100644 --- a/docs/how-to/how-to-use-kata-containers-with-firecracker.md +++ b/docs/how-to/how-to-use-kata-containers-with-firecracker.md @@ -19,12 +19,14 @@ This document requires the presence of Kata Containers on your system. Install u ## Install AWS Firecracker -Kata Containers only support AWS Firecracker v0.23.4 ([yet](https://github.com/kata-containers/kata-containers/pull/1519)). +For information about the supported version of Firecracker, see the Kata Containers +[`versions.yaml`](../../versions.yaml). + To install Firecracker we need to get the `firecracker` and `jailer` binaries: ```bash $ release_url="https://github.com/firecracker-microvm/firecracker/releases" -$ version="v0.23.1" +$ version=$(yq read /versions.yaml assets.hypervisor.firecracker.version) $ arch=`uname -m` $ curl ${release_url}/download/${version}/firecracker-${version}-${arch} -o firecracker $ curl ${release_url}/download/${version}/jailer-${version}-${arch} -o jailer @@ -104,7 +106,7 @@ sudo dmsetup create "${POOL_NAME}" \ cat << EOF # -# Add this to your config.toml configuration file and restart `containerd` daemon +# Add this to your config.toml configuration file and restart containerd daemon # [plugins] [plugins.devmapper] @@ -212,7 +214,7 @@ Next, we need to configure containerd. Add a file in your path (e.g. `/usr/local ``` #!/bin/bash -KATA_CONF_FILE=/etc/containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@ +KATA_CONF_FILE=/etc/kata-containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@ ``` > **Note:** You may need to edit the paths of the configuration file and the `containerd-shim-kata-v2` to correspond to your setup. diff --git a/docs/how-to/images/SNP-stack.svg b/docs/how-to/images/SNP-stack.svg new file mode 100644 index 000000000000..9d32557f2017 --- /dev/null +++ b/docs/how-to/images/SNP-stack.svg @@ -0,0 +1,4 @@ + + + +
Linux Kernel / KVM
Linux Kernel / KVM
QEMU
QEMU
BIOS
BIOS
OVMF
OVMF
Linux Kernel
Linux Kernel
Host
Host
VM
VM
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/how-to/run-kata-with-k8s.md b/docs/how-to/run-kata-with-k8s.md index 4e5c58d5a2b5..ab700d2a4a6a 100644 --- a/docs/how-to/run-kata-with-k8s.md +++ b/docs/how-to/run-kata-with-k8s.md @@ -115,11 +115,11 @@ $ sudo kubeadm init --ignore-preflight-errors=all --config kubeadm-config.yaml $ export KUBECONFIG=/etc/kubernetes/admin.conf ``` -### Allow pods to run in the master node +### Allow pods to run in the control-plane node -By default, the cluster will not schedule pods in the master node. To enable master node scheduling: +By default, the cluster will not schedule pods in the control-plane node. To enable control-plane node scheduling: ```bash -$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master- +$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/control-plane- ``` ### Create runtime class for Kata Containers diff --git a/docs/hypervisors.md b/docs/hypervisors.md index 02dd49aa126b..e380450b203d 100644 --- a/docs/hypervisors.md +++ b/docs/hypervisors.md @@ -33,6 +33,7 @@ are available, their default values and how each setting can be used. [Cloud Hypervisor] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-clh.toml` | [Firecracker] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-fc.toml` | [QEMU] | C | all | Type 2 ([KVM]) | `configuration-qemu.toml` | +[`Dragonball`] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-dragonball.toml` | ## Determine currently configured hypervisor @@ -52,6 +53,7 @@ the hypervisors: [Cloud Hypervisor] | Low latency, small memory footprint, small attack surface | Minimal | | excellent | excellent | High performance modern cloud workloads | | [Firecracker] | Very slimline | Extremely minimal | Doesn't support all device types | excellent | excellent | Serverless / FaaS | | [QEMU] | Lots of features | Lots | | good | good | Good option for most users | | All users | +[`Dragonball`] | Built-in VMM, low CPU and memory overhead| Minimal | | excellent | excellent | Optimized for most container workloads | `out-of-the-box` Kata Containers experience | For further details, see the [Virtualization in Kata Containers](design/virtualization.md) document and the official documentation for each hypervisor. @@ -60,3 +62,4 @@ For further details, see the [Virtualization in Kata Containers](design/virtuali [Firecracker]: https://github.com/firecracker-microvm/firecracker [KVM]: https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine [QEMU]: http://www.qemu-project.org +[`Dragonball`]: https://github.com/openanolis/dragonball-sandbox diff --git a/docs/install/README.md b/docs/install/README.md index 9ad55f0f2174..5fb1c3a03aef 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -19,7 +19,6 @@ Packaged installation methods uses your distribution's native package format (su |------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------| | [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. | | [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. | -| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. | | [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. | | [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. | | [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. | @@ -42,12 +41,6 @@ Kata packages are provided by official distribution repositories for: | [CentOS](centos-installation-guide.md) | 8 | | [Fedora](fedora-installation-guide.md) | 34 | -### Snap Installation - -The snap installation is available for all distributions which support `snapd`. - -[Use snap](snap-installation-guide.md) to install Kata Containers from https://snapcraft.io. - ### Automatic Installation [Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system. @@ -79,3 +72,6 @@ versions. This is not recommended for normal users. * [upgrading document](../Upgrading.md) * [developer guide](../Developer-Guide.md) * [runtime documentation](../../src/runtime/README.md) + +## Kata Containers 3.0 rust runtime installation +* [installation guide](../install/kata-containers-3.0-rust-runtime-installation-guide.md) diff --git a/docs/install/aws-installation-guide.md b/docs/install/aws-installation-guide.md index ee8494d60bc6..d1bac643dd75 100644 --- a/docs/install/aws-installation-guide.md +++ b/docs/install/aws-installation-guide.md @@ -123,7 +123,7 @@ Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-l SSH into the machine ```bash -$ ssh -i MyKeyPair.pen ubuntu@${IP} +$ ssh -i MyKeyPair.pem ubuntu@${IP} ``` Go onto the next step. diff --git a/docs/install/container-manager/containerd/containerd-install.md b/docs/install/container-manager/containerd/containerd-install.md index 15e1e332ebfb..2be3192d9860 100644 --- a/docs/install/container-manager/containerd/containerd-install.md +++ b/docs/install/container-manager/containerd/containerd-install.md @@ -19,12 +19,6 @@ > - If you decide to proceed and install a Kata Containers release, you can > still check for the latest version of Kata Containers by running > `kata-runtime check --only-list-releases`. -> -> - These instructions will not work for Fedora 31 and higher since those -> distribution versions only support cgroups version 2 by default. However, -> Kata Containers currently requires cgroups version 1 (on the host side). See -> https://github.com/kata-containers/kata-containers/issues/927 for further -> details. ## Install Kata Containers diff --git a/docs/install/kata-containers-3.0-rust-runtime-installation-guide.md b/docs/install/kata-containers-3.0-rust-runtime-installation-guide.md new file mode 100644 index 000000000000..bb1732292c37 --- /dev/null +++ b/docs/install/kata-containers-3.0-rust-runtime-installation-guide.md @@ -0,0 +1,99 @@ +# Kata Containers 3.0 rust runtime installation +The following is an overview of the different installation methods available. + +## Prerequisites + +Kata Containers 3.0 rust runtime requires nested virtualization or bare metal. Check +[hardware requirements](/src/runtime/README.md#hardware-requirements) to see if your system is capable of running Kata +Containers. + +### Platform support + +Kata Containers 3.0 rust runtime currently runs on 64-bit systems supporting the following +architectures: + +> **Notes:** +> For other architectures, see https://github.com/kata-containers/kata-containers/issues/4320 + +| Architecture | Virtualization technology | +|-|-| +| `x86_64`| [Intel](https://www.intel.com) VT-x | +| `aarch64` ("`arm64`")| [ARM](https://www.arm.com) Hyp | + +## Packaged installation methods + +| Installation method | Description | Automatic updates | Use case | Availability +|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|----------- | +| [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. | Yes | +| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. | No | +| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. | No | +| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. | No | +| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. | Yes | + +### Kata Deploy Installation + +Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md). +### Official packages +`ToDo` +### Automatic Installation +`ToDo` +### Manual Installation +`ToDo` + +## Build from source installation + +### Rust Environment Set Up + +* Download `Rustup` and install `Rust` + > **Notes:** + > For Rust version, please set `RUST_VERSION` to the value of `languages.rust.meta.newest-version key` in [`versions.yaml`](../../versions.yaml) or, if `yq` is available on your system, run `export RUST_VERSION=$(yq read versions.yaml languages.rust.meta.newest-version)`. + + Example for `x86_64` + ``` + $ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + $ source $HOME/.cargo/env + $ rustup install ${RUST_VERSION} + $ rustup default ${RUST_VERSION}-x86_64-unknown-linux-gnu + ``` + +* Musl support for fully static binary + + Example for `x86_64` + ``` + $ rustup target add x86_64-unknown-linux-musl + ``` +* [Musl `libc`](http://musl.libc.org/) install + + Example for musl 1.2.3 + ``` + $ curl -O https://git.musl-libc.org/cgit/musl/snapshot/musl-1.2.3.tar.gz + $ tar vxf musl-1.2.3.tar.gz + $ cd musl-1.2.3/ + $ ./configure --prefix=/usr/local/ + $ make && sudo make install + ``` + + +### Install Kata 3.0 Rust Runtime Shim + +``` +$ git clone https://github.com/kata-containers/kata-containers.git +$ cd kata-containers/src/runtime-rs +$ make && sudo make install +``` +After running the command above, the default config file `configuration.toml` will be installed under `/usr/share/defaults/kata-containers/`, the binary file `containerd-shim-kata-v2` will be installed under `/usr/local/bin/` . + +### Build Kata Containers Kernel +Follow the [Kernel installation guide](/tools/packaging/kernel/README.md). + +### Build Kata Rootfs +Follow the [Rootfs installation guide](../../tools/osbuilder/rootfs-builder/README.md). + +### Build Kata Image +Follow the [Image installation guide](../../tools/osbuilder/image-builder/README.md). + +### Install Containerd + +Follow the [Containerd installation guide](container-manager/containerd/containerd-install.md). + + diff --git a/docs/install/minikube-installation-guide.md b/docs/install/minikube-installation-guide.md index 7bc80b4b813a..6a946a28baad 100644 --- a/docs/install/minikube-installation-guide.md +++ b/docs/install/minikube-installation-guide.md @@ -55,11 +55,11 @@ Here are the features to set up a CRI-O based Minikube, and why you need them: | what | why | | ---- | --- | -| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) | +| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) | | `--container-runtime=cri-o` | Using CRI-O for Kata | -| `--enable-default-cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) | +| `--enable-default-cni` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) | | `--memory 6144` | Allocate sufficient memory, as Kata Containers default to 1 or 2Gb | -| `--network-plugin=cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) | +| `--network-plugin=cni` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) | | `--vm-driver kvm2` | The host VM driver | To use containerd, modify the `--container-runtime` argument: @@ -71,12 +71,6 @@ To use containerd, modify the `--container-runtime` argument: > **Notes:** > - Adjust the `--memory 6144` line to suit your environment and requirements. Kata Containers default to > requesting 2048MB per container. We recommended you supply more than that to the Minikube node. -> - Prior to Minikube/Kubernetes v1.14, the beta `RuntimeClass` feature also needed enabling with -> the following. -> -> | what | why | -> | ---- | --- | -> | `--feature-gates=RuntimeClass=true` | Kata needs to use the `RuntimeClass` Kubernetes feature | The full command is therefore: @@ -97,7 +91,7 @@ Before you install Kata Containers, check that your Minikube is operating. On yo $ kubectl get nodes ``` -You should see your `master` node listed as being `Ready`. +You should see your `control-plane` node listed as being `Ready`. Check you have virtualization enabled inside your Minikube. The following should return a number larger than `0` if you have either of the `vmx` or `svm` nested virtualization features @@ -138,17 +132,9 @@ $ kubectl -n kube-system exec ${podname} -- ps -ef | fgrep infinity ## Enabling Kata Containers -> **Note:** Only Minikube/Kubernetes versions <= 1.13 require this step. Since version -> v1.14, the `RuntimeClass` is enabled by default. Performing this step on Kubernetes > v1.14 is -> however benign. - Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod. -```sh -$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/node-api/master/manifests/runtimeclass_crd.yaml > runtimeclass_crd.yaml -``` - ### Register the runtime Now register the `kata qemu` runtime with that class. This should result in no errors: diff --git a/docs/install/snap-installation-guide.md b/docs/install/snap-installation-guide.md deleted file mode 100644 index 0f22c221154f..000000000000 --- a/docs/install/snap-installation-guide.md +++ /dev/null @@ -1,52 +0,0 @@ -# Kata Containers snap package - -## Install Kata Containers - -Kata Containers can be installed in any Linux distribution that supports -[snapd](https://docs.snapcraft.io/installing-snapd). - -Run the following command to install **Kata Containers**: - -```sh -$ sudo snap install kata-containers --stable --classic -``` - -## Configure Kata Containers - -By default Kata Containers snap image is mounted at `/snap/kata-containers` as a -read-only file system, therefore default configuration file can not be edited. -Fortunately Kata Containers supports loading a configuration file from another -path than the default. - -```sh -$ sudo mkdir -p /etc/kata-containers -$ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers/ -$ $EDITOR /etc/kata-containers/configuration.toml -``` - -## Integration with shim v2 Container Engines - -The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the -`containerd-shim-kata-v2` binary to allow Kata Containers to be created. -Run the following command to create a symbolic link to the shim v2 binary. - -```sh -$ sudo ln -sf /snap/kata-containers/current/usr/bin/containerd-shim-kata-v2 /usr/local/bin/containerd-shim-kata-v2 -``` - -Once the symbolic link has been created and the engine daemon configured, `io.containerd.kata.v2` -can be used as runtime. - -Read the following documents to know how to run Kata Containers 2.x with `containerd`. - -* [How to use Kata Containers and Containerd](../how-to/containerd-kata.md) -* [Install Kata Containers with containerd](./container-manager/containerd/containerd-install.md) - - -## Remove Kata Containers snap package - -Run the following command to remove the Kata Containers snap: - -```sh -$ sudo snap remove kata-containers -``` diff --git a/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md b/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md index 18a75bde779f..e1b5af76e39d 100644 --- a/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md +++ b/docs/use-cases/NVIDIA-GPU-passthrough-and-Kata.md @@ -545,6 +545,12 @@ Create the hook execution file for Kata: /usr/bin/nvidia-container-toolkit -debug $@ ``` +Make sure the hook shell is executable: + +```sh +chmod +x $ROOTFS_DIR/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh +``` + As the last step one can do some cleanup of files or package caches. Build the rootfs and configure it for use with Kata according to the development guide. diff --git a/docs/use-cases/using-Intel-QAT-and-kata.md b/docs/use-cases/using-Intel-QAT-and-kata.md index d029de367232..e83569fbed8a 100644 --- a/docs/use-cases/using-Intel-QAT-and-kata.md +++ b/docs/use-cases/using-Intel-QAT-and-kata.md @@ -49,7 +49,7 @@ the latest driver. $ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz $ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER} $ export QAT_CONF_LOCATION=~/QAT_conf -$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile +$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile $ export QAT_SRC=~/src/QAT $ export GOPATH=~/src/go $ export KATA_KERNEL_LOCATION=~/kata diff --git a/docs/use-cases/using-Intel-SGX-and-kata.md b/docs/use-cases/using-Intel-SGX-and-kata.md index f45e3ed5bcfb..29635e6ae450 100644 --- a/docs/use-cases/using-Intel-SGX-and-kata.md +++ b/docs/use-cases/using-Intel-SGX-and-kata.md @@ -18,16 +18,13 @@ CONFIG_X86_SGX_KVM=y * Kubernetes cluster configured with: * [`kata-deploy`](../../tools/packaging/kata-deploy) based Kata Containers installation - * [Intel SGX Kubernetes device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#deploying-with-pre-built-images) + * [Intel SGX Kubernetes device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#deploying-with-pre-built-images) and associated components including [operator](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/operator/README.md) and dependencies > Note: Kata Containers supports creating VM sandboxes with Intel® SGX enabled > using [cloud-hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor/) and [QEMU](https://www.qemu.org/) VMMs only. ### Kata Containers Configuration -Before running a Kata Container make sure that your version of `crio` or `containerd` -supports annotations. - For `containerd` check in `/etc/containerd/config.toml` that the list of `pod_annotations` passed to the `sandbox` are: `["io.katacontainers.*", "sgx.intel.com/epc"]`. @@ -64,6 +61,9 @@ spec: name: eosgx-demo-job-1 image: oeciteam/oe-helloworld:latest imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /dev + name: dev-mount securityContext: readOnlyRootFilesystem: true capabilities: @@ -99,4 +99,4 @@ because socket passthrough is not supported. An alternative is to deploy the `ae container. * Projects like [Gramine Shielded Containers (GSC)](https://gramine-gsc.readthedocs.io/en/latest/) are also known to work. For GSC specifically, the Kata guest kernel needs to have the `CONFIG_NUMA=y` -enabled and at least one CPU online when running the GSC container. +enabled and at least one CPU online when running the GSC container. The Kata Containers guest kernel currently has `CONFIG_NUMA=y` enabled by default. diff --git a/docs/use-cases/using-SPDK-vhostuser-and-kata.md b/docs/use-cases/using-SPDK-vhostuser-and-kata.md index 4b091d420c71..ae75930aeb5f 100644 --- a/docs/use-cases/using-SPDK-vhostuser-and-kata.md +++ b/docs/use-cases/using-SPDK-vhostuser-and-kata.md @@ -197,11 +197,6 @@ vhost_user_store_path = "" > under `[hypervisor.qemu]` section. -For the subdirectories of `vhost_user_store_path`: `block` is used for block -device; `block/sockets` is where we expect UNIX domain sockets for vhost-user -block devices to live; `block/devices` is where simulated block device nodes -for vhost-user block devices are created. - For the subdirectories of `vhost_user_store_path`: - `block` is used for block device; - `block/sockets` is where we expect UNIX domain sockets for vhost-user diff --git a/snap/local/README.md b/snap/local/README.md deleted file mode 100644 index 4b449ef177de..000000000000 --- a/snap/local/README.md +++ /dev/null @@ -1,101 +0,0 @@ -# Kata Containers snap image - -This directory contains the resources needed to build the Kata Containers -[snap][1] image. - -## Initial setup - -Kata Containers can be installed in any Linux distribution that supports -[snapd](https://docs.snapcraft.io/installing-snapd). For this example, we -assume Ubuntu as your base distro. -```sh -$ sudo apt-get --no-install-recommends install -y apt-utils ca-certificates snapd snapcraft -``` - -## Install snap - -You can install the Kata Containers snap from the [snapcraft store][8] or by running the following command: - -```sh -$ sudo snap install kata-containers --classic -``` - -## Build and install snap image - -Run the command below which will use the packaging Makefile to build the snap image: - -```sh -$ make -C tools/packaging snap -``` - -> **Warning:** -> -> By default, `snapcraft` will create a clean virtual machine -> environment to build the snap in using the `multipass` tool. -> -> However, `multipass` is silently disabled when `--destructive-mode` is -> used. -> -> Since building the Kata Containers package currently requires -> `--destructive-mode`, the snap will be built using the host -> environment. To avoid parts of the build auto-detecting additional -> features to enable (for example for QEMU), we recommend that you -> only run the snap build in a minimal host environment. - -To install the resulting snap image, snap must be put in [classic mode][3] and the -security confinement must be disabled (`--classic`). Also since the resulting snap -has not been signed the verification of signature must be omitted (`--dangerous`). - -```sh -$ sudo snap install --classic --dangerous "kata-containers_${version}_${arch}.snap" -``` - -Replace `${version}` with the current version of Kata Containers and `${arch}` with -the system architecture. - -## Configure Kata Containers - -By default Kata Containers snap image is mounted at `/snap/kata-containers` as a -read-only file system, therefore default configuration file can not be edited. -Fortunately [`kata-runtime`][4] supports loading a configuration file from another -path than the default. - -```sh -$ sudo mkdir -p /etc/kata-containers -$ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers/ -$ $EDITOR /etc/kata-containers/configuration.toml -``` - -## Integration with docker and Kubernetes - -The path to the runtime provided by the Kata Containers snap image is -`/snap/kata-containers/current/usr/bin/kata-runtime`. You should use it to -run Kata Containers with [docker][9] and [Kubernetes][10]. - -## Remove snap - -You can remove the Kata Containers snap by running the following command: - -```sh -$ sudo snap remove kata-containers -``` - -## Limitations - -The [miniOS image][2] is not included in the snap image as it is not possible for -QEMU to open a guest RAM backing store on a read-only filesystem. Fortunately, -you can start Kata Containers with a Linux initial RAM disk (initrd) that is -included in the snap image. If you want to use the miniOS image instead of initrd, -then a new configuration file can be [created](#configure-kata-containers) -and [configured][7]. - -[1]: https://docs.snapcraft.io/snaps/intro -[2]: ../../docs/design/architecture/README.md#root-filesystem-image -[3]: https://docs.snapcraft.io/reference/confinement#classic -[4]: https://github.com/kata-containers/kata-containers/tree/main/src/runtime#configuration -[5]: https://docs.docker.com/engine/reference/commandline/dockerd -[6]: ../../docs/install/docker/ubuntu-docker-install.md -[7]: ../../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image -[8]: https://snapcraft.io/kata-containers -[9]: ../../docs/Developer-Guide.md#run-kata-containers-with-docker -[10]: ../../docs/Developer-Guide.md#run-kata-containers-with-kubernetes diff --git a/snap/local/snap-common.sh b/snap/local/snap-common.sh deleted file mode 100644 index 0a2a18e1519c..000000000000 --- a/snap/local/snap-common.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -# Description: Idempotent script to be sourced by all parts in a -# snapcraft config file. - -set -o errexit -set -o nounset -set -o pipefail - -# XXX: Bash-specific code. zsh doesn't support this option and that *does* -# matter if this script is run sourced... since it'll be using zsh! ;) -[ -n "$BASH_VERSION" ] && set -o errtrace - -[ -n "${DEBUG:-}" ] && set -o xtrace - -die() -{ - echo >&2 "ERROR: $0: $*" -} - -[ -n "${SNAPCRAFT_STAGE:-}" ] ||\ - die "must be sourced from a snapcraft config file" - -snap_yq_version=3.4.1 - -snap_common_install_yq() -{ - export yq="${SNAPCRAFT_STAGE}/bin/yq" - - local yq_pkg - yq_pkg="github.com/mikefarah/yq" - - local yq_url - yq_url="https://${yq_pkg}/releases/download/${snap_yq_version}/yq_${goos}_${goarch}" - curl -o "${yq}" -L "${yq_url}" - chmod +x "${yq}" -} - -# Function that should be called for each snap "part" in -# snapcraft.yaml. -snap_common_main() -{ - # Architecture - arch="$(uname -m)" - - case "${arch}" in - aarch64) - goarch="arm64" - qemu_arch="${arch}" - ;; - - ppc64le) - goarch="ppc64le" - qemu_arch="ppc64" - ;; - - s390x) - goarch="${arch}" - qemu_arch="${arch}" - ;; - - x86_64) - goarch="amd64" - qemu_arch="${arch}" - ;; - - *) die "unsupported architecture: ${arch}" ;; - esac - - dpkg_arch=$(dpkg --print-architecture) - - # golang - # - # We need the O/S name in golang format, but since we don't - # know if the godeps part has run, we don't know if golang is - # available yet, hence fall back to a standard system command. - goos="$(go env GOOS &>/dev/null || true)" - [ -z "$goos" ] && goos=$(uname -s|tr '[A-Z]' '[a-z]') - - export GOROOT="${SNAPCRAFT_STAGE}" - export GOPATH="${GOROOT}/gopath" - export GO111MODULE="auto" - - mkdir -p "${GOPATH}/bin" - export PATH="${GOPATH}/bin:${PATH}" - - # Proxy - export http_proxy="${http_proxy:-}" - export https_proxy="${https_proxy:-}" - - # Binaries - mkdir -p "${SNAPCRAFT_STAGE}/bin" - - export PATH="$PATH:${SNAPCRAFT_STAGE}/bin" - - # YAML query tool - export yq="${SNAPCRAFT_STAGE}/bin/yq" - - # Kata paths - export kata_dir=$(printf "%s/src/github.com/%s/%s" \ - "${GOPATH}" \ - "${SNAPCRAFT_PROJECT_NAME}" \ - "${SNAPCRAFT_PROJECT_NAME}") - - export versions_file="${kata_dir}/versions.yaml" - - [ -n "${yq:-}" ] && [ -x "${yq:-}" ] || snap_common_install_yq -} - -snap_common_main diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml deleted file mode 100644 index b462755080fc..000000000000 --- a/snap/snapcraft.yaml +++ /dev/null @@ -1,371 +0,0 @@ -name: kata-containers -website: https://github.com/kata-containers/kata-containers -summary: Build lightweight VMs that seamlessly plug into the containers ecosystem -description: | - Kata Containers is an open source project and community working to build a - standard implementation of lightweight Virtual Machines (VMs) that feel and - perform like containers, but provide the workload isolation and security - advantages of VMs -confinement: classic -adopt-info: metadata -base: core20 - -parts: - metadata: - plugin: nil - prime: - - -* - build-packages: - - git - - git-extras - override-pull: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - version="9999" - - if echo "${GITHUB_REF:-}" | grep -q -E "^refs/tags"; then - version=$(echo ${GITHUB_REF:-} | cut -d/ -f3) - git checkout ${version} - fi - - snapcraftctl set-grade "stable" - snapcraftctl set-version "${version}" - - mkdir -p $(dirname ${kata_dir}) - ln -sf $(realpath "${SNAPCRAFT_STAGE}/..") ${kata_dir} - - godeps: - after: [metadata] - plugin: nil - prime: - - -* - build-packages: - - curl - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - # put everything in stage - cd "${SNAPCRAFT_STAGE}" - - version="$(${yq} r ${kata_dir}/versions.yaml languages.golang.meta.newest-version)" - tarfile="go${version}.${goos}-${goarch}.tar.gz" - curl -LO https://golang.org/dl/${tarfile} - tar -xf ${tarfile} --strip-components=1 - - rustdeps: - after: [metadata] - plugin: nil - prime: - - -* - build-packages: - - curl - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - # put everything in stage - cd "${SNAPCRAFT_STAGE}" - - version="$(${yq} r ${kata_dir}/versions.yaml languages.rust.meta.newest-version)" - if ! command -v rustup > /dev/null; then - curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain ${version} - fi - - export PATH=${PATH}:${HOME}/.cargo/bin - rustup toolchain install ${version} - rustup default ${version} - if [ "${arch}" == "ppc64le" ] || [ "${arch}" == "s390x" ] ; then - [ "${arch}" == "ppc64le" ] && arch="powerpc64le" - rustup target add ${arch}-unknown-linux-gnu - else - rustup target add ${arch}-unknown-linux-musl - $([ "$(whoami)" != "root" ] && echo sudo) ln -sf /usr/bin/g++ /bin/musl-g++ - fi - rustup component add rustfmt - - image: - after: [godeps, qemu, kernel] - plugin: nil - build-packages: - - docker.io - - cpio - - git - - iptables - - software-properties-common - - uidmap - - gnupg2 - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - [ "${arch}" = "ppc64le" ] || [ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y protobuf-compiler - - if [ -n "$http_proxy" ]; then - echo "Setting proxy $http_proxy" - sudo -E systemctl set-environment http_proxy="$http_proxy" || true - sudo -E systemctl set-environment https_proxy="$https_proxy" || true - fi - - # Copy yq binary. It's used in the container - cp -a "${yq}" "${GOPATH}/bin/" - - echo "Unmasking docker service" - sudo -E systemctl unmask docker.service || true - sudo -E systemctl unmask docker.socket || true - echo "Adding $USER into docker group" - sudo -E gpasswd -a $USER docker - echo "Starting docker" - sudo -E systemctl start docker || true - - cd "${kata_dir}/tools/osbuilder" - - # build image - export AGENT_INIT=yes - export USE_DOCKER=1 - export DEBUG=1 - initrd_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.initrd.architecture.${arch}.name) - image_distro=$(${yq} r -X ${kata_dir}/versions.yaml assets.image.architecture.${arch}.name) - case "$arch" in - x86_64) - # In some build systems it's impossible to build a rootfs image, try with the initrd image - sudo -E PATH=$PATH make image DISTRO="${image_distro}" || sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}" - ;; - - aarch64|ppc64le|s390x) - sudo -E PATH="$PATH" make initrd DISTRO="${initrd_distro}" - ;; - - *) die "unsupported architecture: ${arch}" ;; - esac - - # Install image - kata_image_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers" - mkdir -p "${kata_image_dir}" - cp kata-containers*.img "${kata_image_dir}" - - runtime: - after: [godeps, image, cloud-hypervisor] - plugin: nil - build-attributes: [no-patchelf] - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - cd "${kata_dir}/src/runtime" - - qemu_cmd="qemu-system-${qemu_arch}" - - # build and install runtime - make \ - PREFIX="/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr" \ - SKIP_GO_VERSION_CHECK=1 \ - QEMUCMD="${qemu_cmd}" - - make install \ - PREFIX=/usr \ - DESTDIR="${SNAPCRAFT_PART_INSTALL}" \ - SKIP_GO_VERSION_CHECK=1 \ - QEMUCMD="${qemu_cmd}" - - if [ ! -f ${SNAPCRAFT_PART_INSTALL}/../../image/install/usr/share/kata-containers/kata-containers.img ]; then - sed -i -e "s|^image =.*|initrd = \"/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr/share/kata-containers/kata-containers-initrd.img\"|" \ - ${SNAPCRAFT_PART_INSTALL}/usr/share/defaults/${SNAPCRAFT_PROJECT_NAME}/configuration.toml - fi - - kernel: - after: [godeps] - plugin: nil - build-packages: - - libelf-dev - - curl - - build-essential - - bison - - flex - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - kernel_version="$(${yq} r $versions_file assets.kernel.version)" - #Remove extra 'v' - kernel_version="${kernel_version#v}" - - [ "${arch}" = "s390x" ] && sudo apt-get --no-install-recommends install -y libssl-dev - - cd "${kata_dir}/tools/packaging/kernel" - kernel_dir_prefix="kata-linux-" - - # Setup and build kernel - ./build-kernel.sh -v "${kernel_version}" -d setup - cd ${kernel_dir_prefix}* - make -j $(nproc ${CI:+--ignore 1}) EXTRAVERSION=".container" - - kernel_suffix="${kernel_version}.container" - kata_kernel_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers" - mkdir -p "${kata_kernel_dir}" - - # Install bz kernel - make install INSTALL_PATH="${kata_kernel_dir}" EXTRAVERSION=".container" || true - vmlinuz_name="vmlinuz-${kernel_suffix}" - ln -sf "${vmlinuz_name}" "${kata_kernel_dir}/vmlinuz.container" - - # Install raw kernel - vmlinux_path="vmlinux" - [ "${arch}" = "s390x" ] && vmlinux_path="arch/s390/boot/compressed/vmlinux" - vmlinux_name="vmlinux-${kernel_suffix}" - cp "${vmlinux_path}" "${kata_kernel_dir}/${vmlinux_name}" - ln -sf "${vmlinux_name}" "${kata_kernel_dir}/vmlinux.container" - - qemu: - plugin: make - after: [godeps] - build-packages: - - gcc - - python3 - - zlib1g-dev - - libcap-ng-dev - - libglib2.0-dev - - libpixman-1-dev - - libnuma-dev - - libltdl-dev - - libcap-dev - - libattr1-dev - - libfdt-dev - - curl - - libcapstone-dev - - bc - - libblkid-dev - - libffi-dev - - libmount-dev - - libseccomp-dev - - libselinux1-dev - - ninja-build - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - branch="$(${yq} r ${versions_file} assets.hypervisor.qemu.version)" - url="$(${yq} r ${versions_file} assets.hypervisor.qemu.url)" - commit="" - patches_dir="${kata_dir}/tools/packaging/qemu/patches/$(echo ${branch} | sed -e 's/.[[:digit:]]*$//' -e 's/^v//').x" - patches_version_dir="${kata_dir}/tools/packaging/qemu/patches/tag_patches/${branch}" - - # download source - qemu_dir="${SNAPCRAFT_STAGE}/qemu" - rm -rf "${qemu_dir}" - git clone --depth 1 --branch ${branch} --single-branch ${url} "${qemu_dir}" - cd "${qemu_dir}" - [ -z "${commit}" ] || git checkout "${commit}" - - [ -n "$(ls -A ui/keycodemapdb)" ] || git clone --depth 1 https://github.com/qemu/keycodemapdb ui/keycodemapdb/ - [ -n "$(ls -A capstone)" ] || git clone --depth 1 https://github.com/qemu/capstone capstone - - # Apply branch patches - [ -d "${patches_version_dir}" ] || mkdir "${patches_version_dir}" - ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_dir}" - ${kata_dir}/tools/packaging/scripts/apply_patches.sh "${patches_version_dir}" - - # Only x86_64 supports libpmem - [ "${arch}" = "x86_64" ] && sudo apt-get --no-install-recommends install -y apt-utils ca-certificates libpmem-dev - - configure_hypervisor="${kata_dir}/tools/packaging/scripts/configure-hypervisor.sh" - chmod +x "${configure_hypervisor}" - # static build. The --prefix, --libdir, --libexecdir, --datadir arguments are - # based on PREFIX and set by configure-hypervisor.sh - echo "$(PREFIX=/snap/${SNAPCRAFT_PROJECT_NAME}/current/usr ${configure_hypervisor} -s kata-qemu) \ - --disable-rbd " \ - | xargs ./configure - - # Copy QEMU configurations (Kconfigs) - case "${branch}" in - "v5.1.0") - cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* default-configs - ;; - - *) - cp -a "${kata_dir}"/tools/packaging/qemu/default-configs/* configs/devices/ - ;; - esac - - # build and install - make -j $(nproc ${CI:+--ignore 1}) - make install DESTDIR="${SNAPCRAFT_PART_INSTALL}" - prime: - - -snap/ - - -usr/bin/qemu-ga - - -usr/bin/qemu-pr-helper - - -usr/bin/virtfs-proxy-helper - - -usr/include/ - - -usr/share/applications/ - - -usr/share/icons/ - - -usr/var/ - - usr/* - - lib/* - organize: - # Hack: move qemu to / - "snap/kata-containers/current/": "./" - - virtiofsd: - plugin: nil - after: [godeps, rustdeps] - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - # Currently, powerpc makes use of the QEMU's C implementation. - # The other platforms make use of the new rust virtiofsd. - # - # See "tools/packaging/scripts/configure-hypervisor.sh". - if [ "${arch}" == "ppc64le" ] - then - echo "INFO: Building QEMU's C version of virtiofsd" - # Handled by the 'qemu' part, so nothing more to do here. - exit 0 - else - echo "INFO: Building rust version of virtiofsd" - fi - - cd "${kata_dir}" - - export PATH=${PATH}:${HOME}/.cargo/bin - # Download the rust implementation of virtiofsd - tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh - sudo install \ - --owner='root' \ - --group='root' \ - --mode=0755 \ - -D \ - --target-directory="${SNAPCRAFT_PART_INSTALL}/usr/libexec/" \ - virtiofsd/virtiofsd - - cloud-hypervisor: - plugin: nil - after: [godeps] - override-build: | - source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh" - - if [ "${arch}" == "aarch64" ] || [ "${arch}" == "x86_64" ]; then - sudo apt-get -y update - sudo apt-get -y install ca-certificates curl gnupg lsb-release - curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\ - sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg - distro_codename=$(lsb_release -cs) - echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get -y update - sudo apt-get -y install docker-ce docker-ce-cli containerd.io - sudo systemctl start docker.socket - - cd "${SNAPCRAFT_PROJECT_DIR}" - sudo -E NO_TTY=true make cloud-hypervisor-tarball - - tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-cloud-hypervisor.tar.xz" - tmpdir=$(mktemp -d) - - tar -xvJpf "${tarfile}" -C "${tmpdir}" - - install -D "${tmpdir}/opt/kata/bin/cloud-hypervisor" "${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor" - - rm -rf "${tmpdir}" - fi - -apps: - runtime: - command: usr/bin/kata-runtime - shim: - command: usr/bin/containerd-shim-kata-v2 - collect-data: - command: usr/bin/kata-collect-data.sh diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 8c9524c9c1c1..7a9ae2d960b1 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -10,13 +10,22 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "ansi_term" version = "0.12.1" @@ -38,6 +47,100 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" +[[package]] +name = "async-broadcast" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c48ccdbf6ca6b121e0f586cbc0e73ae440e56c67c30fa0873b4e110d9c26d2b" +dependencies = [ + "event-listener", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14485364214912d3b19cc3435dde4df66065127f05fa0d75c712f36f12c2f28" +dependencies = [ + "concurrent-queue 1.2.4", + "event-listener", + "futures-core", +] + +[[package]] +name = "async-executor" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fa3dc5f2a8564f07759c008b9109dc0d39de92a88d5588b8a5036d286383afb" +dependencies = [ + "async-lock", + "async-task", + "concurrent-queue 2.2.0", + "fastrand", + "futures-lite", + "slab", +] + +[[package]] +name = "async-fs" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "279cf904654eeebfa37ac9bb1598880884924aab82e290aa65c9e77a0e142e06" +dependencies = [ + "async-lock", + "autocfg", + "blocking", + "futures-lite", +] + +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock", + "autocfg", + "cfg-if 1.0.0", + "concurrent-queue 2.2.0", + "futures-lite", + "log", + "parking", + "polling", + "rustix", + "slab", + "socket2", + "waker-fn", +] + +[[package]] +name = "async-lock" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" +dependencies = [ + "event-listener", +] + +[[package]] +name = "async-process" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9d28b1d97e08915212e2e45310d47854eafa69600756fc735fb788f75199c9" +dependencies = [ + "async-io", + "async-lock", + "autocfg", + "blocking", + "cfg-if 1.0.0", + "event-listener", + "futures-lite", + "rustix", + "signal-hook", + "windows-sys 0.48.0", +] + [[package]] name = "async-recursion" version = "0.3.2" @@ -46,27 +149,50 @@ checksum = "d7d78656ba01f1b93024b7c3a0467f1608e4be67d725749fdcd7d2c7678fd7a2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "async-recursion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", ] +[[package]] +name = "async-task" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a40729d2133846d9ed0ea60a8b9541bccddab49cd30f0715a1da672fe9a2524" + [[package]] name = "async-trait" -version = "0.1.56" +version = "0.1.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716" +checksum = "7b2d0f03b3640e3a630367e40c468cb7f309529c708ed1d88597047b0e7c6ef7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.16", ] +[[package]] +name = "atomic-waker" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" + [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -77,6 +203,18 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "base64" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" + [[package]] name = "bincode" version = "1.3.3" @@ -86,18 +224,64 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitmask-enum" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd9e32d7420c85055e8107e5b2463c4eeefeaac18b52359fe9f9c08a18f342b2" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "blocking" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77231a1c8f801696fc0123ec6150ce92cffb8e164a02afb9c8ddee0e9b65ad65" +dependencies = [ + "async-channel", + "async-lock", + "async-task", + "atomic-waker", + "fastrand", + "futures-lite", + "log", +] + [[package]] name = "bumpalo" version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" +[[package]] +name = "byte-unit" +version = "3.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8" + [[package]] name = "byteorder" version = "1.4.3" @@ -120,6 +304,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +[[package]] +name = "cache-padded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" + [[package]] name = "capctl" version = "0.2.1" @@ -137,16 +327,19 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61bf7211aad104ce2769ec05efcdfabf85ee84ac92461d142f22cf8badd0e54c" dependencies = [ - "errno", + "errno 0.2.8", "libc", "thiserror", ] [[package]] name = "cc" -version = "1.0.73" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "6c6b2562119bf28c3439f7f02db99faf0aa1a8cdfe5772a2ee155d32227239f0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -162,26 +355,29 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.9" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdae996d9638ba03253ffa1c93345a585974a97abbdeab9176c77922f3efc1e8" +checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965" dependencies = [ "libc", "log", - "nix 0.23.1", + "nix 0.25.1", "regex", + "thiserror", ] [[package]] name = "chrono" -version = "0.4.19" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" dependencies = [ - "libc", + "iana-time-zone", + "js-sys", "num-integer", "num-traits", "time 0.1.44", + "wasm-bindgen", "winapi", ] @@ -212,7 +408,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -224,12 +420,55 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "common-path" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2382f75942f4b3be3690fe4f86365e9c853c1587d6ee58212cebf6e2a9ccd101" + +[[package]] +name = "concurrent-queue" +version = "1.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af4780a44ab5696ea9e28294517f1fffb421a83a25af521333c838635509db9c" +dependencies = [ + "cache-padded", +] + +[[package]] +name = "concurrent-queue" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -250,38 +489,67 @@ dependencies = [ ] [[package]] -name = "crossbeam-deque" -version = "0.8.1" +name = "crossbeam-utils" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if 1.0.0", - "crossbeam-epoch", - "crossbeam-utils", ] [[package]] -name = "crossbeam-epoch" -version = "0.9.9" +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "crossbeam-utils", - "memoffset", - "once_cell", - "scopeguard", + "generic-array", + "typenum", ] [[package]] -name = "crossbeam-utils" -version = "0.8.10" +name = "darling" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ - "cfg-if 1.0.0", - "once_cell", + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -292,7 +560,38 @@ checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", ] [[package]] @@ -301,6 +600,36 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encoding_rs" +version = "0.8.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "enumflags2" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c041f5090df68b32bcd905365fd51769c8b9d553fe87fde0b683534f10c01bd2" +dependencies = [ + "enumflags2_derive", + "serde", +] + +[[package]] +name = "enumflags2_derive" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e9a1f9f7d83e59740248a6e14ecf93929ade55027844dfcea78beafccc15745" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + [[package]] name = "errno" version = "0.2.8" @@ -312,6 +641,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "errno" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "errno-dragonfly" version = "0.1.2" @@ -322,6 +662,23 @@ dependencies = [ "libc", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "fail" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" +dependencies = [ + "lazy_static", + "log", + "rand", +] + [[package]] name = "fastrand" version = "1.7.0" @@ -353,6 +710,31 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "futures" version = "0.3.21" @@ -370,9 +752,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -380,9 +762,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" @@ -397,38 +779,53 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-lite" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] [[package]] name = "futures-macro" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.16", ] [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-util" -version = "0.3.21" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -442,6 +839,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.7" @@ -453,6 +860,31 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "h2" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049" +dependencies = [ + "bytes 1.1.0", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util 0.7.8", + "tracing", +] + [[package]] name = "hashbrown" version = "0.12.1" @@ -483,12 +915,119 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes 1.1.0", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes 1.1.0", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "hyper" +version = "0.14.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +dependencies = [ + "bytes 1.1.0", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes 1.1.0", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad2bfd338099682614d3ee3fe0cd72e0b6a41ca6a87f6a74a3bd593c91650501" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "js-sys", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -530,6 +1069,17 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.2", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "iovec" version = "0.1.4" @@ -539,6 +1089,12 @@ dependencies = [ "libc", ] +[[package]] +name = "ipnet" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" + [[package]] name = "ipnetwork" version = "0.17.0" @@ -577,27 +1133,33 @@ name = "kata-agent" version = "0.1.0" dependencies = [ "anyhow", - "async-recursion", + "async-recursion 0.3.2", "async-trait", "capctl", + "cfg-if 1.0.0", "cgroups-rs", "clap", "futures", + "http", "ipnetwork", + "kata-sys-util", + "kata-types", "lazy_static", "libc", "log", "logging", "netlink-packet-utils", "netlink-sys", - "nix 0.23.1", + "nix 0.24.2", "oci", + "openssl", "opentelemetry", "procfs", "prometheus", - "protobuf", + "protobuf 3.2.0", "protocols", "regex", + "reqwest", "rtnetlink", "rustjail", "scan_fmt", @@ -608,8 +1170,9 @@ dependencies = [ "slog", "slog-scope", "slog-stdlog", - "sysinfo", + "slog-term", "tempfile", + "test-utils", "thiserror", "tokio", "tokio-vsock", @@ -619,6 +1182,54 @@ dependencies = [ "tracing-subscriber", "ttrpc", "vsock-exporter", + "which", +] + +[[package]] +name = "kata-sys-util" +version = "0.1.0" +dependencies = [ + "anyhow", + "byteorder", + "cgroups-rs", + "chrono", + "common-path", + "fail", + "kata-types", + "lazy_static", + "libc", + "nix 0.24.2", + "oci", + "once_cell", + "rand", + "serde_json", + "slog", + "slog-scope", + "subprocess", + "thiserror", +] + +[[package]] +name = "kata-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "base64 0.13.0", + "bitmask-enum", + "byte-unit", + "glob", + "lazy_static", + "num_cpus", + "oci", + "regex", + "safe-path", + "serde", + "serde-enum-str", + "serde_json", + "slog", + "slog-scope", + "thiserror", + "toml", ] [[package]] @@ -629,27 +1240,33 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "libseccomp" -version = "0.1.3" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36ad71a5b66ceef3acfe6a3178b29b4da063f8bcb2c36dab666d52a7a9cfdb86" +checksum = "21c57fd8981a80019807b7b68118618d29a87177c63d704fc96e6ecd003ae5b3" dependencies = [ + "bitflags", "libc", "libseccomp-sys", - "nix 0.17.0", "pkg-config", ] [[package]] name = "libseccomp-sys" -version = "0.1.1" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7cbbd4ad467251987c6e5b47d53b11a5a05add08f2447a9e2d70aef1e0d138" + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "539912de229a4fc16e507e8df12a394038a524a5b5b6c92045ad344472aac475" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lock_api" @@ -679,6 +1296,7 @@ dependencies = [ "slog-async", "slog-json", "slog-scope", + "slog-term", ] [[package]] @@ -687,9 +1305,15 @@ version = "0.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" dependencies = [ - "regex-automata", + "regex-automata 0.1.10", ] +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "memchr" version = "2.5.0" @@ -705,6 +1329,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "miniz_oxide" version = "0.5.3" @@ -723,7 +1362,7 @@ dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -732,6 +1371,24 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "netlink-packet-core" version = "0.2.4" @@ -782,7 +1439,7 @@ dependencies = [ "netlink-packet-core", "netlink-sys", "tokio", - "tokio-util", + "tokio-util 0.6.10", ] [[package]] @@ -799,50 +1456,64 @@ dependencies = [ [[package]] name = "nix" -version = "0.17.0" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50e4785f2c3b7589a0d0c1dd60285e1188adac4006e8abd6dd578e1567027363" +checksum = "e4916f159ed8e5de0082076562152a76b7a1f64a01fd9d1e0fea002c37624faf" dependencies = [ "bitflags", "cc", - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", - "void", + "memoffset 0.6.5", ] [[package]] name = "nix" -version = "0.22.3" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4916f159ed8e5de0082076562152a76b7a1f64a01fd9d1e0fea002c37624faf" +checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" dependencies = [ "bitflags", "cc", "cfg-if 1.0.0", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] name = "nix" -version = "0.23.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" +checksum = "195cdbc1741b8134346d515b3a56a1c94b0912758009cfd53f99ea0f57b065fc" dependencies = [ "bitflags", - "cc", "cfg-if 1.0.0", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] -name = "ntapi" -version = "0.3.7" +name = "nix" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" dependencies = [ - "winapi", + "autocfg", + "bitflags", + "cfg-if 1.0.0", + "libc", +] + +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "libc", + "memoffset 0.7.1", ] [[package]] @@ -870,7 +1541,7 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", ] @@ -895,9 +1566,63 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.12.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "openssl" +version = "0.10.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-src" +version = "111.27.0+1.1.1v" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06e8f197c82d7511c5b014030c9b1efeda40d7d5f99d23b4ceed3524a5e63f02" +dependencies = [ + "cc", +] + +[[package]] +name = "openssl-sys" +version = "0.9.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" +dependencies = [ + "cc", + "libc", + "openssl-src", + "pkg-config", + "vcpkg", +] [[package]] name = "opentelemetry" @@ -919,12 +1644,28 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "ordered-stream" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aa2b01e1d916879f73a53d01d1d6cee68adbb31d6d9177a8cfce093cced1d50" +dependencies = [ + "futures-core", + "pin-project-lite", +] + [[package]] name = "os_str_bytes" version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" +[[package]] +name = "parking" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" + [[package]] name = "parking_lot" version = "0.11.2" @@ -970,7 +1711,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1031,7 +1772,7 @@ checksum = "744b6f092ba29c3650faf274db506afd39944f48420f6c86b17cfe0ee1cb36bb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1052,12 +1793,37 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +[[package]] +name = "polling" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab4609a838d88b73d8238967b60dd115cc08d38e2bbaf51ee1e4b695f89122e2" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "libc", + "log", + "wepoll-ffi", + "winapi", +] + [[package]] name = "ppv-lite86" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "proc-macro-crate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eda0fc3b0fb7c975631757e14d9049da17374063edb6ebbcbc54d880d4fe94e9" +dependencies = [ + "once_cell", + "thiserror", + "toml", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1067,7 +1833,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -1084,9 +1850,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.40" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" dependencies = [ "unicode-ident", ] @@ -1119,7 +1885,7 @@ dependencies = [ "memchr", "parking_lot 0.12.1", "procfs", - "protobuf", + "protobuf 2.27.1", "thiserror", ] @@ -1161,7 +1927,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1176,31 +1942,68 @@ dependencies = [ [[package]] name = "protobuf" -version = "2.14.0" +version = "2.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" + +[[package]] +name = "protobuf" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e86d370532557ae7573551a1ec8235a0f8d6cb276c7c9e6aa490b511c447485" +checksum = "b55bad9126f378a853655831eb7363b7b01b81d19f8cb1218861086ca4a1a61e" dependencies = [ - "serde", - "serde_derive", + "once_cell", + "protobuf-support", + "thiserror", +] + +[[package]] +name = "protobuf-codegen" +version = "2.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aec1632b7c8f2e620343439a7dfd1f3c47b18906c4be58982079911482b5d707" +dependencies = [ + "protobuf 2.27.1", ] [[package]] name = "protobuf-codegen" -version = "2.14.0" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd418ac3c91caa4032d37cb80ff0d44e2ebe637b2fb243b6234bf89cdac4901" +dependencies = [ + "anyhow", + "once_cell", + "protobuf 3.2.0", + "protobuf-parse", + "regex", + "tempfile", + "thiserror", +] + +[[package]] +name = "protobuf-parse" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de113bba758ccf2c1ef816b127c958001b7831136c9bc3f8e9ec695ac4e82b0c" +checksum = "9d39b14605eaa1f6a340aec7f320b34064feb26c93aec35d6a9a2272a8ddfa49" dependencies = [ - "protobuf", + "anyhow", + "indexmap", + "log", + "protobuf 3.2.0", + "protobuf-support", + "tempfile", + "thiserror", + "which", ] [[package]] -name = "protobuf-codegen-pure" -version = "2.14.0" +name = "protobuf-support" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1a4febc73bf0cada1d77c459a0c8e5973179f1cfd5b0f1ab789d45b17b6440" +checksum = "a5d4d7b8601c814cfb36bcebb79f0e61e45e1e93640cf778837833bbed05c372" dependencies = [ - "protobuf", - "protobuf-codegen", + "thiserror", ] [[package]] @@ -1208,16 +2011,19 @@ name = "protocols" version = "0.1.0" dependencies = [ "async-trait", - "protobuf", + "oci", + "protobuf 3.2.0", + "serde", + "serde_json", "ttrpc", "ttrpc-codegen", ] [[package]] name = "quote" -version = "1.0.20" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" dependencies = [ "proc-macro2", ] @@ -1253,47 +2059,35 @@ dependencies = [ ] [[package]] -name = "rayon" -version = "1.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.3" +name = "redox_syscall" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "num_cpus", + "bitflags", ] [[package]] -name = "redox_syscall" -version = "0.2.13" +name = "redox_users" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "bitflags", + "getrandom", + "redox_syscall", + "thiserror", ] [[package]] name = "regex" -version = "1.5.6" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata 0.3.7", + "regex-syntax 0.7.5", ] [[package]] @@ -1302,7 +2096,18 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "regex-syntax", + "regex-syntax 0.6.26", +] + +[[package]] +name = "regex-automata" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", ] [[package]] @@ -1311,6 +2116,12 @@ version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -1320,6 +2131,43 @@ dependencies = [ "winapi", ] +[[package]] +name = "reqwest" +version = "0.11.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" +dependencies = [ + "base64 0.21.2", + "bytes 1.1.0", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "rlimit" version = "0.5.4" @@ -1344,25 +2192,41 @@ dependencies = [ "tokio", ] +[[package]] +name = "rustix" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b24138615de35e32031d041a09032ef3487a616d901ca4db224e7d557efae2" +dependencies = [ + "bitflags", + "errno 0.3.3", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.45.0", +] + [[package]] name = "rustjail" version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "bit-vec", "capctl", "caps", "cfg-if 0.1.10", "cgroups-rs", "futures", "inotify", + "kata-sys-util", "lazy_static", "libc", "libseccomp", - "nix 0.23.1", + "nix 0.24.2", "oci", "path-absolutize", - "protobuf", + "protobuf 3.2.0", "protocols", "regex", "rlimit", @@ -1375,15 +2239,31 @@ dependencies = [ "slog", "slog-scope", "tempfile", + "test-utils", "tokio", + "xattr", + "zbus", ] +[[package]] +name = "rustversion" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" + [[package]] name = "ryu" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +[[package]] +name = "safe-path" +version = "0.1.0" +dependencies = [ + "libc", +] + [[package]] name = "scan_fmt" version = "0.2.6" @@ -1393,12 +2273,44 @@ dependencies = [ "regex", ] +[[package]] +name = "schannel" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.137" @@ -1408,6 +2320,36 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-attributes" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb8ec7724e4e524b2492b510e66957fe1a2c76c26a6975ec80823f2439da685" +dependencies = [ + "darling_core", + "serde-rename-rule", + "syn 1.0.109", +] + +[[package]] +name = "serde-enum-str" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26416dc95fcd46b0e4b12a3758043a229a6914050aaec2e8191949753ed4e9aa" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "serde-attributes", + "syn 1.0.109", +] + +[[package]] +name = "serde-rename-rule" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794e44574226fc701e3be5c651feb7939038fc67fb73f6f4dd5c4ba90fd3be70" + [[package]] name = "serde_derive" version = "1.0.137" @@ -1416,7 +2358,7 @@ checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1430,6 +2372,29 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe39d9fbb0ebf5eb2c7cb7e2a47e4f462fad1379f1166b8ae49ad9eae89a7ca" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serial_test" version = "0.5.1" @@ -1449,7 +2414,18 @@ checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "sha1" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest", ] [[package]] @@ -1461,6 +2437,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -1534,6 +2520,19 @@ dependencies = [ "slog-scope", ] +[[package]] +name = "slog-term" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +dependencies = [ + "atty", + "slog", + "term", + "thread_local", + "time 0.3.11", +] + [[package]] name = "smallvec" version = "1.8.0" @@ -1542,25 +2541,41 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "socket2" -version = "0.4.4" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "subprocess" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "syn" -version = "1.0.98" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -1568,18 +2583,14 @@ dependencies = [ ] [[package]] -name = "sysinfo" -version = "0.23.13" +name = "syn" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3977ec2e0520829be45c8a2df70db2bf364714d8a748316a10c3c35d4d2b01c9" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" dependencies = [ - "cfg-if 1.0.0", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "rayon", - "winapi", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] @@ -1602,6 +2613,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.3" @@ -1611,6 +2633,13 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "test-utils" +version = "0.1.0" +dependencies = [ + "nix 0.24.2", +] + [[package]] name = "textwrap" version = "0.15.0" @@ -1634,7 +2663,7 @@ checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1666,37 +2695,68 @@ dependencies = [ "itoa", "libc", "num_threads", + "time-macros", +] + +[[package]] +name = "time-macros" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", ] +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" -version = "1.19.2" +version = "1.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439" +checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105" dependencies = [ + "autocfg", "bytes 1.1.0", "libc", - "memchr", "mio", "num_cpus", - "once_cell", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "winapi", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "1.8.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.16", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", ] [[package]] @@ -1724,6 +2784,20 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +dependencies = [ + "bytes 1.1.0", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + [[package]] name = "tokio-vsock" version = "0.3.1" @@ -1746,11 +2820,17 @@ dependencies = [ "serde", ] +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + [[package]] name = "tracing" -version = "0.1.35" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a400e31aa60b9d44a52a8ee0343b5b18566b03a8321e0d321f695cf56e940160" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ "cfg-if 1.0.0", "pin-project-lite", @@ -1760,20 +2840,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.21" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.16", ] [[package]] name = "tracing-core" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", "valuable", @@ -1835,11 +2915,17 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + [[package]] name = "ttrpc" -version = "0.5.3" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c46d73bc2a74f2440921b6539afbed68064b48b2c4f194c637430d1c83d052ad" +checksum = "a35f22a2964bea14afee161665bb260b83cb48e665e0260ca06ec0e775c8b06c" dependencies = [ "async-trait", "byteorder", @@ -1847,8 +2933,8 @@ dependencies = [ "libc", "log", "nix 0.23.1", - "protobuf", - "protobuf-codegen-pure", + "protobuf 3.2.0", + "protobuf-codegen 3.2.0", "thiserror", "tokio", "tokio-vsock", @@ -1856,43 +2942,85 @@ dependencies = [ [[package]] name = "ttrpc-codegen" -version = "0.2.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809eda4e459820237104e4b61d6b41bbe6c9e1ce6adf4057955e6e6722a90408" +checksum = "94d7f7631d7a9ebed715a47cd4cb6072cbc7ae1d4ec01598971bbec0024340c2" dependencies = [ - "protobuf", - "protobuf-codegen", - "protobuf-codegen-pure", + "protobuf 2.27.1", + "protobuf-codegen 3.2.0", + "protobuf-support", "ttrpc-compiler", ] [[package]] name = "ttrpc-compiler" -version = "0.4.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2978ed3fa047d8fd55cbeb4d4a61d461fb3021a90c9618519c73ce7e5bb66c15" +checksum = "ec3cb5dbf1f0865a34fe3f722290fe776cacb16f50428610b779467b76ddf647" dependencies = [ "derive-new", "prost", "prost-build", "prost-types", - "protobuf", - "protobuf-codegen", + "protobuf 2.27.1", + "protobuf-codegen 2.27.1", + "tempfile", +] + +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] +name = "uds_windows" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce65604324d3cce9b966701489fbd0cf318cb1f7bd9dd07ac9a4ee6fb791930d" +dependencies = [ "tempfile", + "winapi", ] +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" +[[package]] +name = "url" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22fe195a4f217c25b25cb5058ced57059824a678474874038dc88d211bf508d3" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "valuable" version = "0.1.0" @@ -1900,16 +3028,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" [[package]] -name = "version_check" -version = "0.9.4" +name = "vcpkg" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] -name = "void" -version = "1.0.2" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vsock" @@ -1929,7 +3057,7 @@ dependencies = [ "bincode", "byteorder", "libc", - "nix 0.23.1", + "nix 0.24.2", "opentelemetry", "serde", "slog", @@ -1938,6 +3066,21 @@ dependencies = [ "tokio-vsock", ] +[[package]] +name = "waker-fn" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -1971,10 +3114,22 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.81" @@ -1993,7 +3148,7 @@ checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2004,15 +3159,34 @@ version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" +[[package]] +name = "web-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wepoll-ffi" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d743fdedc5c64377b5fc2bc036b01c7fd642205a0d96356034ae3404d49eb7fb" +dependencies = [ + "cc", +] + [[package]] name = "which" -version = "4.2.5" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c4fb54e6113b6a8772ee41c3404fb0301ac79604489467e0a9ce1f3e97c24ae" +checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b" dependencies = [ "either", - "lazy_static", "libc", + "once_cell", ] [[package]] @@ -2052,39 +3226,303 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + +[[package]] +name = "xdg-home" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2769203cd13a0c6015d515be729c526d041e9cf2c0cc478d57faee85f40c6dcd" +dependencies = [ + "nix 0.26.4", + "winapi", +] + +[[package]] +name = "zbus" +version = "3.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31de390a2d872e4cd04edd71b425e29853f786dc99317ed72d73d6fcf5ebb948" +dependencies = [ + "async-broadcast", + "async-executor", + "async-fs", + "async-io", + "async-lock", + "async-process", + "async-recursion 1.0.5", + "async-task", + "async-trait", + "blocking", + "byteorder", + "derivative", + "enumflags2", + "event-listener", + "futures-core", + "futures-sink", + "futures-util", + "hex", + "nix 0.26.4", + "once_cell", + "ordered-stream", + "rand", + "serde", + "serde_repr", + "sha1", + "static_assertions", + "tracing", + "uds_windows", + "winapi", + "xdg-home", + "zbus_macros", + "zbus_names", + "zvariant", +] + +[[package]] +name = "zbus_macros" +version = "3.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d1794a946878c0e807f55a397187c11fc7a038ba5d868e7db4f3bd7760bc9d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "syn 1.0.109", + "zvariant_utils", +] + +[[package]] +name = "zbus_names" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb80bb776dbda6e23d705cf0123c3b95df99c4ebeaec6c2599d4a5419902b4a9" +dependencies = [ + "serde", + "static_assertions", + "zvariant", +] + +[[package]] +name = "zvariant" +version = "3.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44b291bee0d960c53170780af148dca5fa260a63cdd24f1962fa82e03e53338c" +dependencies = [ + "byteorder", + "enumflags2", + "libc", + "serde", + "static_assertions", + "zvariant_derive", +] + +[[package]] +name = "zvariant_derive" +version = "3.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "934d7a7dfc310d6ee06c87ffe88ef4eca7d3e37bb251dece2ef93da8f17d8ecd" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", + "zvariant_utils", +] + +[[package]] +name = "zvariant_utils" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7234f0d811589db492d16893e3f21e8e2fd282e6d01b0cddee310322062cc200" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index ae809bdaf781..47e1d378bcf5 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -3,24 +3,26 @@ name = "kata-agent" version = "0.1.0" authors = ["The Kata Containers community "] edition = "2018" +license = "Apache-2.0" [dependencies] oci = { path = "../libs/oci" } rustjail = { path = "rustjail" } -protocols = { path = "../libs/protocols" } +protocols = { path = "../libs/protocols", features = ["async", "with-serde"] } lazy_static = "1.3.0" -ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false } -protobuf = "=2.14.0" +ttrpc = { version = "0.7.1", features = ["async"], default-features = false } +protobuf = "3.2.0" libc = "0.2.58" -nix = "0.23.0" +nix = "0.24.2" capctl = "0.2.0" serde_json = "1.0.39" scan_fmt = "0.2.3" scopeguard = "1.0.0" thiserror = "1.0.26" -regex = "1.5.5" +regex = "1.5.6" serial_test = "0.5.1" -sysinfo = "0.23.0" +kata-sys-util = { path = "../libs/kata-sys-util" } +kata-types = { path = "../libs/kata-types" } # Async helpers async-trait = "0.1.42" @@ -28,7 +30,7 @@ async-recursion = "0.3.2" futures = "0.3.17" # Async runtime -tokio = { version = "1.14.0", features = ["full"] } +tokio = { version = "1.28.1", features = ["full"] } tokio-vsock = "0.3.1" netlink-sys = { version = "0.7.0", features = ["tokio_socket",]} @@ -41,15 +43,17 @@ ipnetwork = "0.17.0" logging = { path = "../libs/logging" } slog = "2.5.2" slog-scope = "4.1.2" +slog-term = "2.9.0" # Redirect ttrpc log calls slog-stdlog = "4.0.0" log = "0.4.11" +cfg-if = "1.0.0" prometheus = { version = "0.13.0", features = ["process"] } procfs = "0.12.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.2.8" } +cgroups = { package = "cgroups-rs", version = "0.3.2" } # Tracing tracing = "0.1.26" @@ -63,8 +67,16 @@ serde = { version = "1.0.129", features = ["derive"] } toml = "0.5.8" clap = { version = "3.0.1", features = ["derive"] } +# Communication with the OPA service +http = { version = "0.2.8", optional = true } +reqwest = { version = "0.11.14", optional = true } +# The "vendored" feature for openssl is required for musl build +openssl = { version = "0.10.54", features = ["vendored"], optional = true } + [dev-dependencies] tempfile = "3.1.0" +test-utils = { path = "../libs/test-utils" } +which = "4.3.0" [workspace] members = [ @@ -77,6 +89,7 @@ lto = true [features] seccomp = ["rustjail/seccomp"] standard-oci-runtime = ["rustjail/standard-oci-runtime"] +agent-policy = ["http", "openssl", "reqwest"] [[bin]] name = "kata-agent" diff --git a/src/agent/Makefile b/src/agent/Makefile index 533411bee6b4..699b71ce1d21 100644 --- a/src/agent/Makefile +++ b/src/agent/Makefile @@ -26,13 +26,27 @@ export VERSION_COMMIT := $(if $(COMMIT),$(VERSION)-$(COMMIT),$(VERSION)) EXTRA_RUSTFEATURES := ##VAR SECCOMP=yes|no define if agent enables seccomp feature -SECCOMP := yes +SECCOMP ?= yes # Enable seccomp feature of rust build ifeq ($(SECCOMP),yes) override EXTRA_RUSTFEATURES += seccomp endif +##VAR AGENT_POLICY=yes|no define if agent enables the policy feature +AGENT_POLICY := no + +# Enable the policy feature of rust build +ifeq ($(AGENT_POLICY),yes) + override EXTRA_RUSTFEATURES += agent-policy +endif + +include ../../utils.mk + +ifeq ($(ARCH), ppc64le) + override ARCH = powerpc64le +endif + ##VAR STANDARD_OCI_RUNTIME=yes|no define if agent enables standard oci runtime feature STANDARD_OCI_RUNTIME := no @@ -45,8 +59,6 @@ ifneq ($(EXTRA_RUSTFEATURES),) override EXTRA_RUSTFEATURES := --features "$(EXTRA_RUSTFEATURES)" endif -include ../../utils.mk - TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET) ##VAR DESTDIR= is a directory prepended to each installed target file @@ -107,10 +119,9 @@ endef ##TARGET default: build code default: $(TARGET) show-header -$(TARGET): $(GENERATED_CODE) logging-crate-tests $(TARGET_PATH) +static-checks-build: $(GENERATED_CODE) -logging-crate-tests: - make -C $(CWD)/../libs/logging +$(TARGET): $(GENERATED_CODE) $(TARGET_PATH) $(TARGET_PATH): show-summary @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) @@ -137,7 +148,7 @@ vendor: #TARGET test: run cargo tests -test: +test: $(GENERATED_FILES) @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture ##TARGET check: run test @@ -203,7 +214,6 @@ codecov-html: check_tarpaulin .PHONY: \ help \ - logging-crate-tests \ optimize \ show-header \ show-summary \ diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 78c0f962eb82..231fa353abfd 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -3,6 +3,7 @@ name = "rustjail" version = "0.1.0" authors = ["The Kata Containers community "] edition = "2018" +license = "Apache-2.0" [dependencies] serde = "1.0.91" @@ -10,32 +11,37 @@ serde_json = "1.0.39" serde_derive = "1.0.91" oci = { path = "../../libs/oci" } protocols = { path ="../../libs/protocols" } +kata-sys-util = { path = "../../libs/kata-sys-util" } caps = "0.5.0" -nix = "0.23.0" +nix = "0.24.2" scopeguard = "1.0.0" capctl = "0.2.0" lazy_static = "1.3.0" libc = "0.2.58" -protobuf = "=2.14.0" +protobuf = "3.2.0" slog = "2.5.2" slog-scope = "4.1.2" scan_fmt = "0.2.6" -regex = "1.5.5" +regex = "1.5.6" path-absolutize = "1.2.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.2.8" } +cgroups = { package = "cgroups-rs", version = "0.3.2" } rlimit = "0.5.3" cfg-if = "0.1.0" -tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] } +tokio = { version = "1.28.1", features = ["sync", "io-util", "process", "time", "macros", "rt"] } futures = "0.3.17" async-trait = "0.1.31" inotify = "0.9.2" -libseccomp = { version = "0.1.3", optional = true } +libseccomp = { version = "0.3.0", optional = true } +zbus = "3.12.0" +bit-vec= "0.6.3" +xattr = "0.2.3" [dev-dependencies] serial_test = "0.5.0" tempfile = "3.1.0" +test-utils = { path = "../../libs/test-utils" } [features] seccomp = ["libseccomp"] diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 84aa9bc50c50..6145f5f9c065 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -27,29 +27,28 @@ use oci::{ LinuxNetwork, LinuxPids, LinuxResources, }; -use protobuf::{CachedSize, RepeatedField, SingularPtrField, UnknownFields}; +use protobuf::MessageField; use protocols::agent::{ BlkioStats, BlkioStatsEntry, CgroupStats, CpuStats, CpuUsage, HugetlbStats, MemoryData, MemoryStats, PidsStats, ThrottlingData, }; +use std::any::Any; use std::collections::HashMap; use std::fs; use std::path::Path; const GUEST_CPUS_PATH: &str = "/sys/devices/system/cpu/online"; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger().new(o!("subsystem" => "cgroups")) - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger().new(o!("subsystem" => "cgroups")) } macro_rules! get_controller_or_return_singular_none { ($cg:ident) => { match $cg.controller_of() { Some(c) => c, - None => return SingularPtrField::none(), + None => return MessageField::none(), } }; } @@ -75,13 +74,13 @@ macro_rules! set_resource { impl CgroupManager for Manager { fn apply(&self, pid: pid_t) -> Result<()> { - self.cgroup.add_task(CgroupPid::from(pid as u64))?; + self.cgroup.add_task_by_tgid(CgroupPid::from(pid as u64))?; Ok(()) } fn set(&self, r: &LinuxResources, update: bool) -> Result<()> { info!( - sl!(), + sl(), "cgroup manager set resources for container. Resources input {:?}", r ); @@ -119,7 +118,7 @@ impl CgroupManager for Manager { // set devices resources set_devices_resources(&self.cgroup, &r.devices, res); - info!(sl!(), "resources after processed {:?}", res); + info!(sl(), "resources after processed {:?}", res); // apply resources self.cgroup.apply(res)?; @@ -133,11 +132,10 @@ impl CgroupManager for Manager { let throttling_data = get_cpu_stats(&self.cgroup); - let cpu_stats = SingularPtrField::some(CpuStats { + let cpu_stats = MessageField::some(CpuStats { cpu_usage, throttling_data, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }); // Memorystats @@ -159,8 +157,7 @@ impl CgroupManager for Manager { pids_stats, blkio_stats, hugetlb_stats, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } @@ -174,7 +171,7 @@ impl CgroupManager for Manager { freezer_controller.freeze()?; } _ => { - return Err(anyhow!(nix::Error::EINVAL)); + return Err(anyhow!("Invalid FreezerState")); } } @@ -193,6 +190,83 @@ impl CgroupManager for Manager { Ok(result) } + + fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> { + if guest_cpuset.is_empty() { + return Ok(()); + } + info!(sl(), "update_cpuset_path to: {}", guest_cpuset); + + let h = cgroups::hierarchies::auto(); + let root_cg = h.root_control_group(); + + let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap(); + let path = root_cpuset_controller.path(); + let root_path = Path::new(path); + info!(sl(), "root cpuset path: {:?}", &path); + + let container_cpuset_controller: &CpuSetController = self.cgroup.controller_of().unwrap(); + let path = container_cpuset_controller.path(); + let container_path = Path::new(path); + info!(sl(), "container cpuset path: {:?}", &path); + + let mut paths = vec![]; + for ancestor in container_path.ancestors() { + if ancestor == root_path { + break; + } + paths.push(ancestor); + } + info!(sl(), "parent paths to update cpuset: {:?}", &paths); + + let mut i = paths.len(); + loop { + if i == 0 { + break; + } + i -= 1; + + // remove cgroup root from path + let r_path = &paths[i] + .to_str() + .unwrap() + .trim_start_matches(root_path.to_str().unwrap()); + info!(sl(), "updating cpuset for parent path {:?}", &r_path); + let cg = new_cgroup(cgroups::hierarchies::auto(), r_path)?; + let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); + cpuset_controller.set_cpus(guest_cpuset)?; + } + + if !container_cpuset.is_empty() { + info!( + sl(), + "updating cpuset for container path: {:?} cpuset: {}", + &container_path, + container_cpuset + ); + container_cpuset_controller.set_cpus(container_cpuset)?; + } + + Ok(()) + } + + fn get_cgroup_path(&self, cg: &str) -> Result { + if cgroups::hierarchies::is_cgroup2_unified_mode() { + let cg_path = format!("/sys/fs/cgroup/{}", self.cpath); + return Ok(cg_path); + } + + // for cgroup v1 + Ok(self.paths.get(cg).map(|s| s.to_string()).unwrap()) + } + + fn as_any(&self) -> Result<&dyn Any> { + Ok(self) + } + + fn name(&self) -> &str { + "cgroupfs" + } } fn set_network_resources( @@ -200,7 +274,7 @@ fn set_network_resources( network: &LinuxNetwork, res: &mut cgroups::Resources, ) { - info!(sl!(), "cgroup manager set network"); + info!(sl(), "cgroup manager set network"); // set classid // description can be found at https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/net_cls.html @@ -227,7 +301,7 @@ fn set_devices_resources( device_resources: &[LinuxDeviceCgroup], res: &mut cgroups::Resources, ) { - info!(sl!(), "cgroup manager set devices"); + info!(sl(), "cgroup manager set devices"); let mut devices = vec![]; for d in device_resources.iter() { @@ -252,19 +326,28 @@ fn set_devices_resources( } fn set_hugepages_resources( - _cg: &cgroups::Cgroup, + cg: &cgroups::Cgroup, hugepage_limits: &[LinuxHugepageLimit], res: &mut cgroups::Resources, ) { - info!(sl!(), "cgroup manager set hugepage"); + info!(sl(), "cgroup manager set hugepage"); let mut limits = vec![]; + let hugetlb_controller = cg.controller_of::(); for l in hugepage_limits.iter() { - let hr = HugePageResource { - size: l.page_size.clone(), - limit: l.limit, - }; - limits.push(hr); + if hugetlb_controller.is_some() && hugetlb_controller.unwrap().size_supported(&l.page_size) + { + let hr = HugePageResource { + size: l.page_size.clone(), + limit: l.limit, + }; + limits.push(hr); + } else { + warn!( + sl(), + "{} page size support cannot be verified, dropping requested limit", l.page_size + ); + } } res.hugepages.limits = limits; } @@ -274,7 +357,7 @@ fn set_block_io_resources( blkio: &LinuxBlockIo, res: &mut cgroups::Resources, ) { - info!(sl!(), "cgroup manager set block io"); + info!(sl(), "cgroup manager set block io"); res.blkio.weight = blkio.weight; res.blkio.leaf_weight = blkio.leaf_weight; @@ -302,13 +385,13 @@ fn set_block_io_resources( } fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> { - info!(sl!(), "cgroup manager set cpu"); + info!(sl(), "cgroup manager set cpu"); let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); if !cpu.cpus.is_empty() { if let Err(e) = cpuset_controller.set_cpus(&cpu.cpus) { - warn!(sl!(), "write cpuset failed: {:?}", e); + warn!(sl(), "write cpuset failed: {:?}", e); } } @@ -339,7 +422,7 @@ fn set_cpu_resources(cg: &cgroups::Cgroup, cpu: &LinuxCpu) -> Result<()> { } fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool) -> Result<()> { - info!(sl!(), "cgroup manager set memory"); + info!(sl(), "cgroup manager set memory"); let mem_controller: &MemController = cg.controller_of().unwrap(); if !update { @@ -359,14 +442,14 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool let memstat = get_memory_stats(cg) .into_option() .ok_or_else(|| anyhow!("failed to get the cgroup memory stats"))?; - let memusage = memstat.get_usage(); + let memusage = memstat.usage(); // When update memory limit, the kernel would check the current memory limit // set against the new swap setting, if the current memory limit is large than // the new swap, then set limit first, otherwise the kernel would complain and // refused to set; on the other hand, if the current memory limit is smaller than // the new swap, then we should set the swap first and then set the memor limit. - if swap == -1 || memusage.get_limit() < swap as u64 { + if swap == -1 || memusage.limit() < swap as u64 { mem_controller.set_memswap_limit(swap)?; set_resource!(mem_controller, set_limit, memory, limit); } else { @@ -408,7 +491,7 @@ fn set_memory_resources(cg: &cgroups::Cgroup, memory: &LinuxMemory, update: bool } fn set_pids_resources(cg: &cgroups::Cgroup, pids: &LinuxPids) -> Result<()> { - info!(sl!(), "cgroup manager set pids"); + info!(sl(), "cgroup manager set pids"); let pid_controller: &PidController = cg.controller_of().unwrap(); let v = if pids.limit > 0 { MaxValue::Value(pids.limit) @@ -570,21 +653,20 @@ lazy_static! { }; } -fn get_cpu_stats(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_cpu_stats(cg: &cgroups::Cgroup) -> MessageField { let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg); let stat = cpu_controller.cpu().stat; let h = lines_to_map(&stat); - SingularPtrField::some(ThrottlingData { + MessageField::some(ThrottlingData { periods: *h.get("nr_periods").unwrap_or(&0), throttled_periods: *h.get("nr_throttled").unwrap_or(&0), throttled_time: *h.get("throttled_time").unwrap_or(&0), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } -fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> MessageField { if let Some(cpuacct_controller) = cg.controller_of::() { let cpuacct = cpuacct_controller.cpuacct(); @@ -598,24 +680,12 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { let percpu_usage = line_to_vec(&cpuacct.usage_percpu); - return SingularPtrField::some(CpuUsage { + return MessageField::some(CpuUsage { total_usage, percpu_usage, usage_in_kernelmode, usage_in_usermode, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }); - } - - if cg.v2() { - return SingularPtrField::some(CpuUsage { - total_usage: 0, - percpu_usage: vec![], - usage_in_kernelmode: 0, - usage_in_usermode: 0, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }); } @@ -628,17 +698,16 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { let total_usage = *h.get("usage_usec").unwrap_or(&0); let percpu_usage = vec![]; - SingularPtrField::some(CpuUsage { + MessageField::some(CpuUsage { total_usage, percpu_usage, usage_in_kernelmode, usage_in_usermode, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } -fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_memory_stats(cg: &cgroups::Cgroup) -> MessageField { let memory_controller: &MemController = get_controller_or_return_singular_none!(cg); // cache from memory stat @@ -649,53 +718,49 @@ fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField { let value = memory.use_hierarchy; let use_hierarchy = value == 1; - // gte memory datas - let usage = SingularPtrField::some(MemoryData { + // get memory data + let usage = MessageField::some(MemoryData { usage: memory.usage_in_bytes, max_usage: memory.max_usage_in_bytes, failcnt: memory.fail_cnt, limit: memory.limit_in_bytes as u64, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }); // get swap usage let memswap = memory_controller.memswap(); - let swap_usage = SingularPtrField::some(MemoryData { + let swap_usage = MessageField::some(MemoryData { usage: memswap.usage_in_bytes, max_usage: memswap.max_usage_in_bytes, failcnt: memswap.fail_cnt, limit: memswap.limit_in_bytes as u64, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }); // get kernel usage let kmem_stat = memory_controller.kmem_stat(); - let kernel_usage = SingularPtrField::some(MemoryData { + let kernel_usage = MessageField::some(MemoryData { usage: kmem_stat.usage_in_bytes, max_usage: kmem_stat.max_usage_in_bytes, failcnt: kmem_stat.fail_cnt, limit: kmem_stat.limit_in_bytes as u64, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }); - SingularPtrField::some(MemoryStats { + MessageField::some(MemoryStats { cache, usage, swap_usage, kernel_usage, use_hierarchy, stats: memory.stat.raw, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } -fn get_pids_stats(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_pids_stats(cg: &cgroups::Cgroup) -> MessageField { let pid_controller: &PidController = get_controller_or_return_singular_none!(cg); let current = pid_controller.get_pid_current().unwrap_or(0); @@ -709,11 +774,10 @@ fn get_pids_stats(cg: &cgroups::Cgroup) -> SingularPtrField { }, } as u64; - SingularPtrField::some(PidsStats { + MessageField::some(PidsStats { current, limit, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } @@ -749,8 +813,8 @@ https://github.com/opencontainers/runc/blob/a5847db387ae28c0ca4ebe4beee1a76900c8 Total 0 */ -fn get_blkio_stat_blkiodata(blkiodata: &[BlkIoData]) -> RepeatedField { - let mut m = RepeatedField::new(); +fn get_blkio_stat_blkiodata(blkiodata: &[BlkIoData]) -> Vec { + let mut m = Vec::new(); if blkiodata.is_empty() { return m; } @@ -763,16 +827,15 @@ fn get_blkio_stat_blkiodata(blkiodata: &[BlkIoData]) -> RepeatedField RepeatedField { - let mut m = RepeatedField::new(); +fn get_blkio_stat_ioservice(services: &[IoService]) -> Vec { + let mut m = Vec::new(); if services.is_empty() { return m; @@ -796,17 +859,16 @@ fn build_blkio_stats_entry(major: i16, minor: i16, op: &str, value: u64) -> Blki minor: minor as u64, op: op.to_string(), value, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() } } -fn get_blkio_stats_v2(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_blkio_stats_v2(cg: &cgroups::Cgroup) -> MessageField { let blkio_controller: &BlkIoController = get_controller_or_return_singular_none!(cg); let blkio = blkio_controller.blkio(); let mut resp = BlkioStats::new(); - let mut blkio_stats = RepeatedField::new(); + let mut blkio_stats = Vec::new(); let stat = blkio.io_stat; for s in stat { @@ -822,10 +884,10 @@ fn get_blkio_stats_v2(cg: &cgroups::Cgroup) -> SingularPtrField { resp.io_service_bytes_recursive = blkio_stats; - SingularPtrField::some(resp) + MessageField::some(resp) } -fn get_blkio_stats(cg: &cgroups::Cgroup) -> SingularPtrField { +fn get_blkio_stats(cg: &cgroups::Cgroup) -> MessageField { if cg.v2() { return get_blkio_stats_v2(cg); } @@ -858,7 +920,7 @@ fn get_blkio_stats(cg: &cgroups::Cgroup) -> SingularPtrField { m.sectors_recursive = get_blkio_stat_blkiodata(&blkio.sectors_recursive); } - SingularPtrField::some(m) + MessageField::some(m) } fn get_hugetlb_stats(cg: &cgroups::Cgroup) -> HashMap { @@ -882,8 +944,7 @@ fn get_hugetlb_stats(cg: &cgroups::Cgroup) -> HashMap { usage, max_usage, failcnt, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }, ); } @@ -899,7 +960,7 @@ pub fn get_paths() -> Result> { for l in fs::read_to_string(PATHS)?.lines() { let fl: Vec<&str> = l.split(':').collect(); if fl.len() != 3 { - info!(sl!(), "Corrupted cgroup data!"); + info!(sl(), "Corrupted cgroup data!"); continue; } @@ -911,9 +972,8 @@ pub fn get_paths() -> Result> { Ok(m) } -pub fn get_mounts() -> Result> { +pub fn get_mounts(paths: &HashMap) -> Result> { let mut m = HashMap::new(); - let paths = get_paths()?; for l in fs::read_to_string(MOUNTS)?.lines() { let p: Vec<&str> = l.splitn(2, " - ").collect(); @@ -921,7 +981,7 @@ pub fn get_mounts() -> Result> { let post: Vec<&str> = p[1].split(' ').collect(); if post.len() != 3 { - warn!(sl!(), "can't parse {} line {:?}", MOUNTS, l); + warn!(sl(), "can't parse {} line {:?}", MOUNTS, l); continue; } @@ -941,9 +1001,9 @@ pub fn get_mounts() -> Result> { Ok(m) } -fn new_cgroup(h: Box, path: &str) -> Cgroup { +fn new_cgroup(h: Box, path: &str) -> Result { let valid_path = path.trim_start_matches('/').to_string(); - cgroups::Cgroup::new(h, valid_path.as_str()) + cgroups::Cgroup::new(h, valid_path.as_str()).map_err(anyhow::Error::from) } impl Manager { @@ -951,7 +1011,7 @@ impl Manager { let mut m = HashMap::new(); let paths = get_paths()?; - let mounts = get_mounts()?; + let mounts = get_mounts(&paths)?; for key in paths.keys() { let mnt = mounts.get(key); @@ -965,83 +1025,16 @@ impl Manager { m.insert(key.to_string(), p); } + let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?; + Ok(Self { paths: m, mounts, // rels: paths, cpath: cpath.to_string(), - cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath), + cgroup: cg, }) } - - pub fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> { - if guest_cpuset.is_empty() { - return Ok(()); - } - info!(sl!(), "update_cpuset_path to: {}", guest_cpuset); - - let h = cgroups::hierarchies::auto(); - let root_cg = h.root_control_group(); - - let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap(); - let path = root_cpuset_controller.path(); - let root_path = Path::new(path); - info!(sl!(), "root cpuset path: {:?}", &path); - - let container_cpuset_controller: &CpuSetController = self.cgroup.controller_of().unwrap(); - let path = container_cpuset_controller.path(); - let container_path = Path::new(path); - info!(sl!(), "container cpuset path: {:?}", &path); - - let mut paths = vec![]; - for ancestor in container_path.ancestors() { - if ancestor == root_path { - break; - } - paths.push(ancestor); - } - info!(sl!(), "parent paths to update cpuset: {:?}", &paths); - - let mut i = paths.len(); - loop { - if i == 0 { - break; - } - i -= 1; - - // remove cgroup root from path - let r_path = &paths[i] - .to_str() - .unwrap() - .trim_start_matches(root_path.to_str().unwrap()); - info!(sl!(), "updating cpuset for parent path {:?}", &r_path); - let cg = new_cgroup(cgroups::hierarchies::auto(), r_path); - let cpuset_controller: &CpuSetController = cg.controller_of().unwrap(); - cpuset_controller.set_cpus(guest_cpuset)?; - } - - if !container_cpuset.is_empty() { - info!( - sl!(), - "updating cpuset for container path: {:?} cpuset: {}", - &container_path, - container_cpuset - ); - container_cpuset_controller.set_cpus(container_cpuset)?; - } - - Ok(()) - } - - pub fn get_cg_path(&self, cg: &str) -> Option { - if cgroups::hierarchies::is_cgroup2_unified_mode() { - let cg_path = format!("/sys/fs/cgroup/{}", self.cpath); - return Some(cg_path); - } - - // for cgroup v1 - self.paths.get(cg).map(|s| s.to_string()) - } } // get the guest's online cpus. diff --git a/src/agent/rustjail/src/cgroups/mock.rs b/src/agent/rustjail/src/cgroups/mock.rs index e1603c846807..8ac77c63b282 100644 --- a/src/agent/rustjail/src/cgroups/mock.rs +++ b/src/agent/rustjail/src/cgroups/mock.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -use protobuf::{CachedSize, SingularPtrField, UnknownFields}; +use protobuf::MessageField; use crate::cgroups::Manager as CgroupManager; use crate::protocols::agent::{BlkioStats, CgroupStats, CpuStats, MemoryStats, PidsStats}; @@ -11,6 +11,7 @@ use anyhow::Result; use cgroups::freezer::FreezerState; use libc::{self, pid_t}; use oci::LinuxResources; +use std::any::Any; use std::collections::HashMap; use std::string::String; @@ -32,13 +33,12 @@ impl CgroupManager for Manager { fn get_stats(&self) -> Result { Ok(CgroupStats { - cpu_stats: SingularPtrField::some(CpuStats::default()), - memory_stats: SingularPtrField::some(MemoryStats::new()), - pids_stats: SingularPtrField::some(PidsStats::new()), - blkio_stats: SingularPtrField::some(BlkioStats::new()), + cpu_stats: MessageField::some(CpuStats::default()), + memory_stats: MessageField::some(MemoryStats::new()), + pids_stats: MessageField::some(PidsStats::new()), + blkio_stats: MessageField::some(BlkioStats::new()), hugetlb_stats: HashMap::new(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + ..Default::default() }) } @@ -53,6 +53,22 @@ impl CgroupManager for Manager { fn get_pids(&self) -> Result> { Ok(Vec::new()) } + + fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> { + Ok(()) + } + + fn get_cgroup_path(&self, _: &str) -> Result { + Ok("".to_string()) + } + + fn as_any(&self) -> Result<&dyn Any> { + Ok(self) + } + + fn name(&self) -> &str { + "mock" + } } impl Manager { @@ -63,12 +79,4 @@ impl Manager { cpath: cpath.to_string(), }) } - - pub fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> { - Ok(()) - } - - pub fn get_cg_path(&self, _: &str) -> Option { - Some("".to_string()) - } } diff --git a/src/agent/rustjail/src/cgroups/mod.rs b/src/agent/rustjail/src/cgroups/mod.rs index 389ff79fafc7..c4e3b178b565 100644 --- a/src/agent/rustjail/src/cgroups/mod.rs +++ b/src/agent/rustjail/src/cgroups/mod.rs @@ -4,8 +4,10 @@ // use anyhow::{anyhow, Result}; +use core::fmt::Debug; use oci::LinuxResources; use protocols::agent::CgroupStats; +use std::any::Any; use cgroups::freezer::FreezerState; @@ -38,4 +40,24 @@ pub trait Manager { fn set(&self, _container: &LinuxResources, _update: bool) -> Result<()> { Err(anyhow!("not supported!")) } + + fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> { + Err(anyhow!("not supported!")) + } + + fn get_cgroup_path(&self, _: &str) -> Result { + Err(anyhow!("not supported!")) + } + + fn as_any(&self) -> Result<&dyn Any> { + Err(anyhow!("not supported!")) + } + + fn name(&self) -> &str; +} + +impl Debug for dyn Manager + Send + Sync { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.name()) + } } diff --git a/src/agent/rustjail/src/cgroups/notifier.rs b/src/agent/rustjail/src/cgroups/notifier.rs index 9f91b3584fa7..5260a3d3f29b 100644 --- a/src/agent/rustjail/src/cgroups/notifier.rs +++ b/src/agent/rustjail/src/cgroups/notifier.rs @@ -16,11 +16,9 @@ use inotify::{Inotify, WatchMask}; use tokio::io::AsyncReadExt; use tokio::sync::mpsc::{channel, Receiver}; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger().new(o!("subsystem" => "cgroups_notifier")) - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger().new(o!("subsystem" => "cgroups_notifier")) } pub async fn notify_oom(cid: &str, cg_dir: String) -> Result> { @@ -38,7 +36,7 @@ pub async fn notify_oom(cid: &str, cg_dir: String) -> Result> { fn get_value_from_cgroup(path: &Path, key: &str) -> Result { let content = fs::read_to_string(path)?; info!( - sl!(), + sl(), "get_value_from_cgroup file: {:?}, content: {}", &path, &content ); @@ -67,11 +65,11 @@ async fn register_memory_event_v2( let event_control_path = Path::new(&cg_dir).join(memory_event_name); let cgroup_event_control_path = Path::new(&cg_dir).join(cgroup_event_name); info!( - sl!(), + sl(), "register_memory_event_v2 event_control_path: {:?}", &event_control_path ); info!( - sl!(), + sl(), "register_memory_event_v2 cgroup_event_control_path: {:?}", &cgroup_event_control_path ); @@ -82,8 +80,8 @@ async fn register_memory_event_v2( // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?; - info!(sl!(), "ev_wd: {:?}", ev_wd); - info!(sl!(), "cg_wd: {:?}", cg_wd); + info!(sl(), "ev_wd: {:?}", ev_wd); + info!(sl(), "cg_wd: {:?}", cg_wd); let (sender, receiver) = channel(100); let containere_id = containere_id.to_string(); @@ -97,17 +95,17 @@ async fn register_memory_event_v2( while let Some(event_or_error) = stream.next().await { let event = event_or_error.unwrap(); info!( - sl!(), + sl(), "container[{}] get event for container: {:?}", &containere_id, &event ); // info!("is1: {}", event.wd == wd1); - info!(sl!(), "event.wd: {:?}", event.wd); + info!(sl(), "event.wd: {:?}", event.wd); if event.wd == ev_wd { let oom = get_value_from_cgroup(&event_control_path, "oom_kill"); if oom.unwrap_or(0) > 0 { let _ = sender.send(containere_id.clone()).await.map_err(|e| { - error!(sl!(), "send containere_id failed, error: {:?}", e); + error!(sl(), "send containere_id failed, error: {:?}", e); }); return; } @@ -171,13 +169,13 @@ async fn register_memory_event( let mut buf = [0u8; 8]; match eventfd_stream.read(&mut buf).await { Err(err) => { - warn!(sl!(), "failed to read from eventfd: {:?}", err); + warn!(sl(), "failed to read from eventfd: {:?}", err); return; } Ok(_) => { let content = fs::read_to_string(path.clone()); info!( - sl!(), + sl(), "cgroup event for container: {}, path: {:?}, content: {:?}", &containere_id, &path, @@ -193,7 +191,7 @@ async fn register_memory_event( } let _ = sender.send(containere_id.clone()).await.map_err(|e| { - error!(sl!(), "send containere_id failed, error: {:?}", e); + error!(sl(), "send containere_id failed, error: {:?}", e); }); } }); diff --git a/src/agent/rustjail/src/cgroups/systemd.rs b/src/agent/rustjail/src/cgroups/systemd.rs deleted file mode 100644 index 669f6d5bbb92..000000000000 --- a/src/agent/rustjail/src/cgroups/systemd.rs +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) 2019 Ant Financial -// -// SPDX-License-Identifier: Apache-2.0 -// - -use crate::cgroups::Manager as CgroupManager; - -pub struct Manager {} - -impl CgroupManager for Manager {} diff --git a/src/agent/rustjail/src/cgroups/systemd/cgroups_path.rs b/src/agent/rustjail/src/cgroups/systemd/cgroups_path.rs new file mode 100644 index 000000000000..696c0ece0c31 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/cgroups_path.rs @@ -0,0 +1,95 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; + +use super::common::{DEFAULT_SLICE, SCOPE_SUFFIX, SLICE_SUFFIX}; +use std::string::String; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CgroupsPath { + pub slice: String, + pub prefix: String, + pub name: String, +} + +impl CgroupsPath { + pub fn new(cgroups_path_str: &str) -> Result { + let path_vec: Vec<&str> = cgroups_path_str.split(':').collect(); + if path_vec.len() != 3 { + return Err(anyhow!("invalid cpath: {:?}", cgroups_path_str)); + } + + Ok(CgroupsPath { + slice: if path_vec[0].is_empty() { + DEFAULT_SLICE.to_string() + } else { + path_vec[0].to_owned() + }, + prefix: path_vec[1].to_owned(), + name: path_vec[2].to_owned(), + }) + } + + // ref: https://github.com/opencontainers/runc/blob/main/docs/systemd.md + // return: (parent_slice, unit_name) + pub fn parse(&self) -> Result<(String, String)> { + Ok(( + parse_parent(self.slice.to_owned())?, + get_unit_name(self.prefix.to_owned(), self.name.to_owned()), + )) + } +} + +fn parse_parent(slice: String) -> Result { + if !slice.ends_with(SLICE_SUFFIX) || slice.contains('/') { + return Err(anyhow!("invalid slice name: {}", slice)); + } else if slice == "-.slice" { + return Ok(String::new()); + } + + let mut slice_path = String::new(); + let mut prefix = String::new(); + for subslice in slice.trim_end_matches(SLICE_SUFFIX).split('-') { + if subslice.is_empty() { + return Err(anyhow!("invalid slice name: {}", slice)); + } + slice_path = format!("{}/{}{}{}", slice_path, prefix, subslice, SLICE_SUFFIX); + prefix = format!("{}{}-", prefix, subslice); + } + slice_path.remove(0); + Ok(slice_path) +} + +fn get_unit_name(prefix: String, name: String) -> String { + if name.ends_with(SLICE_SUFFIX) { + name + } else if prefix.is_empty() { + format!("{}{}", name, SCOPE_SUFFIX) + } else { + format!("{}-{}{}", prefix, name, SCOPE_SUFFIX) + } +} + +#[cfg(test)] +mod tests { + use super::CgroupsPath; + + #[test] + fn test_cgroup_path_parse() { + let slice = "system.slice"; + let prefix = "kata_agent"; + let name = "123"; + let cgroups_path = + CgroupsPath::new(format!("{}:{}:{}", slice, prefix, name).as_str()).unwrap(); + assert_eq!(slice, cgroups_path.slice.as_str()); + assert_eq!(prefix, cgroups_path.prefix.as_str()); + assert_eq!(name, cgroups_path.name.as_str()); + + let (parent_slice, unit_name) = cgroups_path.parse().unwrap(); + assert_eq!(format!("{}", slice), parent_slice); + assert_eq!(format!("{}-{}.scope", prefix, name), unit_name); + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/common.rs b/src/agent/rustjail/src/cgroups/systemd/common.rs new file mode 100644 index 000000000000..ec82b40c23bb --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/common.rs @@ -0,0 +1,20 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub const DEFAULT_SLICE: &str = "system.slice"; +pub const SLICE_SUFFIX: &str = ".slice"; +pub const SCOPE_SUFFIX: &str = ".scope"; +pub const WHO_ENUM_ALL: &str = "all"; +pub const SIGNAL_KILL: i32 = nix::sys::signal::SIGKILL as i32; +pub const UNIT_MODE_REPLACE: &str = "replace"; +pub const NO_SUCH_UNIT_ERROR: &str = "org.freedesktop.systemd1.NoSuchUnit"; + +pub type Properties<'a> = Vec<(&'a str, zbus::zvariant::Value<'a>)>; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum CgroupHierarchy { + Legacy, + Unified, +} diff --git a/src/agent/rustjail/src/cgroups/systemd/dbus_client.rs b/src/agent/rustjail/src/cgroups/systemd/dbus_client.rs new file mode 100644 index 000000000000..3e1e3275c76e --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/dbus_client.rs @@ -0,0 +1,166 @@ +// Copyright 2021-2023 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::vec; + +use super::common::{ + CgroupHierarchy, Properties, NO_SUCH_UNIT_ERROR, SIGNAL_KILL, SLICE_SUFFIX, UNIT_MODE_REPLACE, + WHO_ENUM_ALL, +}; +use super::interface::system::ManagerProxyBlocking as SystemManager; +use anyhow::{anyhow, Context, Result}; +use zbus::zvariant::Value; + +pub trait SystemdInterface { + fn start_unit(&self, pid: i32, parent: &str, cg_hierarchy: &CgroupHierarchy) -> Result<()>; + fn set_properties(&self, properties: &Properties) -> Result<()>; + fn kill_unit(&self) -> Result<()>; + fn freeze_unit(&self) -> Result<()>; + fn thaw_unit(&self) -> Result<()>; + fn add_process(&self, pid: i32) -> Result<()>; + fn get_version(&self) -> Result; + fn unit_exists(&self) -> Result; +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct DBusClient { + unit_name: String, +} + +impl DBusClient { + pub fn new(unit_name: String) -> Self { + Self { unit_name } + } + + fn build_proxy(&self) -> Result> { + let connection = + zbus::blocking::Connection::system().context("Establishing a D-Bus connection")?; + let proxy = SystemManager::new(&connection).context("Building a D-Bus proxy manager")?; + + Ok(proxy) + } +} + +impl SystemdInterface for DBusClient { + fn start_unit(&self, pid: i32, parent: &str, cg_hierarchy: &CgroupHierarchy) -> Result<()> { + let proxy = self.build_proxy()?; + + // enable CPUAccounting & MemoryAccounting & (Block)IOAccounting by default + let mut properties: Properties = vec![ + ("CPUAccounting", Value::Bool(true)), + ("DefaultDependencies", Value::Bool(false)), + ("MemoryAccounting", Value::Bool(true)), + ("TasksAccounting", Value::Bool(true)), + ("Description", Value::Str("kata-agent container".into())), + ("PIDs", Value::Array(vec![pid as u32].into())), + ]; + + match *cg_hierarchy { + CgroupHierarchy::Legacy => properties.push(("IOAccounting", Value::Bool(true))), + CgroupHierarchy::Unified => properties.push(("BlockIOAccounting", Value::Bool(true))), + } + + if self.unit_name.ends_with(SLICE_SUFFIX) { + properties.push(("Wants", Value::Str(parent.into()))); + } else { + properties.push(("Slice", Value::Str(parent.into()))); + properties.push(("Delegate", Value::Bool(true))); + } + + proxy + .start_transient_unit(&self.unit_name, UNIT_MODE_REPLACE, &properties, &[]) + .context(format!("failed to start transient unit {}", self.unit_name))?; + + Ok(()) + } + + fn set_properties(&self, properties: &Properties) -> Result<()> { + let proxy = self.build_proxy()?; + + proxy + .set_unit_properties(&self.unit_name, true, properties) + .context(format!("failed to set unit {} properties", self.unit_name))?; + + Ok(()) + } + + fn kill_unit(&self) -> Result<()> { + let proxy = self.build_proxy()?; + + proxy + .kill_unit(&self.unit_name, WHO_ENUM_ALL, SIGNAL_KILL) + .or_else(|e| match e { + zbus::Error::MethodError(error_name, _, _) + if error_name.as_str() == NO_SUCH_UNIT_ERROR => + { + Ok(()) + } + _ => Err(e), + }) + .context(format!("failed to kill unit {}", self.unit_name))?; + + Ok(()) + } + + fn freeze_unit(&self) -> Result<()> { + let proxy = self.build_proxy()?; + + proxy + .freeze_unit(&self.unit_name) + .context(format!("failed to freeze unit {}", self.unit_name))?; + + Ok(()) + } + + fn thaw_unit(&self) -> Result<()> { + let proxy = self.build_proxy()?; + + proxy + .thaw_unit(&self.unit_name) + .context(format!("failed to thaw unit {}", self.unit_name))?; + + Ok(()) + } + + fn get_version(&self) -> Result { + let proxy = self.build_proxy()?; + + let systemd_version = proxy + .version() + .context("failed to get systemd version".to_string())?; + + Ok(systemd_version) + } + + fn unit_exists(&self) -> Result { + let proxy = self.build_proxy()?; + + match proxy.get_unit(&self.unit_name) { + Ok(_) => Ok(true), + Err(zbus::Error::MethodError(error_name, _, _)) + if error_name.as_str() == NO_SUCH_UNIT_ERROR => + { + Ok(false) + } + Err(e) => Err(anyhow!(format!( + "failed to check if unit {} exists: {:?}", + self.unit_name, e + ))), + } + } + + fn add_process(&self, pid: i32) -> Result<()> { + let proxy = self.build_proxy()?; + + proxy + .attach_processes_to_unit(&self.unit_name, "/", &[pid as u32]) + .context(format!( + "failed to add process into unit {}", + self.unit_name + ))?; + + Ok(()) + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/interface/mod.rs b/src/agent/rustjail/src/cgroups/systemd/interface/mod.rs new file mode 100644 index 000000000000..d0ac621116a6 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/interface/mod.rs @@ -0,0 +1,7 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod session; +pub(crate) mod system; diff --git a/src/agent/rustjail/src/cgroups/systemd/interface/session.rs b/src/agent/rustjail/src/cgroups/systemd/interface/session.rs new file mode 100644 index 000000000000..648303ad3b40 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/interface/session.rs @@ -0,0 +1,1004 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(unknown_lints)] +#![allow(clippy::all)] + +//! # DBus interface proxy for: `org.freedesktop.systemd1.Manager` +//! +//! This code was generated by `zbus-xmlgen` `2.0.1` from DBus introspection data. +//! Source: `Interface '/org/freedesktop/systemd1' from service 'org.freedesktop.systemd1' on session bus`. +//! +//! You may prefer to adapt it, instead of using it verbatim. +//! +//! More information can be found in the +//! [Writing a client proxy](https://dbus.pages.freedesktop.org/zbus/client.html) +//! section of the zbus documentation. +//! +//! This DBus object implements +//! [standard DBus interfaces](https://dbus.freedesktop.org/doc/dbus-specification.html), +//! (`org.freedesktop.DBus.*`) for which the following zbus proxies can be used: +//! +//! * [`zbus::fdo::PeerProxy`] +//! * [`zbus::fdo::IntrospectableProxy`] +//! * [`zbus::fdo::PropertiesProxy`] +//! +//! …consequently `zbus-xmlgen` did not generate code for the above interfaces. + +use zbus::dbus_proxy; + +#[dbus_proxy( + interface = "org.freedesktop.systemd1.Manager", + default_service = "org.freedesktop.systemd1", + default_path = "/org/freedesktop/systemd1" +)] +trait Manager { + /// AbandonScope method + fn abandon_scope(&self, name: &str) -> zbus::Result<()>; + + /// AddDependencyUnitFiles method + fn add_dependency_unit_files( + &self, + files: &[&str], + target: &str, + type_: &str, + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// AttachProcessesToUnit method + fn attach_processes_to_unit( + &self, + unit_name: &str, + subcgroup: &str, + pids: &[u32], + ) -> zbus::Result<()>; + + /// BindMountUnit method + fn bind_mount_unit( + &self, + name: &str, + source: &str, + destination: &str, + read_only: bool, + mkdir: bool, + ) -> zbus::Result<()>; + + /// CancelJob method + fn cancel_job(&self, id: u32) -> zbus::Result<()>; + + /// CleanUnit method + fn clean_unit(&self, name: &str, mask: &[&str]) -> zbus::Result<()>; + + /// ClearJobs method + fn clear_jobs(&self) -> zbus::Result<()>; + + /// DisableUnitFiles method + fn disable_unit_files( + &self, + files: &[&str], + runtime: bool, + ) -> zbus::Result>; + + /// DisableUnitFilesWithFlags method + fn disable_unit_files_with_flags( + &self, + files: &[&str], + flags: u64, + ) -> zbus::Result>; + + /// Dump method + fn dump(&self) -> zbus::Result; + + /// DumpByFileDescriptor method + fn dump_by_file_descriptor(&self) -> zbus::Result; + + /// EnableUnitFiles method + fn enable_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// EnableUnitFilesWithFlags method + fn enable_unit_files_with_flags( + &self, + files: &[&str], + flags: u64, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// EnqueueMarkedJobs method + fn enqueue_marked_jobs(&self) -> zbus::Result>; + + /// EnqueueUnitJob method + fn enqueue_unit_job( + &self, + name: &str, + job_type: &str, + job_mode: &str, + ) -> zbus::Result<( + u32, + zbus::zvariant::OwnedObjectPath, + String, + zbus::zvariant::OwnedObjectPath, + String, + Vec<( + u32, + zbus::zvariant::OwnedObjectPath, + String, + zbus::zvariant::OwnedObjectPath, + String, + )>, + )>; + + /// Exit method + fn exit(&self) -> zbus::Result<()>; + + /// FreezeUnit method + fn freeze_unit(&self, name: &str) -> zbus::Result<()>; + + /// GetDefaultTarget method + fn get_default_target(&self) -> zbus::Result; + + /// GetDynamicUsers method + fn get_dynamic_users(&self) -> zbus::Result>; + + /// GetJob method + fn get_job(&self, id: u32) -> zbus::Result; + + /// GetJobAfter method + fn get_job_after( + &self, + id: u32, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// GetJobBefore method + fn get_job_before( + &self, + id: u32, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// GetUnit method + fn get_unit(&self, name: &str) -> zbus::Result; + + /// GetUnitByControlGroup method + fn get_unit_by_control_group( + &self, + cgroup: &str, + ) -> zbus::Result; + + /// GetUnitByInvocationID method + fn get_unit_by_invocation_id( + &self, + invocation_id: &[u8], + ) -> zbus::Result; + + /// GetUnitByPID method + fn get_unit_by_pid(&self, pid: u32) -> zbus::Result; + + /// GetUnitFileLinks method + fn get_unit_file_links(&self, name: &str, runtime: bool) -> zbus::Result>; + + /// GetUnitFileState method + fn get_unit_file_state(&self, file: &str) -> zbus::Result; + + /// GetUnitProcesses method + fn get_unit_processes(&self, name: &str) -> zbus::Result>; + + /// Halt method + fn halt(&self) -> zbus::Result<()>; + + /// KExec method + fn kexec(&self) -> zbus::Result<()>; + + /// KillUnit method + fn kill_unit(&self, name: &str, whom: &str, signal: i32) -> zbus::Result<()>; + + /// LinkUnitFiles method + fn link_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// ListJobs method + fn list_jobs( + &self, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitFiles method + fn list_unit_files(&self) -> zbus::Result>; + + /// ListUnitFilesByPatterns method + fn list_unit_files_by_patterns( + &self, + states: &[&str], + patterns: &[&str], + ) -> zbus::Result>; + + /// ListUnits method + fn list_units( + &self, + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsByNames method + fn list_units_by_names( + &self, + names: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsByPatterns method + fn list_units_by_patterns( + &self, + states: &[&str], + patterns: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsFiltered method + fn list_units_filtered( + &self, + states: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// LoadUnit method + fn load_unit(&self, name: &str) -> zbus::Result; + + /// LookupDynamicUserByName method + fn lookup_dynamic_user_by_name(&self, name: &str) -> zbus::Result; + + /// LookupDynamicUserByUID method + fn lookup_dynamic_user_by_uid(&self, uid: u32) -> zbus::Result; + + /// MaskUnitFiles method + fn mask_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// MountImageUnit method + fn mount_image_unit( + &self, + name: &str, + source: &str, + destination: &str, + read_only: bool, + mkdir: bool, + options: &[(&str, &str)], + ) -> zbus::Result<()>; + + /// PowerOff method + fn power_off(&self) -> zbus::Result<()>; + + /// PresetAllUnitFiles method + fn preset_all_unit_files( + &self, + mode: &str, + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// PresetUnitFiles method + fn preset_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// PresetUnitFilesWithMode method + fn preset_unit_files_with_mode( + &self, + files: &[&str], + mode: &str, + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// Reboot method + fn reboot(&self) -> zbus::Result<()>; + + /// ReenableUnitFiles method + fn reenable_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// Reexecute method + fn reexecute(&self) -> zbus::Result<()>; + + /// RefUnit method + fn ref_unit(&self, name: &str) -> zbus::Result<()>; + + /// Reload method + fn reload(&self) -> zbus::Result<()>; + + /// ReloadOrRestartUnit method + fn reload_or_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// ReloadOrTryRestartUnit method + fn reload_or_try_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// ReloadUnit method + fn reload_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// ResetFailed method + fn reset_failed(&self) -> zbus::Result<()>; + + /// ResetFailedUnit method + fn reset_failed_unit(&self, name: &str) -> zbus::Result<()>; + + /// RestartUnit method + fn restart_unit(&self, name: &str, mode: &str) + -> zbus::Result; + + /// RevertUnitFiles method + fn revert_unit_files(&self, files: &[&str]) -> zbus::Result>; + + /// SetDefaultTarget method + fn set_default_target( + &self, + name: &str, + force: bool, + ) -> zbus::Result>; + + /// SetEnvironment method + fn set_environment(&self, assignments: &[&str]) -> zbus::Result<()>; + + /// SetExitCode method + fn set_exit_code(&self, number: u8) -> zbus::Result<()>; + + /// SetShowStatus method + fn set_show_status(&self, mode: &str) -> zbus::Result<()>; + + /// SetUnitProperties method + fn set_unit_properties( + &self, + name: &str, + runtime: bool, + properties: &[(&str, zbus::zvariant::Value<'_>)], + ) -> zbus::Result<()>; + + /// StartTransientUnit method + fn start_transient_unit( + &self, + name: &str, + mode: &str, + properties: &[(&str, zbus::zvariant::Value<'_>)], + aux: &[(&str, &[(&str, zbus::zvariant::Value<'_>)])], + ) -> zbus::Result; + + /// StartUnit method + fn start_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// StartUnitReplace method + fn start_unit_replace( + &self, + old_unit: &str, + new_unit: &str, + mode: &str, + ) -> zbus::Result; + + /// StopUnit method + fn stop_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// Subscribe method + fn subscribe(&self) -> zbus::Result<()>; + + /// SwitchRoot method + fn switch_root(&self, new_root: &str, init: &str) -> zbus::Result<()>; + + /// ThawUnit method + fn thaw_unit(&self, name: &str) -> zbus::Result<()>; + + /// TryRestartUnit method + fn try_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// UnmaskUnitFiles method + fn unmask_unit_files( + &self, + files: &[&str], + runtime: bool, + ) -> zbus::Result>; + + /// UnrefUnit method + fn unref_unit(&self, name: &str) -> zbus::Result<()>; + + /// UnsetAndSetEnvironment method + fn unset_and_set_environment(&self, names: &[&str], assignments: &[&str]) -> zbus::Result<()>; + + /// UnsetEnvironment method + fn unset_environment(&self, names: &[&str]) -> zbus::Result<()>; + + /// Unsubscribe method + fn unsubscribe(&self) -> zbus::Result<()>; + + /// JobNew signal + #[dbus_proxy(signal)] + fn job_new(&self, id: u32, job: zbus::zvariant::ObjectPath<'_>, unit: &str) + -> zbus::Result<()>; + + /// JobRemoved signal + #[dbus_proxy(signal)] + fn job_removed( + &self, + id: u32, + job: zbus::zvariant::ObjectPath<'_>, + unit: &str, + result: &str, + ) -> zbus::Result<()>; + + /// Reloading signal + #[dbus_proxy(signal)] + fn reloading(&self, active: bool) -> zbus::Result<()>; + + /// StartupFinished signal + #[dbus_proxy(signal)] + fn startup_finished( + &self, + firmware: u64, + loader: u64, + kernel: u64, + initrd: u64, + userspace: u64, + total: u64, + ) -> zbus::Result<()>; + + /// UnitFilesChanged signal + #[dbus_proxy(signal)] + fn unit_files_changed(&self) -> zbus::Result<()>; + + /// UnitNew signal + #[dbus_proxy(signal)] + fn unit_new(&self, id: &str, unit: zbus::zvariant::ObjectPath<'_>) -> zbus::Result<()>; + + /// UnitRemoved signal + #[dbus_proxy(signal)] + fn unit_removed(&self, id: &str, unit: zbus::zvariant::ObjectPath<'_>) -> zbus::Result<()>; + + /// Architecture property + #[dbus_proxy(property)] + fn architecture(&self) -> zbus::Result; + + /// ConfirmSpawn property + #[dbus_proxy(property)] + fn confirm_spawn(&self) -> zbus::Result; + + /// ControlGroup property + #[dbus_proxy(property)] + fn control_group(&self) -> zbus::Result; + + /// CtrlAltDelBurstAction property + #[dbus_proxy(property)] + fn ctrl_alt_del_burst_action(&self) -> zbus::Result; + + /// DefaultBlockIOAccounting property + #[dbus_proxy(property)] + fn default_block_ioaccounting(&self) -> zbus::Result; + + /// DefaultCPUAccounting property + #[dbus_proxy(property)] + fn default_cpuaccounting(&self) -> zbus::Result; + + /// DefaultLimitAS property + #[dbus_proxy(property)] + fn default_limit_as(&self) -> zbus::Result; + + /// DefaultLimitASSoft property + #[dbus_proxy(property)] + fn default_limit_assoft(&self) -> zbus::Result; + + /// DefaultLimitCORE property + #[dbus_proxy(property)] + fn default_limit_core(&self) -> zbus::Result; + + /// DefaultLimitCORESoft property + #[dbus_proxy(property)] + fn default_limit_coresoft(&self) -> zbus::Result; + + /// DefaultLimitCPU property + #[dbus_proxy(property)] + fn default_limit_cpu(&self) -> zbus::Result; + + /// DefaultLimitCPUSoft property + #[dbus_proxy(property)] + fn default_limit_cpusoft(&self) -> zbus::Result; + + /// DefaultLimitDATA property + #[dbus_proxy(property)] + fn default_limit_data(&self) -> zbus::Result; + + /// DefaultLimitDATASoft property + #[dbus_proxy(property)] + fn default_limit_datasoft(&self) -> zbus::Result; + + /// DefaultLimitFSIZE property + #[dbus_proxy(property)] + fn default_limit_fsize(&self) -> zbus::Result; + + /// DefaultLimitFSIZESoft property + #[dbus_proxy(property)] + fn default_limit_fsizesoft(&self) -> zbus::Result; + + /// DefaultLimitLOCKS property + #[dbus_proxy(property)] + fn default_limit_locks(&self) -> zbus::Result; + + /// DefaultLimitLOCKSSoft property + #[dbus_proxy(property)] + fn default_limit_lockssoft(&self) -> zbus::Result; + + /// DefaultLimitMEMLOCK property + #[dbus_proxy(property)] + fn default_limit_memlock(&self) -> zbus::Result; + + /// DefaultLimitMEMLOCKSoft property + #[dbus_proxy(property)] + fn default_limit_memlocksoft(&self) -> zbus::Result; + + /// DefaultLimitMSGQUEUE property + #[dbus_proxy(property)] + fn default_limit_msgqueue(&self) -> zbus::Result; + + /// DefaultLimitMSGQUEUESoft property + #[dbus_proxy(property)] + fn default_limit_msgqueuesoft(&self) -> zbus::Result; + + /// DefaultLimitNICE property + #[dbus_proxy(property)] + fn default_limit_nice(&self) -> zbus::Result; + + /// DefaultLimitNICESoft property + #[dbus_proxy(property)] + fn default_limit_nicesoft(&self) -> zbus::Result; + + /// DefaultLimitNOFILE property + #[dbus_proxy(property)] + fn default_limit_nofile(&self) -> zbus::Result; + + /// DefaultLimitNOFILESoft property + #[dbus_proxy(property)] + fn default_limit_nofilesoft(&self) -> zbus::Result; + + /// DefaultLimitNPROC property + #[dbus_proxy(property)] + fn default_limit_nproc(&self) -> zbus::Result; + + /// DefaultLimitNPROCSoft property + #[dbus_proxy(property)] + fn default_limit_nprocsoft(&self) -> zbus::Result; + + /// DefaultLimitRSS property + #[dbus_proxy(property)] + fn default_limit_rss(&self) -> zbus::Result; + + /// DefaultLimitRSSSoft property + #[dbus_proxy(property)] + fn default_limit_rsssoft(&self) -> zbus::Result; + + /// DefaultLimitRTPRIO property + #[dbus_proxy(property)] + fn default_limit_rtprio(&self) -> zbus::Result; + + /// DefaultLimitRTPRIOSoft property + #[dbus_proxy(property)] + fn default_limit_rtpriosoft(&self) -> zbus::Result; + + /// DefaultLimitRTTIME property + #[dbus_proxy(property)] + fn default_limit_rttime(&self) -> zbus::Result; + + /// DefaultLimitRTTIMESoft property + #[dbus_proxy(property)] + fn default_limit_rttimesoft(&self) -> zbus::Result; + + /// DefaultLimitSIGPENDING property + #[dbus_proxy(property)] + fn default_limit_sigpending(&self) -> zbus::Result; + + /// DefaultLimitSIGPENDINGSoft property + #[dbus_proxy(property)] + fn default_limit_sigpendingsoft(&self) -> zbus::Result; + + /// DefaultLimitSTACK property + #[dbus_proxy(property)] + fn default_limit_stack(&self) -> zbus::Result; + + /// DefaultLimitSTACKSoft property + #[dbus_proxy(property)] + fn default_limit_stacksoft(&self) -> zbus::Result; + + /// DefaultMemoryAccounting property + #[dbus_proxy(property)] + fn default_memory_accounting(&self) -> zbus::Result; + + /// DefaultOOMPolicy property + #[dbus_proxy(property)] + fn default_oompolicy(&self) -> zbus::Result; + + /// DefaultRestartUSec property + #[dbus_proxy(property)] + fn default_restart_usec(&self) -> zbus::Result; + + /// DefaultStandardError property + #[dbus_proxy(property)] + fn default_standard_error(&self) -> zbus::Result; + + /// DefaultStandardOutput property + #[dbus_proxy(property)] + fn default_standard_output(&self) -> zbus::Result; + + /// DefaultStartLimitBurst property + #[dbus_proxy(property)] + fn default_start_limit_burst(&self) -> zbus::Result; + + /// DefaultStartLimitIntervalUSec property + #[dbus_proxy(property)] + fn default_start_limit_interval_usec(&self) -> zbus::Result; + + /// DefaultTasksAccounting property + #[dbus_proxy(property)] + fn default_tasks_accounting(&self) -> zbus::Result; + + /// DefaultTasksMax property + #[dbus_proxy(property)] + fn default_tasks_max(&self) -> zbus::Result; + + /// DefaultTimeoutAbortUSec property + #[dbus_proxy(property)] + fn default_timeout_abort_usec(&self) -> zbus::Result; + + /// DefaultTimeoutStartUSec property + #[dbus_proxy(property)] + fn default_timeout_start_usec(&self) -> zbus::Result; + + /// DefaultTimeoutStopUSec property + #[dbus_proxy(property)] + fn default_timeout_stop_usec(&self) -> zbus::Result; + + /// DefaultTimerAccuracyUSec property + #[dbus_proxy(property)] + fn default_timer_accuracy_usec(&self) -> zbus::Result; + + /// Environment property + #[dbus_proxy(property)] + fn environment(&self) -> zbus::Result>; + + /// ExitCode property + #[dbus_proxy(property)] + fn exit_code(&self) -> zbus::Result; + + /// Features property + #[dbus_proxy(property)] + fn features(&self) -> zbus::Result; + + /// FinishTimestamp property + #[dbus_proxy(property)] + fn finish_timestamp(&self) -> zbus::Result; + + /// FinishTimestampMonotonic property + #[dbus_proxy(property)] + fn finish_timestamp_monotonic(&self) -> zbus::Result; + + /// FirmwareTimestamp property + #[dbus_proxy(property)] + fn firmware_timestamp(&self) -> zbus::Result; + + /// FirmwareTimestampMonotonic property + #[dbus_proxy(property)] + fn firmware_timestamp_monotonic(&self) -> zbus::Result; + + /// GeneratorsFinishTimestamp property + #[dbus_proxy(property)] + fn generators_finish_timestamp(&self) -> zbus::Result; + + /// GeneratorsFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn generators_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// GeneratorsStartTimestamp property + #[dbus_proxy(property)] + fn generators_start_timestamp(&self) -> zbus::Result; + + /// GeneratorsStartTimestampMonotonic property + #[dbus_proxy(property)] + fn generators_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDGeneratorsFinishTimestamp property + #[dbus_proxy(property)] + fn init_rdgenerators_finish_timestamp(&self) -> zbus::Result; + + /// InitRDGeneratorsFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdgenerators_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDGeneratorsStartTimestamp property + #[dbus_proxy(property)] + fn init_rdgenerators_start_timestamp(&self) -> zbus::Result; + + /// InitRDGeneratorsStartTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdgenerators_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDSecurityFinishTimestamp property + #[dbus_proxy(property)] + fn init_rdsecurity_finish_timestamp(&self) -> zbus::Result; + + /// InitRDSecurityFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdsecurity_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDSecurityStartTimestamp property + #[dbus_proxy(property)] + fn init_rdsecurity_start_timestamp(&self) -> zbus::Result; + + /// InitRDSecurityStartTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdsecurity_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDTimestamp property + #[dbus_proxy(property)] + fn init_rdtimestamp(&self) -> zbus::Result; + + /// InitRDTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdtimestamp_monotonic(&self) -> zbus::Result; + + /// InitRDUnitsLoadFinishTimestamp property + #[dbus_proxy(property)] + fn init_rdunits_load_finish_timestamp(&self) -> zbus::Result; + + /// InitRDUnitsLoadFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdunits_load_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDUnitsLoadStartTimestamp property + #[dbus_proxy(property)] + fn init_rdunits_load_start_timestamp(&self) -> zbus::Result; + + /// InitRDUnitsLoadStartTimestampMonotonic property + #[dbus_proxy(property)] + fn init_rdunits_load_start_timestamp_monotonic(&self) -> zbus::Result; + + /// KExecWatchdogUSec property + #[dbus_proxy(property)] + fn kexec_watchdog_usec(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_kexec_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// KernelTimestamp property + #[dbus_proxy(property)] + fn kernel_timestamp(&self) -> zbus::Result; + + /// KernelTimestampMonotonic property + #[dbus_proxy(property)] + fn kernel_timestamp_monotonic(&self) -> zbus::Result; + + /// LoaderTimestamp property + #[dbus_proxy(property)] + fn loader_timestamp(&self) -> zbus::Result; + + /// LoaderTimestampMonotonic property + #[dbus_proxy(property)] + fn loader_timestamp_monotonic(&self) -> zbus::Result; + + /// LogLevel property + #[dbus_proxy(property)] + fn log_level(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_log_level(&self, value: &str) -> zbus::Result<()>; + + /// LogTarget property + #[dbus_proxy(property)] + fn log_target(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_log_target(&self, value: &str) -> zbus::Result<()>; + + /// NFailedJobs property + #[dbus_proxy(property)] + fn nfailed_jobs(&self) -> zbus::Result; + + /// NFailedUnits property + #[dbus_proxy(property)] + fn nfailed_units(&self) -> zbus::Result; + + /// NInstalledJobs property + #[dbus_proxy(property)] + fn ninstalled_jobs(&self) -> zbus::Result; + + /// NJobs property + #[dbus_proxy(property)] + fn njobs(&self) -> zbus::Result; + + /// NNames property + #[dbus_proxy(property)] + fn nnames(&self) -> zbus::Result; + + /// Progress property + #[dbus_proxy(property)] + fn progress(&self) -> zbus::Result; + + /// RebootWatchdogUSec property + #[dbus_proxy(property)] + fn reboot_watchdog_usec(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_reboot_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// RuntimeWatchdogUSec property + #[dbus_proxy(property)] + fn runtime_watchdog_usec(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_runtime_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// SecurityFinishTimestamp property + #[dbus_proxy(property)] + fn security_finish_timestamp(&self) -> zbus::Result; + + /// SecurityFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn security_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// SecurityStartTimestamp property + #[dbus_proxy(property)] + fn security_start_timestamp(&self) -> zbus::Result; + + /// SecurityStartTimestampMonotonic property + #[dbus_proxy(property)] + fn security_start_timestamp_monotonic(&self) -> zbus::Result; + + /// ServiceWatchdogs property + #[dbus_proxy(property)] + fn service_watchdogs(&self) -> zbus::Result; + #[dbus_proxy(property)] + fn set_service_watchdogs(&self, value: bool) -> zbus::Result<()>; + + /// ShowStatus property + #[dbus_proxy(property)] + fn show_status(&self) -> zbus::Result; + + /// SystemState property + #[dbus_proxy(property)] + fn system_state(&self) -> zbus::Result; + + /// Tainted property + #[dbus_proxy(property)] + fn tainted(&self) -> zbus::Result; + + /// TimerSlackNSec property + #[dbus_proxy(property)] + fn timer_slack_nsec(&self) -> zbus::Result; + + /// UnitPath property + #[dbus_proxy(property)] + fn unit_path(&self) -> zbus::Result>; + + /// UnitsLoadFinishTimestamp property + #[dbus_proxy(property)] + fn units_load_finish_timestamp(&self) -> zbus::Result; + + /// UnitsLoadFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn units_load_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// UnitsLoadStartTimestamp property + #[dbus_proxy(property)] + fn units_load_start_timestamp(&self) -> zbus::Result; + + /// UnitsLoadStartTimestampMonotonic property + #[dbus_proxy(property)] + fn units_load_start_timestamp_monotonic(&self) -> zbus::Result; + + /// UserspaceTimestamp property + #[dbus_proxy(property)] + fn userspace_timestamp(&self) -> zbus::Result; + + /// UserspaceTimestampMonotonic property + #[dbus_proxy(property)] + fn userspace_timestamp_monotonic(&self) -> zbus::Result; + + /// Version property + #[dbus_proxy(property)] + fn version(&self) -> zbus::Result; + + /// Virtualization property + #[dbus_proxy(property)] + fn virtualization(&self) -> zbus::Result; +} diff --git a/src/agent/rustjail/src/cgroups/systemd/interface/system.rs b/src/agent/rustjail/src/cgroups/systemd/interface/system.rs new file mode 100644 index 000000000000..a61563eaba92 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/interface/system.rs @@ -0,0 +1,1002 @@ +// Copyright 2021-2023 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(unknown_lints)] +#![allow(clippy::all)] + +//! # DBus interface proxy for: `org.freedesktop.systemd1.Manager` +//! +//! This code was generated by `zbus-xmlgen` `3.1.1` from DBus introspection data. +//! Source: `Interface '/org/freedesktop/systemd1' from service 'org.freedesktop.systemd1' on system bus`. +//! +//! You may prefer to adapt it, instead of using it verbatim. +//! +//! More information can be found in the +//! [Writing a client proxy](https://dbus.pages.freedesktop.org/zbus/client.html) +//! section of the zbus documentation. +//! +//! This DBus object implements +//! [standard DBus interfaces](https://dbus.freedesktop.org/doc/dbus-specification.html), +//! (`org.freedesktop.DBus.*`) for which the following zbus proxies can be used: +//! +//! * [`zbus::fdo::PeerProxy`] +//! * [`zbus::fdo::IntrospectableProxy`] +//! * [`zbus::fdo::PropertiesProxy`] +//! +//! …consequently `zbus-xmlgen` did not generate code for the above interfaces. + +use zbus::dbus_proxy; + +#[dbus_proxy( + interface = "org.freedesktop.systemd1.Manager", + default_service = "org.freedesktop.systemd1", + default_path = "/org/freedesktop/systemd1" +)] +trait Manager { + /// AbandonScope method + fn abandon_scope(&self, name: &str) -> zbus::Result<()>; + + /// AddDependencyUnitFiles method + fn add_dependency_unit_files( + &self, + files: &[&str], + target: &str, + type_: &str, + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// AttachProcessesToUnit method + fn attach_processes_to_unit( + &self, + unit_name: &str, + subcgroup: &str, + pids: &[u32], + ) -> zbus::Result<()>; + + /// BindMountUnit method + fn bind_mount_unit( + &self, + name: &str, + source: &str, + destination: &str, + read_only: bool, + mkdir: bool, + ) -> zbus::Result<()>; + + /// CancelJob method + fn cancel_job(&self, id: u32) -> zbus::Result<()>; + + /// CleanUnit method + fn clean_unit(&self, name: &str, mask: &[&str]) -> zbus::Result<()>; + + /// ClearJobs method + fn clear_jobs(&self) -> zbus::Result<()>; + + /// DisableUnitFiles method + fn disable_unit_files( + &self, + files: &[&str], + runtime: bool, + ) -> zbus::Result>; + + /// DisableUnitFilesWithFlags method + fn disable_unit_files_with_flags( + &self, + files: &[&str], + flags: u64, + ) -> zbus::Result>; + + /// Dump method + fn dump(&self) -> zbus::Result; + + /// DumpByFileDescriptor method + fn dump_by_file_descriptor(&self) -> zbus::Result; + + /// EnableUnitFiles method + fn enable_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// EnableUnitFilesWithFlags method + fn enable_unit_files_with_flags( + &self, + files: &[&str], + flags: u64, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// EnqueueMarkedJobs method + fn enqueue_marked_jobs(&self) -> zbus::Result>; + + /// EnqueueUnitJob method + fn enqueue_unit_job( + &self, + name: &str, + job_type: &str, + job_mode: &str, + ) -> zbus::Result<( + u32, + zbus::zvariant::OwnedObjectPath, + String, + zbus::zvariant::OwnedObjectPath, + String, + Vec<( + u32, + zbus::zvariant::OwnedObjectPath, + String, + zbus::zvariant::OwnedObjectPath, + String, + )>, + )>; + + /// Exit method + fn exit(&self) -> zbus::Result<()>; + + /// FreezeUnit method + fn freeze_unit(&self, name: &str) -> zbus::Result<()>; + + /// GetDefaultTarget method + fn get_default_target(&self) -> zbus::Result; + + /// GetDynamicUsers method + fn get_dynamic_users(&self) -> zbus::Result>; + + /// GetJob method + fn get_job(&self, id: u32) -> zbus::Result; + + /// GetJobAfter method + fn get_job_after( + &self, + id: u32, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// GetJobBefore method + fn get_job_before( + &self, + id: u32, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// GetUnit method + fn get_unit(&self, name: &str) -> zbus::Result; + + /// GetUnitByControlGroup method + fn get_unit_by_control_group( + &self, + cgroup: &str, + ) -> zbus::Result; + + /// GetUnitByInvocationID method + #[dbus_proxy(name = "GetUnitByInvocationID")] + fn get_unit_by_invocation_id( + &self, + invocation_id: &[u8], + ) -> zbus::Result; + + /// GetUnitByPID method + #[dbus_proxy(name = "GetUnitByPID")] + fn get_unit_by_pid(&self, pid: u32) -> zbus::Result; + + /// GetUnitFileLinks method + fn get_unit_file_links(&self, name: &str, runtime: bool) -> zbus::Result>; + + /// GetUnitFileState method + fn get_unit_file_state(&self, file: &str) -> zbus::Result; + + /// GetUnitProcesses method + fn get_unit_processes(&self, name: &str) -> zbus::Result>; + + /// Halt method + fn halt(&self) -> zbus::Result<()>; + + /// KExec method + #[dbus_proxy(name = "KExec")] + fn kexec(&self) -> zbus::Result<()>; + + /// KillUnit method + fn kill_unit(&self, name: &str, whom: &str, signal: i32) -> zbus::Result<()>; + + /// LinkUnitFiles method + fn link_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// ListJobs method + fn list_jobs( + &self, + ) -> zbus::Result< + Vec<( + u32, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitFiles method + fn list_unit_files(&self) -> zbus::Result>; + + /// ListUnitFilesByPatterns method + fn list_unit_files_by_patterns( + &self, + states: &[&str], + patterns: &[&str], + ) -> zbus::Result>; + + /// ListUnits method + fn list_units( + &self, + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsByNames method + fn list_units_by_names( + &self, + names: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsByPatterns method + fn list_units_by_patterns( + &self, + states: &[&str], + patterns: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// ListUnitsFiltered method + fn list_units_filtered( + &self, + states: &[&str], + ) -> zbus::Result< + Vec<( + String, + String, + String, + String, + String, + String, + zbus::zvariant::OwnedObjectPath, + u32, + String, + zbus::zvariant::OwnedObjectPath, + )>, + >; + + /// LoadUnit method + fn load_unit(&self, name: &str) -> zbus::Result; + + /// LookupDynamicUserByName method + fn lookup_dynamic_user_by_name(&self, name: &str) -> zbus::Result; + + /// LookupDynamicUserByUID method + #[dbus_proxy(name = "LookupDynamicUserByUID")] + fn lookup_dynamic_user_by_uid(&self, uid: u32) -> zbus::Result; + + /// MaskUnitFiles method + fn mask_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// MountImageUnit method + fn mount_image_unit( + &self, + name: &str, + source: &str, + destination: &str, + read_only: bool, + mkdir: bool, + options: &[(&str, &str)], + ) -> zbus::Result<()>; + + /// PowerOff method + fn power_off(&self) -> zbus::Result<()>; + + /// PresetAllUnitFiles method + fn preset_all_unit_files( + &self, + mode: &str, + runtime: bool, + force: bool, + ) -> zbus::Result>; + + /// PresetUnitFiles method + fn preset_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// PresetUnitFilesWithMode method + fn preset_unit_files_with_mode( + &self, + files: &[&str], + mode: &str, + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// Reboot method + fn reboot(&self) -> zbus::Result<()>; + + /// ReenableUnitFiles method + fn reenable_unit_files( + &self, + files: &[&str], + runtime: bool, + force: bool, + ) -> zbus::Result<(bool, Vec<(String, String, String)>)>; + + /// Reexecute method + fn reexecute(&self) -> zbus::Result<()>; + + /// RefUnit method + fn ref_unit(&self, name: &str) -> zbus::Result<()>; + + /// Reload method + fn reload(&self) -> zbus::Result<()>; + + /// ReloadOrRestartUnit method + fn reload_or_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// ReloadOrTryRestartUnit method + fn reload_or_try_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// ReloadUnit method + fn reload_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// ResetFailed method + fn reset_failed(&self) -> zbus::Result<()>; + + /// ResetFailedUnit method + fn reset_failed_unit(&self, name: &str) -> zbus::Result<()>; + + /// RestartUnit method + fn restart_unit(&self, name: &str, mode: &str) + -> zbus::Result; + + /// RevertUnitFiles method + fn revert_unit_files(&self, files: &[&str]) -> zbus::Result>; + + /// SetDefaultTarget method + fn set_default_target( + &self, + name: &str, + force: bool, + ) -> zbus::Result>; + + /// SetEnvironment method + fn set_environment(&self, assignments: &[&str]) -> zbus::Result<()>; + + /// SetExitCode method + fn set_exit_code(&self, number: u8) -> zbus::Result<()>; + + /// SetShowStatus method + fn set_show_status(&self, mode: &str) -> zbus::Result<()>; + + /// SetUnitProperties method + fn set_unit_properties( + &self, + name: &str, + runtime: bool, + properties: &[(&str, zbus::zvariant::Value<'_>)], + ) -> zbus::Result<()>; + + /// StartTransientUnit method + fn start_transient_unit( + &self, + name: &str, + mode: &str, + properties: &[(&str, zbus::zvariant::Value<'_>)], + aux: &[(&str, &[(&str, zbus::zvariant::Value<'_>)])], + ) -> zbus::Result; + + /// StartUnit method + fn start_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// StartUnitReplace method + fn start_unit_replace( + &self, + old_unit: &str, + new_unit: &str, + mode: &str, + ) -> zbus::Result; + + /// StopUnit method + fn stop_unit(&self, name: &str, mode: &str) -> zbus::Result; + + /// Subscribe method + fn subscribe(&self) -> zbus::Result<()>; + + /// SwitchRoot method + fn switch_root(&self, new_root: &str, init: &str) -> zbus::Result<()>; + + /// ThawUnit method + fn thaw_unit(&self, name: &str) -> zbus::Result<()>; + + /// TryRestartUnit method + fn try_restart_unit( + &self, + name: &str, + mode: &str, + ) -> zbus::Result; + + /// UnmaskUnitFiles method + fn unmask_unit_files( + &self, + files: &[&str], + runtime: bool, + ) -> zbus::Result>; + + /// UnrefUnit method + fn unref_unit(&self, name: &str) -> zbus::Result<()>; + + /// UnsetAndSetEnvironment method + fn unset_and_set_environment(&self, names: &[&str], assignments: &[&str]) -> zbus::Result<()>; + + /// UnsetEnvironment method + fn unset_environment(&self, names: &[&str]) -> zbus::Result<()>; + + /// Unsubscribe method + fn unsubscribe(&self) -> zbus::Result<()>; + + /// JobNew signal + #[dbus_proxy(signal)] + fn job_new(&self, id: u32, job: zbus::zvariant::ObjectPath<'_>, unit: &str) + -> zbus::Result<()>; + + /// JobRemoved signal + #[dbus_proxy(signal)] + fn job_removed( + &self, + id: u32, + job: zbus::zvariant::ObjectPath<'_>, + unit: &str, + result: &str, + ) -> zbus::Result<()>; + + /// Reloading signal + #[dbus_proxy(signal)] + fn reloading(&self, active: bool) -> zbus::Result<()>; + + /// StartupFinished signal + #[dbus_proxy(signal)] + fn startup_finished( + &self, + firmware: u64, + loader: u64, + kernel: u64, + initrd: u64, + userspace: u64, + total: u64, + ) -> zbus::Result<()>; + + /// UnitFilesChanged signal + #[dbus_proxy(signal)] + fn unit_files_changed(&self) -> zbus::Result<()>; + + /// UnitNew signal + #[dbus_proxy(signal)] + fn unit_new(&self, id: &str, unit: zbus::zvariant::ObjectPath<'_>) -> zbus::Result<()>; + + /// UnitRemoved signal + #[dbus_proxy(signal)] + fn unit_removed(&self, id: &str, unit: zbus::zvariant::ObjectPath<'_>) -> zbus::Result<()>; + + /// Architecture property + #[dbus_proxy(property)] + fn architecture(&self) -> zbus::Result; + + /// ConfirmSpawn property + #[dbus_proxy(property)] + fn confirm_spawn(&self) -> zbus::Result; + + /// ControlGroup property + #[dbus_proxy(property)] + fn control_group(&self) -> zbus::Result; + + /// CtrlAltDelBurstAction property + #[dbus_proxy(property)] + fn ctrl_alt_del_burst_action(&self) -> zbus::Result; + + /// DefaultBlockIOAccounting property + #[dbus_proxy(property, name = "DefaultBlockIOAccounting")] + fn default_block_ioaccounting(&self) -> zbus::Result; + + /// DefaultCPUAccounting property + #[dbus_proxy(property, name = "DefaultCPUAccounting")] + fn default_cpuaccounting(&self) -> zbus::Result; + + /// DefaultLimitAS property + #[dbus_proxy(property, name = "DefaultLimitAS")] + fn default_limit_as(&self) -> zbus::Result; + + /// DefaultLimitASSoft property + #[dbus_proxy(property, name = "DefaultLimitASSoft")] + fn default_limit_assoft(&self) -> zbus::Result; + + /// DefaultLimitCORE property + #[dbus_proxy(property, name = "DefaultLimitCORE")] + fn default_limit_core(&self) -> zbus::Result; + + /// DefaultLimitCORESoft property + #[dbus_proxy(property, name = "DefaultLimitCORESoft")] + fn default_limit_coresoft(&self) -> zbus::Result; + + /// DefaultLimitCPU property + #[dbus_proxy(property, name = "DefaultLimitCPU")] + fn default_limit_cpu(&self) -> zbus::Result; + + /// DefaultLimitCPUSoft property + #[dbus_proxy(property, name = "DefaultLimitCPUSoft")] + fn default_limit_cpusoft(&self) -> zbus::Result; + + /// DefaultLimitDATA property + #[dbus_proxy(property, name = "DefaultLimitDATA")] + fn default_limit_data(&self) -> zbus::Result; + + /// DefaultLimitDATASoft property + #[dbus_proxy(property, name = "DefaultLimitDATASoft")] + fn default_limit_datasoft(&self) -> zbus::Result; + + /// DefaultLimitFSIZE property + #[dbus_proxy(property, name = "DefaultLimitFSIZE")] + fn default_limit_fsize(&self) -> zbus::Result; + + /// DefaultLimitFSIZESoft property + #[dbus_proxy(property, name = "DefaultLimitFSIZESoft")] + fn default_limit_fsizesoft(&self) -> zbus::Result; + + /// DefaultLimitLOCKS property + #[dbus_proxy(property, name = "DefaultLimitLOCKS")] + fn default_limit_locks(&self) -> zbus::Result; + + /// DefaultLimitLOCKSSoft property + #[dbus_proxy(property, name = "DefaultLimitLOCKSSoft")] + fn default_limit_lockssoft(&self) -> zbus::Result; + + /// DefaultLimitMEMLOCK property + #[dbus_proxy(property, name = "DefaultLimitMEMLOCK")] + fn default_limit_memlock(&self) -> zbus::Result; + + /// DefaultLimitMEMLOCKSoft property + #[dbus_proxy(property, name = "DefaultLimitMEMLOCKSoft")] + fn default_limit_memlocksoft(&self) -> zbus::Result; + + /// DefaultLimitMSGQUEUE property + #[dbus_proxy(property, name = "DefaultLimitMSGQUEUE")] + fn default_limit_msgqueue(&self) -> zbus::Result; + + /// DefaultLimitMSGQUEUESoft property + #[dbus_proxy(property, name = "DefaultLimitMSGQUEUESoft")] + fn default_limit_msgqueuesoft(&self) -> zbus::Result; + + /// DefaultLimitNICE property + #[dbus_proxy(property, name = "DefaultLimitNICE")] + fn default_limit_nice(&self) -> zbus::Result; + + /// DefaultLimitNICESoft property + #[dbus_proxy(property, name = "DefaultLimitNICESoft")] + fn default_limit_nicesoft(&self) -> zbus::Result; + + /// DefaultLimitNOFILE property + #[dbus_proxy(property, name = "DefaultLimitNOFILE")] + fn default_limit_nofile(&self) -> zbus::Result; + + /// DefaultLimitNOFILESoft property + #[dbus_proxy(property, name = "DefaultLimitNOFILESoft")] + fn default_limit_nofilesoft(&self) -> zbus::Result; + + /// DefaultLimitNPROC property + #[dbus_proxy(property, name = "DefaultLimitNPROC")] + fn default_limit_nproc(&self) -> zbus::Result; + + /// DefaultLimitNPROCSoft property + #[dbus_proxy(property, name = "DefaultLimitNPROCSoft")] + fn default_limit_nprocsoft(&self) -> zbus::Result; + + /// DefaultLimitRSS property + #[dbus_proxy(property, name = "DefaultLimitRSS")] + fn default_limit_rss(&self) -> zbus::Result; + + /// DefaultLimitRSSSoft property + #[dbus_proxy(property, name = "DefaultLimitRSSSoft")] + fn default_limit_rsssoft(&self) -> zbus::Result; + + /// DefaultLimitRTPRIO property + #[dbus_proxy(property, name = "DefaultLimitRTPRIO")] + fn default_limit_rtprio(&self) -> zbus::Result; + + /// DefaultLimitRTPRIOSoft property + #[dbus_proxy(property, name = "DefaultLimitRTPRIOSoft")] + fn default_limit_rtpriosoft(&self) -> zbus::Result; + + /// DefaultLimitRTTIME property + #[dbus_proxy(property, name = "DefaultLimitRTTIME")] + fn default_limit_rttime(&self) -> zbus::Result; + + /// DefaultLimitRTTIMESoft property + #[dbus_proxy(property, name = "DefaultLimitRTTIMESoft")] + fn default_limit_rttimesoft(&self) -> zbus::Result; + + /// DefaultLimitSIGPENDING property + #[dbus_proxy(property, name = "DefaultLimitSIGPENDING")] + fn default_limit_sigpending(&self) -> zbus::Result; + + /// DefaultLimitSIGPENDINGSoft property + #[dbus_proxy(property, name = "DefaultLimitSIGPENDINGSoft")] + fn default_limit_sigpendingsoft(&self) -> zbus::Result; + + /// DefaultLimitSTACK property + #[dbus_proxy(property, name = "DefaultLimitSTACK")] + fn default_limit_stack(&self) -> zbus::Result; + + /// DefaultLimitSTACKSoft property + #[dbus_proxy(property, name = "DefaultLimitSTACKSoft")] + fn default_limit_stacksoft(&self) -> zbus::Result; + + /// DefaultMemoryAccounting property + #[dbus_proxy(property)] + fn default_memory_accounting(&self) -> zbus::Result; + + /// DefaultOOMPolicy property + #[dbus_proxy(property, name = "DefaultOOMPolicy")] + fn default_oompolicy(&self) -> zbus::Result; + + /// DefaultRestartUSec property + #[dbus_proxy(property, name = "DefaultRestartUSec")] + fn default_restart_usec(&self) -> zbus::Result; + + /// DefaultStandardError property + #[dbus_proxy(property)] + fn default_standard_error(&self) -> zbus::Result; + + /// DefaultStandardOutput property + #[dbus_proxy(property)] + fn default_standard_output(&self) -> zbus::Result; + + /// DefaultStartLimitBurst property + #[dbus_proxy(property)] + fn default_start_limit_burst(&self) -> zbus::Result; + + /// DefaultStartLimitIntervalUSec property + #[dbus_proxy(property, name = "DefaultStartLimitIntervalUSec")] + fn default_start_limit_interval_usec(&self) -> zbus::Result; + + /// DefaultTasksAccounting property + #[dbus_proxy(property)] + fn default_tasks_accounting(&self) -> zbus::Result; + + /// DefaultTasksMax property + #[dbus_proxy(property)] + fn default_tasks_max(&self) -> zbus::Result; + + /// DefaultTimeoutAbortUSec property + #[dbus_proxy(property, name = "DefaultTimeoutAbortUSec")] + fn default_timeout_abort_usec(&self) -> zbus::Result; + + /// DefaultTimeoutStartUSec property + #[dbus_proxy(property, name = "DefaultTimeoutStartUSec")] + fn default_timeout_start_usec(&self) -> zbus::Result; + + /// DefaultTimeoutStopUSec property + #[dbus_proxy(property, name = "DefaultTimeoutStopUSec")] + fn default_timeout_stop_usec(&self) -> zbus::Result; + + /// DefaultTimerAccuracyUSec property + #[dbus_proxy(property, name = "DefaultTimerAccuracyUSec")] + fn default_timer_accuracy_usec(&self) -> zbus::Result; + + /// Environment property + #[dbus_proxy(property)] + fn environment(&self) -> zbus::Result>; + + /// ExitCode property + #[dbus_proxy(property)] + fn exit_code(&self) -> zbus::Result; + + /// Features property + #[dbus_proxy(property)] + fn features(&self) -> zbus::Result; + + /// FinishTimestamp property + #[dbus_proxy(property)] + fn finish_timestamp(&self) -> zbus::Result; + + /// FinishTimestampMonotonic property + #[dbus_proxy(property)] + fn finish_timestamp_monotonic(&self) -> zbus::Result; + + /// FirmwareTimestamp property + #[dbus_proxy(property)] + fn firmware_timestamp(&self) -> zbus::Result; + + /// FirmwareTimestampMonotonic property + #[dbus_proxy(property)] + fn firmware_timestamp_monotonic(&self) -> zbus::Result; + + /// GeneratorsFinishTimestamp property + #[dbus_proxy(property)] + fn generators_finish_timestamp(&self) -> zbus::Result; + + /// GeneratorsFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn generators_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// GeneratorsStartTimestamp property + #[dbus_proxy(property)] + fn generators_start_timestamp(&self) -> zbus::Result; + + /// GeneratorsStartTimestampMonotonic property + #[dbus_proxy(property)] + fn generators_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDGeneratorsFinishTimestamp property + #[dbus_proxy(property, name = "InitRDGeneratorsFinishTimestamp")] + fn init_rdgenerators_finish_timestamp(&self) -> zbus::Result; + + /// InitRDGeneratorsFinishTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDGeneratorsFinishTimestampMonotonic")] + fn init_rdgenerators_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDGeneratorsStartTimestamp property + #[dbus_proxy(property, name = "InitRDGeneratorsStartTimestamp")] + fn init_rdgenerators_start_timestamp(&self) -> zbus::Result; + + /// InitRDGeneratorsStartTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDGeneratorsStartTimestampMonotonic")] + fn init_rdgenerators_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDSecurityFinishTimestamp property + #[dbus_proxy(property, name = "InitRDSecurityFinishTimestamp")] + fn init_rdsecurity_finish_timestamp(&self) -> zbus::Result; + + /// InitRDSecurityFinishTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDSecurityFinishTimestampMonotonic")] + fn init_rdsecurity_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDSecurityStartTimestamp property + #[dbus_proxy(property, name = "InitRDSecurityStartTimestamp")] + fn init_rdsecurity_start_timestamp(&self) -> zbus::Result; + + /// InitRDSecurityStartTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDSecurityStartTimestampMonotonic")] + fn init_rdsecurity_start_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDTimestamp property + #[dbus_proxy(property, name = "InitRDTimestamp")] + fn init_rdtimestamp(&self) -> zbus::Result; + + /// InitRDTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDTimestampMonotonic")] + fn init_rdtimestamp_monotonic(&self) -> zbus::Result; + + /// InitRDUnitsLoadFinishTimestamp property + #[dbus_proxy(property, name = "InitRDUnitsLoadFinishTimestamp")] + fn init_rdunits_load_finish_timestamp(&self) -> zbus::Result; + + /// InitRDUnitsLoadFinishTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDUnitsLoadFinishTimestampMonotonic")] + fn init_rdunits_load_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// InitRDUnitsLoadStartTimestamp property + #[dbus_proxy(property, name = "InitRDUnitsLoadStartTimestamp")] + fn init_rdunits_load_start_timestamp(&self) -> zbus::Result; + + /// InitRDUnitsLoadStartTimestampMonotonic property + #[dbus_proxy(property, name = "InitRDUnitsLoadStartTimestampMonotonic")] + fn init_rdunits_load_start_timestamp_monotonic(&self) -> zbus::Result; + + /// KExecWatchdogUSec property + #[dbus_proxy(property, name = "KExecWatchdogUSec")] + fn kexec_watchdog_usec(&self) -> zbus::Result; + fn set_kexec_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// KernelTimestamp property + #[dbus_proxy(property)] + fn kernel_timestamp(&self) -> zbus::Result; + + /// KernelTimestampMonotonic property + #[dbus_proxy(property)] + fn kernel_timestamp_monotonic(&self) -> zbus::Result; + + /// LoaderTimestamp property + #[dbus_proxy(property)] + fn loader_timestamp(&self) -> zbus::Result; + + /// LoaderTimestampMonotonic property + #[dbus_proxy(property)] + fn loader_timestamp_monotonic(&self) -> zbus::Result; + + /// LogLevel property + #[dbus_proxy(property)] + fn log_level(&self) -> zbus::Result; + fn set_log_level(&self, value: &str) -> zbus::Result<()>; + + /// LogTarget property + #[dbus_proxy(property)] + fn log_target(&self) -> zbus::Result; + fn set_log_target(&self, value: &str) -> zbus::Result<()>; + + /// NFailedJobs property + #[dbus_proxy(property, name = "NFailedJobs")] + fn nfailed_jobs(&self) -> zbus::Result; + + /// NFailedUnits property + #[dbus_proxy(property, name = "NFailedUnits")] + fn nfailed_units(&self) -> zbus::Result; + + /// NInstalledJobs property + #[dbus_proxy(property, name = "NInstalledJobs")] + fn ninstalled_jobs(&self) -> zbus::Result; + + /// NJobs property + #[dbus_proxy(property, name = "NJobs")] + fn njobs(&self) -> zbus::Result; + + /// NNames property + #[dbus_proxy(property, name = "NNames")] + fn nnames(&self) -> zbus::Result; + + /// Progress property + #[dbus_proxy(property)] + fn progress(&self) -> zbus::Result; + + /// RebootWatchdogUSec property + #[dbus_proxy(property, name = "RebootWatchdogUSec")] + fn reboot_watchdog_usec(&self) -> zbus::Result; + fn set_reboot_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// RuntimeWatchdogUSec property + #[dbus_proxy(property, name = "RuntimeWatchdogUSec")] + fn runtime_watchdog_usec(&self) -> zbus::Result; + fn set_runtime_watchdog_usec(&self, value: u64) -> zbus::Result<()>; + + /// SecurityFinishTimestamp property + #[dbus_proxy(property)] + fn security_finish_timestamp(&self) -> zbus::Result; + + /// SecurityFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn security_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// SecurityStartTimestamp property + #[dbus_proxy(property)] + fn security_start_timestamp(&self) -> zbus::Result; + + /// SecurityStartTimestampMonotonic property + #[dbus_proxy(property)] + fn security_start_timestamp_monotonic(&self) -> zbus::Result; + + /// ServiceWatchdogs property + #[dbus_proxy(property)] + fn service_watchdogs(&self) -> zbus::Result; + fn set_service_watchdogs(&self, value: bool) -> zbus::Result<()>; + + /// ShowStatus property + #[dbus_proxy(property)] + fn show_status(&self) -> zbus::Result; + + /// SystemState property + #[dbus_proxy(property)] + fn system_state(&self) -> zbus::Result; + + /// Tainted property + #[dbus_proxy(property)] + fn tainted(&self) -> zbus::Result; + + /// TimerSlackNSec property + #[dbus_proxy(property, name = "TimerSlackNSec")] + fn timer_slack_nsec(&self) -> zbus::Result; + + /// UnitPath property + #[dbus_proxy(property)] + fn unit_path(&self) -> zbus::Result>; + + /// UnitsLoadFinishTimestamp property + #[dbus_proxy(property)] + fn units_load_finish_timestamp(&self) -> zbus::Result; + + /// UnitsLoadFinishTimestampMonotonic property + #[dbus_proxy(property)] + fn units_load_finish_timestamp_monotonic(&self) -> zbus::Result; + + /// UnitsLoadStartTimestamp property + #[dbus_proxy(property)] + fn units_load_start_timestamp(&self) -> zbus::Result; + + /// UnitsLoadStartTimestampMonotonic property + #[dbus_proxy(property)] + fn units_load_start_timestamp_monotonic(&self) -> zbus::Result; + + /// UserspaceTimestamp property + #[dbus_proxy(property)] + fn userspace_timestamp(&self) -> zbus::Result; + + /// UserspaceTimestampMonotonic property + #[dbus_proxy(property)] + fn userspace_timestamp_monotonic(&self) -> zbus::Result; + + /// Version property + #[dbus_proxy(property)] + fn version(&self) -> zbus::Result; + + /// Virtualization property + #[dbus_proxy(property)] + fn virtualization(&self) -> zbus::Result; +} diff --git a/src/agent/rustjail/src/cgroups/systemd/manager.rs b/src/agent/rustjail/src/cgroups/systemd/manager.rs new file mode 100644 index 000000000000..19be1c96100b --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/manager.rs @@ -0,0 +1,132 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::cgroups::Manager as CgroupManager; +use crate::protocols::agent::CgroupStats; +use anyhow::{anyhow, Result}; +use cgroups::freezer::FreezerState; +use libc::{self, pid_t}; +use oci::LinuxResources; +use std::any::Any; +use std::collections::HashMap; +use std::convert::TryInto; +use std::string::String; +use std::vec; + +use super::super::fs::Manager as FsManager; + +use super::cgroups_path::CgroupsPath; +use super::common::{CgroupHierarchy, Properties}; +use super::dbus_client::{DBusClient, SystemdInterface}; +use super::subsystem::transformer::Transformer; +use super::subsystem::{cpu::Cpu, cpuset::CpuSet, memory::Memory, pids::Pids}; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Manager { + pub paths: HashMap, + pub mounts: HashMap, + pub cgroups_path: CgroupsPath, + pub cpath: String, + // dbus client for set properties + dbus_client: DBusClient, + // fs manager for get properties + fs_manager: FsManager, + // cgroup version for different dbus properties + cg_hierarchy: CgroupHierarchy, +} + +impl CgroupManager for Manager { + fn apply(&self, pid: pid_t) -> Result<()> { + if self.dbus_client.unit_exists()? { + self.dbus_client.add_process(pid)?; + } else { + self.dbus_client.start_unit( + (pid as u32).try_into().unwrap(), + self.cgroups_path.slice.as_str(), + &self.cg_hierarchy, + )?; + } + + Ok(()) + } + + fn set(&self, r: &LinuxResources, _: bool) -> Result<()> { + let mut properties: Properties = vec![]; + + let systemd_version = self.dbus_client.get_version()?; + let systemd_version_str = systemd_version.as_str(); + + Cpu::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?; + Memory::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?; + Pids::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?; + CpuSet::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?; + + self.dbus_client.set_properties(&properties)?; + + Ok(()) + } + + fn get_stats(&self) -> Result { + self.fs_manager.get_stats() + } + + fn freeze(&self, state: FreezerState) -> Result<()> { + match state { + FreezerState::Thawed => self.dbus_client.thaw_unit(), + FreezerState::Frozen => self.dbus_client.freeze_unit(), + _ => Err(anyhow!("Invalid FreezerState")), + } + } + + fn destroy(&mut self) -> Result<()> { + self.dbus_client.kill_unit()?; + self.fs_manager.destroy() + } + + fn get_pids(&self) -> Result> { + self.fs_manager.get_pids() + } + + fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> { + self.fs_manager + .update_cpuset_path(guest_cpuset, container_cpuset) + } + + fn get_cgroup_path(&self, cg: &str) -> Result { + self.fs_manager.get_cgroup_path(cg) + } + + fn as_any(&self) -> Result<&dyn Any> { + Ok(self) + } + + fn name(&self) -> &str { + "systemd" + } +} + +impl Manager { + pub fn new(cgroups_path_str: &str) -> Result { + let cgroups_path = CgroupsPath::new(cgroups_path_str)?; + let (parent_slice, unit_name) = cgroups_path.parse()?; + let cpath = parent_slice + "/" + &unit_name; + + let fs_manager = FsManager::new(cpath.as_str())?; + + Ok(Manager { + paths: fs_manager.paths.clone(), + mounts: fs_manager.mounts.clone(), + cgroups_path, + cpath, + dbus_client: DBusClient::new(unit_name), + fs_manager, + cg_hierarchy: if cgroups::hierarchies::is_cgroup2_unified_mode() { + CgroupHierarchy::Unified + } else { + CgroupHierarchy::Legacy + }, + }) + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/mod.rs b/src/agent/rustjail/src/cgroups/systemd/mod.rs new file mode 100644 index 000000000000..1bfb49a3646e --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/mod.rs @@ -0,0 +1,12 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod manager; + +mod cgroups_path; +mod common; +mod dbus_client; +mod interface; +mod subsystem; diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/cpu.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/cpu.rs new file mode 100644 index 000000000000..7f7667fcd145 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/cpu.rs @@ -0,0 +1,139 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::super::common::{CgroupHierarchy, Properties}; +use super::transformer::Transformer; + +use anyhow::Result; +use oci::{LinuxCpu, LinuxResources}; +use zbus::zvariant::Value; + +const BASIC_SYSTEMD_VERSION: &str = "242"; +const DEFAULT_CPUQUOTAPERIOD: u64 = 100 * 1000; +const SEC2MICROSEC: u64 = 1000 * 1000; +const BASIC_INTERVAL: u64 = 10 * 1000; + +pub struct Cpu {} + +impl Transformer for Cpu { + fn apply( + r: &LinuxResources, + properties: &mut Properties, + cgroup_hierarchy: &CgroupHierarchy, + systemd_version: &str, + ) -> Result<()> { + if let Some(cpu_resources) = &r.cpu { + match cgroup_hierarchy { + CgroupHierarchy::Legacy => { + Self::legacy_apply(cpu_resources, properties, systemd_version)? + } + CgroupHierarchy::Unified => { + Self::unified_apply(cpu_resources, properties, systemd_version)? + } + } + } + + Ok(()) + } +} + +impl Cpu { + // v1: + // cpu.shares <-> CPUShares + // cpu.period <-> CPUQuotaPeriodUSec + // cpu.period & cpu.quota <-> CPUQuotaPerSecUSec + fn legacy_apply( + cpu_resources: &LinuxCpu, + properties: &mut Properties, + systemd_version: &str, + ) -> Result<()> { + if let Some(shares) = cpu_resources.shares { + properties.push(("CPUShares", Value::U64(shares))); + } + + if let Some(period) = cpu_resources.period { + if period != 0 && systemd_version >= BASIC_SYSTEMD_VERSION { + properties.push(("CPUQuotaPeriodUSec", Value::U64(period))); + } + } + + if let Some(quota) = cpu_resources.quota { + let period = cpu_resources.period.unwrap_or(DEFAULT_CPUQUOTAPERIOD); + if period != 0 { + let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period); + properties.push(("CPUQuotaPerSecUSec", Value::U64(cpu_quota_per_sec_usec))); + } + } + + Ok(()) + } + + // v2: + // cpu.shares <-> CPUWeight + // cpu.period <-> CPUQuotaPeriodUSec + // cpu.period & cpu.quota <-> CPUQuotaPerSecUSec + fn unified_apply( + cpu_resources: &LinuxCpu, + properties: &mut Properties, + systemd_version: &str, + ) -> Result<()> { + if let Some(shares) = cpu_resources.shares { + let weight = shares_to_weight(shares); + properties.push(("CPUWeight", Value::U64(weight))); + } + + if let Some(period) = cpu_resources.period { + if period != 0 && systemd_version >= BASIC_SYSTEMD_VERSION { + properties.push(("CPUQuotaPeriodUSec", Value::U64(period))); + } + } + + if let Some(quota) = cpu_resources.quota { + let period = cpu_resources.period.unwrap_or(DEFAULT_CPUQUOTAPERIOD); + if period != 0 { + let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period); + properties.push(("CPUQuotaPerSecUSec", Value::U64(cpu_quota_per_sec_usec))); + } + } + + Ok(()) + } +} + +// ref: https://github.com/containers/crun/blob/main/crun.1.md#cgroup-v2 +// [2-262144] to [1-10000] +fn shares_to_weight(shares: u64) -> u64 { + if shares == 0 { + return 100; + } + + 1 + ((shares - 2) * 9999) / 262142 +} + +fn resolve_cpuquota(quota: i64, period: u64) -> u64 { + let mut cpu_quota_per_sec_usec = u64::MAX; + if quota > 0 { + cpu_quota_per_sec_usec = (quota as u64) * SEC2MICROSEC / period; + if cpu_quota_per_sec_usec % BASIC_INTERVAL != 0 { + cpu_quota_per_sec_usec = + ((cpu_quota_per_sec_usec / BASIC_INTERVAL) + 1) * BASIC_INTERVAL; + } + } + cpu_quota_per_sec_usec +} + +#[cfg(test)] +mod tests { + use crate::cgroups::systemd::subsystem::cpu::resolve_cpuquota; + + #[test] + fn test_unified_cpuquota() { + let quota: i64 = 1000000; + let period: u64 = 500000; + let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period); + + assert_eq!(2000000, cpu_quota_per_sec_usec); + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/cpuset.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/cpuset.rs new file mode 100644 index 000000000000..3f05cdc7c24f --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/cpuset.rs @@ -0,0 +1,124 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::super::common::{CgroupHierarchy, Properties}; + +use super::transformer::Transformer; + +use anyhow::{bail, Result}; +use bit_vec::BitVec; +use oci::{LinuxCpu, LinuxResources}; +use std::convert::{TryFrom, TryInto}; +use zbus::zvariant::Value; + +const BASIC_SYSTEMD_VERSION: &str = "244"; + +pub struct CpuSet {} + +impl Transformer for CpuSet { + fn apply( + r: &LinuxResources, + properties: &mut Properties, + _: &CgroupHierarchy, + systemd_version: &str, + ) -> Result<()> { + if let Some(cpuset_resources) = &r.cpu { + Self::apply(cpuset_resources, properties, systemd_version)?; + } + + Ok(()) + } +} + +// v1 & v2: +// cpuset.cpus <-> AllowedCPUs (v244) +// cpuset.mems <-> AllowedMemoryNodes (v244) +impl CpuSet { + fn apply( + cpuset_resources: &LinuxCpu, + properties: &mut Properties, + systemd_version: &str, + ) -> Result<()> { + if systemd_version < BASIC_SYSTEMD_VERSION { + return Ok(()); + } + + let cpus = cpuset_resources.cpus.as_str(); + if !cpus.is_empty() { + let cpus_vec: BitMask = cpus.try_into()?; + properties.push(("AllowedCPUs", Value::Array(cpus_vec.0.into()))); + } + + let mems = cpuset_resources.mems.as_str(); + if !mems.is_empty() { + let mems_vec: BitMask = mems.try_into()?; + properties.push(("AllowedMemoryNodes", Value::Array(mems_vec.0.into()))); + } + + Ok(()) + } +} + +struct BitMask(Vec); + +impl TryFrom<&str> for BitMask { + type Error = anyhow::Error; + + fn try_from(bitmask_str: &str) -> Result { + let mut bitmask_vec = BitVec::from_elem(8, false); + let bitmask_str_vec: Vec<&str> = bitmask_str.split(',').collect(); + for bitmask in bitmask_str_vec.iter() { + let range: Vec<&str> = bitmask.split('-').collect(); + match range.len() { + 1 => { + let idx: usize = range[0].parse()?; + while idx >= bitmask_vec.len() { + bitmask_vec.grow(8, false); + } + bitmask_vec.set(adjust_index(idx), true); + } + 2 => { + let left_index = range[0].parse()?; + let right_index = range[1].parse()?; + while right_index >= bitmask_vec.len() { + bitmask_vec.grow(8, false); + } + for idx in left_index..=right_index { + bitmask_vec.set(adjust_index(idx), true); + } + } + _ => bail!("invalid bitmask str {}", bitmask_str), + } + } + let mut result_vec = bitmask_vec.to_bytes(); + result_vec.reverse(); + + Ok(BitMask(result_vec)) + } +} + +#[inline(always)] +fn adjust_index(idx: usize) -> usize { + idx / 8 * 8 + 7 - idx % 8 +} + +#[cfg(test)] +mod tests { + use std::convert::TryInto; + + use crate::cgroups::systemd::subsystem::cpuset::BitMask; + + #[test] + fn test_bitmask_conversion() { + let cpus_vec: BitMask = "2-4".try_into().unwrap(); + assert_eq!(vec![0b11100 as u8], cpus_vec.0); + + let cpus_vec: BitMask = "1,7".try_into().unwrap(); + assert_eq!(vec![0b10000010 as u8], cpus_vec.0); + + let cpus_vec: BitMask = "0,2-3,7".try_into().unwrap(); + assert_eq!(vec![0b10001101 as u8], cpus_vec.0); + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/memory.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/memory.rs new file mode 100644 index 000000000000..e2ec5343c0d6 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/memory.rs @@ -0,0 +1,117 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::super::common::{CgroupHierarchy, Properties}; + +use super::transformer::Transformer; + +use anyhow::{bail, Result}; +use oci::{LinuxMemory, LinuxResources}; +use zbus::zvariant::Value; + +pub struct Memory {} + +impl Transformer for Memory { + fn apply( + r: &LinuxResources, + properties: &mut Properties, + cgroup_hierarchy: &CgroupHierarchy, + _: &str, + ) -> Result<()> { + if let Some(memory_resources) = &r.memory { + match cgroup_hierarchy { + CgroupHierarchy::Legacy => Self::legacy_apply(memory_resources, properties)?, + CgroupHierarchy::Unified => Self::unified_apply(memory_resources, properties)?, + } + } + + Ok(()) + } +} + +impl Memory { + // v1: + // memory.limit <-> MemoryLimit + fn legacy_apply(memory_resources: &LinuxMemory, properties: &mut Properties) -> Result<()> { + if let Some(limit) = memory_resources.limit { + let limit = match limit { + 1..=i64::MAX => limit as u64, + 0 => u64::MAX, + _ => bail!("invalid memory.limit"), + }; + properties.push(("MemoryLimit", Value::U64(limit))); + } + + Ok(()) + } + + // v2: + // memory.low <-> MemoryLow + // memory.max <-> MemoryMax + // memory.swap & memory.limit <-> MemorySwapMax + fn unified_apply(memory_resources: &LinuxMemory, properties: &mut Properties) -> Result<()> { + if let Some(limit) = memory_resources.limit { + let limit = match limit { + 1..=i64::MAX => limit as u64, + 0 => u64::MAX, + _ => bail!("invalid memory.limit: {}", limit), + }; + properties.push(("MemoryMax", Value::U64(limit))); + } + + if let Some(reservation) = memory_resources.reservation { + let reservation = match reservation { + 1..=i64::MAX => reservation as u64, + 0 => u64::MAX, + _ => bail!("invalid memory.reservation: {}", reservation), + }; + properties.push(("MemoryLow", Value::U64(reservation))); + } + + let swap = match memory_resources.swap { + Some(0) => u64::MAX, + Some(1..=i64::MAX) => match memory_resources.limit { + Some(1..=i64::MAX) => { + (memory_resources.limit.unwrap() - memory_resources.swap.unwrap()) as u64 + } + _ => bail!("invalid memory.limit when memory.swap specified"), + }, + None => u64::MAX, + _ => bail!("invalid memory.swap"), + }; + + properties.push(("MemorySwapMax", Value::U64(swap))); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::Memory; + use super::Properties; + use super::Value; + + #[test] + fn test_unified_memory() { + let memory_resources = oci::LinuxMemory { + limit: Some(736870912), + reservation: Some(536870912), + swap: Some(536870912), + kernel: Some(0), + kernel_tcp: Some(0), + swappiness: Some(0), + disable_oom_killer: Some(false), + }; + let mut properties: Properties = vec![]; + + assert_eq!( + true, + Memory::unified_apply(&memory_resources, &mut properties).is_ok() + ); + + assert_eq!(Value::U64(200000000), properties[2].1); + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/mod.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/mod.rs new file mode 100644 index 000000000000..6810040a2bd5 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/mod.rs @@ -0,0 +1,10 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod cpu; +pub mod cpuset; +pub mod memory; +pub mod pids; +pub mod transformer; diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/pids.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/pids.rs new file mode 100644 index 000000000000..7ff1ee7c635f --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/pids.rs @@ -0,0 +1,60 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::super::common::{CgroupHierarchy, Properties}; + +use super::transformer::Transformer; + +use anyhow::Result; +use oci::{LinuxPids, LinuxResources}; +use zbus::zvariant::Value; + +pub struct Pids {} + +impl Transformer for Pids { + fn apply( + r: &LinuxResources, + properties: &mut Properties, + _: &CgroupHierarchy, + _: &str, + ) -> Result<()> { + if let Some(pids_resources) = &r.pids { + Self::apply(pids_resources, properties)?; + } + + Ok(()) + } +} + +// pids.limit <-> TasksMax +impl Pids { + fn apply(pids_resources: &LinuxPids, properties: &mut Properties) -> Result<()> { + let limit = if pids_resources.limit > 0 { + pids_resources.limit as u64 + } else { + u64::MAX + }; + + properties.push(("TasksMax", Value::U64(limit))); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::Pids; + use super::Properties; + use super::Value; + + #[test] + fn test_subsystem_workflow() { + let pids_resources = oci::LinuxPids { limit: 0 }; + let mut properties: Properties = vec![]; + + assert_eq!(true, Pids::apply(&pids_resources, &mut properties).is_ok()); + + assert_eq!(Value::U64(u64::MAX), properties[0].1); + } +} diff --git a/src/agent/rustjail/src/cgroups/systemd/subsystem/transformer.rs b/src/agent/rustjail/src/cgroups/systemd/subsystem/transformer.rs new file mode 100644 index 000000000000..952ed4dd2c06 --- /dev/null +++ b/src/agent/rustjail/src/cgroups/systemd/subsystem/transformer.rs @@ -0,0 +1,17 @@ +// Copyright 2021-2022 Kata Contributors +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::super::common::{CgroupHierarchy, Properties}; +use anyhow::Result; +use oci::LinuxResources; + +pub trait Transformer { + fn apply( + r: &LinuxResources, + properties: &mut Properties, + cgroup_hierarchy: &CgroupHierarchy, + systemd_version: &str, + ) -> Result<()>; +} diff --git a/src/agent/rustjail/src/console.rs b/src/agent/rustjail/src/console.rs index 52e33f392931..3ac351357eec 100644 --- a/src/agent/rustjail/src/console.rs +++ b/src/agent/rustjail/src/console.rs @@ -6,8 +6,9 @@ use anyhow::{anyhow, Result}; use nix::errno::Errno; use nix::pty; -use nix::sys::{socket, uio}; +use nix::sys::socket; use nix::unistd::{self, dup2}; +use std::io::IoSlice; use std::os::unix::io::{AsRawFd, RawFd}; use std::path::Path; @@ -23,10 +24,7 @@ pub fn setup_console_socket(csocket_path: &str) -> Result> { None, )?; - match socket::connect( - socket_fd, - &socket::SockAddr::Unix(socket::UnixAddr::new(Path::new(csocket_path))?), - ) { + match socket::connect(socket_fd, &socket::UnixAddr::new(Path::new(csocket_path))?) { Ok(()) => Ok(Some(socket_fd)), Err(errno) => Err(anyhow!("failed to open console fd: {}", errno)), } @@ -36,11 +34,11 @@ pub fn setup_master_console(socket_fd: RawFd) -> Result<()> { let pseudo = pty::openpty(None, None)?; let pty_name: &[u8] = b"/dev/ptmx"; - let iov = [uio::IoVec::from_slice(pty_name)]; + let iov = [IoSlice::new(pty_name)]; let fds = [pseudo.master]; let cmsg = socket::ControlMessage::ScmRights(&fds); - socket::sendmsg(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?; + socket::sendmsg::<()>(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?; unistd::setsid()?; let ret = unsafe { libc::ioctl(pseudo.slave, libc::TIOCSCTTY) }; diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index f95aaffd8138..b4a0152b8110 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -6,7 +6,7 @@ use anyhow::{anyhow, Context, Result}; use libc::pid_t; use oci::{ContainerState, LinuxDevice, LinuxIdMapping}; -use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec}; +use oci::{Linux, LinuxNamespace, LinuxResources, Spec}; use std::clone::Clone; use std::ffi::CString; use std::fmt::Display; @@ -22,6 +22,7 @@ use crate::capabilities; use crate::cgroups::fs::Manager as FsManager; #[cfg(test)] use crate::cgroups::mock::Manager as FsManager; +use crate::cgroups::systemd::manager::Manager as SystemdManager; use crate::cgroups::Manager; #[cfg(feature = "standard-oci-runtime")] use crate::console; @@ -29,6 +30,7 @@ use crate::log_child; use crate::process::Process; #[cfg(feature = "seccomp")] use crate::seccomp; +use crate::selinux; use crate::specconv::CreateOpts; use crate::{mount, validator}; @@ -46,9 +48,10 @@ use nix::unistd::{self, fork, ForkResult, Gid, Pid, Uid, User}; use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; -use protobuf::SingularPtrField; +use protobuf::MessageField; use oci::State as OCIState; +use regex::Regex; use std::collections::HashMap; use std::os::unix::io::FromRawFd; use std::str::FromStr; @@ -64,6 +67,9 @@ use rlimit::{setrlimit, Resource, Rlim}; use tokio::io::AsyncBufReadExt; use tokio::sync::Mutex; +use kata_sys_util::hooks::HookStates; +use kata_sys_util::validate::valid_env; + pub const EXEC_FIFO_FILENAME: &str = "exec.fifo"; const INIT: &str = "INIT"; @@ -74,6 +80,7 @@ const CLOG_FD: &str = "CLOG_FD"; const FIFO_FD: &str = "FIFO_FD"; const HOME_ENV_KEY: &str = "HOME"; const PIDNS_FD: &str = "PIDNS_FD"; +const PIDNS_ENABLED: &str = "PIDNS_ENABLED"; const CONSOLE_SOCKET_FD: &str = "CONSOLE_SOCKET_FD"; #[derive(Debug)] @@ -106,6 +113,10 @@ impl Default for ContainerStatus { } } +// We might want to change this to thiserror in the future +const MissingLinux: &str = "no linux config"; +const InvalidNamespace: &str = "invalid namespace type"; + pub type Config = CreateOpts; type NamespaceType = String; @@ -196,6 +207,8 @@ lazy_static! { }, ] }; + + pub static ref SYSTEMD_CGROUP_PATH_FORMAT:Regex = Regex::new(r"^[\w\-.]*:[\w\-.]*:[\w\-.]*$").unwrap(); } #[derive(Serialize, Deserialize, Debug)] @@ -234,7 +247,7 @@ pub struct LinuxContainer { pub id: String, pub root: String, pub config: Config, - pub cgroup_manager: Option, + pub cgroup_manager: Box, pub init_process_pid: pid_t, pub init_process_start_time: u64, pub uid_map_path: String, @@ -268,6 +281,17 @@ pub struct SyncPc { pid: pid_t, } +#[derive(Debug, Clone)] +pub struct PidNs { + enabled: bool, + fd: Option, +} +impl PidNs { + pub fn new(enabled: bool, fd: Option) -> Self { + Self { enabled, fd } + } +} + pub trait Container: BaseContainer { fn pause(&mut self) -> Result<()>; fn resume(&mut self) -> Result<()>; @@ -283,16 +307,11 @@ impl Container for LinuxContainer { )); } - if self.cgroup_manager.is_some() { - self.cgroup_manager - .as_ref() - .unwrap() - .freeze(FreezerState::Frozen)?; + self.cgroup_manager.as_ref().freeze(FreezerState::Frozen)?; - self.status.transition(ContainerState::Paused); - return Ok(()); - } - Err(anyhow!("failed to get container's cgroup manager")) + self.status.transition(ContainerState::Paused); + + Ok(()) } fn resume(&mut self) -> Result<()> { @@ -301,16 +320,11 @@ impl Container for LinuxContainer { return Err(anyhow!("container status is: {:?}, not paused", status)); } - if self.cgroup_manager.is_some() { - self.cgroup_manager - .as_ref() - .unwrap() - .freeze(FreezerState::Thawed)?; + self.cgroup_manager.as_ref().freeze(FreezerState::Thawed)?; - self.status.transition(ContainerState::Running); - return Ok(()); - } - Err(anyhow!("failed to get container's cgroup manager")) + self.status.transition(ContainerState::Running); + + Ok(()) } } @@ -337,16 +351,20 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { let crfd = std::env::var(CRFD_FD)?.parse::().unwrap(); let cfd_log = std::env::var(CLOG_FD)?.parse::().unwrap(); - // get the pidns fd from parent, if parent had passed the pidns fd, - // then get it and join in this pidns; otherwise, create a new pidns - // by unshare from the parent pidns. - match std::env::var(PIDNS_FD) { - Ok(fd) => { - let pidns_fd = fd.parse::().context("get parent pidns fd")?; - sched::setns(pidns_fd, CloneFlags::CLONE_NEWPID).context("failed to join pidns")?; - let _ = unistd::close(pidns_fd); + if std::env::var(PIDNS_ENABLED)?.eq(format!("{}", true).as_str()) { + // get the pidns fd from parent, if parent had passed the pidns fd, + // then get it and join in this pidns; otherwise, create a new pidns + // by unshare from the parent pidns. + match std::env::var(PIDNS_FD) { + Ok(fd) => { + let pidns_fd = fd.parse::().context("get parent pidns fd")?; + sched::setns(pidns_fd, CloneFlags::CLONE_NEWPID).context("failed to join pidns")?; + let _ = unistd::close(pidns_fd); + } + Err(_e) => { + sched::unshare(CloneFlags::CLONE_NEWPID)?; + } } - Err(_e) => sched::unshare(CloneFlags::CLONE_NEWPID)?, } match unsafe { fork() } { @@ -372,20 +390,27 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { let buf = read_sync(crfd)?; let spec_str = std::str::from_utf8(&buf)?; let spec: oci::Spec = serde_json::from_str(spec_str)?; - log_child!(cfd_log, "notify parent to send oci process"); write_sync(cwfd, SYNC_SUCCESS, "")?; let buf = read_sync(crfd)?; let process_str = std::str::from_utf8(&buf)?; let oci_process: oci::Process = serde_json::from_str(process_str)?; + log_child!(cfd_log, "notify parent to send oci state"); + write_sync(cwfd, SYNC_SUCCESS, "")?; + + let buf = read_sync(crfd)?; + let state_str = std::str::from_utf8(&buf)?; + let mut state: oci::State = serde_json::from_str(state_str)?; log_child!(cfd_log, "notify parent to send cgroup manager"); write_sync(cwfd, SYNC_SUCCESS, "")?; let buf = read_sync(crfd)?; let cm_str = std::str::from_utf8(&buf)?; - let cm: FsManager = serde_json::from_str(cm_str)?; + // deserialize cm_str into FsManager and SystemdManager separately + let fs_cm: Result = serde_json::from_str(cm_str); + let systemd_cm: Result = serde_json::from_str(cm_str); #[cfg(feature = "standard-oci-runtime")] let csocket_fd = console::setup_console_socket(&std::env::var(CONSOLE_SOCKET_FD)?)?; @@ -397,7 +422,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { }; if spec.linux.is_none() { - return Err(anyhow!("no linux config")); + return Err(anyhow!(MissingLinux)); } let linux = spec.linux.as_ref().unwrap(); @@ -411,7 +436,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { for ns in &nses { let s = NAMESPACES.get(&ns.r#type.as_str()); if s.is_none() { - return Err(anyhow!("invalid ns type")); + return Err(anyhow!(InvalidNamespace)); } let s = s.unwrap(); @@ -526,6 +551,8 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { } } + let selinux_enabled = selinux::is_enabled()?; + sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; if userns { @@ -543,7 +570,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { if to_new.contains(CloneFlags::CLONE_NEWNS) { // setup rootfs - mount::init_rootfs(cfd_log, &spec, &cm.paths, &cm.mounts, bind_device)?; + if let Ok(systemd_cm) = systemd_cm { + mount::init_rootfs( + cfd_log, + &spec, + &systemd_cm.paths, + &systemd_cm.mounts, + bind_device, + )?; + } else { + let fs_cm = fs_cm.unwrap(); + mount::init_rootfs(cfd_log, &spec, &fs_cm.paths, &fs_cm.mounts, bind_device)?; + } } if init { @@ -616,6 +654,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?; } + // Set SELinux label + if !oci_process.selinux_label.is_empty() { + if !selinux_enabled { + return Err(anyhow!( + "SELinux label for the process is provided but SELinux is not enabled on the running kernel" + )); + } + + log_child!(cfd_log, "Set SELinux label to the container process"); + selinux::set_exec_label(&oci_process.selinux_label)?; + } + // Log unknown seccomp system calls in advance before the log file descriptor closes. #[cfg(feature = "seccomp")] if let Some(ref scmp) = linux.seccomp { @@ -714,6 +764,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> { unistd::read(fd, buf)?; } + if init { + // StartContainer Hooks: + // * should be run in container namespace + // * should be run after container is created and before container is started (before user-specific command is executed) + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#startcontainer-hooks + state.pid = std::process::id() as i32; + state.status = oci::ContainerState::Created; + if let Some(hooks) = spec.hooks.as_ref() { + let mut start_container_states = HookStates::new(); + start_container_states.execute_hooks(&hooks.start_container, Some(state))?; + } + } + // With NoNewPrivileges, we should set seccomp as close to // do_exec as possible in order to reduce the amount of // system calls in the seccomp profiles. @@ -825,22 +888,17 @@ impl BaseContainer for LinuxContainer { } fn stats(&self) -> Result { - let mut r = StatsContainerResponse::default(); - - if self.cgroup_manager.is_some() { - r.cgroup_stats = - SingularPtrField::some(self.cgroup_manager.as_ref().unwrap().get_stats()?); - } - // what about network interface stats? - Ok(r) + Ok(StatsContainerResponse { + cgroup_stats: MessageField::some(self.cgroup_manager.as_ref().get_stats()?), + ..Default::default() + }) } fn set(&mut self, r: LinuxResources) -> Result<()> { - if self.cgroup_manager.is_some() { - self.cgroup_manager.as_ref().unwrap().set(&r, true)?; - } + self.cgroup_manager.as_ref().set(&r, true)?; + self.config .spec .as_mut() @@ -941,9 +999,13 @@ impl BaseContainer for LinuxContainer { } let pidns = get_pid_namespace(&self.logger, linux)?; + #[cfg(not(feature = "standard-oci-runtime"))] + if !pidns.enabled { + return Err(anyhow!("cannot find the pid ns")); + } - defer!(if let Some(pid) = pidns { - let _ = unistd::close(pid); + defer!(if let Some(fd) = pidns.fd { + let _ = unistd::close(fd); }); let exec_path = std::env::current_exe()?; @@ -966,14 +1028,15 @@ impl BaseContainer for LinuxContainer { .env(CRFD_FD, format!("{}", crfd)) .env(CWFD_FD, format!("{}", cwfd)) .env(CLOG_FD, format!("{}", cfd_log)) - .env(CONSOLE_SOCKET_FD, console_name); + .env(CONSOLE_SOCKET_FD, console_name) + .env(PIDNS_ENABLED, format!("{}", pidns.enabled)); if p.init { child = child.env(FIFO_FD, format!("{}", fifofd)); } - if pidns.is_some() { - child = child.env(PIDNS_FD, format!("{}", pidns.unwrap())); + if pidns.fd.is_some() { + child = child.env(PIDNS_FD, format!("{}", pidns.fd.unwrap())); } child.spawn()?; @@ -1013,7 +1076,8 @@ impl BaseContainer for LinuxContainer { &logger, spec, &p, - self.cgroup_manager.as_ref().unwrap(), + self.cgroup_manager.as_ref(), + self.config.use_systemd_cgroup, &st, &mut pipe_w, &mut pipe_r, @@ -1076,12 +1140,14 @@ impl BaseContainer for LinuxContainer { } } - if spec.hooks.is_some() { - info!(self.logger, "poststop"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.poststop.iter() { - execute_hook(&self.logger, h, &st).await?; - } + // guest Poststop hook + // * should be executed after the container is deleted but before the delete operation returns + // * the executable file is in agent namespace + // * should also be executed in agent namespace. + if let Some(hooks) = spec.hooks.as_ref() { + info!(self.logger, "guest Poststop hook"); + let mut hook_states = HookStates::new(); + hook_states.execute_hooks(&hooks.poststop, Some(st))?; } self.status.transition(ContainerState::Stopped); @@ -1091,9 +1157,19 @@ impl BaseContainer for LinuxContainer { )?; fs::remove_dir_all(&self.root)?; - if let Some(cgm) = self.cgroup_manager.as_mut() { - cgm.destroy().context("destroy cgroups")?; + let cgm = self.cgroup_manager.as_mut(); + // Kill all of the processes created in this container to prevent + // the leak of some daemon process when this container shared pidns + // with the sandbox. + let pids = cgm.get_pids().context("get cgroup pids")?; + for i in pids { + if let Err(e) = signal::kill(Pid::from_raw(i), Signal::SIGKILL) { + warn!(self.logger, "kill the process {} error: {:?}", i, e); + } } + + cgm.destroy().context("destroy cgroups")?; + Ok(()) } @@ -1117,16 +1193,14 @@ impl BaseContainer for LinuxContainer { .ok_or_else(|| anyhow!("OCI spec was not found"))?; let st = self.oci_state()?; - // run poststart hook - if spec.hooks.is_some() { - info!(self.logger, "poststart hook"); - let hooks = spec - .hooks - .as_ref() - .ok_or_else(|| anyhow!("OCI hooks were not found"))?; - for h in hooks.poststart.iter() { - execute_hook(&self.logger, h, &st).await?; - } + // guest Poststart hook + // * should be executed after the container is started but before the delete operation returns + // * the executable file is in agent namespace + // * should also be executed in agent namespace. + if let Some(hooks) = spec.hooks.as_ref() { + info!(self.logger, "guest Poststart hook"); + let mut hook_states = HookStates::new(); + hook_states.execute_hooks(&hooks.poststart, Some(st))?; } unistd::close(fd)?; @@ -1196,11 +1270,11 @@ pub fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> R Ok(()) } -fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result> { +fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result { for ns in &linux.namespaces { if ns.r#type == "pid" { if ns.path.is_empty() { - return Ok(None); + return Ok(PidNs::new(true, None)); } let fd = @@ -1216,11 +1290,11 @@ fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result> { e })?; - return Ok(Some(fd)); + return Ok(PidNs::new(true, Some(fd))); } } - Err(anyhow!("cannot find the pid ns")) + Ok(PidNs::new(false, None)) } fn is_userns_enabled(linux: &Linux) -> bool { @@ -1265,11 +1339,13 @@ pub fn setup_child_logger(fd: RawFd, child_logger: Logger) -> tokio::task::JoinH }) } +#[allow(clippy::too_many_arguments)] async fn join_namespaces( logger: &Logger, spec: &Spec, p: &Process, - cm: &FsManager, + cm: &(dyn Manager + Send + Sync), + use_systemd_cgroup: bool, st: &OCIState, pipe_w: &mut PipeStream, pipe_r: &mut PipeStream, @@ -1286,7 +1362,6 @@ async fn join_namespaces( write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?; info!(logger, "wait child received oci spec"); - read_async(pipe_r).await?; info!(logger, "send oci process from parent to child"); @@ -1296,7 +1371,18 @@ async fn join_namespaces( info!(logger, "wait child received oci process"); read_async(pipe_r).await?; - let cm_str = serde_json::to_string(cm)?; + info!(logger, "try to send state from parent to child"); + let state_str = serde_json::to_string(st)?; + write_async(pipe_w, SYNC_DATA, state_str.as_str()).await?; + + info!(logger, "wait child received oci state"); + read_async(pipe_r).await?; + + let cm_str = if use_systemd_cgroup { + serde_json::to_string(cm.as_any()?.downcast_ref::().unwrap()) + } else { + serde_json::to_string(cm.as_any()?.downcast_ref::().unwrap()) + }?; write_async(pipe_w, SYNC_DATA, cm_str.as_str()).await?; // wait child setup user namespace @@ -1319,15 +1405,18 @@ async fn join_namespaces( } // apply cgroups - if p.init && res.is_some() { - info!(logger, "apply cgroups!"); - cm.set(res.unwrap(), false)?; - } - + // For FsManger, it's no matter about the order of apply and set. + // For SystemdManger, apply must be precede set because we can only create a systemd unit with specific processes(pids). if res.is_some() { + info!(logger, "apply processes to cgroups!"); cm.apply(p.pid)?; } + if p.init && res.is_some() { + info!(logger, "set properties to cgroups!"); + cm.set(res.unwrap(), false)?; + } + info!(logger, "notify child to continue"); // notify child to continue write_async(pipe_w, SYNC_SUCCESS, "").await?; @@ -1338,13 +1427,14 @@ async fn join_namespaces( info!(logger, "get ready to run prestart hook!"); - // run prestart hook - if spec.hooks.is_some() { - info!(logger, "prestart hook"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.prestart.iter() { - execute_hook(&logger, h, st).await?; - } + // guest Prestart hook + // * should be executed during the start operation, and before the container command is executed + // * the executable file is in agent namespace + // * should also be executed in agent namespace. + if let Some(hooks) = spec.hooks.as_ref() { + info!(logger, "guest Prestart hook"); + let mut hook_states = HookStates::new(); + hook_states.execute_hooks(&hooks.prestart, Some(st.clone()))?; } // notify child run prestart hooks completed @@ -1427,33 +1517,44 @@ impl LinuxContainer { Some(unistd::getuid()), Some(unistd::getgid()), ) - .context(format!("cannot change onwer of container {} root", id))?; - - if config.spec.is_none() { - return Err(anyhow!(nix::Error::EINVAL)); - } + .context(format!("Cannot change owner of container {} root", id))?; let spec = config.spec.as_ref().unwrap(); - - if spec.linux.is_none() { - return Err(anyhow!(nix::Error::EINVAL)); - } - let linux = spec.linux.as_ref().unwrap(); - - let cpath = if linux.cgroups_path.is_empty() { + let cpath = if config.use_systemd_cgroup { + if linux.cgroups_path.len() == 2 { + format!("system.slice:kata_agent:{}", id.as_str()) + } else { + linux.cgroups_path.clone() + } + } else if linux.cgroups_path.is_empty() { format!("/{}", id.as_str()) } else { - linux.cgroups_path.clone() + // if we have a systemd cgroup path we need to convert it to a fs cgroup path + linux.cgroups_path.replace(':', "/") }; - let cgroup_manager = FsManager::new(cpath.as_str())?; + let cgroup_manager: Box = if config.use_systemd_cgroup { + Box::new(SystemdManager::new(cpath.as_str()).map_err(|e| { + anyhow!(format!( + "fail to create cgroup manager with path {}: {:}", + cpath, e + )) + })?) + } else { + Box::new(FsManager::new(cpath.as_str()).map_err(|e| { + anyhow!(format!( + "fail to create cgroup manager with path {}: {:}", + cpath, e + )) + })?) + }; info!(logger, "new cgroup_manager {:?}", &cgroup_manager); Ok(LinuxContainer { id: id.clone(), root, - cgroup_manager: Some(cgroup_manager), + cgroup_manager, status: ContainerStatus::new(), uid_map_path: String::from(""), gid_map_path: "".to_string(), @@ -1505,266 +1606,19 @@ fn set_sysctls(sysctls: &HashMap) -> Result<()> { Ok(()) } -use std::process::Stdio; -use std::time::Duration; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; - -pub async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { - let logger = logger.new(o!("action" => "execute-hook")); - - let binary = PathBuf::from(h.path.as_str()); - let path = binary.canonicalize()?; - if !path.exists() { - return Err(anyhow!(nix::Error::EINVAL)); - } - - let mut args = h.args.clone(); - // the hook.args[0] is the hook binary name which shouldn't be included - // in the Command.args - if args.len() > 1 { - args.remove(0); - } - - // all invalid envs will be omitted, only valid envs will be passed to hook. - let env: HashMap<&str, &str> = h.env.iter().filter_map(|e| valid_env(e)).collect(); - - // Avoid the exit signal to be reaped by the global reaper. - let _wait_locker = WAIT_PID_LOCKER.lock().await; - let mut child = tokio::process::Command::new(path) - .args(args.iter()) - .envs(env.iter()) - .kill_on_drop(true) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; - - // default timeout 10s - let mut timeout: u64 = 10; - - // if timeout is set if hook, then use the specified value - if let Some(t) = h.timeout { - if t > 0 { - timeout = t as u64; - } - } - - let state = serde_json::to_string(st)?; - let path = h.path.clone(); - - let join_handle = tokio::spawn(async move { - if let Some(mut stdin) = child.stdin.take() { - match stdin.write_all(state.as_bytes()).await { - Ok(_) => {} - Err(e) => { - info!(logger, "write to child stdin failed: {:?}", e); - } - } - } - - // read something from stdout and stderr for debug - if let Some(stdout) = child.stdout.as_mut() { - let mut out = String::new(); - match stdout.read_to_string(&mut out).await { - Ok(_) => { - info!(logger, "child stdout: {}", out.as_str()); - } - Err(e) => { - info!(logger, "read from child stdout failed: {:?}", e); - } - } - } - - let mut err = String::new(); - if let Some(stderr) = child.stderr.as_mut() { - match stderr.read_to_string(&mut err).await { - Ok(_) => { - info!(logger, "child stderr: {}", err.as_str()); - } - Err(e) => { - info!(logger, "read from child stderr failed: {:?}", e); - } - } - } - - match child.wait().await { - Ok(exit) => { - let code = exit - .code() - .ok_or_else(|| anyhow!("hook exit status has no status code"))?; - - if code != 0 { - error!( - logger, - "hook {} exit status is {}, error message is {}", &path, code, err - ); - return Err(anyhow!(nix::Error::UnknownErrno)); - } - - debug!(logger, "hook {} exit status is 0", &path); - Ok(()) - } - Err(e) => Err(anyhow!( - "wait child error: {} {}", - e, - e.raw_os_error().unwrap() - )), - } - }); - - match tokio::time::timeout(Duration::new(timeout, 0), join_handle).await { - Ok(r) => r.unwrap(), - Err(_) => Err(anyhow!(nix::Error::ETIMEDOUT)), - } -} - -// valid environment variables according to https://doc.rust-lang.org/std/env/fn.set_var.html#panics -fn valid_env(e: &str) -> Option<(&str, &str)> { - // wherther key or value will contain NULL char. - if e.as_bytes().contains(&b'\0') { - return None; - } - - let v: Vec<&str> = e.splitn(2, '=').collect(); - - // key can't hold an `equal` sign, but value can - if v.len() != 2 { - return None; - } - - let (key, value) = (v[0].trim(), v[1].trim()); - - // key can't be empty - if key.is_empty() { - return None; - } - - Some((key, value)) -} - #[cfg(test)] mod tests { use super::*; use crate::process::Process; - use crate::skip_if_not_root; use nix::unistd::Uid; use std::fs; use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; use tempfile::tempdir; - use tokio::process::Command; - - macro_rules! sl { - () => { - slog_scope::logger() - }; - } - - async fn which(cmd: &str) -> String { - let output: std::process::Output = Command::new("which") - .arg(cmd) - .output() - .await - .expect("which command failed to run"); + use test_utils::skip_if_not_root; - match String::from_utf8(output.stdout) { - Ok(v) => v.trim_end_matches('\n').to_string(), - Err(e) => panic!("Invalid UTF-8 sequence: {}", e), - } - } - - #[tokio::test] - async fn test_execute_hook() { - let temp_file = "/tmp/test_execute_hook"; - - let touch = which("touch").await; - - defer!(fs::remove_file(temp_file).unwrap();); - let invalid_str = vec![97, b'\0', 98]; - let invalid_string = std::str::from_utf8(&invalid_str).unwrap(); - let invalid_env = format!("{}=value", invalid_string); - - execute_hook( - &slog_scope::logger(), - &Hook { - path: touch, - args: vec!["touch".to_string(), temp_file.to_string()], - env: vec![invalid_env], - timeout: Some(10), - }, - &OCIState { - version: "1.2.3".to_string(), - id: "321".to_string(), - status: ContainerState::Running, - pid: 2, - bundle: "".to_string(), - annotations: Default::default(), - }, - ) - .await - .unwrap(); - - assert_eq!(Path::new(&temp_file).exists(), true); - } - - #[tokio::test] - async fn test_execute_hook_with_error() { - let ls = which("ls").await; - - let res = execute_hook( - &slog_scope::logger(), - &Hook { - path: ls, - args: vec!["ls".to_string(), "/tmp/not-exist".to_string()], - env: vec![], - timeout: None, - }, - &OCIState { - version: "1.2.3".to_string(), - id: "321".to_string(), - status: ContainerState::Running, - pid: 2, - bundle: "".to_string(), - annotations: Default::default(), - }, - ) - .await; - - let expected_err = nix::Error::UnknownErrno; - assert_eq!( - res.unwrap_err().downcast::().unwrap(), - expected_err - ); - } - - #[tokio::test] - async fn test_execute_hook_with_timeout() { - let sleep = which("sleep").await; - - let res = execute_hook( - &slog_scope::logger(), - &Hook { - path: sleep, - args: vec!["sleep".to_string(), "2".to_string()], - env: vec![], - timeout: Some(1), - }, - &OCIState { - version: "1.2.3".to_string(), - id: "321".to_string(), - status: ContainerState::Running, - pid: 2, - bundle: "".to_string(), - annotations: Default::default(), - }, - ) - .await; - - let expected_err = nix::Error::ETIMEDOUT; - assert_eq!( - res.unwrap_err().downcast::().unwrap(), - expected_err - ); + fn sl() -> slog::Logger { + slog_scope::logger() } #[test] @@ -1919,20 +1773,12 @@ mod tests { assert!(format!("{:?}", ret).contains("failed to pause container")) } - #[test] - fn test_linuxcontainer_pause_cgroupmgr_is_none() { - let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.cgroup_manager = None; - c.pause().map_err(|e| anyhow!(e)) - }); - - assert!(ret.is_err(), "Expecting error, Got {:?}", ret); - } - #[test] fn test_linuxcontainer_pause() { let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.cgroup_manager = FsManager::new("").ok(); + c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| { + anyhow!(format!("fail to create cgroup manager with path: {:}", e)) + })?); c.pause().map_err(|e| anyhow!(e)) }); @@ -1951,21 +1797,12 @@ mod tests { assert!(format!("{:?}", ret).contains("not paused")) } - #[test] - fn test_linuxcontainer_resume_cgroupmgr_is_none() { - let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.status.transition(ContainerState::Paused); - c.cgroup_manager = None; - c.resume().map_err(|e| anyhow!(e)) - }); - - assert!(ret.is_err(), "Expecting error, Got {:?}", ret); - } - #[test] fn test_linuxcontainer_resume() { let ret = new_linux_container_and_then(|mut c: LinuxContainer| { - c.cgroup_manager = FsManager::new("").ok(); + c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| { + anyhow!(format!("fail to create cgroup manager with path: {:}", e)) + })?); // Change status to paused, this way we can resume it c.status.transition(ContainerState::Paused); c.resume().map_err(|e| anyhow!(e)) @@ -2036,7 +1873,7 @@ mod tests { let _ = new_linux_container_and_then(|mut c: LinuxContainer| { c.processes.insert( 1, - Process::new(&sl!(), &oci::Process::default(), "123", true, 1).unwrap(), + Process::new(&sl(), &oci::Process::default(), "123", true, 1).unwrap(), ); let p = c.get_process("123"); assert!(p.is_ok(), "Expecting Ok, Got {:?}", p); @@ -2063,7 +1900,7 @@ mod tests { let (c, _dir) = new_linux_container(); let ret = c .unwrap() - .start(Process::new(&sl!(), &oci::Process::default(), "123", true, 1).unwrap()) + .start(Process::new(&sl(), &oci::Process::default(), "123", true, 1).unwrap()) .await; assert!(ret.is_err(), "Expecting Err, Got {:?}", ret); } @@ -2073,7 +1910,7 @@ mod tests { let (c, _dir) = new_linux_container(); let ret = c .unwrap() - .run(Process::new(&sl!(), &oci::Process::default(), "123", true, 1).unwrap()) + .run(Process::new(&sl(), &oci::Process::default(), "123", true, 1).unwrap()) .await; assert!(ret.is_err(), "Expecting Err, Got {:?}", ret); } @@ -2098,49 +1935,4 @@ mod tests { let ret = do_init_child(std::io::stdin().as_raw_fd()); assert!(ret.is_err(), "Expecting Err, Got {:?}", ret); } - - #[test] - fn test_valid_env() { - let env = valid_env("a=b=c"); - assert_eq!(Some(("a", "b=c")), env); - - let env = valid_env("a=b"); - assert_eq!(Some(("a", "b")), env); - let env = valid_env("a =b"); - assert_eq!(Some(("a", "b")), env); - - let env = valid_env(" a =b"); - assert_eq!(Some(("a", "b")), env); - - let env = valid_env("a= b"); - assert_eq!(Some(("a", "b")), env); - - let env = valid_env("a=b "); - assert_eq!(Some(("a", "b")), env); - let env = valid_env("a=b c "); - assert_eq!(Some(("a", "b c")), env); - - let env = valid_env("=b"); - assert_eq!(None, env); - - let env = valid_env("a="); - assert_eq!(Some(("a", "")), env); - - let env = valid_env("a=="); - assert_eq!(Some(("a", "=")), env); - - let env = valid_env("a"); - assert_eq!(None, env); - - let invalid_str = vec![97, b'\0', 98]; - let invalid_string = std::str::from_utf8(&invalid_str).unwrap(); - - let invalid_env = format!("{}=value", invalid_string); - let env = valid_env(&invalid_env); - assert_eq!(None, env); - - let invalid_env = format!("key={}", invalid_string); - let env = valid_env(&invalid_env); - assert_eq!(None, env); - } } diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index dafac6381e86..d3647f42eb32 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -38,6 +38,7 @@ pub mod pipestream; pub mod process; #[cfg(feature = "seccomp")] pub mod seccomp; +pub mod selinux; pub mod specconv; pub mod sync; pub mod sync_with_async; @@ -81,11 +82,11 @@ pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process { let cap = p.Capabilities.as_ref().unwrap(); Some(oci::LinuxCapabilities { - bounding: cap.Bounding.clone().into_vec(), - effective: cap.Effective.clone().into_vec(), - inheritable: cap.Inheritable.clone().into_vec(), - permitted: cap.Permitted.clone().into_vec(), - ambient: cap.Ambient.clone().into_vec(), + bounding: cap.Bounding.clone(), + effective: cap.Effective.clone(), + inheritable: cap.Inheritable.clone(), + permitted: cap.Permitted.clone(), + ambient: cap.Ambient.clone(), }) } else { None @@ -107,8 +108,8 @@ pub fn process_grpc_to_oci(p: &grpc::Process) -> oci::Process { terminal: p.Terminal, console_size, user, - args: p.Args.clone().into_vec(), - env: p.Env.clone().into_vec(), + args: p.Args.clone(), + env: p.Env.clone(), cwd: p.Cwd.clone(), capabilities, rlimits, @@ -129,9 +130,9 @@ fn root_grpc_to_oci(root: &grpc::Root) -> oci::Root { fn mount_grpc_to_oci(m: &grpc::Mount) -> oci::Mount { oci::Mount { destination: m.destination.clone(), - r#type: m.field_type.clone(), + r#type: m.type_.clone(), source: m.source.clone(), - options: m.options.clone().into_vec(), + options: m.options.clone(), } } @@ -142,8 +143,8 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec { for e in h.iter() { r.push(oci::Hook { path: e.Path.clone(), - args: e.Args.clone().into_vec(), - env: e.Env.clone().into_vec(), + args: e.Args.clone(), + env: e.Env.clone(), timeout: Some(e.Timeout as i32), }); } @@ -152,13 +153,17 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec { fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks { let prestart = hook_grpc_to_oci(h.Prestart.as_ref()); - + let create_runtime = hook_grpc_to_oci(h.CreateRuntime.as_ref()); + let create_container = hook_grpc_to_oci(h.CreateContainer.as_ref()); + let start_container = hook_grpc_to_oci(h.StartContainer.as_ref()); let poststart = hook_grpc_to_oci(h.Poststart.as_ref()); - let poststop = hook_grpc_to_oci(h.Poststop.as_ref()); oci::Hooks { prestart, + create_runtime, + create_container, + start_container, poststart, poststop, } @@ -354,7 +359,7 @@ fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp { let mut args = Vec::new(); let errno_ret: u32 = if sys.has_errnoret() { - sys.get_errnoret() + sys.errnoret() } else { libc::EPERM as u32 }; @@ -369,7 +374,7 @@ fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp { } r.push(oci::LinuxSyscall { - names: sys.Names.clone().into_vec(), + names: sys.Names.clone(), action: sys.Action.clone(), errno_ret, args, @@ -380,8 +385,8 @@ fn seccomp_grpc_to_oci(sec: &grpc::LinuxSeccomp) -> oci::LinuxSeccomp { oci::LinuxSeccomp { default_action: sec.DefaultAction.clone(), - architectures: sec.Architectures.clone().into_vec(), - flags: sec.Flags.clone().into_vec(), + architectures: sec.Architectures.clone(), + flags: sec.Flags.clone(), syscalls, } } @@ -418,12 +423,18 @@ fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux { let mut r = Vec::new(); for d in l.Devices.iter() { + // if the filemode for the device is 0 (unset), use a default value as runc does + let filemode = if d.FileMode != 0 { + Some(d.FileMode) + } else { + Some(0o666) + }; r.push(oci::LinuxDevice { path: d.Path.clone(), r#type: d.Type.clone(), major: d.Major, minor: d.Minor, - file_mode: Some(d.FileMode), + file_mode: filemode, uid: Some(d.UID), gid: Some(d.GID), }); @@ -451,8 +462,8 @@ fn linux_grpc_to_oci(l: &grpc::Linux) -> oci::Linux { devices, seccomp, rootfs_propagation: l.RootfsPropagation.clone(), - masked_paths: l.MaskedPaths.clone().into_vec(), - readonly_paths: l.ReadonlyPaths.clone().into_vec(), + masked_paths: l.MaskedPaths.clone(), + readonly_paths: l.ReadonlyPaths.clone(), mount_label: l.MountLabel.clone(), intel_rdt, } @@ -514,15 +525,6 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec { #[cfg(test)] mod tests { use super::*; - #[macro_export] - macro_rules! skip_if_not_root { - () => { - if !nix::unistd::Uid::effective().is_root() { - println!("INFO: skipping {} which needs root", module_path!()); - return; - } - }; - } // Parameters: // @@ -562,35 +564,30 @@ mod tests { // All fields specified grpcproc: grpc::Process { Terminal: true, - ConsoleSize: protobuf::SingularPtrField::::some(grpc::Box { + ConsoleSize: protobuf::MessageField::::some(grpc::Box { Height: 123, Width: 456, ..Default::default() }), - User: protobuf::SingularPtrField::::some(grpc::User { + User: protobuf::MessageField::::some(grpc::User { UID: 1234, GID: 5678, AdditionalGids: Vec::from([910, 1112]), Username: String::from("username"), ..Default::default() }), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([String::from("env")])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env")]), Cwd: String::from("cwd"), - Capabilities: protobuf::SingularPtrField::some(grpc::LinuxCapabilities { - Bounding: protobuf::RepeatedField::from(Vec::from([String::from("bnd")])), - Effective: protobuf::RepeatedField::from(Vec::from([String::from("eff")])), - Inheritable: protobuf::RepeatedField::from(Vec::from([String::from( - "inher", - )])), - Permitted: protobuf::RepeatedField::from(Vec::from([String::from("perm")])), - Ambient: protobuf::RepeatedField::from(Vec::from([String::from("amb")])), + Capabilities: protobuf::MessageField::some(grpc::LinuxCapabilities { + Bounding: Vec::from([String::from("bnd")]), + Effective: Vec::from([String::from("eff")]), + Inheritable: Vec::from([String::from("inher")]), + Permitted: Vec::from([String::from("perm")]), + Ambient: Vec::from([String::from("amb")]), ..Default::default() }), - Rlimits: protobuf::RepeatedField::from(Vec::from([ + Rlimits: Vec::from([ grpc::POSIXRlimit { Type: String::from("r#type"), Hard: 123, @@ -603,7 +600,7 @@ mod tests { Soft: 1011, ..Default::default() }, - ])), + ]), NoNewPrivileges: true, ApparmorProfile: String::from("apparmor profile"), OOMScoreAdj: 123456, @@ -653,7 +650,7 @@ mod tests { TestData { // None ConsoleSize grpcproc: grpc::Process { - ConsoleSize: protobuf::SingularPtrField::::none(), + ConsoleSize: protobuf::MessageField::::none(), OOMScoreAdj: 0, ..Default::default() }, @@ -666,7 +663,7 @@ mod tests { TestData { // None User grpcproc: grpc::Process { - User: protobuf::SingularPtrField::::none(), + User: protobuf::MessageField::::none(), OOMScoreAdj: 0, ..Default::default() }, @@ -684,7 +681,7 @@ mod tests { TestData { // None Capabilities grpcproc: grpc::Process { - Capabilities: protobuf::SingularPtrField::none(), + Capabilities: protobuf::MessageField::none(), OOMScoreAdj: 0, ..Default::default() }, @@ -785,60 +782,57 @@ mod tests { TestData { // All specified grpchooks: grpc::Hooks { - Prestart: protobuf::RepeatedField::from(Vec::from([ + Prestart: Vec::from([ grpc::Hook { Path: String::from("prestartpath"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() }, grpc::Hook { Path: String::from("prestartpath2"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg3"), - String::from("arg4"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env3"), - String::from("env4"), - ])), + Args: Vec::from([String::from("arg3"), String::from("arg4")]), + Env: Vec::from([String::from("env3"), String::from("env4")]), Timeout: 25, ..Default::default() }, - ])), - Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook { + ]), + Poststart: Vec::from([grpc::Hook { Path: String::from("poststartpath"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() - }])), - Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook { + }]), + Poststop: Vec::from([grpc::Hook { Path: String::from("poststoppath"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), + CreateRuntime: Vec::from([grpc::Hook { + Path: String::from("createruntimepath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), + CreateContainer: Vec::from([grpc::Hook { + Path: String::from("createcontainerpath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), + StartContainer: Vec::from([grpc::Hook { + Path: String::from("startcontainerpath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() - }])), + }]), ..Default::default() }, result: oci::Hooks { @@ -868,38 +862,65 @@ mod tests { env: Vec::from([String::from("env1"), String::from("env2")]), timeout: Some(10), }]), + create_runtime: Vec::from([oci::Hook { + path: String::from("createruntimepath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), + create_container: Vec::from([oci::Hook { + path: String::from("createcontainerpath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), + start_container: Vec::from([oci::Hook { + path: String::from("startcontainerpath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), }, }, TestData { // Prestart empty grpchooks: grpc::Hooks { - Prestart: protobuf::RepeatedField::from(Vec::from([])), - Poststart: protobuf::RepeatedField::from(Vec::from([grpc::Hook { + Prestart: Vec::from([]), + Poststart: Vec::from([grpc::Hook { Path: String::from("poststartpath"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() - }])), - Poststop: protobuf::RepeatedField::from(Vec::from([grpc::Hook { + }]), + Poststop: Vec::from([grpc::Hook { Path: String::from("poststoppath"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() - }])), + }]), + CreateRuntime: Vec::from([grpc::Hook { + Path: String::from("createruntimepath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), + CreateContainer: Vec::from([grpc::Hook { + Path: String::from("createcontainerpath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), + StartContainer: Vec::from([grpc::Hook { + Path: String::from("startcontainerpath"), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), + Timeout: 10, + ..Default::default() + }]), ..Default::default() }, result: oci::Hooks { @@ -916,6 +937,24 @@ mod tests { env: Vec::from([String::from("env1"), String::from("env2")]), timeout: Some(10), }]), + create_runtime: Vec::from([oci::Hook { + path: String::from("createruntimepath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), + create_container: Vec::from([oci::Hook { + path: String::from("createcontainerpath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), + start_container: Vec::from([oci::Hook { + path: String::from("startcontainerpath"), + args: Vec::from([String::from("arg1"), String::from("arg2")]), + env: Vec::from([String::from("env1"), String::from("env2")]), + timeout: Some(10), + }]), }, }, ]; @@ -953,11 +992,8 @@ mod tests { grpcmount: grpc::Mount { destination: String::from("destination"), source: String::from("source"), - field_type: String::from("fieldtype"), - options: protobuf::RepeatedField::from(Vec::from([ - String::from("option1"), - String::from("option2"), - ])), + type_: String::from("fieldtype"), + options: Vec::from([String::from("option1"), String::from("option2")]), ..Default::default() }, result: oci::Mount { @@ -971,8 +1007,8 @@ mod tests { grpcmount: grpc::Mount { destination: String::from("destination"), source: String::from("source"), - field_type: String::from("fieldtype"), - options: protobuf::RepeatedField::from(Vec::new()), + type_: String::from("fieldtype"), + options: Vec::new(), ..Default::default() }, result: oci::Mount { @@ -986,8 +1022,8 @@ mod tests { grpcmount: grpc::Mount { destination: String::new(), source: String::from("source"), - field_type: String::from("fieldtype"), - options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])), + type_: String::from("fieldtype"), + options: Vec::from([String::from("option1")]), ..Default::default() }, result: oci::Mount { @@ -1001,8 +1037,8 @@ mod tests { grpcmount: grpc::Mount { destination: String::from("destination"), source: String::from("source"), - field_type: String::new(), - options: protobuf::RepeatedField::from(Vec::from([String::from("option1")])), + type_: String::new(), + options: Vec::from([String::from("option1")]), ..Default::default() }, result: oci::Mount { @@ -1062,27 +1098,15 @@ mod tests { grpchook: &[ grpc::Hook { Path: String::from("path"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg1"), - String::from("arg2"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env1"), - String::from("env2"), - ])), + Args: Vec::from([String::from("arg1"), String::from("arg2")]), + Env: Vec::from([String::from("env1"), String::from("env2")]), Timeout: 10, ..Default::default() }, grpc::Hook { Path: String::from("path2"), - Args: protobuf::RepeatedField::from(Vec::from([ - String::from("arg3"), - String::from("arg4"), - ])), - Env: protobuf::RepeatedField::from(Vec::from([ - String::from("env3"), - String::from("env4"), - ])), + Args: Vec::from([String::from("arg3"), String::from("arg4")]), + Env: Vec::from([String::from("env3"), String::from("env4")]), Timeout: 20, ..Default::default() }, diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index 74742c0ffe47..b822736dcf23 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -25,6 +25,7 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use crate::container::DEFAULT_DEVICES; +use crate::selinux; use crate::sync::write_count; use std::string::ToString; @@ -34,7 +35,7 @@ use crate::log_child; // struct is populated from the content in the /proc//mountinfo file. #[derive(std::fmt::Debug, PartialEq)] pub struct Info { - mount_point: String, + pub mount_point: String, optional: String, fstype: String, } @@ -181,6 +182,8 @@ pub fn init_rootfs( None => flags |= MsFlags::MS_SLAVE, } + let label = &linux.mount_label; + let root = spec .root .as_ref() @@ -244,7 +247,7 @@ pub fn init_rootfs( } } - mount_from(cfd_log, m, rootfs, flags, &data, "")?; + mount_from(cfd_log, m, rootfs, flags, &data, label)?; // bind mount won't change mount options, we need remount to make mount options // effective. // first check that we have non-default options required before attempting a @@ -524,7 +527,6 @@ pub fn pivot_rootfs(path: &P) -> Result<( fn rootfs_parent_mount_private(path: &str) -> Result<()> { let mount_infos = parse_mount_table(MOUNTINFO_PATH)?; - let mut max_len = 0; let mut mount_point = String::from(""); let mut options = String::from(""); @@ -551,7 +553,7 @@ fn rootfs_parent_mount_private(path: &str) -> Result<()> { // Parse /proc/self/mountinfo because comparing Dev and ino does not work from // bind mounts -fn parse_mount_table(mountinfo_path: &str) -> Result> { +pub fn parse_mount_table(mountinfo_path: &str) -> Result> { let file = File::open(mountinfo_path)?; let reader = BufReader::new(file); let mut infos = Vec::new(); @@ -767,9 +769,9 @@ fn mount_from( rootfs: &str, flags: MsFlags, data: &str, - _label: &str, + label: &str, ) -> Result<()> { - let d = String::from(data); + let mut d = String::from(data); let dest = secure_join(rootfs, &m.destination); let src = if m.r#type.as_str() == "bind" { @@ -780,18 +782,31 @@ fn mount_from( Path::new(&dest).parent().unwrap() }; - let _ = fs::create_dir_all(&dir).map_err(|e| { + fs::create_dir_all(dir).map_err(|e| { log_child!( cfd_log, "create dir {}: {}", dir.to_str().unwrap(), e.to_string() - ) - }); + ); + e + })?; // make sure file exists so we can bind over it if !src.is_dir() { - let _ = OpenOptions::new().create(true).write(true).open(&dest); + let _ = OpenOptions::new() + .create(true) + .write(true) + .open(&dest) + .map_err(|e| { + log_child!( + cfd_log, + "open/create dest error. {}: {:?}", + dest.as_str(), + e + ); + e + })?; } src.to_str().unwrap().to_string() } else { @@ -804,8 +819,41 @@ fn mount_from( } }; - let _ = stat::stat(dest.as_str()) - .map_err(|e| log_child!(cfd_log, "dest stat error. {}: {:?}", dest.as_str(), e)); + let _ = stat::stat(dest.as_str()).map_err(|e| { + log_child!(cfd_log, "dest stat error. {}: {:?}", dest.as_str(), e); + e + })?; + + // Set the SELinux context for the mounts + let mut use_xattr = false; + if !label.is_empty() { + if selinux::is_enabled()? { + let device = Path::new(&m.source) + .file_name() + .ok_or_else(|| anyhow!("invalid device source path: {}", &m.source))? + .to_str() + .ok_or_else(|| anyhow!("failed to convert device source path: {}", &m.source))?; + + match device { + // SELinux does not support labeling of /proc or /sys + "proc" | "sysfs" => (), + // SELinux does not support mount labeling against /dev/mqueue, + // so we use setxattr instead + "mqueue" => { + use_xattr = true; + } + _ => { + log_child!(cfd_log, "add SELinux mount label to {}", dest.as_str()); + selinux::add_mount_label(&mut d, label); + } + } + } else { + log_child!( + cfd_log, + "SELinux label for the mount is provided but SELinux is not enabled on the running kernel" + ); + } + } mount( Some(src.as_str()), @@ -819,6 +867,10 @@ fn mount_from( e })?; + if !label.is_empty() && selinux::is_enabled()? && use_xattr { + xattr::set(dest.as_str(), "security.selinux", label.as_bytes())?; + } + if flags.contains(MsFlags::MS_BIND) && flags.intersects( !(MsFlags::MS_REC @@ -1005,9 +1057,7 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec, process: &Process) -> Result<( } fn mask_path(path: &str) -> Result<()> { - if !path.starts_with('/') || path.contains("..") { - return Err(anyhow!(nix::Error::EINVAL)); - } + check_paths(path)?; match mount( Some("/dev/null"), @@ -1025,9 +1075,7 @@ fn mask_path(path: &str) -> Result<()> { } fn readonly_path(path: &str) -> Result<()> { - if !path.starts_with('/') || path.contains("..") { - return Err(anyhow!(nix::Error::EINVAL)); - } + check_paths(path)?; if let Err(e) = mount( Some(&path[1..]), @@ -1053,18 +1101,29 @@ fn readonly_path(path: &str) -> Result<()> { Ok(()) } +fn check_paths(path: &str) -> Result<()> { + if !path.starts_with('/') || path.contains("..") { + return Err(anyhow!( + "Cannot mount {} (path does not start with '/' or contains '..').", + path + )); + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; use crate::assert_result; - use crate::skip_if_not_root; use std::fs::create_dir; use std::fs::create_dir_all; use std::fs::remove_dir_all; + use std::fs::remove_file; use std::io; use std::os::unix::fs; use std::os::unix::io::AsRawFd; use tempfile::tempdir; + use test_utils::skip_if_not_root; #[test] #[serial(chdir)] @@ -1275,14 +1334,9 @@ mod tests { fn test_mknod_dev() { skip_if_not_root!(); - let tempdir = tempdir().unwrap(); - - let olddir = unistd::getcwd().unwrap(); - defer!(let _ = unistd::chdir(&olddir);); - let _ = unistd::chdir(tempdir.path()); - + let path = "/dev/fifo-test"; let dev = oci::LinuxDevice { - path: "/fifo".to_string(), + path: path.to_string(), r#type: "c".to_string(), major: 0, minor: 0, @@ -1290,13 +1344,16 @@ mod tests { uid: Some(unistd::getuid().as_raw()), gid: Some(unistd::getgid().as_raw()), }; - let path = Path::new("fifo"); - let ret = mknod_dev(&dev, path); + let ret = mknod_dev(&dev, Path::new(path)); assert!(ret.is_ok(), "Should pass. Got: {:?}", ret); let ret = stat::stat(path); assert!(ret.is_ok(), "Should pass. Got: {:?}", ret); + + // clear test device node + let ret = remove_file(path); + assert!(ret.is_ok(), "Should pass, Got: {:?}", ret); } #[test] @@ -1405,6 +1462,55 @@ mod tests { } } + #[test] + fn test_check_paths() { + #[derive(Debug)] + struct TestData<'a> { + name: &'a str, + path: &'a str, + result: Result<()>, + } + + let tests = &[ + TestData { + name: "valid path", + path: "/foo/bar", + result: Ok(()), + }, + TestData { + name: "does not starts with /", + path: "foo/bar", + result: Err(anyhow!( + "Cannot mount foo/bar (path does not start with '/' or contains '..')." + )), + }, + TestData { + name: "contains ..", + path: "../foo/bar", + result: Err(anyhow!( + "Cannot mount ../foo/bar (path does not start with '/' or contains '..')." + )), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d.name); + + let result = check_paths(d.path); + + let msg = format!("{}: result: {:?}", msg, result); + + if d.result.is_ok() { + assert!(result.is_ok()); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + #[test] fn test_check_proc_mount() { let mount = oci::Mount { diff --git a/src/agent/rustjail/src/process.rs b/src/agent/rustjail/src/process.rs index d94b595cc174..cdecae130882 100644 --- a/src/agent/rustjail/src/process.rs +++ b/src/agent/rustjail/src/process.rs @@ -28,7 +28,6 @@ macro_rules! close_process_stream { ($self: ident, $stream:ident, $stream_type: ident) => { if $self.$stream.is_some() { $self.close_stream(StreamType::$stream_type); - let _ = unistd::close($self.$stream.unwrap()); $self.$stream = None; } }; @@ -162,7 +161,7 @@ impl Process { pub fn notify_term_close(&mut self) { let notify = self.term_exit_notifier.clone(); - notify.notify_one(); + notify.notify_waiters(); } pub fn close_stdin(&mut self) { @@ -225,7 +224,7 @@ impl Process { Some(writer) } - pub fn close_stream(&mut self, stream_type: StreamType) { + fn close_stream(&mut self, stream_type: StreamType) { let _ = self.readers.remove(&stream_type); let _ = self.writers.remove(&stream_type); } diff --git a/src/agent/rustjail/src/seccomp.rs b/src/agent/rustjail/src/seccomp.rs index 3496a45d8aa6..4b0c89515431 100644 --- a/src/agent/rustjail/src/seccomp.rs +++ b/src/agent/rustjail/src/seccomp.rs @@ -26,12 +26,15 @@ fn get_rule_conditions(args: &[LinuxSeccompArg]) -> Result> return Err(anyhow!("seccomp opreator is required")); } - let cond = ScmpArgCompare::new( - arg.index, - ScmpCompareOp::from_str(&arg.op)?, - arg.value, - Some(arg.value_two), - ); + let mut op = ScmpCompareOp::from_str(&arg.op)?; + let mut value = arg.value; + // For SCMP_CMP_MASKED_EQ, arg.value is the mask and arg.value_two is the value + if op == ScmpCompareOp::MaskedEqual(u64::default()) { + op = ScmpCompareOp::MaskedEqual(arg.value); + value = arg.value_two; + } + + let cond = ScmpArgCompare::new(arg.index, op, value); conditions.push(cond); } @@ -44,7 +47,7 @@ pub fn get_unknown_syscalls(scmp: &LinuxSeccomp) -> Option> { for syscall in &scmp.syscalls { for name in &syscall.names { - if get_syscall_from_name(name, None).is_err() { + if ScmpSyscall::from_name(name).is_err() { unknown_syscalls.push(name.to_string()); } } @@ -60,7 +63,7 @@ pub fn get_unknown_syscalls(scmp: &LinuxSeccomp) -> Option> { // init_seccomp creates a seccomp filter and loads it for the current process // including all the child processes. pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { - let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM as u32))?; + let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM))?; // Create a new filter context let mut filter = ScmpFilterContext::new_filter(def_action)?; @@ -72,7 +75,7 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { } // Unset no new privileges bit - filter.set_no_new_privs_bit(false)?; + filter.set_ctl_nnp(false)?; // Add a rule for each system call for syscall in &scmp.syscalls { @@ -80,13 +83,13 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { return Err(anyhow!("syscall name is required")); } - let action = ScmpAction::from_str(&syscall.action, Some(syscall.errno_ret))?; + let action = ScmpAction::from_str(&syscall.action, Some(syscall.errno_ret as i32))?; if action == def_action { continue; } for name in &syscall.names { - let syscall_num = match get_syscall_from_name(name, None) { + let syscall_num = match ScmpSyscall::from_name(name) { Ok(num) => num, Err(_) => { // If we cannot resolve the given system call, we assume it is not supported @@ -96,10 +99,10 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { }; if syscall.args.is_empty() { - filter.add_rule(action, syscall_num, None)?; + filter.add_rule(action, syscall_num)?; } else { let conditions = get_rule_conditions(&syscall.args)?; - filter.add_rule(action, syscall_num, Some(&conditions))?; + filter.add_rule_conditional(action, syscall_num, &conditions)?; } } } @@ -119,10 +122,10 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::skip_if_not_root; use libc::{dup3, process_vm_readv, EPERM, O_CLOEXEC}; use std::io::Error; use std::ptr::null; + use test_utils::skip_if_not_root; macro_rules! syscall_assert { ($e1: expr, $e2: expr) => { diff --git a/src/agent/rustjail/src/selinux.rs b/src/agent/rustjail/src/selinux.rs new file mode 100644 index 000000000000..5a647e3cc46f --- /dev/null +++ b/src/agent/rustjail/src/selinux.rs @@ -0,0 +1,80 @@ +// Copyright 2022 Sony Group Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use nix::unistd::gettid; +use std::fs::{self, OpenOptions}; +use std::io::prelude::*; +use std::path::Path; + +pub fn is_enabled() -> Result { + let buf = fs::read_to_string("/proc/mounts")?; + let enabled = buf.contains("selinuxfs"); + + Ok(enabled) +} + +pub fn add_mount_label(data: &mut String, label: &str) { + if data.is_empty() { + let context = format!("context=\"{}\"", label); + data.push_str(&context); + } else { + let context = format!(",context=\"{}\"", label); + data.push_str(&context); + } +} + +pub fn set_exec_label(label: &str) -> Result<()> { + let mut attr_path = Path::new("/proc/thread-self/attr/exec").to_path_buf(); + if !attr_path.exists() { + // Fall back to the old convention + attr_path = Path::new("/proc/self/task") + .join(gettid().to_string()) + .join("attr/exec") + } + + let mut file = OpenOptions::new() + .write(true) + .truncate(true) + .open(attr_path)?; + file.write_all(label.as_bytes()) + .with_context(|| "failed to apply SELinux label")?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST_LABEL: &str = "system_u:system_r:unconfined_t:s0"; + + #[test] + fn test_is_enabled() { + let ret = is_enabled(); + assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret); + } + + #[test] + fn test_add_mount_label() { + let mut data = String::new(); + add_mount_label(&mut data, TEST_LABEL); + assert_eq!(data, format!("context=\"{}\"", TEST_LABEL)); + + let mut data = String::from("defaults"); + add_mount_label(&mut data, TEST_LABEL); + assert_eq!(data, format!("defaults,context=\"{}\"", TEST_LABEL)); + } + + #[test] + fn test_set_exec_label() { + let ret = set_exec_label(TEST_LABEL); + if is_enabled().unwrap() { + assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret); + } else { + assert!(ret.is_err(), "Expecting error, Got {:?}", ret); + } + } +} diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index 3b5aeb3619e5..4955fbf46669 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -4,17 +4,16 @@ // use crate::container::Config; -use anyhow::{anyhow, Context, Error, Result}; +use anyhow::{anyhow, Context, Result}; use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec}; +use regex::Regex; use std::collections::HashMap; use std::path::{Component, PathBuf}; -fn einval() -> Error { - anyhow!(nix::Error::EINVAL) -} - fn get_linux(oci: &Spec) -> Result<&Linux> { - oci.linux.as_ref().ok_or_else(einval) + oci.linux + .as_ref() + .ok_or_else(|| anyhow!("Unable to get Linux section from Spec")) } fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool { @@ -31,7 +30,10 @@ fn rootfs(root: &str) -> Result<()> { let path = PathBuf::from(root); // not absolute path or not exists if !path.exists() || !path.is_absolute() { - return Err(einval()); + return Err(anyhow!( + "Path from {:?} does not exist or is not absolute", + root + )); } // symbolic link? ..? @@ -49,7 +51,7 @@ fn rootfs(root: &str) -> Result<()> { if let Some(v) = c.as_os_str().to_str() { stack.push(v.to_string()); } else { - return Err(einval()); + return Err(anyhow!("Invalid path component (unable to convert to str)")); } } @@ -58,10 +60,13 @@ fn rootfs(root: &str) -> Result<()> { cleaned.push(e); } - let canon = path.canonicalize().context("canonicalize")?; + let canon = path.canonicalize().context("failed to canonicalize path")?; if cleaned != canon { // There is symbolic in path - return Err(einval()); + return Err(anyhow!( + "There may be illegal symbols in the path name. Cleaned ({:?}) and canonicalized ({:?}) paths do not match", + cleaned, + canon)); } Ok(()) @@ -74,7 +79,7 @@ fn hostname(oci: &Spec) -> Result<()> { let linux = get_linux(oci)?; if !contain_namespace(&linux.namespaces, "uts") { - return Err(einval()); + return Err(anyhow!("Linux namespace does not contain uts")); } Ok(()) @@ -82,17 +87,32 @@ fn hostname(oci: &Spec) -> Result<()> { fn security(oci: &Spec) -> Result<()> { let linux = get_linux(oci)?; + let label_pattern = r".*_u:.*_r:.*_t:s[0-9]|1[0-5].*"; + let label_regex = Regex::new(label_pattern)?; + + if let Some(ref process) = oci.process { + if !process.selinux_label.is_empty() && !label_regex.is_match(&process.selinux_label) { + return Err(anyhow!( + "SELinux label for the process is invalid format: {}", + &process.selinux_label + )); + } + } + if !linux.mount_label.is_empty() && !label_regex.is_match(&linux.mount_label) { + return Err(anyhow!( + "SELinux label for the mount is invalid format: {}", + &linux.mount_label + )); + } if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() { return Ok(()); } if !contain_namespace(&linux.namespaces, "mount") { - return Err(einval()); + return Err(anyhow!("Linux namespace does not contain mount")); } - // don't care about selinux at present - Ok(()) } @@ -103,7 +123,7 @@ fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> { } } - Err(einval()) + Err(anyhow!("No idmap has size > 0")) } fn usernamespace(oci: &Spec) -> Result<()> { @@ -121,7 +141,7 @@ fn usernamespace(oci: &Spec) -> Result<()> { } else { // no user namespace but idmap if !linux.uid_mappings.is_empty() || !linux.gid_mappings.is_empty() { - return Err(einval()); + return Err(anyhow!("No user namespace, but uid or gid mapping exists")); } } @@ -163,7 +183,7 @@ fn sysctl(oci: &Spec) -> Result<()> { if contain_namespace(&linux.namespaces, "ipc") { continue; } else { - return Err(einval()); + return Err(anyhow!("Linux namespace does not contain ipc")); } } @@ -178,11 +198,11 @@ fn sysctl(oci: &Spec) -> Result<()> { } if key == "kernel.hostname" { - return Err(einval()); + return Err(anyhow!("Kernel hostname specfied in Spec")); } } - return Err(einval()); + return Err(anyhow!("Sysctl config contains invalid settings")); } Ok(()) } @@ -191,12 +211,13 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> { let linux = get_linux(oci)?; if !contain_namespace(&linux.namespaces, "user") { - return Err(einval()); + return Err(anyhow!("Linux namespace is missing user")); } if linux.uid_mappings.is_empty() || linux.gid_mappings.is_empty() { - // rootless containers requires at least one UID/GID mapping - return Err(einval()); + return Err(anyhow!( + "Rootless containers require at least one UID/GID mapping" + )); } Ok(()) @@ -220,7 +241,7 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> { let fields: Vec<&str> = opt.split('=').collect(); if fields.len() != 2 { - return Err(einval()); + return Err(anyhow!("Options has invalid field: {:?}", fields)); } let id = fields[1] @@ -229,11 +250,11 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> { .context(format!("parse field {}", &fields[1]))?; if opt.starts_with("uid=") && !has_idmapping(&linux.uid_mappings, id) { - return Err(einval()); + return Err(anyhow!("uid of {} does not have a valid mapping", id)); } if opt.starts_with("gid=") && !has_idmapping(&linux.gid_mappings, id) { - return Err(einval()); + return Err(anyhow!("gid of {} does not have a valid mapping", id)); } } } @@ -249,15 +270,18 @@ fn rootless_euid(oci: &Spec) -> Result<()> { pub fn validate(conf: &Config) -> Result<()> { lazy_static::initialize(&SYSCTLS); - let oci = conf.spec.as_ref().ok_or_else(einval)?; + let oci = conf + .spec + .as_ref() + .ok_or_else(|| anyhow!("Invalid config spec"))?; if oci.linux.is_none() { - return Err(einval()); + return Err(anyhow!("oci Linux is none")); } let root = match oci.root.as_ref() { Some(v) => v.path.as_str(), - None => return Err(einval()), + None => return Err(anyhow!("oci root is none")), }; rootfs(root).context("rootfs")?; @@ -277,7 +301,7 @@ pub fn validate(conf: &Config) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use oci::Mount; + use oci::{Mount, Process}; #[test] fn test_namespace() { @@ -380,6 +404,29 @@ mod tests { ]; spec.linux = Some(linux); security(&spec).unwrap(); + + // SELinux + let valid_label = "system_u:system_r:container_t:s0:c123,c456"; + let mut process = Process::default(); + process.selinux_label = valid_label.to_string(); + spec.process = Some(process); + security(&spec).unwrap(); + + let mut linux = Linux::default(); + linux.mount_label = valid_label.to_string(); + spec.linux = Some(linux); + security(&spec).unwrap(); + + let invalid_label = "system_u:system_r:container_t"; + let mut process = Process::default(); + process.selinux_label = invalid_label.to_string(); + spec.process = Some(process); + security(&spec).unwrap_err(); + + let mut linux = Linux::default(); + linux.mount_label = invalid_label.to_string(); + spec.linux = Some(linux); + security(&spec).unwrap_err(); } #[test] diff --git a/src/agent/src/ap.rs b/src/agent/src/ap.rs new file mode 100644 index 000000000000..202d3306408c --- /dev/null +++ b/src/agent/src/ap.rs @@ -0,0 +1,79 @@ +// Copyright (c) IBM Corp. 2023 +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::fmt; +use std::str::FromStr; + +use anyhow::{anyhow, Context}; + +// IBM Adjunct Processor (AP) is used for cryptographic operations +// by IBM Crypto Express hardware security modules on IBM zSystem & LinuxONE (s390x). +// In Linux, virtual cryptographic devices are called AP queues. +// The name of an AP queue respects a format . in hexadecimal notation [1, p.467]: +// - is an adapter ID +// - is an adapter domain ID +// [1] https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf + +#[derive(Debug)] +pub struct Address { + pub adapter_id: u8, + pub adapter_domain: u16, +} + +impl Address { + pub fn new(adapter_id: u8, adapter_domain: u16) -> Address { + Address { + adapter_id, + adapter_domain, + } + } +} + +impl FromStr for Address { + type Err = anyhow::Error; + + fn from_str(s: &str) -> anyhow::Result { + let split: Vec<&str> = s.split('.').collect(); + if split.len() != 2 { + return Err(anyhow!( + "Wrong AP bus format. It needs to be in the form . (e.g. 0a.003f), got {:?}", + s + )); + } + + let adapter_id = u8::from_str_radix(split[0], 16).context(format!( + "Wrong AP bus format. AP ID needs to be in the form (e.g. 0a), got {:?}", + split[0] + ))?; + let adapter_domain = u16::from_str_radix(split[1], 16).context(format!( + "Wrong AP bus format. AP domain needs to be in the form (e.g. 003f), got {:?}", + split[1] + ))?; + + Ok(Address::new(adapter_id, adapter_domain)) + } +} + +impl fmt::Display for Address { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{:02x}.{:04x}", self.adapter_id, self.adapter_domain) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_str() { + let device = Address::from_str("a.1").unwrap(); + assert_eq!(format!("{}", device), "0a.0001"); + + assert!(Address::from_str("").is_err()); + assert!(Address::from_str(".").is_err()); + assert!(Address::from_str("0.0.0").is_err()); + assert!(Address::from_str("0g.0000").is_err()); + assert!(Address::from_str("0a.10000").is_err()); + } +} diff --git a/src/agent/src/config.rs b/src/agent/src/config.rs index 0bdf20c2dca6..36bdf66c8668 100644 --- a/src/agent/src/config.rs +++ b/src/agent/src/config.rs @@ -12,6 +12,8 @@ use std::str::FromStr; use std::time; use tracing::instrument; +use kata_types::config::default::DEFAULT_AGENT_VSOCK_PORT; + const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console"; const DEV_MODE_FLAG: &str = "agent.devmode"; const TRACE_MODE_OPTION: &str = "agent.trace"; @@ -28,7 +30,6 @@ const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info; const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3); const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0; const VSOCK_ADDR: &str = "vsock://-1"; -const VSOCK_PORT: u16 = 1024; // Environment variables used for development and testing const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR"; @@ -147,7 +148,7 @@ impl Default for AgentConfig { debug_console_vport: 0, log_vport: 0, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, - server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT), + server_addr: format!("{}:{}", VSOCK_ADDR, DEFAULT_AGENT_VSOCK_PORT), unified_cgroup_hierarchy: false, tracing: false, endpoints: Default::default(), @@ -194,12 +195,13 @@ impl FromStr for AgentConfig { impl AgentConfig { #[instrument] + #[allow(clippy::redundant_closure_call)] pub fn from_cmdline(file: &str, args: Vec) -> Result { // If config file specified in the args, generate our config from it let config_position = args.iter().position(|a| a == "--config" || a == "-c"); if let Some(config_position) = config_position { if let Some(config_file) = args.get(config_position + 1) { - return AgentConfig::from_config_file(config_file); + return AgentConfig::from_config_file(config_file).context("AgentConfig from args"); } else { panic!("The config argument wasn't formed properly: {:?}", args); } @@ -215,7 +217,8 @@ impl AgentConfig { // or if it can't be parsed properly. if param.starts_with(format!("{}=", CONFIG_FILE).as_str()) { let config_file = get_string_value(param)?; - return AgentConfig::from_config_file(&config_file); + return AgentConfig::from_config_file(&config_file) + .context("AgentConfig from kernel cmdline"); } // parse cmdline flags @@ -303,7 +306,8 @@ impl AgentConfig { #[instrument] pub fn from_config_file(file: &str) -> Result { - let config = fs::read_to_string(file)?; + let config = fs::read_to_string(file) + .with_context(|| format!("Failed to read config file {}", file))?; AgentConfig::from_str(&config) } @@ -432,7 +436,7 @@ fn get_container_pipe_size(param: &str) -> Result { #[cfg(test)] mod tests { - use crate::assert_result; + use test_utils::assert_result; use super::*; use anyhow::anyhow; @@ -1389,7 +1393,7 @@ Caused by: assert_eq!(config.server_addr, "vsock://8:2048"); assert_eq!( config.endpoints.allowed, - vec!["CreateContainer".to_string(), "StartContainer".to_string()] + ["CreateContainer".to_string(), "StartContainer".to_string()] .iter() .cloned() .collect() diff --git a/src/agent/src/console.rs b/src/agent/src/console.rs index c705af1b7173..8f1ae5ff32af 100644 --- a/src/agent/src/console.rs +++ b/src/agent/src/console.rs @@ -9,7 +9,7 @@ use anyhow::{anyhow, Result}; use nix::fcntl::{self, FcntlArg, FdFlag, OFlag}; use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; use nix::pty::{openpty, OpenptyResult}; -use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType}; +use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr}; use nix::sys::stat::Mode; use nix::sys::wait; use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid}; @@ -67,7 +67,7 @@ pub async fn debug_console_handler( SockFlag::SOCK_CLOEXEC, None, )?; - let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port); + let addr = VsockAddr::new(libc::VMADDR_CID_ANY, port); socket::bind(listenfd, &addr)?; socket::listen(listenfd, 1)?; diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 7d89d0124560..4e91bc1759f8 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -16,44 +16,49 @@ use std::str::FromStr; use std::sync::Arc; use tokio::sync::Mutex; -#[cfg(target_arch = "s390x")] -use crate::ccw; use crate::linux_abi::*; use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; +use cfg_if::cfg_if; use oci::{LinuxDeviceCgroup, LinuxResources, Spec}; use protocols::agent::Device; use tracing::instrument; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger().new(o!("subsystem" => "device")) - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger().new(o!("subsystem" => "device")) } const VM_ROOTFS: &str = "/"; - +const BLOCK: &str = "block"; pub const DRIVER_9P_TYPE: &str = "9p"; pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs"; -pub const DRIVER_BLK_TYPE: &str = "blk"; +pub const DRIVER_BLK_PCI_TYPE: &str = "blk"; pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw"; -pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk"; +pub const DRIVER_BLK_MMIO_TYPE: &str = "mmioblk"; pub const DRIVER_SCSI_TYPE: &str = "scsi"; pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm"; pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral"; pub const DRIVER_LOCAL_TYPE: &str = "local"; pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind"; -// VFIO device to be bound to a guest kernel driver -pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk"; -// VFIO device to be bound to vfio-pci and made available inside the +// VFIO PCI device to be bound to a guest kernel driver +pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk"; +// VFIO PCI device to be bound to vfio-pci and made available inside the // container as a VFIO device node -pub const DRIVER_VFIO_TYPE: &str = "vfio"; +pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; +pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap"; pub const DRIVER_OVERLAYFS_TYPE: &str = "overlayfs"; pub const FS_TYPE_HUGETLB: &str = "hugetlbfs"; +cfg_if! { + if #[cfg(target_arch = "s390x")] { + use crate::ap; + use crate::ccw; + } +} + #[instrument] pub fn online_device(path: &str) -> Result<()> { fs::write(path, "1")?; @@ -71,7 +76,7 @@ where { let syspci = Path::new(&syspci); let drv = drv.as_ref(); - info!(sl!(), "rebind_pci_driver: {} => {:?}", dev, drv); + info!(sl(), "rebind_pci_driver: {} => {:?}", dev, drv); let devpath = syspci.join("devices").join(dev.to_string()); let overridepath = &devpath.join("driver_override"); @@ -197,7 +202,7 @@ impl ScsiBlockMatcher { impl UeventMatcher for ScsiBlockMatcher { fn is_match(&self, uev: &Uevent) -> bool { - uev.subsystem == "block" && uev.devpath.contains(&self.search) && !uev.devname.is_empty() + uev.subsystem == BLOCK && uev.devpath.contains(&self.search) && !uev.devname.is_empty() } } @@ -231,7 +236,7 @@ impl VirtioBlkPciMatcher { impl UeventMatcher for VirtioBlkPciMatcher { fn is_match(&self, uev: &Uevent) -> bool { - uev.subsystem == "block" && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty() + uev.subsystem == BLOCK && self.rex.is_match(&uev.devpath) && !uev.devname.is_empty() } } @@ -280,7 +285,7 @@ pub async fn get_virtio_blk_ccw_device_name( sandbox: &Arc>, device: &ccw::Device, ) -> Result { - let matcher = VirtioBlkCCWMatcher::new(&create_ccw_root_bus_path(), device); + let matcher = VirtioBlkCCWMatcher::new(CCW_ROOT_BUS_PATH, device); let uev = wait_for_uevent(sandbox, matcher).await?; let devname = uev.devname; return match Path::new(SYSTEM_DEV_PATH).join(&devname).to_str() { @@ -304,7 +309,7 @@ impl PmemBlockMatcher { impl UeventMatcher for PmemBlockMatcher { fn is_match(&self, uev: &Uevent) -> bool { - uev.subsystem == "block" + uev.subsystem == BLOCK && uev.devpath.starts_with(ACPI_DEV_PATH) && uev.devpath.ends_with(&self.suffix) && !uev.devname.is_empty() @@ -401,6 +406,81 @@ async fn get_vfio_device_name(sandbox: &Arc>, grp: IommuGroup) -> Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname)) } +#[cfg(target_arch = "s390x")] +#[derive(Debug)] +struct ApMatcher { + syspath: String, +} + +#[cfg(target_arch = "s390x")] +impl ApMatcher { + fn new(address: ap::Address) -> ApMatcher { + ApMatcher { + syspath: format!( + "{}/card{:02x}/{}", + AP_ROOT_BUS_PATH, address.adapter_id, address + ), + } + } +} + +#[cfg(target_arch = "s390x")] +impl UeventMatcher for ApMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.action == "add" && uev.devpath == self.syspath + } +} + +#[cfg(target_arch = "s390x")] +#[instrument] +async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { + let matcher = ApMatcher::new(address); + wait_for_uevent(sandbox, matcher).await?; + Ok(()) +} + +#[derive(Debug)] +struct MmioBlockMatcher { + suffix: String, +} + +impl MmioBlockMatcher { + fn new(devname: &str) -> MmioBlockMatcher { + MmioBlockMatcher { + suffix: format!(r"/block/{}", devname), + } + } +} + +impl UeventMatcher for MmioBlockMatcher { + fn is_match(&self, uev: &Uevent) -> bool { + uev.subsystem == BLOCK && uev.devpath.ends_with(&self.suffix) && !uev.devname.is_empty() + } +} + +#[instrument] +pub async fn get_virtio_mmio_device_name( + sandbox: &Arc>, + devpath: &str, +) -> Result<()> { + let devname = devpath + .strip_prefix("/dev/") + .ok_or_else(|| anyhow!("Storage source '{}' must start with /dev/", devpath))?; + + let matcher = MmioBlockMatcher::new(devname); + let uev = wait_for_uevent(sandbox, matcher) + .await + .context("failed to wait for uevent")?; + if uev.devname != devname { + return Err(anyhow!( + "Unexpected device name {} for mmio device (expected {})", + uev.devname, + devname + )); + } + Ok(()) +} + /// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN) #[instrument] fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { @@ -414,7 +494,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { // Scan scsi host passing in the channel, SCSI id and LUN. // Channel is always 0 because we have only one SCSI controller. - let scan_data = format!("0 {} {}", tokens[0], tokens[1]); + let scan_data = &format!("0 {} {}", tokens[0], tokens[1]); for entry in fs::read_dir(SYSFS_SCSI_HOST_PATH)? { let host = entry?.file_name(); @@ -428,7 +508,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { let scan_path = PathBuf::from(&format!("{}/{}/{}", SYSFS_SCSI_HOST_PATH, host_str, "scan")); - fs::write(scan_path, &scan_data)?; + fs::write(scan_path, scan_data)?; } Ok(()) @@ -524,7 +604,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - let host_minor = specdev.minor; info!( - sl!(), + sl(), "update_spec_devices() updating device"; "container_path" => &specdev.path, "type" => &specdev.r#type, @@ -575,7 +655,7 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - if let Some(update) = res_updates.get(&(r.r#type.as_str(), host_major, host_minor)) { info!( - sl!(), + sl(), "update_spec_devices() updating resource"; "type" => &r.r#type, "host_major" => host_major, @@ -636,12 +716,18 @@ pub fn update_env_pci( #[instrument] async fn virtiommio_blk_device_handler( device: &Device, - _sandbox: &Arc>, + sandbox: &Arc>, ) -> Result { if device.vm_path.is_empty() { return Err(anyhow!("Invalid path for virtio mmio blk device")); } + if !Path::new(&device.vm_path).exists() { + get_virtio_mmio_device_name(sandbox, &device.vm_path.to_string()) + .await + .context("failed to get mmio device name")?; + } + Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into()) } @@ -699,7 +785,7 @@ async fn virtio_nvdimm_device_handler( Ok(DevNumUpdate::from_vm_path(&device.vm_path)?.into()) } -fn split_vfio_option(opt: &str) -> Option<(&str, &str)> { +fn split_vfio_pci_option(opt: &str) -> Option<(&str, &str)> { let mut tokens = opt.split('='); let hostbdf = tokens.next()?; let path = tokens.next()?; @@ -714,14 +800,18 @@ fn split_vfio_option(opt: &str) -> Option<(&str, &str)> { // Each option should have the form "DDDD:BB:DD.F=" // DDDD:BB:DD.F is the device's PCI address in the host // is a PCI path to the device in the guest (see pci.rs) -async fn vfio_device_handler(device: &Device, sandbox: &Arc>) -> Result { - let vfio_in_guest = device.field_type != DRIVER_VFIO_GK_TYPE; +#[instrument] +async fn vfio_pci_device_handler( + device: &Device, + sandbox: &Arc>, +) -> Result { + let vfio_in_guest = device.type_ != DRIVER_VFIO_PCI_GK_TYPE; let mut pci_fixups = Vec::<(pci::Address, pci::Address)>::new(); let mut group = None; for opt in device.options.iter() { - let (host, pcipath) = - split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?; + let (host, pcipath) = split_vfio_pci_option(opt) + .ok_or_else(|| anyhow!("Malformed VFIO PCI option {:?}", opt))?; let host = pci::Address::from_str(host).context("Bad host PCI address in VFIO option {:?}")?; let pcipath = pci::Path::from_str(pcipath)?; @@ -763,6 +853,28 @@ async fn vfio_device_handler(device: &Device, sandbox: &Arc>) -> }) } +// The VFIO AP (Adjunct Processor) device handler takes all the APQNs provided as device options +// and awaits them. It sets the minimum AP rescan time of 5 seconds and temporarily adds that +// amount to the hotplug timeout. +#[cfg(target_arch = "s390x")] +#[instrument] +async fn vfio_ap_device_handler( + device: &Device, + sandbox: &Arc>, +) -> Result { + // Force AP bus rescan + fs::write(AP_SCANS_PATH, "1")?; + for apqn in device.options.iter() { + wait_for_ap_device(sandbox, ap::Address::from_str(apqn)?).await?; + } + Ok(Default::default()) +} + +#[cfg(not(target_arch = "s390x"))] +async fn vfio_ap_device_handler(_: &Device, _: &Arc>) -> Result { + Err(anyhow!("AP is only supported on s390x")) +} + #[instrument] pub async fn add_devices( devices: &[Device], @@ -807,10 +919,10 @@ pub async fn add_devices( #[instrument] async fn add_device(device: &Device, sandbox: &Arc>) -> Result { // log before validation to help with debugging gRPC protocol version differences. - info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}", - device.id, device.field_type, device.vm_path, device.container_path, device.options); + info!(sl(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}", + device.id, device.type_, device.vm_path, device.container_path, device.options); - if device.field_type.is_empty() { + if device.type_.is_empty() { return Err(anyhow!("invalid type for device {:?}", device)); } @@ -822,14 +934,17 @@ async fn add_device(device: &Device, sandbox: &Arc>) -> Result virtio_blk_device_handler(device, sandbox).await, + match device.type_.as_str() { + DRIVER_BLK_PCI_TYPE => virtio_blk_device_handler(device, sandbox).await, DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await, - DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await, + DRIVER_BLK_MMIO_TYPE => virtiommio_blk_device_handler(device, sandbox).await, DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await, DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await, - DRIVER_VFIO_GK_TYPE | DRIVER_VFIO_TYPE => vfio_device_handler(device, sandbox).await, - _ => Err(anyhow!("Unknown device type {}", device.field_type)), + DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => { + vfio_pci_device_handler(device, sandbox).await + } + DRIVER_VFIO_AP_TYPE => vfio_ap_device_handler(device, sandbox).await, + _ => Err(anyhow!("Unknown device type {}", device.type_)), } } @@ -1325,7 +1440,7 @@ mod tests { let mut uev = crate::uevent::Uevent::default(); uev.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); - uev.subsystem = "block".to_string(); + uev.subsystem = BLOCK.to_string(); uev.devpath = devpath.clone(); uev.devname = devname.to_string(); @@ -1352,6 +1467,7 @@ mod tests { } #[tokio::test] + #[allow(clippy::redundant_clone)] async fn test_virtio_blk_matcher() { let root_bus = create_pci_root_bus_path(); let devname = "vda"; @@ -1359,7 +1475,7 @@ mod tests { let mut uev_a = crate::uevent::Uevent::default(); let relpath_a = "/0000:00:0a.0"; uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); - uev_a.subsystem = "block".to_string(); + uev_a.subsystem = BLOCK.to_string(); uev_a.devname = devname.to_string(); uev_a.devpath = format!("{}{}/virtio4/block/{}", root_bus, relpath_a, devname); let matcher_a = VirtioBlkPciMatcher::new(relpath_a); @@ -1378,7 +1494,7 @@ mod tests { #[cfg(target_arch = "s390x")] #[tokio::test] async fn test_virtio_blk_ccw_matcher() { - let root_bus = create_ccw_root_bus_path(); + let root_bus = CCW_ROOT_BUS_PATH; let subsystem = "block"; let devname = "vda"; let relpath = "0.0.0002"; @@ -1436,6 +1552,7 @@ mod tests { } #[tokio::test] + #[allow(clippy::redundant_clone)] async fn test_scsi_block_matcher() { let root_bus = create_pci_root_bus_path(); let devname = "sda"; @@ -1443,7 +1560,7 @@ mod tests { let mut uev_a = crate::uevent::Uevent::default(); let addr_a = "0:0"; uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); - uev_a.subsystem = "block".to_string(); + uev_a.subsystem = BLOCK.to_string(); uev_a.devname = devname.to_string(); uev_a.devpath = format!( "{}/0000:00:00.0/virtio0/host0/target0:0:0/0:0:{}/block/sda", @@ -1466,6 +1583,7 @@ mod tests { } #[tokio::test] + #[allow(clippy::redundant_clone)] async fn test_vfio_matcher() { let grpa = IommuGroup(1); let grpb = IommuGroup(22); @@ -1486,14 +1604,42 @@ mod tests { assert!(!matcher_a.is_match(&uev_b)); } + #[tokio::test] + #[allow(clippy::redundant_clone)] + async fn test_mmio_block_matcher() { + let devname_a = "vda"; + let devname_b = "vdb"; + let mut uev_a = crate::uevent::Uevent::default(); + uev_a.action = crate::linux_abi::U_EVENT_ACTION_ADD.to_string(); + uev_a.subsystem = BLOCK.to_string(); + uev_a.devname = devname_a.to_string(); + uev_a.devpath = format!( + "/sys/devices/virtio-mmio-cmdline/virtio-mmio.0/virtio0/block/{}", + devname_a + ); + let matcher_a = MmioBlockMatcher::new(devname_a); + + let mut uev_b = uev_a.clone(); + uev_b.devpath = format!( + "/sys/devices/virtio-mmio-cmdline/virtio-mmio.4/virtio4/block/{}", + devname_b + ); + let matcher_b = MmioBlockMatcher::new(devname_b); + + assert!(matcher_a.is_match(&uev_a)); + assert!(matcher_b.is_match(&uev_b)); + assert!(!matcher_b.is_match(&uev_a)); + assert!(!matcher_a.is_match(&uev_b)); + } + #[test] - fn test_split_vfio_option() { + fn test_split_vfio_pci_option() { assert_eq!( - split_vfio_option("0000:01:00.0=02/01"), + split_vfio_pci_option("0000:01:00.0=02/01"), Some(("0000:01:00.0", "02/01")) ); - assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None); - assert_eq!(split_vfio_option("0000:01:00.0"), None); + assert_eq!(split_vfio_pci_option("0000:01:00.0=02/01=rubbish"), None); + assert_eq!(split_vfio_pci_option("0000:01:00.0"), None); } #[test] @@ -1531,7 +1677,7 @@ mod tests { pci_driver_override(syspci, dev0, "drv_b").unwrap(); assert_eq!(fs::read_to_string(&dev0override).unwrap(), "drv_b"); assert_eq!(fs::read_to_string(&probepath).unwrap(), dev0.to_string()); - assert_eq!(fs::read_to_string(&drvaunbind).unwrap(), dev0.to_string()); + assert_eq!(fs::read_to_string(drvaunbind).unwrap(), dev0.to_string()); } #[test] @@ -1543,7 +1689,7 @@ mod tests { let dev0 = pci::Address::new(0, 0, pci::SlotFn::new(0, 0).unwrap()); let dev0path = syspci.join("devices").join(dev0.to_string()); - fs::create_dir_all(&dev0path).unwrap(); + fs::create_dir_all(dev0path).unwrap(); // Test dev0 assert!(pci_iommu_group(&syspci, dev0).unwrap().is_none()); @@ -1554,7 +1700,7 @@ mod tests { let dev1group = dev1path.join("iommu_group"); fs::create_dir_all(&dev1path).unwrap(); - std::os::unix::fs::symlink("../../../kernel/iommu_groups/12", &dev1group).unwrap(); + std::os::unix::fs::symlink("../../../kernel/iommu_groups/12", dev1group).unwrap(); // Test dev1 assert_eq!( @@ -1567,9 +1713,40 @@ mod tests { let dev2path = syspci.join("devices").join(dev2.to_string()); let dev2group = dev2path.join("iommu_group"); - fs::create_dir_all(&dev2group).unwrap(); + fs::create_dir_all(dev2group).unwrap(); // Test dev2 assert!(pci_iommu_group(&syspci, dev2).is_err()); } + + #[cfg(target_arch = "s390x")] + #[tokio::test] + async fn test_vfio_ap_matcher() { + let subsystem = "ap"; + let card = "0a"; + let relpath = format!("{}.0001", card); + + let mut uev = Uevent::default(); + uev.action = U_EVENT_ACTION_ADD.to_string(); + uev.subsystem = subsystem.to_string(); + uev.devpath = format!("{}/card{}/{}", AP_ROOT_BUS_PATH, card, relpath); + + let ap_address = ap::Address::from_str(&relpath).unwrap(); + let matcher = ApMatcher::new(ap_address); + + assert!(matcher.is_match(&uev)); + + let mut uev_remove = uev.clone(); + uev_remove.action = U_EVENT_ACTION_REMOVE.to_string(); + assert!(!matcher.is_match(&uev_remove)); + + let mut uev_other_device = uev.clone(); + uev_other_device.devpath = format!( + "{}/card{}/{}", + AP_ROOT_BUS_PATH, + card, + format!("{}.0002", card) + ); + assert!(!matcher.is_match(&uev_other_device)); + } } diff --git a/src/agent/src/linux_abi.rs b/src/agent/src/linux_abi.rs index c4d304d28051..b87da3ceb6b3 100644 --- a/src/agent/src/linux_abi.rs +++ b/src/agent/src/linux_abi.rs @@ -3,6 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // +use cfg_if::cfg_if; + /// Linux ABI related constants. #[cfg(target_arch = "aarch64")] @@ -31,7 +33,7 @@ pub fn create_pci_root_bus_path() -> String { // check if there is pci bus path for acpi acpi_sysfs_dir.push_str(&acpi_root_bus_path); - if let Ok(_) = fs::metadata(&acpi_sysfs_dir) { + if fs::metadata(&acpi_sysfs_dir).is_ok() { return acpi_root_bus_path; } @@ -64,10 +66,14 @@ pub fn create_pci_root_bus_path() -> String { ret } -#[cfg(target_arch = "s390x")] -pub fn create_ccw_root_bus_path() -> String { - String::from("/devices/css0") +cfg_if! { + if #[cfg(target_arch = "s390x")] { + pub const CCW_ROOT_BUS_PATH: &str = "/devices/css0"; + pub const AP_ROOT_BUS_PATH: &str = "/devices/ap"; + pub const AP_SCANS_PATH: &str = "/sys/bus/ap/scans"; + } } + // From https://www.kernel.org/doc/Documentation/acpi/namespace.txt // The Linux kernel's core ACPI subsystem creates struct acpi_device // objects for ACPI namespace objects representing devices, power resources @@ -75,7 +81,8 @@ pub fn create_ccw_root_bus_path() -> String { // sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00 pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM"; -pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu"; +pub const SYSFS_CPU_PATH: &str = "/sys/devices/system/cpu"; +pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu/online"; pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes"; pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe"; diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 6a023e093e70..cb3abbe1e2b3 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -20,9 +20,10 @@ extern crate scopeguard; extern crate slog; use anyhow::{anyhow, Context, Result}; +use cfg_if::cfg_if; use clap::{AppSettings, Parser}; use nix::fcntl::OFlag; -use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType}; +use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr}; use nix::unistd::{self, dup, Pid}; use std::env; use std::ffi::OsStr; @@ -34,8 +35,6 @@ use std::process::exit; use std::sync::Arc; use tracing::{instrument, span}; -#[cfg(target_arch = "s390x")] -mod ccw; mod config; mod console; mod device; @@ -49,8 +48,7 @@ mod pci; pub mod random; mod sandbox; mod signal; -#[cfg(test)] -mod test_utils; +mod storage; mod uevent; mod util; mod version; @@ -68,7 +66,7 @@ use tokio::{ io::AsyncWrite, sync::{ watch::{channel, Receiver}, - Mutex, RwLock, + Mutex, }, task::JoinHandle, }; @@ -76,15 +74,29 @@ use tokio::{ mod rpc; mod tracer; +#[cfg(feature = "agent-policy")] +mod policy; + +cfg_if! { + if #[cfg(target_arch = "s390x")] { + mod ap; + mod ccw; + } +} + const NAME: &str = "kata-agent"; lazy_static! { - static ref AGENT_CONFIG: Arc> = Arc::new(RwLock::new( + static ref AGENT_CONFIG: AgentConfig = // Note: We can't do AgentOpts.parse() here to send through the processed arguments to AgentConfig // clap::Parser::parse() greedily process all command line input including cargo test parameters, // so should only be used inside main. - AgentConfig::from_cmdline("/proc/cmdline", env::args().collect()).unwrap() - )); + AgentConfig::from_cmdline("/proc/cmdline", env::args().collect()).unwrap(); +} + +#[cfg(feature = "agent-policy")] +lazy_static! { + static ref AGENT_POLICY: Mutex = Mutex::new(AgentPolicy::new()); } #[derive(Parser)] @@ -110,10 +122,6 @@ enum SubCommand { fn announce(logger: &Logger, config: &AgentConfig) { info!(logger, "announce"; "agent-commit" => version::VERSION_COMMIT, - - // Avoid any possibility of confusion with the old agent - "agent-type" => "rust", - "agent-version" => version::AGENT_VERSION, "api-version" => version::API_VERSION, "config" => format!("{:?}", config), @@ -132,7 +140,7 @@ async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver std::result::Result<(), Box> { lazy_static::initialize(&AGENT_CONFIG); - init_agent_as_init(&logger, AGENT_CONFIG.read().await.unified_cgroup_hierarchy)?; + init_agent_as_init(&logger, AGENT_CONFIG.unified_cgroup_hierarchy)?; drop(logger_async_guard); } else { lazy_static::initialize(&AGENT_CONFIG); } - let config = AGENT_CONFIG.read().await; + let config = &AGENT_CONFIG; let log_vport = config.log_vport as u32; let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone())); @@ -200,7 +208,7 @@ async fn real_main() -> std::result::Result<(), Box> { let (logger, logger_async_guard) = logging::create_logger(NAME, "agent", config.log_level, writer); - announce(&logger, &config); + announce(&logger, config); // This variable is required as it enables the global (and crucially static) logger, // which is required to satisfy the the lifetime constraints of the auto-generated gRPC code. @@ -213,7 +221,7 @@ async fn real_main() -> std::result::Result<(), Box> { if config.log_level == slog::Level::Trace { // Redirect ttrpc log calls to slog iff full debug requested - ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?); + ttrpc_log_guard = Ok(slog_stdlog::init()?); } if config.tracing { @@ -228,7 +236,7 @@ async fn real_main() -> std::result::Result<(), Box> { let span_guard = root_span.enter(); // Start the sandbox and wait for its ttRPC server to end - start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?; + start_sandbox(&logger, config, init_mode, &mut tasks, shutdown_rx.clone()).await?; // Install a NOP logger for the remainder of the shutdown sequence // to ensure any log calls made by local crates using the scope logger @@ -327,6 +335,19 @@ async fn start_sandbox( s.rtnl.handle_localhost().await?; } + // - When init_mode is true, enabling the localhost link during the + // handle_localhost call above is required before starting OPA with the + // initialize_policy call below. + // - When init_mode is false, the Policy could be initialized earlier, + // because initialize_policy doesn't start OPA. OPA is started by + // systemd after localhost has been enabled. + #[cfg(feature = "agent-policy")] + if let Err(e) = initialize_policy(init_mode).await { + error!(logger, "Failed to initialize agent policy: {:?}", e); + // Continuing execution without a security policy could be dangerous. + std::process::abort(); + } + let sandbox = Arc::new(Mutex::new(s)); let signal_handler_task = tokio::spawn(setup_signal_handler( @@ -345,7 +366,7 @@ async fn start_sandbox( sandbox.lock().await.sender = Some(tx); // vsock:///dev/vsock, port - let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str())?; + let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str(), init_mode)?; server.start().await?; rx.await?; @@ -388,6 +409,18 @@ fn init_agent_as_init(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result Ok(()) } +#[cfg(feature = "agent-policy")] +async fn initialize_policy(init_mode: bool) -> Result<()> { + let opa_addr = "localhost:8181"; + let agent_policy_path = "/agent_policy"; + let default_agent_policy = "/etc/kata-opa/default-policy.rego"; + AGENT_POLICY + .lock() + .await + .initialize(init_mode, opa_addr, agent_policy_path, default_agent_policy) + .await +} + // The Rust standard library had suppressed the default SIGPIPE behavior, // see https://github.com/rust-lang/rust/pull/13158. // Since the parent's signal handler would be inherited by it's child process, @@ -402,10 +435,14 @@ fn reset_sigpipe() { use crate::config::AgentConfig; use std::os::unix::io::{FromRawFd, RawFd}; +#[cfg(feature = "agent-policy")] +use crate::policy::AgentPolicy; + #[cfg(test)] mod tests { use super::*; - use crate::test_utils::test_utils::TestUserType; + use test_utils::TestUserType; + use test_utils::{assert_result, skip_if_not_root, skip_if_root}; #[tokio::test] async fn test_create_logger_task() { @@ -441,9 +478,8 @@ mod tests { let msg = format!("test[{}]: {:?}", i, d); let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); defer!({ - // rfd is closed by the use of PipeStream in the crate_logger_task function, - // but we will attempt to close in case of a failure - let _ = unistd::close(rfd); + // XXX: Never try to close rfd, because it will be closed by PipeStream in + // create_logger_task() and it's not safe to close the same fd twice time. unistd::close(wfd).unwrap(); }); diff --git a/src/agent/src/metrics.rs b/src/agent/src/metrics.rs index 508d7d2c2aab..d7fc4d12bd13 100644 --- a/src/agent/src/metrics.rs +++ b/src/agent/src/metrics.rs @@ -5,73 +5,87 @@ extern crate procfs; -use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder}; +use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, Opts, Registry, TextEncoder}; -use anyhow::Result; +use anyhow::{anyhow, Result}; use slog::warn; +use std::sync::Mutex; use tracing::instrument; const NAMESPACE_KATA_AGENT: &str = "kata_agent"; const NAMESPACE_KATA_GUEST: &str = "kata_guest"; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger().new(o!("subsystem" => "metrics")) - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger().new(o!("subsystem" => "metrics")) } lazy_static! { - static ref AGENT_SCRAPE_COUNT: IntCounter = - prometheus::register_int_counter!(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count"), "Metrics scrape count").unwrap(); + static ref REGISTERED: Mutex = Mutex::new(false); + + // custom registry + static ref REGISTRY: Registry = Registry::new(); + + static ref AGENT_SCRAPE_COUNT: IntCounter = + IntCounter::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count"), "Metrics scrape count").unwrap(); - static ref AGENT_THREADS: Gauge = - prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads"), "Agent process threads").unwrap(); + // agent metrics + static ref AGENT_THREADS: Gauge = + Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads"), "Agent process threads").unwrap(); - static ref AGENT_TOTAL_TIME: Gauge = - prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time"), "Agent process total time").unwrap(); + static ref AGENT_TOTAL_TIME: Gauge = + Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time"), "Agent process total time").unwrap(); - static ref AGENT_TOTAL_VM: Gauge = - prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm"), "Agent process total VM size").unwrap(); + static ref AGENT_TOTAL_VM: Gauge = + Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm"), "Agent process total VM size").unwrap() ; - static ref AGENT_TOTAL_RSS: Gauge = - prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss"), "Agent process total RSS size").unwrap(); + static ref AGENT_TOTAL_RSS: Gauge = + Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss"), "Agent process total RSS size").unwrap(); - static ref AGENT_PROC_STATUS: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status"), "Agent process status.", &["item"]).unwrap(); + static ref AGENT_PROC_STATUS: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status"), "Agent process status."), &["item"]).unwrap(); - static ref AGENT_IO_STAT: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat"), "Agent process IO statistics.", &["item"]).unwrap(); + static ref AGENT_IO_STAT: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat"), "Agent process IO statistics."), &["item"]).unwrap(); - static ref AGENT_PROC_STAT: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat"), "Agent process statistics.", &["item"]).unwrap(); + static ref AGENT_PROC_STAT: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat"), "Agent process statistics."), &["item"]).unwrap(); // guest os metrics - static ref GUEST_LOAD: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"load") , "Guest system load.", &["item"]).unwrap(); + static ref GUEST_LOAD: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"load"), "Guest system load."), &["item"]).unwrap(); - static ref GUEST_TASKS: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks") , "Guest system load.", &["item"]).unwrap(); + static ref GUEST_TASKS: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks"), "Guest system load."), &["item"]).unwrap(); - static ref GUEST_CPU_TIME: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time") , "Guest CPU statistics.", &["cpu","item"]).unwrap(); + static ref GUEST_CPU_TIME: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time"), "Guest CPU statistics."), &["cpu","item"]).unwrap(); - static ref GUEST_VM_STAT: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat") , "Guest virtual memory statistics.", &["item"]).unwrap(); + static ref GUEST_VM_STAT: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat"), "Guest virtual memory statistics."), &["item"]).unwrap(); - static ref GUEST_NETDEV_STAT: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat") , "Guest net devices statistics.", &["interface","item"]).unwrap(); + static ref GUEST_NETDEV_STAT: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat"), "Guest net devices statistics."), &["interface","item"]).unwrap(); - static ref GUEST_DISKSTAT: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat") , "Disks statistics in system.", &["disk","item"]).unwrap(); + static ref GUEST_DISKSTAT: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat"), "Disks statistics in system."), &["disk","item"]).unwrap(); - static ref GUEST_MEMINFO: GaugeVec = - prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo") , "Statistics about memory usage in the system.", &["item"]).unwrap(); + static ref GUEST_MEMINFO: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo"), "Statistics about memory usage in the system."), &["item"]).unwrap(); } #[instrument] pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result { + let mut registered = REGISTERED + .lock() + .map_err(|e| anyhow!("failed to check agent metrics register status {:?}", e))?; + + if !(*registered) { + register_metrics()?; + *registered = true; + } + AGENT_SCRAPE_COUNT.inc(); // update agent process metrics @@ -81,7 +95,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result { update_guest_metrics(); // gather all metrics and return as a String - let metric_families = prometheus::gather(); + let metric_families = REGISTRY.gather(); let mut buffer = Vec::new(); let encoder = TextEncoder::new(); @@ -90,6 +104,31 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result { Ok(String::from_utf8(buffer)?) } +#[instrument] +fn register_metrics() -> Result<()> { + REGISTRY.register(Box::new(AGENT_SCRAPE_COUNT.clone()))?; + + // agent metrics + REGISTRY.register(Box::new(AGENT_THREADS.clone()))?; + REGISTRY.register(Box::new(AGENT_TOTAL_TIME.clone()))?; + REGISTRY.register(Box::new(AGENT_TOTAL_VM.clone()))?; + REGISTRY.register(Box::new(AGENT_TOTAL_RSS.clone()))?; + REGISTRY.register(Box::new(AGENT_PROC_STATUS.clone()))?; + REGISTRY.register(Box::new(AGENT_IO_STAT.clone()))?; + REGISTRY.register(Box::new(AGENT_PROC_STAT.clone()))?; + + // guest metrics + REGISTRY.register(Box::new(GUEST_LOAD.clone()))?; + REGISTRY.register(Box::new(GUEST_TASKS.clone()))?; + REGISTRY.register(Box::new(GUEST_CPU_TIME.clone()))?; + REGISTRY.register(Box::new(GUEST_VM_STAT.clone()))?; + REGISTRY.register(Box::new(GUEST_NETDEV_STAT.clone()))?; + REGISTRY.register(Box::new(GUEST_DISKSTAT.clone()))?; + REGISTRY.register(Box::new(GUEST_MEMINFO.clone()))?; + + Ok(()) +} + #[instrument] fn update_agent_metrics() -> Result<()> { let me = procfs::process::Process::myself(); @@ -98,7 +137,7 @@ fn update_agent_metrics() -> Result<()> { Ok(p) => p, Err(e) => { // FIXME: return Ok for all errors? - warn!(sl!(), "failed to create process instance: {:?}", e); + warn!(sl(), "failed to create process instance: {:?}", e); return Ok(()); } @@ -119,7 +158,7 @@ fn update_agent_metrics() -> Result<()> { // io match me.io() { Err(err) => { - info!(sl!(), "failed to get process io stat: {:?}", err); + info!(sl(), "failed to get process io stat: {:?}", err); } Ok(io) => { set_gauge_vec_proc_io(&AGENT_IO_STAT, &io); @@ -128,7 +167,7 @@ fn update_agent_metrics() -> Result<()> { match me.stat() { Err(err) => { - info!(sl!(), "failed to get process stat: {:?}", err); + info!(sl(), "failed to get process stat: {:?}", err); } Ok(stat) => { set_gauge_vec_proc_stat(&AGENT_PROC_STAT, &stat); @@ -136,7 +175,7 @@ fn update_agent_metrics() -> Result<()> { } match me.status() { - Err(err) => error!(sl!(), "failed to get process status: {:?}", err), + Err(err) => error!(sl(), "failed to get process status: {:?}", err), Ok(status) => set_gauge_vec_proc_status(&AGENT_PROC_STATUS, &status), } @@ -148,7 +187,7 @@ fn update_guest_metrics() { // try get load and task info match procfs::LoadAverage::new() { Err(err) => { - info!(sl!(), "failed to get guest LoadAverage: {:?}", err); + info!(sl(), "failed to get guest LoadAverage: {:?}", err); } Ok(load) => { GUEST_LOAD @@ -168,7 +207,7 @@ fn update_guest_metrics() { // try to get disk stats match procfs::diskstats() { Err(err) => { - info!(sl!(), "failed to get guest diskstats: {:?}", err); + info!(sl(), "failed to get guest diskstats: {:?}", err); } Ok(diskstats) => { for diskstat in diskstats { @@ -180,7 +219,7 @@ fn update_guest_metrics() { // try to get vm stats match procfs::vmstat() { Err(err) => { - info!(sl!(), "failed to get guest vmstat: {:?}", err); + info!(sl(), "failed to get guest vmstat: {:?}", err); } Ok(vmstat) => { for (k, v) in vmstat { @@ -192,7 +231,7 @@ fn update_guest_metrics() { // cpu stat match procfs::KernelStats::new() { Err(err) => { - info!(sl!(), "failed to get guest KernelStats: {:?}", err); + info!(sl(), "failed to get guest KernelStats: {:?}", err); } Ok(kernel_stats) => { set_gauge_vec_cpu_time(&GUEST_CPU_TIME, "total", &kernel_stats.total); @@ -205,7 +244,7 @@ fn update_guest_metrics() { // try to get net device stats match procfs::net::dev_status() { Err(err) => { - info!(sl!(), "failed to get guest net::dev_status: {:?}", err); + info!(sl(), "failed to get guest net::dev_status: {:?}", err); } Ok(devs) => { // netdev: map[string]procfs::net::DeviceStatus @@ -218,7 +257,7 @@ fn update_guest_metrics() { // get statistics about memory from /proc/meminfo match procfs::Meminfo::new() { Err(err) => { - info!(sl!(), "failed to get guest Meminfo: {:?}", err); + info!(sl(), "failed to get guest Meminfo: {:?}", err); } Ok(meminfo) => { set_gauge_vec_meminfo(&GUEST_MEMINFO, &meminfo); diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 568ea6aef241..e7bbf96f8db7 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -4,91 +4,23 @@ // use std::collections::HashMap; -use std::fs; -use std::fs::{File, OpenOptions}; -use std::io::{BufRead, BufReader, Write}; -use std::iter; -use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::fmt::Debug; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::ops::Deref; use std::path::Path; -use std::str::FromStr; -use std::sync::Arc; - -use tokio::sync::Mutex; +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::{get_linux_mount_info, parse_mount_options}; use nix::mount::MsFlags; -use nix::unistd::{Gid, Uid}; - use regex::Regex; - -use crate::device::{ - get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device, - DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE, - DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE, - DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, FS_TYPE_HUGETLB, -}; -use crate::linux_abi::*; -use crate::pci; -use crate::protocols::agent::Storage; -use crate::protocols::types::FSGroupChangePolicy; -use crate::Sandbox; -#[cfg(target_arch = "s390x")] -use crate::{ccw, device::get_virtio_blk_ccw_device_name}; -use anyhow::{anyhow, Context, Result}; use slog::Logger; use tracing::instrument; -pub const TYPE_ROOTFS: &str = "rootfs"; -const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; -pub const MOUNT_GUEST_TAG: &str = "kataShared"; - -// Allocating an FSGroup that owns the pod's volumes -const FS_GID: &str = "fsgid"; - -const RW_MASK: u32 = 0o660; -const RO_MASK: u32 = 0o440; -const EXEC_MASK: u32 = 0o110; -const MODE_SETGID: u32 = 0o2000; +use crate::device::online_device; +use crate::linux_abi::*; -#[rustfmt::skip] -lazy_static! { - pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = { - let mut m = HashMap::new(); - m.insert("defaults", (false, MsFlags::empty())); - m.insert("ro", (false, MsFlags::MS_RDONLY)); - m.insert("rw", (true, MsFlags::MS_RDONLY)); - m.insert("suid", (true, MsFlags::MS_NOSUID)); - m.insert("nosuid", (false, MsFlags::MS_NOSUID)); - m.insert("dev", (true, MsFlags::MS_NODEV)); - m.insert("nodev", (false, MsFlags::MS_NODEV)); - m.insert("exec", (true, MsFlags::MS_NOEXEC)); - m.insert("noexec", (false, MsFlags::MS_NOEXEC)); - m.insert("sync", (false, MsFlags::MS_SYNCHRONOUS)); - m.insert("async", (true, MsFlags::MS_SYNCHRONOUS)); - m.insert("dirsync", (false, MsFlags::MS_DIRSYNC)); - m.insert("remount", (false, MsFlags::MS_REMOUNT)); - m.insert("mand", (false, MsFlags::MS_MANDLOCK)); - m.insert("nomand", (true, MsFlags::MS_MANDLOCK)); - m.insert("atime", (true, MsFlags::MS_NOATIME)); - m.insert("noatime", (false, MsFlags::MS_NOATIME)); - m.insert("diratime", (true, MsFlags::MS_NODIRATIME)); - m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME)); - m.insert("bind", (false, MsFlags::MS_BIND)); - m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC)); - m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE)); - m.insert("runbindable", (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC)); - m.insert("private", (false, MsFlags::MS_PRIVATE)); - m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC)); - m.insert("shared", (false, MsFlags::MS_SHARED)); - m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC)); - m.insert("slave", (false, MsFlags::MS_SLAVE)); - m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC)); - m.insert("relatime", (false, MsFlags::MS_RELATIME)); - m.insert("norelatime", (true, MsFlags::MS_RELATIME)); - m.insert("strictatime", (false, MsFlags::MS_STRICTATIME)); - m.insert("nostrictatime", (true, MsFlags::MS_STRICTATIME)); - m - }; -} +pub const TYPE_ROOTFS: &str = "rootfs"; #[derive(Debug, PartialEq)] pub struct InitMount<'a> { @@ -131,19 +63,6 @@ lazy_static! { ]; } -pub const STORAGE_HANDLER_LIST: &[&str] = &[ - DRIVER_BLK_TYPE, - DRIVER_9P_TYPE, - DRIVER_VIRTIOFS_TYPE, - DRIVER_EPHEMERAL_TYPE, - DRIVER_OVERLAYFS_TYPE, - DRIVER_MMIO_BLK_TYPE, - DRIVER_LOCAL_TYPE, - DRIVER_SCSI_TYPE, - DRIVER_NVDIMM_TYPE, - DRIVER_WATCHABLE_BIND_TYPE, -]; - #[instrument] pub fn baremount( source: &Path, @@ -167,13 +86,22 @@ pub fn baremount( return Err(anyhow!("need mount FS type")); } + let destination_str = destination.to_string_lossy(); + if let Ok(m) = get_linux_mount_info(destination_str.deref()) { + if m.fs_type == fs_type { + slog_info!(logger, "{source:?} is already mounted at {destination:?}"); + return Ok(()); + } + } + info!( logger, - "mount source={:?}, dest={:?}, fs_type={:?}, options={:?}", + "baremount source={:?}, dest={:?}, fs_type={:?}, options={:?}, flags={:?}", source, destination, fs_type, - options + options, + flags ); nix::mount::mount( @@ -185,635 +113,41 @@ pub fn baremount( ) .map_err(|e| { anyhow!( - "failed to mount {:?} to {:?}, with error: {}", - source, - destination, + "failed to mount {} to {}, with error: {}", + source.display(), + destination.display(), e ) }) } -#[instrument] -async fn ephemeral_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - // hugetlbfs - if storage.fstype == FS_TYPE_HUGETLB { - return handle_hugetlbfs_storage(logger, storage).await; - } - - // normal ephemeral storage - fs::create_dir_all(Path::new(&storage.mount_point))?; - - // By now we only support one option field: "fsGroup" which - // isn't an valid mount option, thus we should remove it when - // do mount. - if storage.options.len() > 0 { - // ephemeral_storage didn't support mount options except fsGroup. - let mut new_storage = storage.clone(); - new_storage.options = protobuf::RepeatedField::default(); - common_storage_handler(logger, &new_storage)?; - - let opts_vec: Vec = storage.options.to_vec(); - - let opts = parse_options(opts_vec); - - if let Some(fsgid) = opts.get(FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - - let meta = fs::metadata(&storage.mount_point)?; - let mut permission = meta.permissions(); - - let o_mode = meta.mode() | MODE_SETGID; - permission.set_mode(o_mode); - fs::set_permissions(&storage.mount_point, permission)?; - } - } else { - common_storage_handler(logger, storage)?; - } - - Ok("".to_string()) -} - -#[instrument] -async fn overlayfs_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: Arc>, -) -> Result { - common_storage_handler(logger, storage) -} - -#[instrument] -async fn local_storage_handler( - _logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - fs::create_dir_all(&storage.mount_point).context(format!( - "failed to create dir all {:?}", - &storage.mount_point - ))?; - - let opts_vec: Vec = storage.options.to_vec(); - - let opts = parse_options(opts_vec); - - let mut need_set_fsgid = false; - if let Some(fsgid) = opts.get(FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - need_set_fsgid = true; - } - - if let Some(mode) = opts.get("mode") { - let mut permission = fs::metadata(&storage.mount_point)?.permissions(); - - let mut o_mode = u32::from_str_radix(mode, 8)?; - - if need_set_fsgid { - // set SetGid mode mask. - o_mode |= MODE_SETGID; - } - permission.set_mode(o_mode); - - fs::set_permissions(&storage.mount_point, permission)?; - } - - Ok("".to_string()) -} - -#[instrument] -async fn virtio9p_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: Arc>, -) -> Result { - common_storage_handler(logger, storage) -} - -#[instrument] -async fn handle_hugetlbfs_storage(logger: &Logger, storage: &Storage) -> Result { - info!(logger, "handle hugetlbfs storage"); - // Allocate hugepages before mount - // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages - // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - // options eg "pagesize=2097152,size=524288000"(2M, 500M) - allocate_hugepages(logger, &storage.options.to_vec()).context("allocate hugepages")?; - - common_storage_handler(logger, storage)?; - - // hugetlbfs return empty string as ephemeral_storage_handler do. - // this is a sandbox level storage, but not a container-level mount. - Ok("".to_string()) -} - -// Allocate hugepages by writing to sysfs -fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { - info!(logger, "mounting hugePages storage options: {:?}", options); - - let (pagesize, size) = get_pagesize_and_size_from_option(options) - .context(format!("parse mount options: {:?}", &options))?; - - info!( - logger, - "allocate hugepages. pageSize: {}, size: {}", pagesize, size - ); - - // sysfs entry is always of the form hugepages-${pagesize}kB - // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt - let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) - .join(format!("hugepages-{}kB", pagesize / 1024)) - .join("nr_hugepages"); - - // write numpages to nr_hugepages file. - let numpages = format!("{}", size / pagesize); - info!(logger, "write {} pages to {:?}", &numpages, &path); - - let mut file = OpenOptions::new() - .write(true) - .open(&path) - .context(format!("open nr_hugepages directory {:?}", &path))?; - - file.write_all(numpages.as_bytes()) - .context(format!("write nr_hugepages failed: {:?}", &path))?; - - // Even if the write succeeds, the kernel isn't guaranteed to be - // able to allocate all the pages we requested. Verify that it - // did. - let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; - let allocated = verify - .trim_end() - .parse::() - .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; - if allocated != size / pagesize { - return Err(anyhow!( - "Only allocated {} of {} hugepages of size {}", - allocated, - numpages, - pagesize - )); - } - - Ok(()) -} - -// Parse filesystem options string to retrieve hugepage details -// options eg "pagesize=2048,size=107374182" -fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { - let mut pagesize_str: Option<&str> = None; - let mut size_str: Option<&str> = None; - - for option in options { - let vars: Vec<&str> = option.trim().split(',').collect(); - - for var in vars { - if let Some(stripped) = var.strip_prefix("pagesize=") { - pagesize_str = Some(stripped); - } else if let Some(stripped) = var.strip_prefix("size=") { - size_str = Some(stripped); - } - - if pagesize_str.is_some() && size_str.is_some() { - break; - } - } - } - - if pagesize_str.is_none() || size_str.is_none() { - return Err(anyhow!("no pagesize/size options found")); - } - - let pagesize = pagesize_str - .unwrap() - .parse::() - .context(format!("parse pagesize: {:?}", &pagesize_str))?; - let size = size_str - .unwrap() - .parse::() - .context(format!("parse size: {:?}", &pagesize_str))?; - - Ok((pagesize, size)) -} - -// virtiommio_blk_storage_handler handles the storage for mmio blk driver. -#[instrument] -async fn virtiommio_blk_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - //The source path is VmPath - common_storage_handler(logger, storage) -} - -// virtiofs_storage_handler handles the storage for virtio-fs. -#[instrument] -async fn virtiofs_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: Arc>, -) -> Result { - common_storage_handler(logger, storage) -} - -// virtio_blk_storage_handler handles the storage for blk driver. -#[instrument] -async fn virtio_blk_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - let mut storage = storage.clone(); - // If hot-plugged, get the device node path based on the PCI path - // otherwise use the virt path provided in Storage Source - if storage.source.starts_with("/dev") { - let metadata = fs::metadata(&storage.source) - .context(format!("get metadata on file {:?}", &storage.source))?; - - let mode = metadata.permissions().mode(); - if mode & libc::S_IFBLK == 0 { - return Err(anyhow!("Invalid device {}", &storage.source)); - } - } else { - let pcipath = pci::Path::from_str(&storage.source)?; - let dev_path = get_virtio_blk_pci_device_name(&sandbox, &pcipath).await?; - storage.source = dev_path; - } - - common_storage_handler(logger, &storage) -} - -// virtio_blk_ccw_storage_handler handles storage for the blk-ccw driver (s390x) -#[cfg(target_arch = "s390x")] -#[instrument] -async fn virtio_blk_ccw_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - let mut storage = storage.clone(); - let ccw_device = ccw::Device::from_str(&storage.source)?; - let dev_path = get_virtio_blk_ccw_device_name(&sandbox, &ccw_device).await?; - storage.source = dev_path; - common_storage_handler(logger, &storage) -} - -#[cfg(not(target_arch = "s390x"))] -#[instrument] -async fn virtio_blk_ccw_storage_handler( - _: &Logger, - _: &Storage, - _: Arc>, -) -> Result { - Err(anyhow!("CCW is only supported on s390x")) -} - -// virtio_scsi_storage_handler handles the storage for scsi driver. -#[instrument] -async fn virtio_scsi_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - let mut storage = storage.clone(); - - // Retrieve the device path from SCSI address. - let dev_path = get_scsi_device_name(&sandbox, &storage.source).await?; - storage.source = dev_path; - - common_storage_handler(logger, &storage) -} - -#[instrument] -fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { - // Mount the storage device. - let mount_point = storage.mount_point.to_string(); - - mount_storage(logger, storage)?; - set_ownership(logger, storage)?; - Ok(mount_point) -} - -// nvdimm_storage_handler handles the storage for NVDIMM driver. -#[instrument] -async fn nvdimm_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, -) -> Result { - let storage = storage.clone(); - - // Retrieve the device path from NVDIMM address. - wait_for_pmem_device(&sandbox, &storage.source).await?; - - common_storage_handler(logger, &storage) -} - -async fn bind_watcher_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: Arc>, - cid: Option, -) -> Result<()> { - let mut locked = sandbox.lock().await; - - if let Some(cid) = cid { - locked - .bind_watcher - .add_container(cid, iter::once(storage.clone()), logger) - .await - } else { - Ok(()) - } -} - -// mount_storage performs the mount described by the storage structure. -#[instrument] -fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount")); - - // Check share before attempting to mount to see if the destination is already a mount point. - // If so, skip doing the mount. This facilitates mounting the sharedfs automatically - // in the guest before the agent service starts. - if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? { - warn!( - logger, - "{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point - ); - return Ok(()); - } - - let mount_path = Path::new(&storage.mount_point); - let src_path = Path::new(&storage.source); - if storage.fstype == "bind" && !src_path.is_dir() { - ensure_destination_file_exists(mount_path) - } else { - fs::create_dir_all(mount_path).map_err(anyhow::Error::from) - } - .context("Could not create mountpoint")?; - - let options_vec = storage.options.to_vec(); - let options_vec = options_vec.iter().map(String::as_str).collect(); - let (flags, options) = parse_mount_flags_and_options(options_vec); - - let source = Path::new(&storage.source); - - info!(logger, "mounting storage"; - "mount-source" => source.display(), - "mount-destination" => mount_path.display(), - "mount-fstype" => storage.fstype.as_str(), - "mount-options" => options.as_str(), - ); - - baremount( - source, - mount_path, - storage.fstype.as_str(), - flags, - options.as_str(), - &logger, - ) -} - -#[instrument] -pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); - - // If fsGroup is not set, skip performing ownership change - if storage.fs_group.is_none() { - return Ok(()); - } - let fs_group = storage.get_fs_group(); - - let mut read_only = false; - let opts_vec: Vec = storage.options.to_vec(); - if opts_vec.contains(&String::from("ro")) { - read_only = true; - } - - let mount_path = Path::new(&storage.mount_point); - let metadata = mount_path.metadata().map_err(|err| { - error!(logger, "failed to obtain metadata for mount path"; - "mount-path" => mount_path.to_str(), - "error" => err.to_string(), - ); - err - })?; - - if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch - && metadata.gid() == fs_group.group_id - { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - mask |= EXEC_MASK; - - // With fsGroup change policy to OnRootMismatch, if the current - // gid of the mount path root directory matches the desired gid - // and the current permission of mount path root directory is correct, - // then ownership change will be skipped. - let current_mode = metadata.permissions().mode(); - if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { - info!(logger, "skipping ownership change for volume"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - return Ok(()); - } - } - - info!(logger, "performing recursive ownership change"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - recursive_ownership_change( - mount_path, - None, - Some(Gid::from_raw(fs_group.group_id)), - read_only, - ) -} - -#[instrument] -pub fn recursive_ownership_change( - path: &Path, - uid: Option, - gid: Option, - read_only: bool, -) -> Result<()> { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - if path.is_dir() { - for entry in fs::read_dir(&path)? { - recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; - } - mask |= EXEC_MASK; - mask |= MODE_SETGID; - } - nix::unistd::chown(path, uid, gid)?; - - if gid.is_some() { - let metadata = path.metadata()?; - let mut permission = metadata.permissions(); - let target_mode = metadata.mode() | mask; - permission.set_mode(target_mode); - fs::set_permissions(path, permission)?; - } - - Ok(()) -} - /// Looks for `mount_point` entry in the /proc/mounts. #[instrument] pub fn is_mounted(mount_point: &str) -> Result { let mount_point = mount_point.trim_end_matches('/'); - let found = fs::metadata(mount_point).is_ok() - // Looks through /proc/mounts and check if the mount exists - && fs::read_to_string("/proc/mounts")? - .lines() - .any(|line| { - // The 2nd column reveals the mount point. - line.split_whitespace() - .nth(1) - .map(|target| mount_point.eq(target)) - .unwrap_or(false) - }); - + let found = fs::metadata(mount_point).is_ok() && get_linux_mount_info(mount_point).is_ok(); Ok(found) } -#[instrument] -fn parse_mount_flags_and_options(options_vec: Vec<&str>) -> (MsFlags, String) { - let mut flags = MsFlags::empty(); - let mut options: String = "".to_string(); - - for opt in options_vec { - if !opt.is_empty() { - match FLAGS.get(opt) { - Some(x) => { - let (clear, f) = *x; - if clear { - flags &= !f; - } else { - flags |= f; - } - } - None => { - if !options.is_empty() { - options.push_str(format!(",{}", opt).as_str()); - } else { - options.push_str(opt.to_string().as_str()); - } - } - }; - } - } - (flags, options) -} - -// add_storages takes a list of storages passed by the caller, and perform the -// associated operations such as waiting for the device to show up, and mount -// it to a specific location, according to the type of handler chosen, and for -// each storage. -#[instrument] -pub async fn add_storages( - logger: Logger, - storages: Vec, - sandbox: Arc>, - cid: Option, -) -> Result> { - let mut mount_list = Vec::new(); - - for storage in storages { - let handler_name = storage.driver.clone(); - let logger = logger.new(o!( - "subsystem" => "storage", - "storage-type" => handler_name.to_owned())); - - { - let mut sb = sandbox.lock().await; - let new_storage = sb.set_sandbox_storage(&storage.mount_point); - if !new_storage { - continue; - } - } - - let res = match handler_name.as_str() { - DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, &storage, sandbox.clone()).await, - DRIVER_BLK_CCW_TYPE => { - virtio_blk_ccw_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, &storage, sandbox.clone()).await, - DRIVER_VIRTIOFS_TYPE => { - virtiofs_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_EPHEMERAL_TYPE => { - ephemeral_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_OVERLAYFS_TYPE => { - overlayfs_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_MMIO_BLK_TYPE => { - virtiommio_blk_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_LOCAL_TYPE => local_storage_handler(&logger, &storage, sandbox.clone()).await, - DRIVER_SCSI_TYPE => { - virtio_scsi_storage_handler(&logger, &storage, sandbox.clone()).await - } - DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, &storage, sandbox.clone()).await, - DRIVER_WATCHABLE_BIND_TYPE => { - bind_watcher_storage_handler(&logger, &storage, sandbox.clone(), cid.clone()) - .await?; - // Don't register watch mounts, they're handled separately by the watcher. - Ok(String::new()) - } - _ => { - return Err(anyhow!( - "Failed to find the storage handler {}", - storage.driver.to_owned() - )); - } - }; - - // Todo need to rollback the mounted storage if err met. - let mount_point = res?; - - if !mount_point.is_empty() { - mount_list.push(mount_point); - } - } - - Ok(mount_list) -} - #[instrument] fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> { - let options_vec: Vec<&str> = m.options.clone(); - - let (flags, options) = parse_mount_flags_and_options(options_vec); - - fs::create_dir_all(Path::new(m.dest)).context("could not create directory")?; + fs::create_dir_all(m.dest).context("could not create directory")?; + let (flags, options) = parse_mount_options(&m.options)?; let source = Path::new(m.src); let dest = Path::new(m.dest); baremount(source, dest, m.fstype, flags, &options, logger).or_else(|e| { - if m.src != "dev" { - return Err(e); + if m.src == "dev" { + error!( + logger, + "Could not mount filesystem from {} to {}", m.src, m.dest + ); + Ok(()) + } else { + Err(e) } - - error!( - logger, - "Could not mount filesystem from {} to {}", m.src, m.dest - ); - - Ok(()) - })?; - - Ok(()) + }) } #[instrument] @@ -833,26 +167,24 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result { } // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and -// any error ecountered. +// any error encountered. #[instrument] pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result { if mount_point.is_empty() { return Err(anyhow!("Invalid mount point {}", mount_point)); } - let content = fs::read_to_string(mount_file)?; + let content = fs::read_to_string(mount_file) + .map_err(|e| anyhow!("read mount file {}: {}", mount_file, e))?; let re = Regex::new(format!("device .+ mounted on {} with fstype (.+)", mount_point).as_str())?; // Read the file line by line using the lines() iterator from std::io::BufRead. for (_index, line) in content.lines().enumerate() { - let capes = match re.captures(line) { - Some(c) => c, - None => continue, - }; - - if capes.len() > 1 { - return Ok(capes[1].to_string()); + if let Some(capes) = re.captures(line) { + if capes.len() > 1 { + return Ok(capes[1].to_string()); + } } } @@ -880,7 +212,7 @@ pub fn get_cgroup_mounts( }]); } - let file = File::open(&cg_path)?; + let file = File::open(cg_path)?; let reader = BufReader::new(file); let mut has_device_cgroup = false; @@ -967,64 +299,56 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result< // Enable memory hierarchical account. // For more information see https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt - online_device("/sys/fs/cgroup/memory/memory.use_hierarchy")?; - Ok(()) + online_device("/sys/fs/cgroup/memory/memory.use_hierarchy") } #[instrument] -pub fn remove_mounts(mounts: &[String]) -> Result<()> { +pub fn remove_mounts + std::fmt::Debug>(mounts: &[P]) -> Result<()> { for m in mounts.iter() { - nix::mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?; - } - Ok(()) -} - -#[instrument] -fn ensure_destination_file_exists(path: &Path) -> Result<()> { - if path.is_file() { - return Ok(()); - } else if path.exists() { - return Err(anyhow!("{:?} exists but is not a regular file", path)); + nix::mount::umount(m.as_ref()).context(format!("failed to umount {:?}", m.as_ref()))?; } - - let dir = path - .parent() - .ok_or_else(|| anyhow!("failed to find parent path for {:?}", path))?; - - fs::create_dir_all(dir).context(format!("create_dir_all {:?}", dir))?; - - fs::File::create(path).context(format!("create empty file {:?}", path))?; - Ok(()) } -#[instrument] -fn parse_options(option_list: Vec) -> HashMap { - let mut options = HashMap::new(); - for opt in option_list.iter() { - let fields: Vec<&str> = opt.split('=').collect(); - if fields.len() != 2 { - continue; - } - - options.insert(fields[0].to_string(), fields[1].to_string()); - } - - options -} - #[cfg(test)] mod tests { use super::*; - use crate::test_utils::test_utils::TestUserType; - use crate::{skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root}; - use protobuf::RepeatedField; - use protocols::agent::FSGroup; + use slog::Drain; use std::fs::File; use std::fs::OpenOptions; use std::io::Write; + use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use tempfile::tempdir; + use test_utils::TestUserType; + use test_utils::{ + skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, + }; + + #[test] + fn test_already_baremounted() { + let plain = slog_term::PlainSyncDecorator::new(std::io::stdout()); + let logger = Logger::root(slog_term::FullFormat::new(plain).build().fuse(), o!()); + + let test_cases = [ + ("dev", "/dev", "devtmpfs"), + ("udev", "/dev", "devtmpfs"), + ("proc", "/proc", "proc"), + ("sysfs", "/sys", "sysfs"), + ]; + + for &(source, destination, fs_type) in &test_cases { + let source = Path::new(source); + let destination = Path::new(destination); + let flags = MsFlags::MS_RDONLY; + let options = "mode=755"; + println!( + "testing if already mounted baremount({:?} {:?} {:?})", + source, destination, fs_type + ); + assert!(baremount(source, destination, fs_type, flags, options, &logger).is_ok()); + } + } #[test] fn test_mount() { @@ -1563,145 +887,6 @@ mod tests { } } - #[test] - fn test_ensure_destination_file_exists() { - let dir = tempdir().expect("failed to create tmpdir"); - - let mut testfile = dir.into_path(); - testfile.push("testfile"); - - let result = ensure_destination_file_exists(&testfile); - - assert!(result.is_ok()); - assert!(testfile.exists()); - - let result = ensure_destination_file_exists(&testfile); - assert!(result.is_ok()); - - assert!(testfile.is_file()); - } - - #[test] - fn test_mount_storage() { - #[derive(Debug)] - struct TestData<'a> { - test_user: TestUserType, - storage: Storage, - error_contains: &'a str, - - make_source_dir: bool, - make_mount_dir: bool, - deny_mount_permission: bool, - } - - impl Default for TestData<'_> { - fn default() -> Self { - TestData { - test_user: TestUserType::Any, - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "tmpfs".to_string(), - ..Default::default() - }, - make_source_dir: true, - make_mount_dir: false, - deny_mount_permission: false, - error_contains: "", - } - } - } - - let tests = &[ - TestData { - test_user: TestUserType::NonRootOnly, - error_contains: "EPERM: Operation not permitted", - ..Default::default() - }, - TestData { - test_user: TestUserType::RootOnly, - ..Default::default() - }, - TestData { - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "bind".to_string(), - ..Default::default() - }, - make_source_dir: false, - make_mount_dir: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - TestData { - test_user: TestUserType::NonRootOnly, - deny_mount_permission: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - ]; - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - skip_loop_by_user!(msg, d.test_user); - - let drain = slog::Discard; - let logger = slog::Logger::root(drain, o!()); - - let tempdir = tempdir().unwrap(); - - let source = tempdir.path().join(&d.storage.source); - let mount_point = tempdir.path().join(&d.storage.mount_point); - - let storage = Storage { - source: source.to_str().unwrap().to_string(), - mount_point: mount_point.to_str().unwrap().to_string(), - ..d.storage.clone() - }; - - if d.make_source_dir { - fs::create_dir_all(&storage.source).unwrap(); - } - if d.make_mount_dir { - fs::create_dir_all(&storage.mount_point).unwrap(); - } - - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o000), - ) - .unwrap(); - } - - let result = mount_storage(&logger, &storage); - - // restore permissions so tempdir can be cleaned up - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o755), - ) - .unwrap(); - } - - if result.is_ok() { - nix::mount::umount(&mount_point).unwrap(); - } - - let msg = format!("{}: result: {:?}", msg, result); - if d.error_contains.is_empty() { - assert!(result.is_ok(), "{}", msg); - } else { - assert!(result.is_err(), "{}", msg); - let error_msg = format!("{}", result.unwrap_err()); - assert!(error_msg.contains(d.error_contains), "{}", msg); - } - } - } - #[test] fn test_mount_to_rootfs() { #[derive(Debug)] @@ -1761,7 +946,7 @@ mod tests { let tempdir = tempdir().unwrap(); let src = if d.mask_src { - tempdir.path().join(&d.src) + tempdir.path().join(d.src) } else { Path::new(d.src).to_path_buf() }; @@ -1801,62 +986,6 @@ mod tests { } } - #[test] - fn test_get_pagesize_and_size_from_option() { - let expected_pagesize = 2048; - let expected_size = 107374182; - let expected = (expected_pagesize, expected_size); - - let data = vec![ - // (input, expected, is_ok) - ("size-1=107374182,pagesize-1=2048", expected, false), - ("size-1=107374182,pagesize=2048", expected, false), - ("size=107374182,pagesize-1=2048", expected, false), - ("size=107374182,pagesize=abc", expected, false), - ("size=abc,pagesize=2048", expected, false), - ("size=,pagesize=2048", expected, false), - ("size=107374182,pagesize=", expected, false), - ("size=107374182,pagesize=2048", expected, true), - ("pagesize=2048,size=107374182", expected, true), - ("foo=bar,pagesize=2048,size=107374182", expected, true), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182", - expected, - true, - ), - ( - "pagesize=2048,foo1=bar1,foo=bar,size=107374182", - expected, - true, - ), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", - expected, - true, - ), - ( - "foo=bar,size=107374182,foo1=bar1,pagesize=2048", - expected, - true, - ), - ]; - - for case in data { - let input = case.0; - let r = get_pagesize_and_size_from_option(&[input.to_string()]); - - let is_ok = case.2; - if is_ok { - let expected = case.1; - let (pagesize, size) = r.unwrap(); - assert_eq!(expected.0, pagesize); - assert_eq!(expected.1, size); - } else { - assert!(r.is_err()); - } - } - } - #[test] fn test_parse_mount_flags_and_options() { #[derive(Debug)] @@ -1899,7 +1028,7 @@ mod tests { for (i, d) in tests.iter().enumerate() { let msg = format!("test[{}]: {:?}", i, d); - let result = parse_mount_flags_and_options(d.options_vec.clone()); + let result = parse_mount_options(&d.options_vec).unwrap(); let msg = format!("{}: result: {:?}", msg, result); @@ -1907,212 +1036,4 @@ mod tests { assert_eq!(expected_result, result, "{}", msg); } } - - #[test] - fn test_set_ownership() { - skip_if_not_root!(); - - let logger = slog::Logger::root(slog::Discard, o!()); - - #[derive(Debug)] - struct TestData<'a> { - mount_path: &'a str, - fs_group: Option, - read_only: bool, - expected_group_id: u32, - expected_permission: u32, - } - - let tests = &[ - TestData { - mount_path: "foo", - fs_group: None, - read_only: false, - expected_group_id: 0, - expected_permission: 0, - }, - TestData { - mount_path: "rw_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::Always, - unknown_fields: Default::default(), - cached_size: Default::default(), - }), - read_only: false, - expected_group_id: 3000, - expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, - }, - TestData { - mount_path: "ro_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::OnRootMismatch, - unknown_fields: Default::default(), - cached_size: Default::default(), - }), - read_only: true, - expected_group_id: 3000, - expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.mount_path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut storage_data = Storage::new(); - if d.read_only { - storage_data.set_options(RepeatedField::from_slice(&[ - "foo".to_string(), - "ro".to_string(), - ])); - } - if let Some(fs_group) = d.fs_group.clone() { - storage_data.set_fs_group(fs_group); - } - storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); - - let result = set_ownership(&logger, &storage_data); - assert!(result.is_ok()); - - assert_eq!( - mount_dir.as_path().metadata().unwrap().gid(), - d.expected_group_id - ); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission) - ); - } - } - - #[test] - fn test_recursive_ownership_change() { - skip_if_not_root!(); - - const COUNT: usize = 5; - - #[derive(Debug)] - struct TestData<'a> { - // Directory where the recursive ownership change should be performed on - path: &'a str, - - // User ID for ownership change - uid: u32, - - // Group ID for ownership change - gid: u32, - - // Set when the permission should be read-only - read_only: bool, - - // The expected permission of all directories after ownership change - expected_permission_directory: u32, - - // The expected permission of all files after ownership change - expected_permission_file: u32, - } - - let tests = &[ - TestData { - path: "no_gid_change", - uid: 0, - gid: 0, - read_only: false, - expected_permission_directory: 0, - expected_permission_file: 0, - }, - TestData { - path: "rw_gid_change", - uid: 0, - gid: 3000, - read_only: false, - expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RW_MASK, - }, - TestData { - path: "ro_gid_change", - uid: 0, - gid: 3000, - read_only: true, - expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RO_MASK, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut file_mode: u32 = 0; - - // create testing directories and files - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - fs::create_dir(&nest_dir) - .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); - - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - File::create(&filename) - .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); - file_mode = filename.as_path().metadata().unwrap().permissions().mode(); - } - } - - let uid = if d.uid > 0 { - Some(Uid::from_raw(d.uid)) - } else { - None - }; - let gid = if d.gid > 0 { - Some(Gid::from_raw(d.gid)) - } else { - None - }; - let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); - - assert!(result.is_ok()); - - assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - let file = Path::new(&filename); - - assert_eq!(file.metadata().unwrap().gid(), d.gid); - assert_eq!( - file.metadata().unwrap().permissions().mode(), - (file_mode | d.expected_permission_file) - ); - } - - let dir = Path::new(&nest_dir); - assert_eq!(dir.metadata().unwrap().gid(), d.gid); - assert_eq!( - dir.metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - } - } - } } diff --git a/src/agent/src/namespace.rs b/src/agent/src/namespace.rs index dc6ebe63b382..bf24cd1048d2 100644 --- a/src/agent/src/namespace.rs +++ b/src/agent/src/namespace.rs @@ -7,14 +7,14 @@ use anyhow::{anyhow, Result}; use nix::mount::MsFlags; use nix::sched::{unshare, CloneFlags}; use nix::unistd::{getpid, gettid}; +use slog::Logger; use std::fmt; use std::fs; use std::fs::File; use std::path::{Path, PathBuf}; use tracing::instrument; -use crate::mount::{baremount, FLAGS}; -use slog::Logger; +use crate::mount::baremount; const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns"; pub const NSTYPEIPC: &str = "ipc"; @@ -78,6 +78,7 @@ impl Namespace { // setup creates persistent namespace without switching to it. // Note, pid namespaces cannot be persisted. #[instrument] + #[allow(clippy::question_mark)] pub async fn setup(mut self) -> Result { fs::create_dir_all(&self.persistent_ns_dir)?; @@ -88,7 +89,7 @@ impl Namespace { } let logger = self.logger.clone(); - let new_ns_path = ns_path.join(&ns_type.get()); + let new_ns_path = ns_path.join(ns_type.get()); File::create(new_ns_path.as_path())?; @@ -102,7 +103,7 @@ impl Namespace { let source = Path::new(&origin_ns_path); let destination = new_ns_path.as_path(); - File::open(&source)?; + File::open(source)?; // Create a new netns on the current thread. let cf = ns_type.get_flags(); @@ -115,15 +116,7 @@ impl Namespace { // Bind mount the new namespace from the current thread onto the mount point to persist it. let mut flags = MsFlags::empty(); - - if let Some(x) = FLAGS.get("rbind") { - let (clear, f) = *x; - if clear { - flags &= !f; - } else { - flags |= f; - } - }; + flags |= MsFlags::MS_BIND | MsFlags::MS_REC; baremount(source, destination, "none", flags, "", &logger).map_err(|e| { anyhow!( @@ -187,9 +180,10 @@ impl fmt::Debug for NamespaceType { #[cfg(test)] mod tests { use super::{Namespace, NamespaceType}; - use crate::{mount::remove_mounts, skip_if_not_root}; + use crate::mount::remove_mounts; use nix::sched::CloneFlags; use tempfile::Builder; + use test_utils::skip_if_not_root; #[tokio::test] async fn test_setup_persistent_ns() { diff --git a/src/agent/src/netlink.rs b/src/agent/src/netlink.rs index 1de4ef6920d6..280b0d95a60e 100644 --- a/src/agent/src/netlink.rs +++ b/src/agent/src/netlink.rs @@ -7,7 +7,6 @@ use anyhow::{anyhow, Context, Result}; use futures::{future, StreamExt, TryStreamExt}; use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network}; use nix::errno::Errno; -use protobuf::RepeatedField; use protocols::types::{ARPNeighbor, IPAddress, IPFamily, Interface, Route}; use rtnetlink::{new_connection, packet, IpVersion}; use std::convert::{TryFrom, TryInto}; @@ -64,7 +63,7 @@ impl Handle { pub async fn update_interface(&mut self, iface: &Interface) -> Result<()> { // The reliable way to find link is using hardware address // as filter. However, hardware filter might not be supported - // by netlink, we may have to dump link list and the find the + // by netlink, we may have to dump link list and then find the // target link. filter using name or family is supported, but // we cannot use that to find target link. // let's try if hardware address filter works. -_- @@ -83,13 +82,34 @@ impl Handle { // Add new ip addresses from request for ip_address in &iface.IPAddresses { - let ip = IpAddr::from_str(ip_address.get_address())?; - let mask = ip_address.get_mask().parse::()?; + let ip = IpAddr::from_str(ip_address.address())?; + let mask = ip_address.mask().parse::()?; self.add_addresses(link.index(), std::iter::once(IpNetwork::new(ip, mask)?)) .await?; } + // we need to update the link's interface name, thus we should rename the existed link whose name + // is the same with the link's request name, otherwise, it would update the link failed with the + // name conflicted. + let mut new_link = None; + if link.name() != iface.name { + if let Ok(link) = self.find_link(LinkFilter::Name(iface.name.as_str())).await { + // update the existing interface name with a temporary name, otherwise + // it would failed to udpate this interface with an existing name. + let mut request = self.handle.link().set(link.index()); + request.message_mut().header = link.header.clone(); + + request + .name(format!("{}_temp", link.name())) + .up() + .execute() + .await?; + + new_link = Some(link); + } + } + // Update link let mut request = self.handle.link().set(link.index()); request.message_mut().header = link.header.clone(); @@ -102,6 +122,14 @@ impl Handle { .execute() .await?; + // swap the updated iface's name. + if let Some(nlink) = new_link { + let mut request = self.handle.link().set(nlink.index()); + request.message_mut().header = nlink.header.clone(); + + request.name(link.name()).up().execute().await?; + } + Ok(()) } @@ -152,7 +180,7 @@ impl Handle { .map(|p| p.try_into()) .collect::>>()?; - iface.IPAddresses = RepeatedField::from_vec(ips); + iface.IPAddresses = ips; list.push(iface); } @@ -178,7 +206,7 @@ impl Handle { .with_context(|| format!("Failed to parse MAC address: {}", addr))?; // Hardware filter might not be supported by netlink, - // we may have to dump link list and the find the target link. + // we may have to dump link list and then find the target link. stream .try_filter(|f| { let result = f.nlas.iter().any(|n| match n { @@ -334,7 +362,7 @@ impl Handle { // `rtnetlink` offers a separate request builders for different IP versions (IP v4 and v6). // This if branch is a bit clumsy because it does almost the same. - if route.get_family() == IPFamily::v6 { + if route.family() == IPFamily::v6 { let dest_addr = if !route.dest.is_empty() { Ipv6Network::from_str(&route.dest)? } else { @@ -368,9 +396,9 @@ impl Handle { if Errno::from_i32(message.code.abs()) != Errno::EEXIST { return Err(anyhow!( "Failed to add IP v6 route (src: {}, dst: {}, gtw: {},Err: {})", - route.get_source(), - route.get_dest(), - route.get_gateway(), + route.source(), + route.dest(), + route.gateway(), message )); } @@ -409,9 +437,9 @@ impl Handle { if Errno::from_i32(message.code.abs()) != Errno::EEXIST { return Err(anyhow!( "Failed to add IP v4 route (src: {}, dst: {}, gtw: {},Err: {})", - route.get_source(), - route.get_dest(), - route.get_gateway(), + route.source(), + route.dest(), + route.gateway(), message )); } @@ -506,7 +534,7 @@ impl Handle { self.add_arp_neighbor(&neigh).await.map_err(|err| { anyhow!( "Failed to add ARP neighbor {}: {:?}", - neigh.get_toIPAddress().get_address(), + neigh.toIPAddress().address(), err ) })?; @@ -523,13 +551,15 @@ impl Handle { .as_ref() .map(|to| to.address.as_str()) // Extract address field .and_then(|addr| if addr.is_empty() { None } else { Some(addr) }) // Make sure it's not empty - .ok_or_else(|| anyhow!(nix::Error::EINVAL))?; + .ok_or_else(|| anyhow!("Unable to determine ip address of ARP neighbor"))?; let ip = IpAddr::from_str(ip_address) .map_err(|e| anyhow!("Failed to parse IP {}: {:?}", ip_address, e))?; // Import rtnetlink objects that make sense only for this function - use packet::constants::{NDA_UNSPEC, NLM_F_ACK, NLM_F_CREATE, NLM_F_EXCL, NLM_F_REQUEST}; + use packet::constants::{ + NDA_UNSPEC, NLM_F_ACK, NLM_F_CREATE, NLM_F_REPLACE, NLM_F_REQUEST, + }; use packet::neighbour::{NeighbourHeader, NeighbourMessage}; use packet::nlas::neighbour::Nla; use packet::{NetlinkMessage, NetlinkPayload, RtnlMessage}; @@ -572,7 +602,7 @@ impl Handle { // Send request and ACK let mut req = NetlinkMessage::from(RtnlMessage::NewNeighbour(message)); - req.header.flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + req.header.flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_REPLACE; let mut response = self.handle.request(req)?; while let Some(message) = response.next().await { @@ -612,7 +642,12 @@ fn parse_mac_address(addr: &str) -> Result<[u8; 6]> { // Parse single Mac address block let mut parse_next = || -> Result { - let v = u8::from_str_radix(split.next().ok_or_else(|| anyhow!(nix::Error::EINVAL))?, 16)?; + let v = u8::from_str_radix( + split + .next() + .ok_or_else(|| anyhow!("Invalid MAC address {}", addr))?, + 16, + )?; Ok(v) }; @@ -718,7 +753,7 @@ impl TryFrom
for IPAddress { let mask = format!("{}", value.0.header.prefix_len); Ok(IPAddress { - family, + family: family.into(), address, mask, ..Default::default() @@ -770,10 +805,10 @@ impl Address { #[cfg(test)] mod tests { use super::*; - use crate::skip_if_not_root; use rtnetlink::packet; use std::iter; use std::process::Command; + use test_utils::skip_if_not_root; #[tokio::test] async fn find_link_by_name() { @@ -939,13 +974,13 @@ mod tests { fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) { // ip link delete dummy Command::new("ip") - .args(&["link", "delete", dummy_name]) + .args(["link", "delete", dummy_name]) .output() .expect("prepare: failed to delete dummy"); // ip neigh del dev dummy ip Command::new("ip") - .args(&["neigh", "del", dummy_name, ip]) + .args(["neigh", "del", dummy_name, ip]) .output() .expect("prepare: failed to delete neigh"); } @@ -960,19 +995,19 @@ mod tests { // ip link add dummy type dummy Command::new("ip") - .args(&["link", "add", dummy_name, "type", "dummy"]) + .args(["link", "add", dummy_name, "type", "dummy"]) .output() .expect("failed to add dummy interface"); // ip addr add 192.168.0.2/16 dev dummy Command::new("ip") - .args(&["addr", "add", "192.168.0.2/16", "dev", dummy_name]) + .args(["addr", "add", "192.168.0.2/16", "dev", dummy_name]) .output() .expect("failed to add ip for dummy"); // ip link set dummy up; Command::new("ip") - .args(&["link", "set", dummy_name, "up"]) + .args(["link", "set", dummy_name, "up"]) .output() .expect("failed to up dummy"); } @@ -1004,7 +1039,7 @@ mod tests { // ip neigh show dev dummy ip let stdout = Command::new("ip") - .args(&["neigh", "show", "dev", dummy_name, to_ip]) + .args(["neigh", "show", "dev", dummy_name, to_ip]) .output() .expect("failed to show neigh") .stdout; diff --git a/src/agent/src/network.rs b/src/agent/src/network.rs index 1152fce917ec..37e329f35145 100644 --- a/src/agent/src/network.rs +++ b/src/agent/src/network.rs @@ -7,6 +7,7 @@ use anyhow::{anyhow, Result}; use nix::mount::{self, MsFlags}; use slog::Logger; use std::fs; +use std::path; const KATA_GUEST_SANDBOX_DNS_FILE: &str = "/run/kata-containers/sandbox/resolv.conf"; const GUEST_DNS_FILE: &str = "/etc/resolv.conf"; @@ -28,7 +29,7 @@ impl Network { } } -pub fn setup_guest_dns(logger: Logger, dns_list: Vec) -> Result<()> { +pub fn setup_guest_dns(logger: Logger, dns_list: &[String]) -> Result<()> { do_setup_guest_dns( logger, dns_list, @@ -37,7 +38,7 @@ pub fn setup_guest_dns(logger: Logger, dns_list: Vec) -> Result<()> { ) } -fn do_setup_guest_dns(logger: Logger, dns_list: Vec, src: &str, dst: &str) -> Result<()> { +fn do_setup_guest_dns(logger: Logger, dns_list: &[String], src: &str, dst: &str) -> Result<()> { let logger = logger.new(o!( "subsystem" => "network")); if dns_list.is_empty() { @@ -64,7 +65,13 @@ fn do_setup_guest_dns(logger: Logger, dns_list: Vec, src: &str, dst: &st .map(|x| x.trim()) .collect::>() .join("\n"); - fs::write(src, &content)?; + + // make sure the src file's parent path exist. + let file_path = path::Path::new(src); + if let Some(p) = file_path.parent() { + fs::create_dir_all(p)?; + } + fs::write(src, content)?; // bind mount to /etc/resolv.conf mount::mount(Some(src), dst, Some("bind"), MsFlags::MS_BIND, None::<&str>) @@ -76,11 +83,11 @@ fn do_setup_guest_dns(logger: Logger, dns_list: Vec, src: &str, dst: &st #[cfg(test)] mod tests { use super::*; - use crate::skip_if_not_root; use nix::mount; use std::fs::File; use std::io::Write; use tempfile::tempdir; + use test_utils::skip_if_not_root; #[test] fn test_setup_guest_dns() { @@ -117,7 +124,7 @@ mod tests { .expect("failed to write file contents"); // call do_setup_guest_dns - let result = do_setup_guest_dns(logger, dns.clone(), src_filename, dst_filename); + let result = do_setup_guest_dns(logger, &dns, src_filename, dst_filename); assert!(result.is_ok(), "result should be ok, but {:?}", result); diff --git a/src/agent/src/policy.rs b/src/agent/src/policy.rs new file mode 100644 index 000000000000..0202510240fc --- /dev/null +++ b/src/agent/src/policy.rs @@ -0,0 +1,267 @@ +// Copyright (c) 2023 Microsoft Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{bail, Result}; +use serde::{Deserialize, Serialize}; +use slog::Drain; +use tokio::io::AsyncWriteExt; +use tokio::time::{sleep, Duration}; + +static EMPTY_JSON_INPUT: &str = "{\"input\":{}}"; + +static OPA_DATA_PATH: &str = "/data"; +static OPA_POLICIES_PATH: &str = "/policies"; + +static POLICY_LOG_FILE: &str = "/tmp/policy.txt"; + +/// Convenience macro to obtain the scope logger +macro_rules! sl { + () => { + slog_scope::logger() + }; +} + +/// Example of HTTP response from OPA: {"result":true} +#[derive(Debug, Serialize, Deserialize)] +struct AllowResponse { + result: bool, +} + +/// Singleton policy object. +#[derive(Debug, Default)] +pub struct AgentPolicy { + /// When true policy errors are ignored, for debug purposes. + allow_failures: bool, + + /// OPA path used to query if an Agent gRPC request should be allowed. + /// The request name (e.g., CreateContainerRequest) must be added to + /// this path. + query_path: String, + + /// OPA path used to add or delete a rego format Policy. + policy_path: String, + + /// Client used to connect a single time to the OPA service and reused + /// for all the future communication with OPA. + opa_client: Option, + + /// "/tmp/policy.txt" log file for policy activity. + log_file: Option, +} + +impl AgentPolicy { + /// Create AgentPolicy object. + pub fn new() -> Self { + Self { + allow_failures: false, + ..Default::default() + } + } + + /// Wait for OPA to start and connect to it. + pub async fn initialize( + &mut self, + launch_opa: bool, + opa_addr: &str, + policy_name: &str, + default_policy: &str, + ) -> Result<()> { + if sl!().is_enabled(slog::Level::Debug) { + self.log_file = Some( + tokio::fs::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(POLICY_LOG_FILE) + .await?, + ); + debug!(sl!(), "policy: log file: {}", POLICY_LOG_FILE); + } + + if launch_opa { + start_opa(opa_addr)?; + } + + let opa_uri = format!("http://{opa_addr}/v1"); + self.query_path = format!("{opa_uri}{OPA_DATA_PATH}{policy_name}/"); + self.policy_path = format!("{opa_uri}{OPA_POLICIES_PATH}{policy_name}"); + let opa_client = reqwest::Client::builder().http1_only().build()?; + let policy = tokio::fs::read_to_string(default_policy).await?; + + // This loop is necessary to get the opa_client connected to the + // OPA service while that service is starting. Future requests to + // OPA are expected to work without retrying, after connecting + // successfully for the first time. + for i in 0..50 { + if i > 0 { + sleep(Duration::from_millis(100)).await; + debug!(sl!(), "policy initialize: PUT failed, retrying"); + } + + // Set-up the default policy. + if opa_client + .put(&self.policy_path) + .body(policy.clone()) + .send() + .await + .is_ok() + { + self.opa_client = Some(opa_client); + + // Check if requests causing policy errors should actually + // be allowed. That is an insecure configuration but is + // useful for allowing insecure pods to start, then connect to + // them and inspect Guest logs for the root cause of a failure. + // + // Note that post_query returns Ok(false) in case + // AllowRequestsFailingPolicy was not defined in the policy. + self.allow_failures = self + .post_query("AllowRequestsFailingPolicy", EMPTY_JSON_INPUT) + .await?; + return Ok(()); + } + } + bail!("Failed to connect to OPA") + } + + /// Ask OPA to check if an API call should be allowed or not. + pub async fn is_allowed_endpoint(&mut self, ep: &str, request: &str) -> bool { + let post_input = format!("{{\"input\":{request}}}"); + self.log_opa_input(ep, &post_input).await; + match self.post_query(ep, &post_input).await { + Err(e) => { + debug!( + sl!(), + "policy: failed to query endpoint {}: {:?}. Returning false.", ep, e + ); + false + } + Ok(allowed) => allowed, + } + } + + /// Replace the Policy in OPA. + pub async fn set_policy(&mut self, policy: &str) -> Result<()> { + if let Some(opa_client) = &mut self.opa_client { + // Delete the old rules. + opa_client.delete(&self.policy_path).send().await?; + + // Put the new rules. + opa_client + .put(&self.policy_path) + .body(policy.to_string()) + .send() + .await?; + + // Check if requests causing policy errors should actually be allowed. + // That is an insecure configuration but is useful for allowing insecure + // pods to start, then connect to them and inspect Guest logs for the + // root cause of a failure. + // + // Note that post_query returns Ok(false) in case + // AllowRequestsFailingPolicy was not defined in the policy. + self.allow_failures = self + .post_query("AllowRequestsFailingPolicy", EMPTY_JSON_INPUT) + .await?; + + Ok(()) + } else { + bail!("Agent Policy is not initialized") + } + } + + // Post query to OPA. + async fn post_query(&mut self, ep: &str, post_input: &str) -> Result { + debug!(sl!(), "policy check: {ep}"); + + if let Some(opa_client) = &mut self.opa_client { + let uri = format!("{}{ep}", &self.query_path); + let response = opa_client + .post(uri) + .body(post_input.to_string()) + .send() + .await?; + + if response.status() != http::StatusCode::OK { + bail!("policy: POST {} response status {}", ep, response.status()); + } + + let http_response = response.text().await?; + let opa_response: serde_json::Result = + serde_json::from_str(&http_response); + + match opa_response { + Ok(resp) => { + if !resp.result { + if self.allow_failures { + warn!( + sl!(), + "policy: POST {} response <{}>. Ignoring error!", ep, http_response + ); + return Ok(true); + } else { + error!(sl!(), "policy: POST {} response <{}>", ep, http_response); + } + } + Ok(resp.result) + } + Err(_) => { + warn!( + sl!(), + "policy: endpoint {} not found in policy. Returning false.", ep, + ); + Ok(false) + } + } + } else { + bail!("Agent Policy is not initialized") + } + } + + async fn log_opa_input(&mut self, ep: &str, input: &str) { + if let Some(log_file) = &mut self.log_file { + match ep { + "StatsContainerRequest" | "ReadStreamRequest" | "SetPolicyRequest" => { + // - StatsContainerRequest and ReadStreamRequest are called + // relatively often, so we're not logging them, to avoid + // growing this log file too much. + // - Confidential Containers Policy documents are relatively + // large, so we're not logging them here, for SetPolicyRequest. + // The Policy text can be obtained directly from the pod YAML. + } + _ => { + let log_entry = format!("[\"ep\":\"{ep}\",{input}],\n\n"); + + if let Err(e) = log_file.write_all(log_entry.as_bytes()).await { + warn!(sl!(), "policy: log_opa_input: write_all failed: {}", e); + } else if let Err(e) = log_file.flush().await { + warn!(sl!(), "policy: log_opa_input: flush failed: {}", e); + } + } + } + } + } +} + +fn start_opa(opa_addr: &str) -> Result<()> { + let bin_dirs = vec!["/bin", "/usr/bin", "/usr/local/bin"]; + for bin_dir in &bin_dirs { + let opa_path = bin_dir.to_string() + "/opa"; + if std::fs::metadata(&opa_path).is_ok() { + // args copied from kata-opa.service.in. + std::process::Command::new(&opa_path) + .arg("run") + .arg("--server") + .arg("--disable-telemetry") + .arg("--addr") + .arg(opa_addr) + .arg("--log-level") + .arg("info") + .spawn()?; + return Ok(()); + } + } + bail!("OPA binary not found in {:?}", &bin_dirs); +} diff --git a/src/agent/src/random.rs b/src/agent/src/random.rs index 1c83f03f0d47..f97f0f0334b4 100644 --- a/src/agent/src/random.rs +++ b/src/agent/src/random.rs @@ -53,9 +53,9 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::skip_if_not_root; use std::fs::File; use std::io::prelude::*; + use test_utils::skip_if_not_root; #[test] fn test_reseed_rng() { diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 3004be5c0e0c..b7f49753c435 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -8,8 +8,10 @@ use rustjail::{pipestream::PipeStream, process::StreamType}; use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf}; use tokio::sync::Mutex; -use std::ffi::CString; +use std::ffi::{CString, OsStr}; +use std::fmt::Debug; use std::io; +use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::sync::Arc; use ttrpc::{ @@ -21,21 +23,26 @@ use ttrpc::{ use anyhow::{anyhow, Context, Result}; use cgroups::freezer::FreezerState; use oci::{LinuxNamespace, Root, Spec}; -use protobuf::{Message, RepeatedField, SingularPtrField}; +use protobuf::{MessageDyn, MessageField}; use protocols::agent::{ AddSwapRequest, AgentDetails, CopyFileRequest, GetIPTablesRequest, GetIPTablesResponse, GuestDetailsResponse, Interfaces, Metrics, OOMEvent, ReadStreamResponse, Routes, SetIPTablesRequest, SetIPTablesResponse, StatsContainerResponse, VolumeStatsRequest, WaitProcessResponse, WriteStreamResponse, }; -use protocols::csi::{VolumeCondition, VolumeStatsResponse, VolumeUsage, VolumeUsage_Unit}; +use protocols::csi::{ + volume_usage::Unit as VolumeUsage_Unit, VolumeCondition, VolumeStatsResponse, VolumeUsage, +}; use protocols::empty::Empty; use protocols::health::{ - HealthCheckResponse, HealthCheckResponse_ServingStatus, VersionCheckResponse, + health_check_response::ServingStatus as HealthCheckResponse_ServingStatus, HealthCheckResponse, + VersionCheckResponse, }; use protocols::types::Interface; +use protocols::{agent_ttrpc_async as agent_ttrpc, health_ttrpc_async as health_ttrpc}; use rustjail::cgroups::notifier; -use rustjail::container::{BaseContainer, Container, LinuxContainer}; +use rustjail::container::{BaseContainer, Container, LinuxContainer, SYSTEMD_CGROUP_PATH_FORMAT}; +use rustjail::mount::parse_mount_table; use rustjail::process::Process; use rustjail::specconv::CreateOpts; @@ -43,7 +50,6 @@ use nix::errno::Errno; use nix::mount::MsFlags; use nix::sys::{stat, statfs}; use nix::unistd::{self, Pid}; -use rustjail::cgroups::Manager; use rustjail::process::ProcessOperations; use crate::device::{ @@ -51,17 +57,22 @@ use crate::device::{ }; use crate::linux_abi::*; use crate::metrics::get_metrics; -use crate::mount::{add_storages, baremount, STORAGE_HANDLER_LIST}; +use crate::mount::baremount; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; use crate::network::setup_guest_dns; use crate::pci; use crate::random; use crate::sandbox::Sandbox; +use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS}; use crate::version::{AGENT_VERSION, API_VERSION}; use crate::AGENT_CONFIG; use crate::trace_rpc_call; use crate::tracer::extract_carrier_from_ttrpc; + +#[cfg(feature = "agent-policy")] +use crate::AGENT_POLICY; + use opentelemetry::global; use tracing::span; use tracing_opentelemetry::OpenTelemetrySpanExt; @@ -80,13 +91,20 @@ use std::io::{BufRead, BufReader, Write}; use std::os::unix::fs::FileExt; use std::path::PathBuf; -const CONTAINER_BASE: &str = "/run/kata-containers"; +pub const CONTAINER_BASE: &str = "/run/kata-containers"; const MODPROBE_PATH: &str = "/sbin/modprobe"; +/// the iptables seriers binaries could appear either in /sbin +/// or /usr/sbin, we need to check both of them +const USR_IPTABLES_SAVE: &str = "/usr/sbin/iptables-save"; const IPTABLES_SAVE: &str = "/sbin/iptables-save"; +const USR_IPTABLES_RESTORE: &str = "/usr/sbin/iptables-store"; const IPTABLES_RESTORE: &str = "/sbin/iptables-restore"; +const USR_IP6TABLES_SAVE: &str = "/usr/sbin/ip6tables-save"; const IP6TABLES_SAVE: &str = "/sbin/ip6tables-save"; +const USR_IP6TABLES_RESTORE: &str = "/usr/sbin/ip6tables-save"; const IP6TABLES_RESTORE: &str = "/sbin/ip6tables-restore"; +const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/"; const ERR_CANNOT_GET_WRITER: &str = "Cannot get writer"; const ERR_INVALID_BLOCK_SIZE: &str = "Invalid block size"; @@ -99,64 +117,92 @@ const ERR_NO_SANDBOX_PIDNS: &str = "Sandbox does not have sandbox_pidns"; // not available. const IPTABLES_RESTORE_WAIT_SEC: u64 = 5; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger() - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger() } -// Convenience macro to wrap an error and response to ttrpc client -macro_rules! ttrpc_error { - ($code:path, $err:expr $(,)?) => { - get_rpc_status($code, format!("{:?}", $err)) - }; +// Convenience function to wrap an error and response to ttrpc client +fn ttrpc_error(code: ttrpc::Code, err: impl Debug) -> ttrpc::Error { + get_rpc_status(code, format!("{:?}", err)) } -macro_rules! is_allowed { - ($req:ident) => { - if !AGENT_CONFIG - .read() - .await - .is_allowed_endpoint($req.descriptor().name()) - { - return Err(ttrpc_error!( - ttrpc::Code::UNIMPLEMENTED, - format!("{} is blocked", $req.descriptor().name()), - )); - } - }; +fn config_allows(req: &impl MessageDyn) -> ttrpc::Result<()> { + if !AGENT_CONFIG.is_allowed_endpoint(req.descriptor_dyn().name()) { + Err(ttrpc_error( + ttrpc::Code::UNIMPLEMENTED, + format!("{} is blocked", req.descriptor_dyn().name()), + )) + } else { + Ok(()) + } } -#[derive(Clone, Debug)] -pub struct AgentService { - sandbox: Arc>, +#[cfg(feature = "agent-policy")] +async fn policy_allows(req: &(impl MessageDyn + serde::Serialize)) -> ttrpc::Result<()> { + let request = serde_json::to_string(req).unwrap(); + let mut policy = AGENT_POLICY.lock().await; + if !policy + .is_allowed_endpoint(req.descriptor_dyn().name(), &request) + .await + { + warn!(sl(), "{} is blocked by policy", req.descriptor_dyn().name()); + Err(ttrpc_error( + ttrpc::Code::PERMISSION_DENIED, + format!("{} is blocked by policy", req.descriptor_dyn().name()), + )) + } else { + Ok(()) + } } -// A container ID must match this regex: -// -// ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$ -// -fn verify_cid(id: &str) -> Result<()> { - let mut chars = id.chars(); - - let valid = match chars.next() { - Some(first) - if first.is_alphanumeric() - && id.len() > 1 - && chars.all(|c| c.is_alphanumeric() || ['.', '-', '_'].contains(&c)) => - { - true - } - _ => false, - }; +async fn is_allowed(req: &(impl MessageDyn + serde::Serialize)) -> ttrpc::Result<()> { + let res = config_allows(req); + + #[cfg(feature = "agent-policy")] + if res.is_ok() { + return policy_allows(req).await; + } + + res +} + +fn same(e: E) -> E { + e +} + +trait ResultToTtrpcResult: Sized { + fn map_ttrpc_err(self, msg_builder: impl FnOnce(E) -> R) -> ttrpc::Result; + fn map_ttrpc_err_do(self, doer: impl FnOnce(&E)) -> ttrpc::Result { + self.map_ttrpc_err(|e| { + doer(&e); + e + }) + } +} + +impl ResultToTtrpcResult for Result { + fn map_ttrpc_err(self, msg_builder: impl FnOnce(E) -> R) -> ttrpc::Result { + self.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, msg_builder(e))) + } +} + +trait OptionToTtrpcResult: Sized { + fn map_ttrpc_err(self, code: ttrpc::Code, msg: &str) -> ttrpc::Result; +} - match valid { - true => Ok(()), - false => Err(anyhow!("invalid container ID: {:?}", id)), +impl OptionToTtrpcResult for Option { + fn map_ttrpc_err(self, code: ttrpc::Code, msg: &str) -> ttrpc::Result { + self.ok_or_else(|| ttrpc_error(code, msg)) } } +#[derive(Clone, Debug)] +pub struct AgentService { + sandbox: Arc>, + init_mode: bool, +} + impl AgentService { #[instrument] async fn do_create_container( @@ -165,25 +211,22 @@ impl AgentService { ) -> Result<()> { let cid = req.container_id.clone(); - verify_cid(&cid)?; + kata_sys_util::validate::verify_id(&cid)?; let mut oci_spec = req.OCI.clone(); - let use_sandbox_pidns = req.get_sandbox_pidns(); - - let sandbox; - let mut s; + let use_sandbox_pidns = req.sandbox_pidns(); let mut oci = match oci_spec.as_mut() { Some(spec) => rustjail::grpc_to_oci(spec), None => { - error!(sl!(), "no oci spec in the create container request!"); + error!(sl(), "no oci spec in the create container request!"); return Err(anyhow!(nix::Error::EINVAL)); } }; - info!(sl!(), "receive createcontainer, spec: {:?}", &oci); + info!(sl(), "receive createcontainer, spec: {:?}", &oci); info!( - sl!(), + sl(), "receive createcontainer, storages: {:?}", &req.storages ); @@ -192,7 +235,7 @@ impl AgentService { // updates the devices listed in the OCI spec, so that they actually // match real devices inside the VM. This step is necessary since we // cannot predict everything from the caller. - add_devices(&req.devices.to_vec(), &mut oci, &self.sandbox).await?; + add_devices(&req.devices, &mut oci, &self.sandbox).await?; // Both rootfs and volumes (invoked with --volume for instance) will // be processed the same way. The idea is to always mount any provided @@ -201,18 +244,10 @@ impl AgentService { // After all those storages have been processed, no matter the order // here, the agent will rely on rustjail (using the oci.Mounts // list) to bind mount all of them inside the container. - let m = add_storages( - sl!(), - req.storages.to_vec(), - self.sandbox.clone(), - Some(req.container_id.clone()), - ) - .await?; - { - sandbox = self.sandbox.clone(); - s = sandbox.lock().await; - s.container_mounts.insert(cid.clone(), m); - } + let m = add_storages(sl(), req.storages, &self.sandbox, Some(req.container_id)).await?; + + let mut s = self.sandbox.lock().await; + s.container_mounts.insert(cid.clone(), m); update_container_namespaces(&s, &mut oci, use_sandbox_pidns)?; @@ -228,9 +263,20 @@ impl AgentService { // restore the cwd for kata-agent process. defer!(unistd::chdir(&olddir).unwrap()); + // determine which cgroup driver to take and then assign to use_systemd_cgroup + // systemd: "[slice]:[prefix]:[name]" + // fs: "/path_a/path_b" + // If agent is init we can't use systemd cgroup mode, no matter what the host tells us + let cgroups_path = oci.linux.as_ref().map_or("", |linux| &linux.cgroups_path); + let use_systemd_cgroup = if self.init_mode { + false + } else { + SYSTEMD_CGROUP_PATH_FORMAT.is_match(cgroups_path) + }; + let opts = CreateOpts { cgroup_name: "".to_string(), - use_systemd_cgroup: false, + use_systemd_cgroup, no_pivot_root: s.no_pivot_root, no_new_keyring: false, spec: Some(oci.clone()), @@ -239,36 +285,46 @@ impl AgentService { }; let mut ctr: LinuxContainer = - LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl!())?; + LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl())?; - let pipe_size = AGENT_CONFIG.read().await.container_pipe_size; + let pipe_size = AGENT_CONFIG.container_pipe_size; let p = if let Some(p) = oci.process { - Process::new(&sl!(), &p, cid.as_str(), true, pipe_size)? + Process::new(&sl(), &p, cid.as_str(), true, pipe_size)? } else { - info!(sl!(), "no process configurations!"); + info!(sl(), "no process configurations!"); return Err(anyhow!(nix::Error::EINVAL)); }; - ctr.start(p).await?; + + // if starting container failed, we will do some rollback work + // to ensure no resources are leaked. + if let Err(err) = ctr.start(p).await { + error!(sl(), "failed to start container: {:?}", err); + if let Err(e) = ctr.destroy().await { + error!(sl(), "failed to destroy container: {:?}", e); + } + if let Err(e) = remove_container_resources(&mut s, &cid).await { + error!(sl(), "failed to remove container resources: {:?}", e); + } + return Err(err); + } + s.update_shared_pidns(&ctr)?; s.add_container(ctr); - info!(sl!(), "created container!"); + info!(sl(), "created container!"); Ok(()) } #[instrument] async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> { - let cid = req.container_id; - - let sandbox = self.sandbox.clone(); - let mut s = sandbox.lock().await; + let mut s = self.sandbox.lock().await; let sid = s.id.clone(); + let cid = req.container_id; let ctr = s .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))?; - ctr.exec().await?; if sid == cid { @@ -276,14 +332,9 @@ impl AgentService { } // start oom event loop - if let Some(ref ctr) = ctr.cgroup_manager { - let cg_path = ctr.get_cg_path("memory"); - - if let Some(cg_path) = cg_path { - let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?; - - s.run_oom_event_monitor(rx, cid.clone()).await; - } + if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") { + let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?; + s.run_oom_event_monitor(rx, cid).await; } Ok(()) @@ -294,126 +345,84 @@ impl AgentService { &self, req: protocols::agent::RemoveContainerRequest, ) -> Result<()> { - let cid = req.container_id.clone(); - let mut cmounts: Vec = vec![]; - - let mut remove_container_resources = |sandbox: &mut Sandbox| -> Result<()> { - // Find the sandbox storage used by this container - let mounts = sandbox.container_mounts.get(&cid); - if let Some(mounts) = mounts { - for m in mounts.iter() { - if sandbox.storages.get(m).is_some() { - cmounts.push(m.to_string()); - } - } - } - - for m in cmounts.iter() { - sandbox.unset_and_remove_sandbox_storage(m)?; - } - - sandbox.container_mounts.remove(cid.as_str()); - sandbox.containers.remove(cid.as_str()); - Ok(()) - }; + let cid = req.container_id; if req.timeout == 0 { - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; - + let mut sandbox = self.sandbox.lock().await; sandbox.bind_watcher.remove_container(&cid).await; - sandbox .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))? .destroy() .await?; - - remove_container_resources(&mut sandbox)?; - + remove_container_resources(&mut sandbox, &cid).await?; return Ok(()); } // timeout != 0 let s = self.sandbox.clone(); let cid2 = cid.clone(); - let (tx, rx) = tokio::sync::oneshot::channel::(); - let handle = tokio::spawn(async move { let mut sandbox = s.lock().await; - if let Some(ctr) = sandbox.get_container(&cid2) { - ctr.destroy().await.unwrap(); - sandbox.bind_watcher.remove_container(&cid2).await; - tx.send(1).unwrap(); - }; + sandbox.bind_watcher.remove_container(&cid2).await; + sandbox + .get_container(&cid2) + .ok_or_else(|| anyhow!("Invalid container id"))? + .destroy() + .await }); - if tokio::time::timeout(Duration::from_secs(req.timeout.into()), rx) + let to = Duration::from_secs(req.timeout.into()); + tokio::time::timeout(to, handle) .await - .is_err() - { - return Err(anyhow!(nix::Error::ETIME)); - } - - if handle.await.is_err() { - return Err(anyhow!(nix::Error::UnknownErrno)); - } - - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; - - remove_container_resources(&mut sandbox)?; + .map_err(|_| anyhow!(nix::Error::ETIME))???; - Ok(()) + remove_container_resources(&mut *self.sandbox.lock().await, &cid).await } #[instrument] async fn do_exec_process(&self, req: protocols::agent::ExecProcessRequest) -> Result<()> { - let cid = req.container_id.clone(); - let exec_id = req.exec_id.clone(); - - info!(sl!(), "do_exec_process cid: {} eid: {}", cid, exec_id); + let cid = req.container_id; + let exec_id = req.exec_id; - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; + info!(sl(), "do_exec_process cid: {} eid: {}", cid, exec_id); + let mut sandbox = self.sandbox.lock().await; let mut process = req .process .into_option() - .ok_or_else(|| anyhow!(nix::Error::EINVAL))?; + .ok_or_else(|| anyhow!("Unable to parse process from ExecProcessRequest"))?; // Apply any necessary corrections for PCI addresses update_env_pci(&mut process.Env, &sandbox.pcimap)?; - let pipe_size = AGENT_CONFIG.read().await.container_pipe_size; + let pipe_size = AGENT_CONFIG.container_pipe_size; let ocip = rustjail::process_grpc_to_oci(&process); - let p = Process::new(&sl!(), &ocip, exec_id.as_str(), false, pipe_size)?; + let p = Process::new(&sl(), &ocip, exec_id.as_str(), false, pipe_size)?; let ctr = sandbox .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))?; - ctr.run(p).await?; - - Ok(()) + ctr.run(p).await } #[instrument] async fn do_signal_process(&self, req: protocols::agent::SignalProcessRequest) -> Result<()> { - let cid = req.container_id.clone(); - let eid = req.exec_id.clone(); - let s = self.sandbox.clone(); + let cid = req.container_id; + let eid = req.exec_id; info!( - sl!(), + sl(), "signal process"; - "container-id" => cid.clone(), - "exec-id" => eid.clone(), + "container-id" => &cid, + "exec-id" => &eid, + "signal" => req.signal, ); let mut sig: libc::c_int = req.signal as libc::c_int; { - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; // For container initProcess, if it hasn't installed handler for "SIGTERM" signal, // it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal @@ -422,24 +431,38 @@ impl AgentService { if p.init && sig == libc::SIGTERM && !is_signal_handled(&proc_status_file, sig as u32) { sig = libc::SIGKILL; } - p.signal(sig)?; - } + + match p.signal(sig) { + Err(Errno::ESRCH) => { + info!( + sl(), + "signal encounter ESRCH, continue"; + "container-id" => &cid, + "exec-id" => &eid, + "pid" => p.pid, + "signal" => sig, + ); + } + Err(err) => return Err(anyhow!(err)), + Ok(()) => (), + } + }; if eid.is_empty() { // eid is empty, signal all the remaining processes in the container cgroup info!( - sl!(), + sl(), "signal all the remaining processes"; - "container-id" => cid.clone(), - "exec-id" => eid.clone(), + "container-id" => &cid, + "exec-id" => &eid, ); if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Frozen).await { warn!( - sl!(), + sl(), "freeze cgroup failed"; - "container-id" => cid.clone(), - "exec-id" => eid.clone(), + "container-id" => &cid, + "exec-id" => &eid, "error" => format!("{:?}", err), ); } @@ -449,10 +472,10 @@ impl AgentService { let res = unsafe { libc::kill(*pid, sig) }; if let Err(err) = Errno::result(res).map(drop) { warn!( - sl!(), + sl(), "signal failed"; - "container-id" => cid.clone(), - "exec-id" => eid.clone(), + "container-id" => &cid, + "exec-id" => &eid, "pid" => pid, "error" => format!("{:?}", err), ); @@ -460,43 +483,32 @@ impl AgentService { } if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Thawed).await { warn!( - sl!(), + sl(), "unfreeze cgroup failed"; - "container-id" => cid.clone(), - "exec-id" => eid.clone(), + "container-id" => &cid, + "exec-id" => &eid, "error" => format!("{:?}", err), ); } } + Ok(()) } async fn freeze_cgroup(&self, cid: &str, state: FreezerState) -> Result<()> { - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let ctr = sandbox .get_container(cid) .ok_or_else(|| anyhow!("Invalid container id {}", cid))?; - let cm = ctr - .cgroup_manager - .as_ref() - .ok_or_else(|| anyhow!("cgroup manager not exist"))?; - cm.freeze(state)?; - Ok(()) + ctr.cgroup_manager.as_ref().freeze(state) } async fn get_pids(&self, cid: &str) -> Result> { - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let ctr = sandbox .get_container(cid) .ok_or_else(|| anyhow!("Invalid container id {}", cid))?; - let cm = ctr - .cgroup_manager - .as_ref() - .ok_or_else(|| anyhow!("cgroup manager not exist"))?; - let pids = cm.get_pids()?; - Ok(pids) + ctr.cgroup_manager.as_ref().get_pids() } #[instrument] @@ -504,23 +516,21 @@ impl AgentService { &self, req: protocols::agent::WaitProcessRequest, ) -> Result { - let cid = req.container_id.clone(); + let cid = req.container_id; let eid = req.exec_id; - let s = self.sandbox.clone(); let mut resp = WaitProcessResponse::new(); - let pid: pid_t; - - let (exit_send, mut exit_recv) = tokio::sync::mpsc::channel(100); info!( - sl!(), + sl(), "wait process"; - "container-id" => cid.clone(), - "exec-id" => eid.clone() + "container-id" => &cid, + "exec-id" => &eid ); + let pid: pid_t; + let (exit_send, mut exit_recv) = tokio::sync::mpsc::channel(100); let exit_rx = { - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; p.exit_watchers.push(exit_send); @@ -530,12 +540,12 @@ impl AgentService { }; if let Some(mut exit_rx) = exit_rx { - info!(sl!(), "cid {} eid {} waiting for exit signal", &cid, &eid); + info!(sl(), "cid {} eid {} waiting for exit signal", &cid, &eid); while exit_rx.changed().await.is_ok() {} - info!(sl!(), "cid {} eid {} received exit signal", &cid, &eid); + info!(sl(), "cid {} eid {} received exit signal", &cid, &eid); } - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let ctr = sandbox .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))?; @@ -573,12 +583,11 @@ impl AgentService { &self, req: protocols::agent::WriteStreamRequest, ) -> Result { - let cid = req.container_id.clone(); - let eid = req.exec_id.clone(); + let cid = req.container_id; + let eid = req.exec_id; let writer = { - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; // use ptmx io @@ -607,15 +616,14 @@ impl AgentService { let cid = req.container_id; let eid = req.exec_id; - let mut term_exit_notifier = Arc::new(tokio::sync::Notify::new()); + let term_exit_notifier; let reader = { - let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; - + let mut sandbox = self.sandbox.lock().await; let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; + term_exit_notifier = p.term_exit_notifier.clone(); + if p.term_master.is_some() { - term_exit_notifier = p.term_exit_notifier.clone(); p.get_reader(StreamType::TermMaster) } else if stdout { if p.parent_stdout.is_some() { @@ -628,40 +636,40 @@ impl AgentService { } }; - if reader.is_none() { - return Err(anyhow!(nix::Error::EINVAL)); - } - let reader = reader.ok_or_else(|| anyhow!("cannot get stream reader"))?; tokio::select! { - _ = term_exit_notifier.notified() => { - Err(anyhow!("eof")) - } - v = read_stream(reader, req.len as usize) => { + // Poll the futures in the order they appear from top to bottom + // it is very important to avoid data loss. If there is still + // data in the buffer and read_stream branch will return + // Poll::Ready so that the term_exit_notifier will never polled + // before all data were read. + biased; + v = read_stream(&reader, req.len as usize) => { let vector = v?; let mut resp = ReadStreamResponse::new(); resp.set_data(vector); Ok(resp) } + _ = term_exit_notifier.notified() => { + Err(anyhow!("eof")) + } } } } #[async_trait] -impl protocols::agent_ttrpc::AgentService for AgentService { +impl agent_ttrpc::AgentService for AgentService { async fn create_container( &self, ctx: &TtrpcContext, req: protocols::agent::CreateContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "create_container", req); - is_allowed!(req); - match self.do_create_container(req).await { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(_) => Ok(Empty::new()), - } + is_allowed(&req).await?; + self.do_create_container(req).await.map_ttrpc_err(same)?; + Ok(Empty::new()) } async fn start_container( @@ -670,11 +678,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::StartContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "start_container", req); - is_allowed!(req); - match self.do_start_container(req).await { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(_) => Ok(Empty::new()), - } + is_allowed(&req).await?; + self.do_start_container(req).await.map_ttrpc_err(same)?; + Ok(Empty::new()) } async fn remove_container( @@ -683,12 +689,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::RemoveContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "remove_container", req); - is_allowed!(req); - - match self.do_remove_container(req).await { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(_) => Ok(Empty::new()), - } + is_allowed(&req).await?; + self.do_remove_container(req).await.map_ttrpc_err(same)?; + Ok(Empty::new()) } async fn exec_process( @@ -697,11 +700,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::ExecProcessRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "exec_process", req); - is_allowed!(req); - match self.do_exec_process(req).await { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(_) => Ok(Empty::new()), - } + is_allowed(&req).await?; + self.do_exec_process(req).await.map_ttrpc_err(same)?; + Ok(Empty::new()) } async fn signal_process( @@ -710,11 +711,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::SignalProcessRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "signal_process", req); - is_allowed!(req); - match self.do_signal_process(req).await { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(_) => Ok(Empty::new()), - } + is_allowed(&req).await?; + self.do_signal_process(req).await.map_ttrpc_err(same)?; + Ok(Empty::new()) } async fn wait_process( @@ -723,10 +722,8 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::WaitProcessRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "wait_process", req); - is_allowed!(req); - self.do_wait_process(req) - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e)) + is_allowed(&req).await?; + self.do_wait_process(req).await.map_ttrpc_err(same) } async fn update_container( @@ -735,34 +732,18 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::UpdateContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "update_container", req); - is_allowed!(req); - let cid = req.container_id.clone(); - let res = req.resources; - - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; - - let ctr = sandbox.get_container(&cid).ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "invalid container id".to_string(), - ) - })?; - - let resp = Empty::new(); + is_allowed(&req).await?; - if let Some(res) = res.as_ref() { + let mut sandbox = self.sandbox.lock().await; + let ctr = sandbox + .get_container(&req.container_id) + .map_ttrpc_err(ttrpc::Code::INVALID_ARGUMENT, "invalid container id")?; + if let Some(res) = req.resources.as_ref() { let oci_res = rustjail::resources_grpc_to_oci(res); - match ctr.set(oci_res) { - Err(e) => { - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - - Ok(_) => return Ok(resp), - } + ctr.set(oci_res).map_ttrpc_err(same)?; } - Ok(resp) + Ok(Empty::new()) } async fn stats_container( @@ -771,20 +752,13 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::StatsContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "stats_container", req); - is_allowed!(req); - let cid = req.container_id; - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; - - let ctr = sandbox.get_container(&cid).ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "invalid container id".to_string(), - ) - })?; + is_allowed(&req).await?; - ctr.stats() - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e)) + let mut sandbox = self.sandbox.lock().await; + let ctr = sandbox + .get_container(&req.container_id) + .map_ttrpc_err(ttrpc::Code::INVALID_ARGUMENT, "invalid container id")?; + ctr.stats().map_ttrpc_err(same) } async fn pause_container( @@ -793,21 +767,13 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::PauseContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "pause_container", req); - is_allowed!(req); - let cid = req.get_container_id(); - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; - - let ctr = sandbox.get_container(cid).ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "invalid container id".to_string(), - ) - })?; - - ctr.pause() - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + is_allowed(&req).await?; + let mut sandbox = self.sandbox.lock().await; + let ctr = sandbox + .get_container(&req.container_id) + .map_ttrpc_err(ttrpc::Code::INVALID_ARGUMENT, "invalid container id")?; + ctr.pause().map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -817,20 +783,34 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::ResumeContainerRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "resume_container", req); - is_allowed!(req); - let cid = req.get_container_id(); - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; + is_allowed(&req).await?; - let ctr = sandbox.get_container(cid).ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "invalid container id".to_string(), - ) - })?; + let mut sandbox = self.sandbox.lock().await; + let ctr = sandbox + .get_container(&req.container_id) + .map_ttrpc_err(ttrpc::Code::INVALID_ARGUMENT, "invalid container id")?; + ctr.resume().map_ttrpc_err(same)?; + Ok(Empty::new()) + } - ctr.resume() - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + async fn remove_stale_virtiofs_share_mounts( + &self, + ctx: &TtrpcContext, + req: protocols::agent::RemoveStaleVirtiofsShareMountsRequest, + ) -> ttrpc::Result { + trace_rpc_call!(ctx, "remove_stale_virtiofs_share_mounts", req); + is_allowed(&req).await?; + let mount_infos = parse_mount_table("/proc/self/mountinfo").map_ttrpc_err(same)?; + for m in &mount_infos { + if m.mount_point.starts_with(KATA_GUEST_SHARE_DIR) { + // stat the mount point, virtiofs daemon will remove the stale cache and release the fds if the mount point doesn't exist any more. + // More details in https://github.com/kata-containers/kata-containers/issues/6455#issuecomment-1477137277 + match stat::stat(Path::new(&m.mount_point)) { + Ok(_) => info!(sl(), "stat {} success", m.mount_point), + Err(e) => info!(sl(), "stat {} failed: {}", m.mount_point, e), + } + } + } Ok(Empty::new()) } @@ -840,10 +820,8 @@ impl protocols::agent_ttrpc::AgentService for AgentService { _ctx: &TtrpcContext, req: protocols::agent::WriteStreamRequest, ) -> ttrpc::Result { - is_allowed!(req); - self.do_write_stream(req) - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e)) + is_allowed(&req).await?; + self.do_write_stream(req).await.map_ttrpc_err(same) } async fn read_stdout( @@ -851,10 +829,8 @@ impl protocols::agent_ttrpc::AgentService for AgentService { _ctx: &TtrpcContext, req: protocols::agent::ReadStreamRequest, ) -> ttrpc::Result { - is_allowed!(req); - self.do_read_stream(req, true) - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e)) + is_allowed(&req).await?; + self.do_read_stream(req, true).await.map_ttrpc_err(same) } async fn read_stderr( @@ -862,10 +838,8 @@ impl protocols::agent_ttrpc::AgentService for AgentService { _ctx: &TtrpcContext, req: protocols::agent::ReadStreamRequest, ) -> ttrpc::Result { - is_allowed!(req); - self.do_read_stream(req, false) - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e)) + is_allowed(&req).await?; + self.do_read_stream(req, false).await.map_ttrpc_err(same) } async fn close_stdin( @@ -874,17 +848,16 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::CloseStdinRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "close_stdin", req); - is_allowed!(req); + is_allowed(&req).await?; - let cid = req.container_id.clone(); + let cid = req.container_id; let eid = req.exec_id; - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let p = sandbox .find_container_process(cid.as_str(), eid.as_str()) .map_err(|e| { - ttrpc_error!( + ttrpc_error( ttrpc::Code::INVALID_ARGUMENT, format!("invalid argument: {:?}", e), ) @@ -901,38 +874,32 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::TtyWinResizeRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "tty_win_resize", req); - is_allowed!(req); + is_allowed(&req).await?; - let cid = req.container_id.clone(); - let eid = req.exec_id.clone(); - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; + let mut sandbox = self.sandbox.lock().await; let p = sandbox - .find_container_process(cid.as_str(), eid.as_str()) + .find_container_process(req.container_id(), req.exec_id()) .map_err(|e| { - ttrpc_error!( + ttrpc_error( ttrpc::Code::UNAVAILABLE, format!("invalid argument: {:?}", e), ) })?; - if let Some(fd) = p.term_master { - unsafe { - let win = winsize { - ws_row: req.row as c_ushort, - ws_col: req.column as c_ushort, - ws_xpixel: 0, - ws_ypixel: 0, - }; + let fd = p + .term_master + .map_ttrpc_err(ttrpc::Code::UNAVAILABLE, "no tty")?; + let win = winsize { + ws_row: req.row as c_ushort, + ws_col: req.column as c_ushort, + ws_xpixel: 0, + ws_ypixel: 0, + }; - let err = libc::ioctl(fd, TIOCSWINSZ, &win); - Errno::result(err).map(drop).map_err(|e| { - ttrpc_error!(ttrpc::Code::INTERNAL, format!("ioctl error: {:?}", e)) - })?; - } - } else { - return Err(ttrpc_error!(ttrpc::Code::UNAVAILABLE, "no tty".to_string())); - } + let err = unsafe { libc::ioctl(fd, TIOCSWINSZ, &win) }; + Errno::result(err) + .map(drop) + .map_ttrpc_err(|e| format!("ioctl error: {:?}", e))?; Ok(Empty::new()) } @@ -943,14 +910,12 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::UpdateInterfaceRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "update_interface", req); - is_allowed!(req); + is_allowed(&req).await?; - let interface = req.interface.into_option().ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "empty update interface request".to_string(), - ) - })?; + let interface = req.interface.into_option().map_ttrpc_err( + ttrpc::Code::INVALID_ARGUMENT, + "empty update interface request", + )?; self.sandbox .lock() @@ -958,9 +923,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { .rtnl .update_interface(&interface) .await - .map_err(|e| { - ttrpc_error!(ttrpc::Code::INTERNAL, format!("update interface: {:?}", e)) - })?; + .map_ttrpc_err(|e| format!("update interface: {:?}", e))?; Ok(interface) } @@ -971,68 +934,82 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::UpdateRoutesRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "update_routes", req); - is_allowed!(req); + is_allowed(&req).await?; let new_routes = req .routes .into_option() - .map(|r| r.Routes.into_vec()) - .ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "empty update routes request".to_string(), - ) - })?; + .map(|r| r.Routes) + .map_ttrpc_err(ttrpc::Code::INVALID_ARGUMENT, "empty update routes request")?; let mut sandbox = self.sandbox.lock().await; - sandbox.rtnl.update_routes(new_routes).await.map_err(|e| { - ttrpc_error!( - ttrpc::Code::INTERNAL, - format!("Failed to update routes: {:?}", e), - ) - })?; + sandbox + .rtnl + .update_routes(new_routes) + .await + .map_ttrpc_err(|e| format!("Failed to update routes: {:?}", e))?; - let list = sandbox.rtnl.list_routes().await.map_err(|e| { - ttrpc_error!( - ttrpc::Code::INTERNAL, - format!("Failed to list routes after update: {:?}", e), - ) - })?; + let list = sandbox + .rtnl + .list_routes() + .await + .map_ttrpc_err(|e| format!("Failed to list routes after update: {:?}", e))?; Ok(protocols::agent::Routes { - Routes: RepeatedField::from_vec(list), + Routes: list, ..Default::default() }) } + async fn update_ephemeral_mounts( + &self, + ctx: &TtrpcContext, + req: protocols::agent::UpdateEphemeralMountsRequest, + ) -> ttrpc::Result { + trace_rpc_call!(ctx, "update_mounts", req); + is_allowed(&req).await?; + + update_ephemeral_mounts(sl(), &req.storages, &self.sandbox) + .await + .map_ttrpc_err(|e| format!("Failed to update mounts: {:?}", e))?; + Ok(Empty::new()) + } + async fn get_ip_tables( &self, ctx: &TtrpcContext, req: GetIPTablesRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "get_iptables", req); - is_allowed!(req); + is_allowed(&req).await?; - info!(sl!(), "get_ip_tables: request received"); + info!(sl(), "get_ip_tables: request received"); + // the binary could exists in either /usr/sbin or /sbin + // here check both of the places and return the one exists + // if none exists, return the /sbin one, and the rpc will + // returns an internal error let cmd = if req.is_ipv6 { - IP6TABLES_SAVE + if Path::new(USR_IP6TABLES_SAVE).exists() { + USR_IP6TABLES_SAVE + } else { + IP6TABLES_SAVE + } + } else if Path::new(USR_IPTABLES_SAVE).exists() { + USR_IPTABLES_SAVE } else { IPTABLES_SAVE } .to_string(); - match Command::new(cmd.clone()).output() { - Ok(output) => Ok(GetIPTablesResponse { - data: output.stdout, - ..Default::default() - }), - Err(e) => { - warn!(sl!(), "failed to run {}: {:?}", cmd, e.kind()); - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - } + let output = Command::new(cmd.clone()) + .output() + .map_ttrpc_err_do(|e| warn!(sl(), "failed to run {}: {:?}", cmd, e.kind()))?; + Ok(GetIPTablesResponse { + data: output.stdout, + ..Default::default() + }) } async fn set_ip_tables( @@ -1041,39 +1018,43 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: SetIPTablesRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "set_iptables", req); - is_allowed!(req); + is_allowed(&req).await?; - info!(sl!(), "set_ip_tables request received"); + info!(sl(), "set_ip_tables request received"); + // the binary could exists in both /usr/sbin and /sbin + // here check both of the places and return the one exists + // if none exists, return the /sbin one, and the rpc will + // returns an internal error let cmd = if req.is_ipv6 { - IP6TABLES_RESTORE + if Path::new(USR_IP6TABLES_RESTORE).exists() { + USR_IP6TABLES_RESTORE + } else { + IP6TABLES_RESTORE + } + } else if Path::new(USR_IPTABLES_RESTORE).exists() { + USR_IPTABLES_RESTORE } else { IPTABLES_RESTORE } .to_string(); - let mut child = match Command::new(cmd.clone()) + let mut child = Command::new(cmd.clone()) .arg("--wait") .arg(IPTABLES_RESTORE_WAIT_SEC.to_string()) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .spawn() - { - Ok(child) => child, - Err(e) => { - warn!(sl!(), "failure to spawn {}: {:?}", cmd, e.kind()); - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - }; + .map_ttrpc_err_do(|e| warn!(sl(), "failure to spawn {}: {:?}", cmd, e.kind()))?; let mut stdin = match child.stdin.take() { Some(si) => si, None => { println!("failed to get stdin from child"); - return Err(ttrpc_error!( + return Err(ttrpc_error( ttrpc::Code::INTERNAL, - "failed to take stdin from child".to_string() + "failed to take stdin from child", )); } }; @@ -1083,54 +1064,41 @@ impl protocols::agent_ttrpc::AgentService for AgentService { let _ = match stdin.write_all(&req.data) { Ok(o) => o, Err(e) => { - warn!(sl!(), "error writing stdin: {:?}", e.kind()); + warn!(sl(), "error writing stdin: {:?}", e.kind()); return; } }; if tx.send(1).is_err() { - warn!(sl!(), "stdin writer thread receiver dropped"); + warn!(sl(), "stdin writer thread receiver dropped"); }; }); - if tokio::time::timeout(Duration::from_secs(IPTABLES_RESTORE_WAIT_SEC), rx) + let _ = tokio::time::timeout(Duration::from_secs(IPTABLES_RESTORE_WAIT_SEC), rx) .await - .is_err() - { - return Err(ttrpc_error!( - ttrpc::Code::INTERNAL, - "timeout waiting for stdin writer to complete".to_string() - )); - } + .map_ttrpc_err(|_| "timeout waiting for stdin writer to complete")?; - if handle.await.is_err() { - return Err(ttrpc_error!( - ttrpc::Code::INTERNAL, - "stdin writer thread failure".to_string() - )); - } - - let output = match child.wait_with_output() { - Ok(o) => o, - Err(e) => { - warn!( - sl!(), - "failure waiting for spawned {} to complete: {:?}", - cmd, - e.kind() - ); - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - }; + handle + .await + .map_ttrpc_err(|_| "stdin writer thread failure")?; + + let output = child.wait_with_output().map_ttrpc_err_do(|e| { + warn!( + sl(), + "failure waiting for spawned {} to complete: {:?}", + cmd, + e.kind() + ) + })?; if !output.status.success() { - warn!(sl!(), "{} failed: {:?}", cmd, output.stderr); - return Err(ttrpc_error!( + warn!(sl(), "{} failed: {:?}", cmd, output.stderr); + return Err(ttrpc_error( ttrpc::Code::INTERNAL, format!( "{} failed: {:?}", cmd, String::from_utf8_lossy(&output.stderr) - ) + ), )); } @@ -1146,7 +1114,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::ListInterfacesRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "list_interfaces", req); - is_allowed!(req); + is_allowed(&req).await?; let list = self .sandbox @@ -1155,15 +1123,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService { .rtnl .list_interfaces() .await - .map_err(|e| { - ttrpc_error!( - ttrpc::Code::INTERNAL, - format!("Failed to list interfaces: {:?}", e), - ) - })?; + .map_ttrpc_err(|e| format!("Failed to list interfaces: {:?}", e))?; Ok(protocols::agent::Interfaces { - Interfaces: RepeatedField::from_vec(list), + Interfaces: list, ..Default::default() }) } @@ -1174,7 +1137,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::ListRoutesRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "list_routes", req); - is_allowed!(req); + is_allowed(&req).await?; let list = self .sandbox @@ -1183,10 +1146,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService { .rtnl .list_routes() .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, format!("list routes: {:?}", e)))?; + .map_ttrpc_err(|e| format!("list routes: {:?}", e))?; Ok(protocols::agent::Routes { - Routes: RepeatedField::from_vec(list), + Routes: list, ..Default::default() }) } @@ -1197,11 +1160,10 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::CreateSandboxRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "create_sandbox", req); - is_allowed!(req); + is_allowed(&req).await?; { - let sandbox = self.sandbox.clone(); - let mut s = sandbox.lock().await; + let mut s = self.sandbox.lock().await; let _ = fs::remove_dir_all(CONTAINER_BASE); let _ = fs::create_dir_all(CONTAINER_BASE); @@ -1212,7 +1174,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { if !req.guest_hook_path.is_empty() { let _ = s.add_hooks(&req.guest_hook_path).map_err(|e| { error!( - sl!(), + sl(), "add guest hook {} failed: {:?}", req.guest_hook_path, e ); }); @@ -1223,35 +1185,24 @@ impl protocols::agent_ttrpc::AgentService for AgentService { } for m in req.kernel_modules.iter() { - load_kernel_module(m).map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + load_kernel_module(m).map_ttrpc_err(same)?; } - s.setup_shared_namespaces() - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + s.setup_shared_namespaces().await.map_ttrpc_err(same)?; } - match add_storages(sl!(), req.storages.to_vec(), self.sandbox.clone(), None).await { - Ok(m) => { - let sandbox = self.sandbox.clone(); - let mut s = sandbox.lock().await; - s.mounts = m - } - Err(e) => return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - }; + let m = add_storages(sl(), req.storages, &self.sandbox, None) + .await + .map_ttrpc_err(same)?; + self.sandbox.lock().await.mounts = m; - match setup_guest_dns(sl!(), req.dns.to_vec()) { - Ok(_) => { - let sandbox = self.sandbox.clone(); - let mut s = sandbox.lock().await; - let _dns = req - .dns - .to_vec() - .iter() - .map(|dns| s.network.set_dns(dns.to_string())); + setup_guest_dns(sl(), &req.dns).map_ttrpc_err(same)?; + { + let mut s = self.sandbox.lock().await; + for dns in req.dns { + s.network.set_dns(dns); } - Err(e) => return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - }; + } Ok(Empty::new()) } @@ -1262,31 +1213,24 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::DestroySandboxRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "destroy_sandbox", req); - is_allowed!(req); + is_allowed(&req).await?; - let s = Arc::clone(&self.sandbox); - let mut sandbox = s.lock().await; - // destroy all containers, clean up, notify agent to exit - // etc. - sandbox - .destroy() - .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + let mut sandbox = self.sandbox.lock().await; + // destroy all containers, clean up, notify agent to exit etc. + sandbox.destroy().await.map_ttrpc_err(same)?; // Close get_oom_event connection, // otherwise it will block the shutdown of ttrpc. - sandbox.event_tx.take(); + drop(sandbox.event_tx.take()); sandbox .sender .take() - .ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INTERNAL, - "failed to get sandbox sender channel".to_string(), - ) - })? + .map_ttrpc_err( + ttrpc::Code::INTERNAL, + "failed to get sandbox sender channel", + )? .send(1) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + .map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1297,18 +1241,16 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::AddARPNeighborsRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "add_arp_neighbors", req); - is_allowed!(req); + is_allowed(&req).await?; let neighs = req .neighbors .into_option() - .map(|n| n.ARPNeighbors.into_vec()) - .ok_or_else(|| { - ttrpc_error!( - ttrpc::Code::INVALID_ARGUMENT, - "empty add arp neighbours request".to_string(), - ) - })?; + .map(|n| n.ARPNeighbors) + .map_ttrpc_err( + ttrpc::Code::INVALID_ARGUMENT, + "empty add arp neighbours request", + )?; self.sandbox .lock() @@ -1316,12 +1258,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { .rtnl .add_arp_neighbors(neighs) .await - .map_err(|e| { - ttrpc_error!( - ttrpc::Code::INTERNAL, - format!("Failed to add ARP neighbours: {:?}", e), - ) - })?; + .map_ttrpc_err(|e| format!("Failed to add ARP neighbours: {:?}", e))?; Ok(Empty::new()) } @@ -1331,14 +1268,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService { ctx: &TtrpcContext, req: protocols::agent::OnlineCPUMemRequest, ) -> ttrpc::Result { - is_allowed!(req); - let s = Arc::clone(&self.sandbox); - let sandbox = s.lock().await; trace_rpc_call!(ctx, "online_cpu_mem", req); + is_allowed(&req).await?; + let sandbox = self.sandbox.lock().await; - sandbox - .online_cpu_memory(&req) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + sandbox.online_cpu_memory(&req).map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1349,10 +1283,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::ReseedRandomDevRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "reseed_random_dev", req); - is_allowed!(req); + is_allowed(&req).await?; - random::reseed_rng(req.data.as_slice()) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + random::reseed_rng(req.data.as_slice()).map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1363,30 +1296,25 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::GuestDetailsRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "get_guest_details", req); - is_allowed!(req); + is_allowed(&req).await?; - info!(sl!(), "get guest details!"); + info!(sl(), "get guest details!"); let mut resp = GuestDetailsResponse::new(); // to get memory block size - match get_memory_info( + let (u, v) = get_memory_info( req.mem_block_size, req.mem_hotplug_probe, SYSFS_MEMORY_BLOCK_SIZE_PATH, SYSFS_MEMORY_HOTPLUG_PROBE_PATH, - ) { - Ok((u, v)) => { - resp.mem_block_size_bytes = u; - resp.support_mem_hotplug_probe = v; - } - Err(e) => { - info!(sl!(), "fail to get memory info!"); - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - } + ) + .map_ttrpc_err_do(|_| info!(sl(), "fail to get memory info!"))?; + + resp.mem_block_size_bytes = u; + resp.support_mem_hotplug_probe = v; // to get agent details let detail = get_agent_details(); - resp.agent_details = SingularPtrField::some(detail); + resp.agent_details = MessageField::some(detail); Ok(resp) } @@ -1397,10 +1325,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::MemHotplugByProbeRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "mem_hotplug_by_probe", req); - is_allowed!(req); + is_allowed(&req).await?; - do_mem_hotplug_by_probe(&req.memHotplugProbeAddr) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + do_mem_hotplug_by_probe(&req.memHotplugProbeAddr).map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1411,10 +1338,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::SetGuestDateTimeRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "set_guest_date_time", req); - is_allowed!(req); + is_allowed(&req).await?; - do_set_guest_date_time(req.Sec, req.Usec) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + do_set_guest_date_time(req.Sec, req.Usec).map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1425,9 +1351,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::CopyFileRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "copy_file", req); - is_allowed!(req); + is_allowed(&req).await?; - do_copy_file(&req).map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + do_copy_file(&req).map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1438,16 +1364,12 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::GetMetricsRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "get_metrics", req); - is_allowed!(req); - - match get_metrics(&req) { - Err(e) => Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)), - Ok(s) => { - let mut metrics = Metrics::new(); - metrics.set_metrics(s); - Ok(metrics) - } - } + is_allowed(&req).await?; + + let s = get_metrics(&req).map_ttrpc_err(same)?; + let mut metrics = Metrics::new(); + metrics.set_metrics(s); + Ok(metrics) } async fn get_oom_event( @@ -1455,24 +1377,22 @@ impl protocols::agent_ttrpc::AgentService for AgentService { _ctx: &TtrpcContext, req: protocols::agent::GetOOMEventRequest, ) -> ttrpc::Result { - is_allowed!(req); - let sandbox = self.sandbox.clone(); - let s = sandbox.lock().await; + is_allowed(&req).await?; + let s = self.sandbox.lock().await; let event_rx = &s.event_rx.clone(); let mut event_rx = event_rx.lock().await; drop(s); - drop(sandbox); - if let Some(container_id) = event_rx.recv().await { - info!(sl!(), "get_oom_event return {}", &container_id); - - let mut resp = OOMEvent::new(); - resp.container_id = container_id; + let container_id = event_rx + .recv() + .await + .map_ttrpc_err(ttrpc::Code::INTERNAL, "")?; - return Ok(resp); - } + info!(sl(), "get_oom_event return {}", &container_id); - Err(ttrpc_error!(ttrpc::Code::INTERNAL, "")) + let mut resp = OOMEvent::new(); + resp.container_id = container_id; + Ok(resp) } async fn get_volume_stats( @@ -1481,38 +1401,32 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: VolumeStatsRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "get_volume_stats", req); - is_allowed!(req); + is_allowed(&req).await?; - info!(sl!(), "get volume stats!"); + info!(sl(), "get volume stats!"); let mut resp = VolumeStatsResponse::new(); - let mut condition = VolumeCondition::new(); - match File::open(&req.volume_guest_path) { - Ok(_) => { - condition.abnormal = false; - condition.message = String::from("OK"); - } - Err(e) => { - info!(sl!(), "failed to open the volume"); - return Err(ttrpc_error!(ttrpc::Code::INTERNAL, e)); - } - }; + File::open(&req.volume_guest_path) + .map_ttrpc_err_do(|_| info!(sl(), "failed to open the volume"))?; + + condition.abnormal = false; + condition.message = String::from("OK"); let mut usage_vec = Vec::new(); // to get volume capacity stats get_volume_capacity_stats(&req.volume_guest_path) .map(|u| usage_vec.push(u)) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + .map_ttrpc_err(same)?; // to get volume inode stats get_volume_inode_stats(&req.volume_guest_path) .map(|u| usage_vec.push(u)) - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + .map_ttrpc_err(same)?; - resp.usage = RepeatedField::from_vec(usage_vec); - resp.volume_condition = SingularPtrField::some(condition); + resp.usage = usage_vec; + resp.volume_condition = MessageField::some(condition); Ok(resp) } @@ -1522,11 +1436,28 @@ impl protocols::agent_ttrpc::AgentService for AgentService { req: protocols::agent::AddSwapRequest, ) -> ttrpc::Result { trace_rpc_call!(ctx, "add_swap", req); - is_allowed!(req); + is_allowed(&req).await?; + + do_add_swap(&self.sandbox, &req).await.map_ttrpc_err(same)?; + + Ok(Empty::new()) + } - do_add_swap(&self.sandbox, &req) + #[cfg(feature = "agent-policy")] + async fn set_policy( + &self, + ctx: &TtrpcContext, + req: protocols::agent::SetPolicyRequest, + ) -> ttrpc::Result { + trace_rpc_call!(ctx, "set_policy", req); + is_allowed(&req).await?; + + AGENT_POLICY + .lock() + .await + .set_policy(&req.policy) .await - .map_err(|e| ttrpc_error!(ttrpc::Code::INTERNAL, e))?; + .map_ttrpc_err(same)?; Ok(Empty::new()) } @@ -1536,7 +1467,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService { struct HealthService; #[async_trait] -impl protocols::health_ttrpc::Health for HealthService { +impl health_ttrpc::Health for HealthService { async fn check( &self, _ctx: &TtrpcContext, @@ -1553,7 +1484,7 @@ impl protocols::health_ttrpc::Health for HealthService { _ctx: &TtrpcContext, req: protocols::health::CheckRequest, ) -> ttrpc::Result { - info!(sl!(), "version {:?}", req); + info!(sl(), "version {:?}", req); let mut rep = protocols::health::VersionCheckResponse::new(); rep.agent_version = AGENT_VERSION.to_string(); rep.grpc_version = API_VERSION.to_string(); @@ -1574,17 +1505,17 @@ fn get_memory_info( match fs::read_to_string(block_size_path) { Ok(v) => { if v.is_empty() { - warn!(sl!(), "file {} is empty", block_size_path); + warn!(sl(), "file {} is empty", block_size_path); return Err(anyhow!(ERR_INVALID_BLOCK_SIZE)); } size = u64::from_str_radix(v.trim(), 16).map_err(|_| { - warn!(sl!(), "failed to parse the str {} to hex", size); + warn!(sl(), "failed to parse the str {} to hex", size); anyhow!(ERR_INVALID_BLOCK_SIZE) })?; } Err(e) => { - warn!(sl!(), "memory block size error: {:?}", e.kind()); + warn!(sl(), "memory block size error: {:?}", e.kind()); if e.kind() != std::io::ErrorKind::NotFound { return Err(anyhow!(e)); } @@ -1596,7 +1527,7 @@ fn get_memory_info( match stat::stat(hotplug_probe_path) { Ok(_) => plug = true, Err(e) => { - warn!(sl!(), "hotplug memory error: {:?}", e); + warn!(sl(), "hotplug memory error: {:?}", e); match e { nix::Error::ENOENT => plug = false, _ => return Err(anyhow!(e)), @@ -1616,7 +1547,7 @@ fn get_volume_capacity_stats(path: &str) -> Result { usage.total = stat.blocks() * block_size; usage.available = stat.blocks_free() * block_size; usage.used = usage.total - usage.available; - usage.unit = VolumeUsage_Unit::BYTES; + usage.unit = VolumeUsage_Unit::BYTES.into(); Ok(usage) } @@ -1628,7 +1559,7 @@ fn get_volume_inode_stats(path: &str) -> Result { usage.total = stat.files(); usage.available = stat.files_free(); usage.used = usage.total - usage.available; - usage.unit = VolumeUsage_Unit::INODES; + usage.unit = VolumeUsage_Unit::INODES.into(); Ok(usage) } @@ -1648,19 +1579,13 @@ fn get_agent_details() -> AgentDetails { detail.set_supports_seccomp(have_seccomp()); detail.init_daemon = unistd::getpid() == Pid::from_raw(1); - detail.device_handlers = RepeatedField::new(); - detail.storage_handlers = RepeatedField::from_vec( - STORAGE_HANDLER_LIST - .to_vec() - .iter() - .map(|x| x.to_string()) - .collect(), - ); + detail.device_handlers = Vec::new(); + detail.storage_handlers = STORAGE_HANDLERS.get_handlers(); detail } -async fn read_stream(reader: Arc>>, l: usize) -> Result> { +async fn read_stream(reader: &Mutex>, l: usize) -> Result> { let mut content = vec![0u8; l]; let mut reader = reader.lock().await; @@ -1674,26 +1599,22 @@ async fn read_stream(reader: Arc>>, l: usize) -> Resu Ok(content) } -pub fn start(s: Arc>, server_address: &str) -> Result { - let agent_service = Box::new(AgentService { sandbox: s }) - as Box; - - let agent_worker = Arc::new(agent_service); +pub fn start(s: Arc>, server_address: &str, init_mode: bool) -> Result { + let agent_service = Box::new(AgentService { + sandbox: s, + init_mode, + }) as Box; + let aservice = agent_ttrpc::create_agent_service(Arc::new(agent_service)); - let health_service = - Box::new(HealthService {}) as Box; - let health_worker = Arc::new(health_service); - - let aservice = protocols::agent_ttrpc::create_agent_service(agent_worker); - - let hservice = protocols::health_ttrpc::create_health(health_worker); + let health_service = Box::new(HealthService {}) as Box; + let hservice = health_ttrpc::create_health(Arc::new(health_service)); let server = TtrpcServer::new() .bind(server_address)? .register_service(aservice) .register_service(hservice); - info!(sl!(), "ttRPC server started"; "address" => server_address); + info!(sl(), "ttRPC server started"; "address" => server_address); Ok(server) } @@ -1730,6 +1651,7 @@ fn update_container_namespaces( continue; } } + // update pid namespace let mut pid_ns = LinuxNamespace { r#type: NSTYPEPID.to_string(), @@ -1752,6 +1674,35 @@ fn update_container_namespaces( Ok(()) } +async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> { + let mut cmounts: Vec = vec![]; + + // Find the sandbox storage used by this container + let mounts = sandbox.container_mounts.get(cid); + if let Some(mounts) = mounts { + for m in mounts.iter() { + if sandbox.storages.contains_key(m) { + cmounts.push(m.to_string()); + } + } + } + + for m in cmounts.iter() { + if let Err(err) = sandbox.remove_sandbox_storage(m).await { + error!( + sl(), + "failed to unset_and_remove_sandbox_storage for container {}, error: {:?}", + cid, + err + ); + } + } + + sandbox.container_mounts.remove(cid); + sandbox.containers.remove(cid); + Ok(()) +} + fn append_guest_hooks(s: &Sandbox, oci: &mut Spec) -> Result<()> { if let Some(ref guest_hooks) = s.hooks { let mut hooks = oci.hooks.take().unwrap_or_default(); @@ -1775,7 +1726,7 @@ fn is_signal_handled(proc_status_file: &str, signum: u32) -> bool { return fs::metadata(proc_status_file).is_ok(); } else if signum > 64 { // Ensure invalid signum won't break bit shift logic - warn!(sl!(), "received invalid signum {}", signum); + warn!(sl(), "received invalid signum {}", signum); return false; } else { (signum - 1).into() @@ -1785,7 +1736,7 @@ fn is_signal_handled(proc_status_file: &str, signum: u32) -> bool { let file = match File::open(proc_status_file) { Ok(f) => f, Err(_) => { - warn!(sl!(), "failed to open file {}", proc_status_file); + warn!(sl(), "failed to open file {}", proc_status_file); return false; } }; @@ -1843,26 +1794,57 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> { let path = PathBuf::from(req.path.as_str()); if !path.starts_with(CONTAINER_BASE) { - return Err(anyhow!(nix::Error::EINVAL)); + return Err(anyhow!( + "Path {:?} does not start with {}", + path, + CONTAINER_BASE + )); } - let parent = path.parent(); + if let Some(parent) = path.parent() { + if !parent.exists() { + let dir = parent.to_path_buf(); + if let Err(e) = fs::create_dir_all(&dir) { + if e.kind() != std::io::ErrorKind::AlreadyExists { + return Err(e.into()); + } + } else { + std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?; + } + } + } - let dir = if let Some(parent) = parent { - parent.to_path_buf() - } else { - PathBuf::from("/") - }; + let sflag = stat::SFlag::from_bits_truncate(req.file_mode); - fs::create_dir_all(&dir).or_else(|e| { - if e.kind() != std::io::ErrorKind::AlreadyExists { - return Err(e); - } + if sflag.contains(stat::SFlag::S_IFDIR) { + fs::create_dir(&path).or_else(|e| { + if e.kind() != std::io::ErrorKind::AlreadyExists { + return Err(e); + } + Ok(()) + })?; - Ok(()) - })?; + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(req.file_mode))?; + + unistd::chown( + &path, + Some(Uid::from_raw(req.uid as u32)), + Some(Gid::from_raw(req.gid as u32)), + )?; - std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?; + return Ok(()); + } + + if sflag.contains(stat::SFlag::S_IFLNK) { + let src = PathBuf::from(OsStr::from_bytes(&req.data)); + unistd::symlinkat(&src, None, &path)?; + let path_str = CString::new(path.as_os_str().as_bytes())?; + + let ret = unsafe { libc::lchown(path_str.as_ptr(), req.uid as u32, req.gid as u32) }; + Errno::result(ret).map(drop)?; + + return Ok(()); + } let mut tmpfile = path.clone(); tmpfile.set_extension("tmp"); @@ -1939,7 +1921,7 @@ pub fn setup_bundle(cid: &str, spec: &mut Spec) -> Result { "bind", MsFlags::MS_BIND, "", - &sl!(), + &sl(), )?; let rootfs_path_name = rootfs_path @@ -1974,14 +1956,14 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> { } info!( - sl!(), + sl(), "load_kernel_module {}: {:?}", module.name, module.parameters ); - let mut args = vec!["-v".to_string(), module.name.clone()]; + let mut args = vec!["-v", &module.name]; - if module.parameters.len() > 0 { - args.extend(module.parameters.to_vec()) + if !module.parameters.is_empty() { + args.extend(module.parameters.iter().map(String::as_str)); } let output = Command::new(MODPROBE_PATH) @@ -2011,15 +1993,18 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::{ - assert_result, namespace::Namespace, protocols::agent_ttrpc::AgentService as _, - skip_if_not_root, - }; + use crate::{namespace::Namespace, protocols::agent_ttrpc_async::AgentService as _}; use nix::mount; use nix::sched::{unshare, CloneFlags}; use oci::{Hook, Hooks, Linux, LinuxNamespace}; use tempfile::{tempdir, TempDir}; + use test_utils::{assert_result, skip_if_not_root}; use ttrpc::{r#async::TtrpcContext, MessageHeader}; + use which::which; + + fn check_command(cmd: &str) -> bool { + which(cmd).is_ok() + } fn mk_ttrpc_context() -> TtrpcContext { TtrpcContext { @@ -2084,6 +2069,7 @@ mod tests { let result = load_kernel_module(&m); assert!(result.is_err(), "load module should failed"); + skip_if_not_root!(); // case 3: normal module. // normally this module should eixsts... m.name = "bridge".to_string(); @@ -2116,6 +2102,7 @@ mod tests { let agent_service = Box::new(AgentService { sandbox: Arc::new(Mutex::new(sandbox)), + init_mode: true, }); let req = protocols::agent::UpdateInterfaceRequest::default(); @@ -2133,6 +2120,7 @@ mod tests { let agent_service = Box::new(AgentService { sandbox: Arc::new(Mutex::new(sandbox)), + init_mode: true, }); let req = protocols::agent::UpdateRoutesRequest::default(); @@ -2150,6 +2138,7 @@ mod tests { let agent_service = Box::new(AgentService { sandbox: Arc::new(Mutex::new(sandbox)), + init_mode: true, }); let req = protocols::agent::AddARPNeighborsRequest::default(); @@ -2262,6 +2251,7 @@ mod tests { if d.has_fd { Some(wfd) } else { + unistd::close(wfd).unwrap(); None } }; @@ -2282,6 +2272,7 @@ mod tests { let agent_service = Box::new(AgentService { sandbox: Arc::new(Mutex::new(sandbox)), + init_mode: true, }); let result = agent_service @@ -2296,13 +2287,14 @@ mod tests { if !d.break_pipe { unistd::close(rfd).unwrap(); } - unistd::close(wfd).unwrap(); + // XXX: Do not close wfd. + // the fd will be closed on Process's dropping. + // unistd::close(wfd).unwrap(); let msg = format!("{}, result: {:?}", msg, result); assert_result!(d.result, result, msg); } } - #[tokio::test] async fn test_update_container_namespaces() { #[derive(Debug)] @@ -2670,233 +2662,6 @@ OtherField:other } } - #[tokio::test] - async fn test_verify_cid() { - #[derive(Debug)] - struct TestData<'a> { - id: &'a str, - expect_error: bool, - } - - let tests = &[ - TestData { - // Cannot be blank - id: "", - expect_error: true, - }, - TestData { - // Cannot be a space - id: " ", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: ".", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "-", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "_", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: " a", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: ".a", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "-a", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "_a", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "..", - expect_error: true, - }, - TestData { - // Too short - id: "a", - expect_error: true, - }, - TestData { - // Too short - id: "z", - expect_error: true, - }, - TestData { - // Too short - id: "A", - expect_error: true, - }, - TestData { - // Too short - id: "Z", - expect_error: true, - }, - TestData { - // Too short - id: "0", - expect_error: true, - }, - TestData { - // Too short - id: "9", - expect_error: true, - }, - TestData { - // Must start with an alphanumeric - id: "-1", - expect_error: true, - }, - TestData { - id: "/", - expect_error: true, - }, - TestData { - id: "a/", - expect_error: true, - }, - TestData { - id: "a/../", - expect_error: true, - }, - TestData { - id: "../a", - expect_error: true, - }, - TestData { - id: "../../a", - expect_error: true, - }, - TestData { - id: "../../../a", - expect_error: true, - }, - TestData { - id: "foo/../bar", - expect_error: true, - }, - TestData { - id: "foo bar", - expect_error: true, - }, - TestData { - id: "a.", - expect_error: false, - }, - TestData { - id: "a..", - expect_error: false, - }, - TestData { - id: "aa", - expect_error: false, - }, - TestData { - id: "aa.", - expect_error: false, - }, - TestData { - id: "hello..world", - expect_error: false, - }, - TestData { - id: "hello/../world", - expect_error: true, - }, - TestData { - id: "aa1245124sadfasdfgasdga.", - expect_error: false, - }, - TestData { - id: "aAzZ0123456789_.-", - expect_error: false, - }, - TestData { - id: "abcdefghijklmnopqrstuvwxyz0123456789.-_", - expect_error: false, - }, - TestData { - id: "0123456789abcdefghijklmnopqrstuvwxyz.-_", - expect_error: false, - }, - TestData { - id: " abcdefghijklmnopqrstuvwxyz0123456789.-_", - expect_error: true, - }, - TestData { - id: ".abcdefghijklmnopqrstuvwxyz0123456789.-_", - expect_error: true, - }, - TestData { - id: "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", - expect_error: false, - }, - TestData { - id: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.-_", - expect_error: false, - }, - TestData { - id: " ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", - expect_error: true, - }, - TestData { - id: ".ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", - expect_error: true, - }, - TestData { - id: "/a/b/c", - expect_error: true, - }, - TestData { - id: "a/b/c", - expect_error: true, - }, - TestData { - id: "foo/../../../etc/passwd", - expect_error: true, - }, - TestData { - id: "../../../../../../etc/motd", - expect_error: true, - }, - TestData { - id: "/etc/passwd", - expect_error: true, - }, - ]; - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let result = verify_cid(d.id); - - let msg = format!("{}, result: {:?}", msg, result); - - if result.is_ok() { - assert!(!d.expect_error, "{}", msg); - } else { - assert!(d.expect_error, "{}", msg); - } - } - } - #[tokio::test] async fn test_volume_capacity_stats() { skip_if_not_root!(); @@ -2963,10 +2728,32 @@ OtherField:other async fn test_ip_tables() { skip_if_not_root!(); + let iptables_cmd_list = [ + USR_IPTABLES_SAVE, + USR_IP6TABLES_SAVE, + USR_IPTABLES_RESTORE, + USR_IP6TABLES_RESTORE, + IPTABLES_SAVE, + IP6TABLES_SAVE, + IPTABLES_RESTORE, + IP6TABLES_RESTORE, + ]; + + for cmd in iptables_cmd_list { + if !check_command(cmd) { + warn!( + sl(), + "one or more commands for ip tables test are missing, skip it" + ); + return; + } + } + let logger = slog::Logger::root(slog::Discard, o!()); let sandbox = Sandbox::new(&logger).unwrap(); let agent_service = Box::new(AgentService { sandbox: Arc::new(Mutex::new(sandbox)), + init_mode: true, }); let ctx = mk_ttrpc_context(); @@ -3072,7 +2859,7 @@ COMMIT .unwrap(); assert!(!result.data.is_empty(), "we should have non-zero output:"); assert!( - std::str::from_utf8(&*result.data).unwrap().contains( + std::str::from_utf8(&result.data).unwrap().contains( "PREROUTING -d 192.168.103.153/32 -j DNAT --to-destination 192.168.188.153" ), "We should see the resulting rule" @@ -3110,7 +2897,7 @@ COMMIT .unwrap(); assert!(!result.data.is_empty(), "we should have non-zero output:"); assert!( - std::str::from_utf8(&*result.data) + std::str::from_utf8(&result.data) .unwrap() .contains("INPUT -s 2001:db8:100::1/128 -i sit+ -p tcp -m tcp --sport 512:65535"), "We should see the resulting rule" diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 36861fc831ba..b4331ed4becd 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -3,15 +3,20 @@ // SPDX-License-Identifier: Apache-2.0 // -use crate::linux_abi::*; -use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS}; -use crate::namespace::Namespace; -use crate::netlink::Handle; -use crate::network::Network; -use crate::pci; -use crate::uevent::{Uevent, UeventMatcher}; -use crate::watcher::BindWatcher; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::str::FromStr; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::Arc; +use std::{thread, time}; + use anyhow::{anyhow, Context, Result}; +use kata_types::cpu::CpuSet; +use kata_types::mount::StorageDevice; use libc::pid_t; use oci::{Hook, Hooks}; use protocols::agent::OnlineCPUMemRequest; @@ -21,21 +26,69 @@ use rustjail::container::BaseContainer; use rustjail::container::LinuxContainer; use rustjail::process::Process; use slog::Logger; -use std::collections::HashMap; -use std::fs; -use std::os::unix::fs::PermissionsExt; -use std::path::Path; -use std::sync::Arc; -use std::{thread, time}; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::sync::oneshot; use tokio::sync::Mutex; use tracing::instrument; +use crate::linux_abi::*; +use crate::mount::{get_mount_fs_type, TYPE_ROOTFS}; +use crate::namespace::Namespace; +use crate::netlink::Handle; +use crate::network::Network; +use crate::pci; +use crate::storage::StorageDeviceGeneric; +use crate::uevent::{Uevent, UeventMatcher}; +use crate::watcher::BindWatcher; + pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id"; type UeventWatcher = (Box, oneshot::Sender); +#[derive(Clone)] +pub struct StorageState { + count: Arc, + device: Arc, +} + +impl Debug for StorageState { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageState").finish() + } +} + +impl StorageState { + fn new() -> Self { + StorageState { + count: Arc::new(AtomicU32::new(1)), + device: Arc::new(StorageDeviceGeneric::default()), + } + } + + pub fn from_device(device: Arc) -> Self { + Self { + count: Arc::new(AtomicU32::new(1)), + device, + } + } + + pub fn path(&self) -> Option<&str> { + self.device.path() + } + + pub async fn ref_count(&self) -> u32 { + self.count.load(Ordering::Relaxed) + } + + async fn inc_ref_count(&self) { + self.count.fetch_add(1, Ordering::Acquire); + } + + async fn dec_and_test_ref_count(&self) -> bool { + self.count.fetch_sub(1, Ordering::AcqRel) == 1 + } +} + #[derive(Debug)] pub struct Sandbox { pub logger: Logger, @@ -50,7 +103,7 @@ pub struct Sandbox { pub shared_utsns: Namespace, pub shared_ipcns: Namespace, pub sandbox_pidns: Option, - pub storages: HashMap, + pub storages: HashMap, pub running: bool, pub no_pivot_root: bool, pub sender: Option>, @@ -96,83 +149,58 @@ impl Sandbox { }) } - // set_sandbox_storage sets the sandbox level reference - // counter for the sandbox storage. - // This method also returns a boolean to let - // callers know if the storage already existed or not. - // It will return true if storage is new. - // - // It's assumed that caller is calling this method after - // acquiring a lock on sandbox. + /// Add a new storage object or increase reference count of existing one. + /// The caller may detect new storage object by checking `StorageState.refcount == 1`. #[instrument] - pub fn set_sandbox_storage(&mut self, path: &str) -> bool { - match self.storages.get_mut(path) { - None => { - self.storages.insert(path.to_string(), 1); - true + pub async fn add_sandbox_storage(&mut self, path: &str) -> StorageState { + match self.storages.entry(path.to_string()) { + Entry::Occupied(e) => { + let state = e.get().clone(); + state.inc_ref_count().await; + state } - Some(count) => { - *count += 1; - false + Entry::Vacant(e) => { + let state = StorageState::new(); + e.insert(state.clone()); + state } } } - // unset_sandbox_storage will decrement the sandbox storage - // reference counter. If there aren't any containers using - // that sandbox storage, this method will remove the - // storage reference from the sandbox and return 'true' to - // let the caller know that they can clean up the storage - // related directories by calling remove_sandbox_storage - // - // It's assumed that caller is calling this method after - // acquiring a lock on sandbox. - #[instrument] - pub fn unset_sandbox_storage(&mut self, path: &str) -> Result { - match self.storages.get_mut(path) { - None => Err(anyhow!("Sandbox storage with path {} not found", path)), - Some(count) => { - *count -= 1; - if *count < 1 { - self.storages.remove(path); - return Ok(true); - } - Ok(false) - } + /// Update the storage device associated with a path. + pub fn update_sandbox_storage( + &mut self, + path: &str, + device: Arc, + ) -> std::result::Result, Arc> { + if !self.storages.contains_key(path) { + return Err(device); } - } - // remove_sandbox_storage removes the sandbox storage if no - // containers are using that storage. - // - // It's assumed that caller is calling this method after - // acquiring a lock on sandbox. - #[instrument] - pub fn remove_sandbox_storage(&self, path: &str) -> Result<()> { - let mounts = vec![path.to_string()]; - remove_mounts(&mounts)?; - // "remove_dir" will fail if the mount point is backed by a read-only filesystem. - // This is the case with the device mapper snapshotter, where we mount the block device directly - // at the underlying sandbox path which was provided from the base RO kataShared path from the host. - if let Err(err) = fs::remove_dir(path) { - warn!(self.logger, "failed to remove dir {}, {:?}", path, err); - } - Ok(()) + let state = StorageState::from_device(device); + // Safe to unwrap() because we have just ensured existence of entry. + let state = self.storages.insert(path.to_string(), state).unwrap(); + Ok(state.device) } - // unset_and_remove_sandbox_storage unsets the storage from sandbox - // and if there are no containers using this storage it will - // remove it from the sandbox. - // - // It's assumed that caller is calling this method after - // acquiring a lock on sandbox. + /// Decrease reference count and destroy the storage object if reference count reaches zero. + /// Returns `Ok(true)` if the reference count has reached zero and the storage object has been + /// removed. #[instrument] - pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> { - if self.unset_sandbox_storage(path)? { - return self.remove_sandbox_storage(path); + pub async fn remove_sandbox_storage(&mut self, path: &str) -> Result { + match self.storages.get(path) { + None => Err(anyhow!("Sandbox storage with path {} not found", path)), + Some(state) => { + if state.dec_and_test_ref_count().await { + if let Some(storage) = self.storages.remove(path) { + storage.device.cleanup()?; + } + Ok(true) + } else { + Ok(false) + } + } } - - Ok(()) } #[instrument] @@ -182,22 +210,18 @@ impl Sandbox { .get_ipc() .setup() .await - .context("Failed to setup persistent IPC namespace")?; + .context("setup persistent IPC namespace")?; // // Set up shared UTS namespace self.shared_utsns = Namespace::new(&self.logger) .get_uts(self.hostname.as_str()) .setup() .await - .context("Failed to setup persistent UTS namespace")?; + .context("setup persistent UTS namespace")?; Ok(true) } - pub fn add_container(&mut self, c: LinuxContainer) { - self.containers.insert(c.id.clone(), c); - } - #[instrument] pub fn update_shared_pidns(&mut self, c: &LinuxContainer) -> Result<()> { // Populate the shared pid path only if this is an infra container and @@ -222,14 +246,18 @@ impl Sandbox { Ok(()) } + pub fn add_container(&mut self, c: LinuxContainer) { + self.containers.insert(c.id.clone(), c); + } + pub fn get_container(&mut self, id: &str) -> Option<&mut LinuxContainer> { self.containers.get_mut(id) } pub fn find_process(&mut self, pid: pid_t) -> Option<&mut Process> { for (_, c) in self.containers.iter_mut() { - if c.processes.get(&pid).is_some() { - return c.processes.get_mut(&pid); + if let Some(p) = c.processes.get_mut(&pid) { + return Some(p); } } @@ -263,12 +291,12 @@ impl Sandbox { pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> { if req.nb_cpus > 0 { // online cpus - online_cpus(&self.logger, req.nb_cpus as i32)?; + online_cpus(&self.logger, req.nb_cpus as i32).context("online cpus")?; } if !req.cpu_only { // online memory - online_memory(&self.logger)?; + online_memory(&self.logger).context("online memory")?; } if req.nb_cpus == 0 { @@ -278,26 +306,17 @@ impl Sandbox { let guest_cpuset = rustjail_cgroups::fs::get_guest_cpuset()?; for (_, ctr) in self.containers.iter() { - let cpu = ctr - .config - .spec - .as_ref() - .unwrap() - .linux - .as_ref() - .unwrap() - .resources - .as_ref() - .unwrap() - .cpu - .as_ref(); - let container_cpust = if let Some(c) = cpu { &c.cpus } else { "" }; - - info!(self.logger, "updating {}", ctr.id.as_str()); - ctr.cgroup_manager - .as_ref() - .unwrap() - .update_cpuset_path(guest_cpuset.as_str(), container_cpust)?; + if let Some(spec) = ctr.config.spec.as_ref() { + if let Some(linux) = spec.linux.as_ref() { + if let Some(resources) = linux.resources.as_ref() { + if let Some(cpus) = resources.cpu.as_ref() { + info!(self.logger, "updating {}", ctr.id.as_str()); + ctr.cgroup_manager + .update_cpuset_path(guest_cpuset.as_str(), &cpus.cpus)?; + } + } + } + } } Ok(()) @@ -327,7 +346,7 @@ impl Sandbox { // Reject non-file, symlinks and non-executable files if !entry.file_type()?.is_file() || entry.file_type()?.is_symlink() - || entry.metadata()?.permissions().mode() & 0o777 & 0o111 == 0 + || entry.metadata()?.permissions().mode() & 0o111 == 0 { continue; } @@ -359,31 +378,28 @@ impl Sandbox { #[instrument] pub async fn run_oom_event_monitor(&self, mut rx: Receiver, container_id: String) { let logger = self.logger.clone(); - - if self.event_tx.is_none() { - error!( - logger, - "sandbox.event_tx not found in run_oom_event_monitor" - ); - return; - } - - let tx = self.event_tx.as_ref().unwrap().clone(); + let tx = match self.event_tx.as_ref() { + Some(v) => v.clone(), + None => { + error!( + logger, + "sandbox.event_tx not found in run_oom_event_monitor" + ); + return; + } + }; tokio::spawn(async move { loop { let event = rx.recv().await; - // None means the container has exited, - // and sender in OOM notifier is dropped. + // None means the container has exited, and sender in OOM notifier is dropped. if event.is_none() { return; } info!(logger, "got an OOM event {:?}", event); - - let _ = tx - .send(container_id.clone()) - .await - .map_err(|e| error!(logger, "failed to send message: {:?}", e)); + if let Err(e) = tx.send(container_id.clone()).await { + error!(logger, "failed to send message: {:?}", e); + } } }); } @@ -396,63 +412,70 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res for e in fs::read_dir(path)? { let entry = e?; - let tmpname = entry.file_name(); - let name = tmpname.to_str().unwrap(); - let p = entry.path(); - - if re.is_match(name) { - let file = format!("{}/{}", p.to_str().unwrap(), SYSFS_ONLINE_FILE); - info!(logger, "{}", file.as_str()); - - let c = fs::read_to_string(file.as_str()); - if c.is_err() { - continue; - } - let c = c.unwrap(); - - if c.trim().contains('0') { - let r = fs::write(file.as_str(), "1"); - if r.is_err() { + // Skip direntry which doesn't match the pattern. + match entry.file_name().to_str() { + None => continue, + Some(v) => { + if !re.is_match(v) { continue; } - count += 1; + } + }; - if num > 0 && count == num { + let p = entry.path().join(SYSFS_ONLINE_FILE); + if let Ok(c) = fs::read_to_string(&p) { + // Try to online the object in offline state. + if c.trim().contains('0') && fs::write(&p, "1").is_ok() && num > 0 { + count += 1; + if count == num { break; } } } } - if num > 0 { - return Ok(count); - } + Ok(count) +} - Ok(0) +#[instrument] +fn online_memory(logger: &Logger) -> Result<()> { + online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1) + .context("online memory resource")?; + Ok(()) } // max wait for all CPUs to online will use 50 * 100 = 5 seconds. -const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50; -const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100; +const ONLINE_CPUMEM_WAIT_MILLIS: u64 = 50; +const ONLINE_CPUMEM_MAX_RETRIES: i32 = 100; #[instrument] fn online_cpus(logger: &Logger, num: i32) -> Result { - let mut onlined_count: i32 = 0; + let mut onlined_cpu_count = onlined_cpus().context("onlined cpu count")?; + // for some vmms, like dragonball, they will online cpus for us + // so check first whether agent need to do the online operation + if onlined_cpu_count >= num { + return Ok(num); + } for i in 0..ONLINE_CPUMEM_MAX_RETRIES { - let r = online_resources( + // online num resources + online_resources( logger, - SYSFS_CPU_ONLINE_PATH, + SYSFS_CPU_PATH, r"cpu[0-9]+", - num - onlined_count, - ); + num - onlined_cpu_count, + ) + .context("online cpu resource")?; - onlined_count += r?; - if onlined_count == num { - info!(logger, "online {} CPU(s) after {} retries", num, i); + onlined_cpu_count = onlined_cpus().context("onlined cpu count")?; + if onlined_cpu_count >= num { + info!( + logger, + "Currently {} onlined CPU(s) after {} retries", onlined_cpu_count, i + ); return Ok(num); } - thread::sleep(time::Duration::from_millis(ONLINE_CPUMEM_WATI_MILLIS)); + thread::sleep(time::Duration::from_millis(ONLINE_CPUMEM_WAIT_MILLIS)); } Err(anyhow!( @@ -462,16 +485,17 @@ fn online_cpus(logger: &Logger, num: i32) -> Result { )) } -#[instrument] -fn online_memory(logger: &Logger) -> Result<()> { - online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?; - Ok(()) +fn onlined_cpus() -> Result { + let content = + fs::read_to_string(SYSFS_CPU_ONLINE_PATH).context("read sysfs cpu online file")?; + let online_cpu_set = CpuSet::from_str(content.trim())?; + Ok(online_cpu_set.len() as i32) } #[cfg(test)] mod tests { use super::*; - use crate::{mount::baremount, skip_if_not_root}; + use crate::mount::baremount; use anyhow::{anyhow, Error}; use nix::mount::MsFlags; use oci::{Linux, Root, Spec}; @@ -484,6 +508,7 @@ mod tests { use std::os::unix::fs::PermissionsExt; use std::path::Path; use tempfile::{tempdir, Builder, TempDir}; + use test_utils::skip_if_not_root; fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> { let src_path = Path::new(src); @@ -504,24 +529,22 @@ mod tests { let tmpdir_path = tmpdir.path().to_str().unwrap(); // Add a new sandbox storage - let new_storage = s.set_sandbox_storage(tmpdir_path); + let new_storage = s.add_sandbox_storage(tmpdir_path).await; // Check the reference counter - let ref_count = s.storages[tmpdir_path]; + let ref_count = new_storage.ref_count().await; assert_eq!( ref_count, 1, "Invalid refcount, got {} expected 1.", ref_count ); - assert!(new_storage); // Use the existing sandbox storage - let new_storage = s.set_sandbox_storage(tmpdir_path); - assert!(!new_storage, "Should be false as already exists."); + let new_storage = s.add_sandbox_storage(tmpdir_path).await; // Since we are using existing storage, the reference counter // should be 2 by now. - let ref_count = s.storages[tmpdir_path]; + let ref_count = new_storage.ref_count().await; assert_eq!( ref_count, 2, "Invalid refcount, got {} expected 2.", @@ -529,52 +552,6 @@ mod tests { ); } - #[tokio::test] - #[serial] - async fn remove_sandbox_storage() { - skip_if_not_root!(); - - let logger = slog::Logger::root(slog::Discard, o!()); - let s = Sandbox::new(&logger).unwrap(); - - let tmpdir = Builder::new().tempdir().unwrap(); - let tmpdir_path = tmpdir.path().to_str().unwrap(); - - let srcdir = Builder::new() - .prefix("src") - .tempdir_in(tmpdir_path) - .unwrap(); - let srcdir_path = srcdir.path().to_str().unwrap(); - - let destdir = Builder::new() - .prefix("dest") - .tempdir_in(tmpdir_path) - .unwrap(); - let destdir_path = destdir.path().to_str().unwrap(); - - let emptydir = Builder::new() - .prefix("empty") - .tempdir_in(tmpdir_path) - .unwrap(); - - assert!( - s.remove_sandbox_storage(srcdir_path).is_err(), - "Expect Err as the directory is not a mountpoint" - ); - - assert!(s.remove_sandbox_storage("").is_err()); - - let invalid_dir = emptydir.path().join("invalid"); - - assert!(s - .remove_sandbox_storage(invalid_dir.to_str().unwrap()) - .is_err()); - - assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok()); - - assert!(s.remove_sandbox_storage(destdir_path).is_ok()); - } - #[tokio::test] #[serial] async fn unset_and_remove_sandbox_storage() { @@ -584,8 +561,7 @@ mod tests { let mut s = Sandbox::new(&logger).unwrap(); assert!( - s.unset_and_remove_sandbox_storage("/tmp/testEphePath") - .is_err(), + s.remove_sandbox_storage("/tmp/testEphePath").await.is_err(), "Should fail because sandbox storage doesn't exist" ); @@ -606,8 +582,12 @@ mod tests { assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok()); - assert!(s.set_sandbox_storage(destdir_path)); - assert!(s.unset_and_remove_sandbox_storage(destdir_path).is_ok()); + s.add_sandbox_storage(destdir_path).await; + let storage = StorageDeviceGeneric::new(destdir_path.to_string()); + assert!(s + .update_sandbox_storage(destdir_path, Arc::new(storage)) + .is_ok()); + assert!(s.remove_sandbox_storage(destdir_path).await.is_ok()); let other_dir_str; { @@ -620,10 +600,14 @@ mod tests { let other_dir_path = other_dir.path().to_str().unwrap(); other_dir_str = other_dir_path.to_string(); - assert!(s.set_sandbox_storage(other_dir_path)); + s.add_sandbox_storage(other_dir_path).await; + let storage = StorageDeviceGeneric::new(other_dir_path.to_string()); + assert!(s + .update_sandbox_storage(other_dir_path, Arc::new(storage)) + .is_ok()); } - assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err()); + assert!(s.remove_sandbox_storage(&other_dir_str).await.is_ok()); } #[tokio::test] @@ -635,28 +619,30 @@ mod tests { let storage_path = "/tmp/testEphe"; // Add a new sandbox storage - assert!(s.set_sandbox_storage(storage_path)); + s.add_sandbox_storage(storage_path).await; // Use the existing sandbox storage + let state = s.add_sandbox_storage(storage_path).await; assert!( - !s.set_sandbox_storage(storage_path), + state.ref_count().await > 1, "Expects false as the storage is not new." ); assert!( - !s.unset_sandbox_storage(storage_path).unwrap(), + !s.remove_sandbox_storage(storage_path).await.unwrap(), "Expects false as there is still a storage." ); // Reference counter should decrement to 1. - let ref_count = s.storages[storage_path]; + let storage = &s.storages[storage_path]; + let refcount = storage.ref_count().await; assert_eq!( - ref_count, 1, + refcount, 1, "Invalid refcount, got {} expected 1.", - ref_count + refcount ); assert!( - s.unset_sandbox_storage(storage_path).unwrap(), + s.remove_sandbox_storage(storage_path).await.unwrap(), "Expects true as there is still a storage." ); @@ -672,7 +658,7 @@ mod tests { // If no container is using the sandbox storage, the reference // counter for it should not exist. assert!( - s.unset_sandbox_storage(storage_path).is_err(), + s.remove_sandbox_storage(storage_path).await.is_err(), "Expects false as the reference counter should no exist." ); } @@ -1072,7 +1058,7 @@ mod tests { fs::create_dir(&subdir_path).unwrap(); for file in j.files { let subfile_path = format!("{}/{}", subdir_path, file.name); - let mut subfile = File::create(&subfile_path).unwrap(); + let mut subfile = File::create(subfile_path).unwrap(); subfile.write_all(file.content.as_bytes()).unwrap(); } } diff --git a/src/agent/src/signal.rs b/src/agent/src/signal.rs index 79dea3b08bfe..401ded953fd1 100644 --- a/src/agent/src/signal.rs +++ b/src/agent/src/signal.rs @@ -24,7 +24,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc>) -> Result loop { // Avoid reaping the undesirable child's signal, e.g., execute_hook's // The lock should be released immediately. - rustjail::container::WAIT_PID_LOCKER.lock().await; + let _locker = rustjail::container::WAIT_PID_LOCKER.lock().await; let result = wait::waitpid( Some(Pid::from_raw(-1)), Some(WaitPidFlag::WNOHANG | WaitPidFlag::__WALL), @@ -57,7 +57,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc>) -> Result continue; } - let mut p = process.unwrap(); + let p = process.unwrap(); let ret: i32 = match wait_status { WaitStatus::Exited(_, c) => c, diff --git a/src/agent/src/storage/bind_watcher_handler.rs b/src/agent/src/storage/bind_watcher_handler.rs new file mode 100644 index 000000000000..3b50327d1278 --- /dev/null +++ b/src/agent/src/storage/bind_watcher_handler.rs @@ -0,0 +1,37 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use kata_types::mount::StorageDevice; +use protocols::agent::Storage; +use std::iter; +use std::sync::Arc; +use tracing::instrument; + +use crate::storage::{new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct BindWatcherHandler {} + +#[async_trait::async_trait] +impl StorageHandler for BindWatcherHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + if let Some(cid) = ctx.cid { + ctx.sandbox + .lock() + .await + .bind_watcher + .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) + .await?; + } + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/block_handler.rs b/src/agent/src/storage/block_handler.rs new file mode 100644 index 000000000000..60330253ce45 --- /dev/null +++ b/src/agent/src/storage/block_handler.rs @@ -0,0 +1,146 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_types::mount::StorageDevice; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::device::{ + get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, + wait_for_pmem_device, +}; +use crate::pci; +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; +#[cfg(target_arch = "s390x")] +use crate::{ccw, device::get_virtio_blk_ccw_device_name}; + +#[derive(Debug)] +pub struct VirtioBlkMmioHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkMmioHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + if !Path::new(&storage.source).exists() { + get_virtio_mmio_device_name(ctx.sandbox, &storage.source) + .await + .context("failed to get mmio device name")?; + } + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkPciHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkPciHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + // If hot-plugged, get the device node path based on the PCI path + // otherwise use the virt path provided in Storage Source + if storage.source.starts_with("/dev") { + let metadata = fs::metadata(&storage.source) + .context(format!("get metadata on file {:?}", &storage.source))?; + let mode = metadata.permissions().mode(); + if mode & libc::S_IFBLK == 0 { + return Err(anyhow!("Invalid device {}", &storage.source)); + } + } else { + let pcipath = pci::Path::from_str(&storage.source)?; + let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; + storage.source = dev_path; + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkCcwHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkCcwHandler { + #[cfg(target_arch = "s390x")] + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + let ccw_device = ccw::Device::from_str(&storage.source)?; + let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; + storage.source = dev_path; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } + + #[cfg(not(target_arch = "s390x"))] + #[instrument] + async fn create_device( + &self, + _storage: Storage, + _ctx: &mut StorageContext, + ) -> Result> { + Err(anyhow!("CCW is only supported on s390x")) + } +} + +#[derive(Debug)] +pub struct ScsiHandler {} + +#[async_trait::async_trait] +impl StorageHandler for ScsiHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + // Retrieve the device path from SCSI address. + let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; + storage.source = dev_path; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct PmemHandler {} + +#[async_trait::async_trait] +impl StorageHandler for PmemHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + // Retrieve the device for pmem storage + wait_for_pmem_device(ctx.sandbox, &storage.source).await?; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/ephemeral_handler.rs b/src/agent/src/storage/ephemeral_handler.rs new file mode 100644 index 000000000000..8fc70f6959ac --- /dev/null +++ b/src/agent/src/storage/ephemeral_handler.rs @@ -0,0 +1,293 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::fs::OpenOptions; +use std::io::Write; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::parse_mount_options; +use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID}; +use nix::unistd::Gid; +use protocols::agent::Storage; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB}; +use crate::mount::baremount; +use crate::sandbox::Sandbox; +use crate::storage::{ + common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID, +}; + +const FS_GID_EQ: &str = "fsgid="; +const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; + +#[derive(Debug)] +pub struct EphemeralHandler {} + +#[async_trait::async_trait] +impl StorageHandler for EphemeralHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + // hugetlbfs + if storage.fstype == FS_TYPE_HUGETLB { + info!(ctx.logger, "handle hugetlbfs storage"); + // Allocate hugepages before mount + // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages + // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + // options eg "pagesize=2097152,size=524288000"(2M, 500M) + Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) + .context("allocate hugepages")?; + common_storage_handler(ctx.logger, &storage)?; + } else if !storage.options.is_empty() { + // By now we only support one option field: "fsGroup" which + // isn't an valid mount option, thus we should remove it when + // do mount. + let opts = parse_options(&storage.options); + storage.options = Default::default(); + common_storage_handler(ctx.logger, &storage)?; + + // ephemeral_storage didn't support mount options except fsGroup. + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + + let meta = fs::metadata(&storage.mount_point)?; + let mut permission = meta.permissions(); + + let o_mode = meta.mode() | MODE_SETGID; + permission.set_mode(o_mode); + fs::set_permissions(&storage.mount_point, permission)?; + } + } else { + common_storage_handler(ctx.logger, &storage)?; + } + + new_device("".to_string()) + } +} + +impl EphemeralHandler { + // Allocate hugepages by writing to sysfs + fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { + info!(logger, "mounting hugePages storage options: {:?}", options); + + let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) + .context(format!("parse mount options: {:?}", &options))?; + + info!( + logger, + "allocate hugepages. pageSize: {}, size: {}", pagesize, size + ); + + // sysfs entry is always of the form hugepages-${pagesize}kB + // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt + let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) + .join(format!("hugepages-{}kB", pagesize / 1024)) + .join("nr_hugepages"); + + // write numpages to nr_hugepages file. + let numpages = format!("{}", size / pagesize); + info!(logger, "write {} pages to {:?}", &numpages, &path); + + let mut file = OpenOptions::new() + .write(true) + .open(&path) + .context(format!("open nr_hugepages directory {:?}", &path))?; + + file.write_all(numpages.as_bytes()) + .context(format!("write nr_hugepages failed: {:?}", &path))?; + + // Even if the write succeeds, the kernel isn't guaranteed to be + // able to allocate all the pages we requested. Verify that it + // did. + let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; + let allocated = verify + .trim_end() + .parse::() + .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; + if allocated != size / pagesize { + return Err(anyhow!( + "Only allocated {} of {} hugepages of size {}", + allocated, + numpages, + pagesize + )); + } + + Ok(()) + } + + // Parse filesystem options string to retrieve hugepage details + // options eg "pagesize=2048,size=107374182" + fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { + let mut pagesize_str: Option<&str> = None; + let mut size_str: Option<&str> = None; + + for option in options { + let vars: Vec<&str> = option.trim().split(',').collect(); + + for var in vars { + if let Some(stripped) = var.strip_prefix("pagesize=") { + pagesize_str = Some(stripped); + } else if let Some(stripped) = var.strip_prefix("size=") { + size_str = Some(stripped); + } + + if pagesize_str.is_some() && size_str.is_some() { + break; + } + } + } + + if pagesize_str.is_none() || size_str.is_none() { + return Err(anyhow!("no pagesize/size options found")); + } + + let pagesize = pagesize_str + .unwrap() + .parse::() + .context(format!("parse pagesize: {:?}", &pagesize_str))?; + let size = size_str + .unwrap() + .parse::() + .context(format!("parse size: {:?}", &pagesize_str))?; + + Ok((pagesize, size)) + } +} + +// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them +// with mount options passed by the caller +#[instrument] +pub async fn update_ephemeral_mounts( + logger: Logger, + storages: &[Storage], + _sandbox: &Arc>, +) -> Result<()> { + for storage in storages { + let handler_name = &storage.driver; + let logger = logger.new(o!( + "msg" => "updating tmpfs storage", + "subsystem" => "storage", + "storage-type" => handler_name.to_owned())); + + match handler_name.as_str() { + DRIVER_EPHEMERAL_TYPE => { + fs::create_dir_all(&storage.mount_point)?; + + if storage.options.is_empty() { + continue; + } else { + // assume that fsGid has already been set + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + let opts: Vec<&String> = storage + .options + .iter() + .filter(|&opt| !opt.starts_with(FS_GID_EQ)) + .collect(); + let (flags, options) = parse_mount_options(&opts)?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + )?; + } + } + _ => { + return Err(anyhow!( + "Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported", + storage.driver + )); + } + }; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_pagesize_and_size_from_option() { + let expected_pagesize = 2048; + let expected_size = 107374182; + let expected = (expected_pagesize, expected_size); + + let data = vec![ + // (input, expected, is_ok) + ("size-1=107374182,pagesize-1=2048", expected, false), + ("size-1=107374182,pagesize=2048", expected, false), + ("size=107374182,pagesize-1=2048", expected, false), + ("size=107374182,pagesize=abc", expected, false), + ("size=abc,pagesize=2048", expected, false), + ("size=,pagesize=2048", expected, false), + ("size=107374182,pagesize=", expected, false), + ("size=107374182,pagesize=2048", expected, true), + ("pagesize=2048,size=107374182", expected, true), + ("foo=bar,pagesize=2048,size=107374182", expected, true), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182", + expected, + true, + ), + ( + "pagesize=2048,foo1=bar1,foo=bar,size=107374182", + expected, + true, + ), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", + expected, + true, + ), + ( + "foo=bar,size=107374182,foo1=bar1,pagesize=2048", + expected, + true, + ), + ]; + + for case in data { + let input = case.0; + let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); + + let is_ok = case.2; + if is_ok { + let expected = case.1; + let (pagesize, size) = r.unwrap(); + assert_eq!(expected.0, pagesize); + assert_eq!(expected.1, size); + } else { + assert!(r.is_err()); + } + } + } +} diff --git a/src/agent/src/storage/fs_handler.rs b/src/agent/src/storage/fs_handler.rs new file mode 100644 index 000000000000..fce59c0b14a3 --- /dev/null +++ b/src/agent/src/storage/fs_handler.rs @@ -0,0 +1,89 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_types::mount::StorageDevice; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct OverlayfsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for OverlayfsHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + if storage + .options + .iter() + .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") + { + let cid = ctx + .cid + .clone() + .ok_or_else(|| anyhow!("No container id in rw overlay"))?; + let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); + let work = cpath.join("work"); + let upper = cpath.join("upper"); + + fs::create_dir_all(&work).context("Creating overlay work directory")?; + fs::create_dir_all(&upper).context("Creating overlay upper directory")?; + + storage.fstype = "overlay".into(); + storage + .options + .push(format!("upperdir={}", upper.to_string_lossy())); + storage + .options + .push(format!("workdir={}", work.to_string_lossy())); + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct Virtio9pHandler {} + +#[async_trait::async_trait] +impl StorageHandler for Virtio9pHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioFsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioFsHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result> { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/local_handler.rs b/src/agent/src/storage/local_handler.rs new file mode 100644 index 000000000000..5bcee2d01f98 --- /dev/null +++ b/src/agent/src/storage/local_handler.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID}; +use nix::unistd::Gid; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID}; + +#[derive(Debug)] +pub struct LocalHandler {} + +#[async_trait::async_trait] +impl StorageHandler for LocalHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + _ctx: &mut StorageContext, + ) -> Result> { + fs::create_dir_all(&storage.mount_point).context(format!( + "failed to create dir all {:?}", + &storage.mount_point + ))?; + + let opts = parse_options(&storage.options); + + let mut need_set_fsgid = false; + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + need_set_fsgid = true; + } + + if let Some(mode) = opts.get("mode") { + let mut permission = fs::metadata(&storage.mount_point)?.permissions(); + + let mut o_mode = u32::from_str_radix(mode, 8)?; + + if need_set_fsgid { + // set SetGid mode mask. + o_mode |= MODE_SETGID; + } + permission.set_mode(o_mode); + + fs::set_permissions(&storage.mount_point, permission)?; + } + + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/mod.rs b/src/agent/src/storage/mod.rs new file mode 100644 index 000000000000..f312bbd83be2 --- /dev/null +++ b/src/agent/src/storage/mod.rs @@ -0,0 +1,789 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::{create_mount_destination, parse_mount_options}; +use kata_types::mount::{StorageDevice, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG}; +use nix::unistd::{Gid, Uid}; +use protocols::agent::Storage; +use protocols::types::FSGroupChangePolicy; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use self::bind_watcher_handler::BindWatcherHandler; +use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler}; +use self::ephemeral_handler::EphemeralHandler; +use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler}; +use self::local_handler::LocalHandler; +use crate::device::{ + DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE, + DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE, + DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, +}; +use crate::mount::{baremount, is_mounted, remove_mounts}; +use crate::sandbox::Sandbox; + +pub use self::ephemeral_handler::update_ephemeral_mounts; + +mod bind_watcher_handler; +mod block_handler; +mod ephemeral_handler; +mod fs_handler; +mod local_handler; + +const RW_MASK: u32 = 0o660; +const RO_MASK: u32 = 0o440; +const EXEC_MASK: u32 = 0o110; +const MODE_SETGID: u32 = 0o2000; + +#[derive(Debug)] +pub struct StorageContext<'a> { + cid: &'a Option, + logger: &'a Logger, + sandbox: &'a Arc>, +} + +/// An implementation of generic storage device. +#[derive(Default, Debug)] +pub struct StorageDeviceGeneric { + path: Option, +} + +impl StorageDeviceGeneric { + /// Create a new instance of `StorageStateCommon`. + pub fn new(path: String) -> Self { + StorageDeviceGeneric { path: Some(path) } + } +} + +impl StorageDevice for StorageDeviceGeneric { + fn path(&self) -> Option<&str> { + self.path.as_deref() + } + + fn cleanup(&self) -> Result<()> { + let path = match self.path() { + None => return Ok(()), + Some(v) => { + if v.is_empty() { + // TODO: Bind watch, local, ephemeral volume has empty path, which will get leaked. + return Ok(()); + } else { + v + } + } + }; + if !Path::new(path).exists() { + return Ok(()); + } + + if matches!(is_mounted(path), Ok(true)) { + let mounts = vec![path.to_string()]; + remove_mounts(&mounts)?; + } + if matches!(is_mounted(path), Ok(true)) { + return Err(anyhow!("failed to umount mountpoint {}", path)); + } + + let p = Path::new(path); + if p.is_dir() { + let is_empty = p.read_dir()?.next().is_none(); + if !is_empty { + return Err(anyhow!("directory is not empty when clean up storage")); + } + // "remove_dir" will fail if the mount point is backed by a read-only filesystem. + // This is the case with the device mapper snapshotter, where we mount the block device + // directly at the underlying sandbox path which was provided from the base RO kataShared + // path from the host. + let _ = fs::remove_dir(p); + } else if !p.is_file() { + // TODO: should we remove the file for bind mount? + return Err(anyhow!( + "storage path {} is neither directory nor file", + path + )); + } + + Ok(()) + } +} + +/// Trait object to handle storage device. +#[async_trait::async_trait] +pub trait StorageHandler: Send + Sync { + /// Create a new storage device. + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result>; +} + +#[rustfmt::skip] +lazy_static! { + pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { + let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); + manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); + #[cfg(target_arch = "s390x")] + manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(self::block_handler::VirtioBlkCcwHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); + manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); + manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); + manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); + manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); + manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); + manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); + manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); + manager + }; +} + +// add_storages takes a list of storages passed by the caller, and perform the +// associated operations such as waiting for the device to show up, and mount +// it to a specific location, according to the type of handler chosen, and for +// each storage. +#[instrument] +pub async fn add_storages( + logger: Logger, + storages: Vec, + sandbox: &Arc>, + cid: Option, +) -> Result> { + let mut mount_list = Vec::new(); + + for storage in storages { + let path = storage.mount_point.clone(); + let state = sandbox.lock().await.add_sandbox_storage(&path).await; + if state.ref_count().await > 1 { + if let Some(path) = state.path() { + if !path.is_empty() { + mount_list.push(path.to_string()); + } + } + // The device already exists. + continue; + } + + if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { + let logger = + logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); + let mut ctx = StorageContext { + cid: &cid, + logger: &logger, + sandbox, + }; + + match handler.create_device(storage, &mut ctx).await { + Ok(device) => { + match sandbox + .lock() + .await + .update_sandbox_storage(&path, device.clone()) + { + Ok(d) => { + if let Some(path) = device.path() { + if !path.is_empty() { + mount_list.push(path.to_string()); + } + } + drop(d); + } + Err(device) => { + error!(logger, "failed to update device for storage"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await + { + warn!(logger, "failed to remove dummy sandbox storage {:?}", e); + } + if let Err(e) = device.cleanup() { + error!( + logger, + "failed to clean state for storage device {}, {}", path, e + ); + } + return Err(anyhow!("failed to update device for storage")); + } + } + } + Err(e) => { + error!(logger, "failed to create device for storage, error: {e:?}"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { + warn!(logger, "failed to remove dummy sandbox storage {e:?}"); + } + return Err(e); + } + } + } else { + return Err(anyhow!( + "Failed to find the storage handler {}", + storage.driver + )); + } + } + + Ok(mount_list) +} + +pub(crate) fn new_device(path: String) -> Result> { + let device = StorageDeviceGeneric::new(path); + Ok(Arc::new(device)) +} + +#[instrument] +pub(crate) fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { + mount_storage(logger, storage)?; + set_ownership(logger, storage)?; + Ok(storage.mount_point.clone()) +} + +// mount_storage performs the mount described by the storage structure. +#[instrument] +fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount")); + + // There's a special mechanism to create mountpoint from a `sharedfs` instance before + // starting the kata-agent. Check for such cases. + if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { + warn!( + logger, + "{} already mounted on {}, ignoring...", + KATA_SHAREDFS_GUEST_PREMOUNT_TAG, + &storage.mount_point + ); + return Ok(()); + } + + let (flags, options) = parse_mount_options(&storage.options)?; + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + create_mount_destination(src_path, mount_path, "", &storage.fstype) + .context("Could not create mountpoint")?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + ) +} + +#[instrument] +pub(crate) fn parse_options(option_list: &[String]) -> HashMap { + let mut options = HashMap::new(); + for opt in option_list { + let fields: Vec<&str> = opt.split('=').collect(); + if fields.len() == 2 { + options.insert(fields[0].to_string(), fields[1].to_string()); + } + } + options +} + +#[instrument] +pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); + + // If fsGroup is not set, skip performing ownership change + if storage.fs_group.is_none() { + return Ok(()); + } + + let fs_group = storage.fs_group(); + let read_only = storage.options.contains(&String::from("ro")); + let mount_path = Path::new(&storage.mount_point); + let metadata = mount_path.metadata().map_err(|err| { + error!(logger, "failed to obtain metadata for mount path"; + "mount-path" => mount_path.to_str(), + "error" => err.to_string(), + ); + err + })?; + + if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into() + && metadata.gid() == fs_group.group_id + { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + mask |= EXEC_MASK; + + // With fsGroup change policy to OnRootMismatch, if the current + // gid of the mount path root directory matches the desired gid + // and the current permission of mount path root directory is correct, + // then ownership change will be skipped. + let current_mode = metadata.permissions().mode(); + if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { + info!(logger, "skipping ownership change for volume"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + return Ok(()); + } + } + + info!(logger, "performing recursive ownership change"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + recursive_ownership_change( + mount_path, + None, + Some(Gid::from_raw(fs_group.group_id)), + read_only, + ) +} + +#[instrument] +pub fn recursive_ownership_change( + path: &Path, + uid: Option, + gid: Option, + read_only: bool, +) -> Result<()> { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + if path.is_dir() { + for entry in fs::read_dir(path)? { + recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; + } + mask |= EXEC_MASK; + mask |= MODE_SETGID; + } + + // We do not want to change the permission of the underlying file + // using symlink. Hence we skip symlinks from recursive ownership + // and permission changes. + if path.is_symlink() { + return Ok(()); + } + + nix::unistd::chown(path, uid, gid)?; + + if gid.is_some() { + let metadata = path.metadata()?; + let mut permission = metadata.permissions(); + let target_mode = metadata.mode() | mask; + permission.set_mode(target_mode); + fs::set_permissions(path, permission)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::Error; + use nix::mount::MsFlags; + use protocols::agent::FSGroup; + use std::fs::File; + use tempfile::{tempdir, Builder}; + use test_utils::{ + skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, TestUserType, + }; + + #[test] + fn test_mount_storage() { + #[derive(Debug)] + struct TestData<'a> { + test_user: TestUserType, + storage: Storage, + error_contains: &'a str, + + make_source_dir: bool, + make_mount_dir: bool, + deny_mount_permission: bool, + } + + impl Default for TestData<'_> { + fn default() -> Self { + TestData { + test_user: TestUserType::Any, + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "tmpfs".to_string(), + ..Default::default() + }, + make_source_dir: true, + make_mount_dir: false, + deny_mount_permission: false, + error_contains: "", + } + } + } + + let tests = &[ + TestData { + test_user: TestUserType::NonRootOnly, + error_contains: "EPERM: Operation not permitted", + ..Default::default() + }, + TestData { + test_user: TestUserType::RootOnly, + ..Default::default() + }, + TestData { + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "bind".to_string(), + ..Default::default() + }, + make_source_dir: false, + make_mount_dir: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + TestData { + test_user: TestUserType::NonRootOnly, + deny_mount_permission: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + skip_loop_by_user!(msg, d.test_user); + + let drain = slog::Discard; + let logger = slog::Logger::root(drain, o!()); + + let tempdir = tempdir().unwrap(); + + let source = tempdir.path().join(&d.storage.source); + let mount_point = tempdir.path().join(&d.storage.mount_point); + + let storage = Storage { + source: source.to_str().unwrap().to_string(), + mount_point: mount_point.to_str().unwrap().to_string(), + ..d.storage.clone() + }; + + if d.make_source_dir { + fs::create_dir_all(&storage.source).unwrap(); + } + if d.make_mount_dir { + fs::create_dir_all(&storage.mount_point).unwrap(); + } + + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o000), + ) + .unwrap(); + } + + let result = mount_storage(&logger, &storage); + + // restore permissions so tempdir can be cleaned up + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o755), + ) + .unwrap(); + } + + if result.is_ok() { + nix::mount::umount(&mount_point).unwrap(); + } + + let msg = format!("{}: result: {:?}", msg, result); + if d.error_contains.is_empty() { + assert!(result.is_ok(), "{}", msg); + } else { + assert!(result.is_err(), "{}", msg); + let error_msg = format!("{}", result.unwrap_err()); + assert!(error_msg.contains(d.error_contains), "{}", msg); + } + } + } + + #[test] + fn test_set_ownership() { + skip_if_not_root!(); + + let logger = slog::Logger::root(slog::Discard, o!()); + + #[derive(Debug)] + struct TestData<'a> { + mount_path: &'a str, + fs_group: Option, + read_only: bool, + expected_group_id: u32, + expected_permission: u32, + } + + let tests = &[ + TestData { + mount_path: "foo", + fs_group: None, + read_only: false, + expected_group_id: 0, + expected_permission: 0, + }, + TestData { + mount_path: "rw_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::Always.into(), + ..Default::default() + }), + read_only: false, + expected_group_id: 3000, + expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, + }, + TestData { + mount_path: "ro_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(), + ..Default::default() + }), + read_only: true, + expected_group_id: 3000, + expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.mount_path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut storage_data = Storage::new(); + if d.read_only { + storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]); + } + if let Some(fs_group) = d.fs_group.clone() { + storage_data.set_fs_group(fs_group); + } + storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); + + let result = set_ownership(&logger, &storage_data); + assert!(result.is_ok()); + + assert_eq!( + mount_dir.as_path().metadata().unwrap().gid(), + d.expected_group_id + ); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission) + ); + } + } + + #[test] + fn test_recursive_ownership_change() { + skip_if_not_root!(); + + const COUNT: usize = 5; + + #[derive(Debug)] + struct TestData<'a> { + // Directory where the recursive ownership change should be performed on + path: &'a str, + + // User ID for ownership change + uid: u32, + + // Group ID for ownership change + gid: u32, + + // Set when the permission should be read-only + read_only: bool, + + // The expected permission of all directories after ownership change + expected_permission_directory: u32, + + // The expected permission of all files after ownership change + expected_permission_file: u32, + } + + let tests = &[ + TestData { + path: "no_gid_change", + uid: 0, + gid: 0, + read_only: false, + expected_permission_directory: 0, + expected_permission_file: 0, + }, + TestData { + path: "rw_gid_change", + uid: 0, + gid: 3000, + read_only: false, + expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RW_MASK, + }, + TestData { + path: "ro_gid_change", + uid: 0, + gid: 3000, + read_only: true, + expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RO_MASK, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut file_mode: u32 = 0; + + // create testing directories and files + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + fs::create_dir(&nest_dir) + .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); + + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + File::create(&filename) + .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); + file_mode = filename.as_path().metadata().unwrap().permissions().mode(); + } + } + + let uid = if d.uid > 0 { + Some(Uid::from_raw(d.uid)) + } else { + None + }; + let gid = if d.gid > 0 { + Some(Gid::from_raw(d.gid)) + } else { + None + }; + let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); + + assert!(result.is_ok()); + + assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + let file = Path::new(&filename); + + assert_eq!(file.metadata().unwrap().gid(), d.gid); + assert_eq!( + file.metadata().unwrap().permissions().mode(), + (file_mode | d.expected_permission_file) + ); + } + + let dir = Path::new(&nest_dir); + assert_eq!(dir.metadata().unwrap().gid(), d.gid); + assert_eq!( + dir.metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + } + } + } + + #[tokio::test] + #[serial_test::serial] + async fn cleanup_storage() { + skip_if_not_root!(); + + let logger = slog::Logger::root(slog::Discard, o!()); + + let tmpdir = Builder::new().tempdir().unwrap(); + let tmpdir_path = tmpdir.path().to_str().unwrap(); + + let srcdir = Builder::new() + .prefix("src") + .tempdir_in(tmpdir_path) + .unwrap(); + let srcdir_path = srcdir.path().to_str().unwrap(); + let empty_file = Path::new(srcdir_path).join("emptyfile"); + fs::write(&empty_file, "test").unwrap(); + + let destdir = Builder::new() + .prefix("dest") + .tempdir_in(tmpdir_path) + .unwrap(); + let destdir_path = destdir.path().to_str().unwrap(); + + let emptydir = Builder::new() + .prefix("empty") + .tempdir_in(tmpdir_path) + .unwrap(); + + let s = StorageDeviceGeneric::default(); + assert!(s.cleanup().is_ok()); + + let s = StorageDeviceGeneric::new("".to_string()); + assert!(s.cleanup().is_ok()); + + let invalid_dir = emptydir + .path() + .join("invalid") + .to_str() + .unwrap() + .to_string(); + let s = StorageDeviceGeneric::new(invalid_dir); + assert!(s.cleanup().is_ok()); + + assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok()); + + let s = StorageDeviceGeneric::new(destdir_path.to_string()); + assert!(s.cleanup().is_ok()); + + // fail to remove non-empty directory + let s = StorageDeviceGeneric::new(srcdir_path.to_string()); + s.cleanup().unwrap_err(); + + // remove a directory without umount + fs::remove_file(&empty_file).unwrap(); + s.cleanup().unwrap(); + } + + fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), Error> { + let src_path = Path::new(src); + let dst_path = Path::new(dst); + + baremount(src_path, dst_path, "bind", MsFlags::MS_BIND, "", logger) + } +} diff --git a/src/agent/src/test_utils.rs b/src/agent/src/test_utils.rs deleted file mode 100644 index d25eb129d163..000000000000 --- a/src/agent/src/test_utils.rs +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// -#![allow(clippy::module_inception)] - -#[cfg(test)] -pub mod test_utils { - #[derive(Debug, PartialEq)] - pub enum TestUserType { - RootOnly, - NonRootOnly, - Any, - } - - #[macro_export] - macro_rules! skip_if_root { - () => { - if nix::unistd::Uid::effective().is_root() { - println!("INFO: skipping {} which needs non-root", module_path!()); - return; - } - }; - } - - #[macro_export] - macro_rules! skip_if_not_root { - () => { - if !nix::unistd::Uid::effective().is_root() { - println!("INFO: skipping {} which needs root", module_path!()); - return; - } - }; - } - - #[macro_export] - macro_rules! skip_loop_if_root { - ($msg:expr) => { - if nix::unistd::Uid::effective().is_root() { - println!( - "INFO: skipping loop {} in {} which needs non-root", - $msg, - module_path!() - ); - continue; - } - }; - } - - #[macro_export] - macro_rules! skip_loop_if_not_root { - ($msg:expr) => { - if !nix::unistd::Uid::effective().is_root() { - println!( - "INFO: skipping loop {} in {} which needs root", - $msg, - module_path!() - ); - continue; - } - }; - } - - // Parameters: - // - // 1: expected Result - // 2: actual Result - // 3: string used to identify the test on error - #[macro_export] - macro_rules! assert_result { - ($expected_result:expr, $actual_result:expr, $msg:expr) => { - if $expected_result.is_ok() { - let expected_value = $expected_result.as_ref().unwrap(); - let actual_value = $actual_result.unwrap(); - assert!(*expected_value == actual_value, "{}", $msg); - } else { - assert!($actual_result.is_err(), "{}", $msg); - - let expected_error = $expected_result.as_ref().unwrap_err(); - let expected_error_msg = format!("{:?}", expected_error); - - let actual_error_msg = format!("{:?}", $actual_result.unwrap_err()); - - assert!(expected_error_msg == actual_error_msg, "{}", $msg); - } - }; - } - - #[macro_export] - macro_rules! skip_loop_by_user { - ($msg:expr, $user:expr) => { - if $user == TestUserType::RootOnly { - skip_loop_if_not_root!($msg); - } else if $user == TestUserType::NonRootOnly { - skip_loop_if_root!($msg); - } - }; - } -} diff --git a/src/agent/src/tracer.rs b/src/agent/src/tracer.rs index 1854876da292..1199b601c9ba 100644 --- a/src/agent/src/tracer.rs +++ b/src/agent/src/tracer.rs @@ -69,6 +69,8 @@ macro_rules! trace_rpc_call { propagator.extract(&extract_carrier_from_ttrpc($ctx)) }); + info!(sl(), "rpc call from shim to agent: {:?}", $name); + // generate tracing span let rpc_span = span!(tracing::Level::INFO, $name, "mod"="rpc.rs", req=?$req); diff --git a/src/agent/src/uevent.rs b/src/agent/src/uevent.rs index 5d1f554940c5..53b7c103dc2d 100644 --- a/src/agent/src/uevent.rs +++ b/src/agent/src/uevent.rs @@ -19,11 +19,9 @@ use tokio::sync::watch::Receiver; use tokio::sync::Mutex; use tracing::instrument; -// Convenience macro to obtain the scope logger -macro_rules! sl { - () => { - slog_scope::logger().new(o!("subsystem" => "uevent")) - }; +// Convenience function to obtain the scope logger. +fn sl() -> slog::Logger { + slog_scope::logger().new(o!("subsystem" => "uevent")) } #[derive(Debug, Default, Clone, PartialEq, Eq)] @@ -120,11 +118,11 @@ pub async fn wait_for_uevent( ) -> Result { let logprefix = format!("Waiting for {:?}", &matcher); - info!(sl!(), "{}", logprefix); + info!(sl(), "{}", logprefix); let mut sb = sandbox.lock().await; for uev in sb.uevent_map.values() { if matcher.is_match(uev) { - info!(sl!(), "{}: found {:?} in uevent map", logprefix, &uev); + info!(sl(), "{}: found {:?} in uevent map", logprefix, &uev); return Ok(uev.clone()); } } @@ -139,9 +137,9 @@ pub async fn wait_for_uevent( sb.uevent_watchers.push(Some((Box::new(matcher), tx))); drop(sb); // unlock - info!(sl!(), "{}: waiting on channel", logprefix); + info!(sl(), "{}: waiting on channel", logprefix); - let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout; + let hotplug_timeout = AGENT_CONFIG.hotplug_timeout; let uev = match tokio::time::timeout(hotplug_timeout, rx).await { Ok(v) => v?, @@ -157,7 +155,7 @@ pub async fn wait_for_uevent( } }; - info!(sl!(), "{}: found {:?} on channel", logprefix, &uev); + info!(sl(), "{}: found {:?} on channel", logprefix, &uev); Ok(uev) } diff --git a/src/agent/src/watcher.rs b/src/agent/src/watcher.rs index e423126613d2..a6cf4113b43e 100644 --- a/src/agent/src/watcher.rs +++ b/src/agent/src/watcher.rs @@ -11,7 +11,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::SystemTime; -use anyhow::{ensure, Context, Result}; +use anyhow::{anyhow, ensure, Context, Result}; use async_recursion::async_recursion; use nix::mount::{umount, MsFlags}; use nix::unistd::{Gid, Uid}; @@ -34,9 +34,13 @@ const MAX_SIZE_PER_WATCHABLE_MOUNT: u64 = 1024 * 1024; /// How often to check for modified files. const WATCH_INTERVAL_SECS: u64 = 2; -/// Destination path for tmpfs +/// Destination path for tmpfs, which used by the golang runtime const WATCH_MOUNT_POINT_PATH: &str = "/run/kata-containers/shared/containers/watchable/"; +/// Destination path for tmpfs for runtime-rs passthrough file sharing +const WATCH_MOUNT_POINT_PATH_PASSTHROUGH: &str = + "/run/kata-containers/shared/containers/passthrough/watchable/"; + /// Represents a single watched storage entry which may have multiple files to watch. #[derive(Default, Debug, Clone)] struct Storage { @@ -120,7 +124,7 @@ impl Storage { // if we are creating a directory: just create it, nothing more to do if metadata.file_type().is_dir() { - let dest_file_path = self.make_target_path(&source_file_path)?; + let dest_file_path = self.make_target_path(source_file_path)?; fs::create_dir_all(&dest_file_path) .await @@ -148,7 +152,7 @@ impl Storage { // Assume target mount is a file path self.target_mount_point.clone() } else { - let dest_file_path = self.make_target_path(&source_file_path)?; + let dest_file_path = self.make_target_path(source_file_path)?; if let Some(path) = dest_file_path.parent() { debug!(logger, "Creating destination directory: {}", path.display()); @@ -451,7 +455,7 @@ impl BindWatcher { ) -> Result<()> { if self.watch_thread.is_none() { // Virtio-fs shared path is RO by default, so we back the target-mounts by tmpfs. - self.mount(logger).await?; + self.mount(logger).await.context("mount watch directory")?; // Spawn background thread to monitor changes self.watch_thread = Some(Self::spawn_watcher( @@ -500,16 +504,28 @@ impl BindWatcher { } async fn mount(&self, logger: &Logger) -> Result<()> { - fs::create_dir_all(WATCH_MOUNT_POINT_PATH).await?; + // the watchable directory is created on the host side. + // here we can only check if it exist. + // first we will check the default WATCH_MOUNT_POINT_PATH, + // and then check WATCH_MOUNT_POINT_PATH_PASSTHROUGH + // in turn which are introduced by runtime-rs file sharing. + let watchable_dir = if Path::new(WATCH_MOUNT_POINT_PATH).is_dir() { + WATCH_MOUNT_POINT_PATH + } else if Path::new(WATCH_MOUNT_POINT_PATH_PASSTHROUGH).is_dir() { + WATCH_MOUNT_POINT_PATH_PASSTHROUGH + } else { + return Err(anyhow!("watchable mount source not found")); + }; baremount( Path::new("tmpfs"), - Path::new(WATCH_MOUNT_POINT_PATH), + Path::new(watchable_dir), "tmpfs", MsFlags::empty(), "", logger, - )?; + ) + .context("baremount watchable mount path")?; Ok(()) } @@ -520,7 +536,12 @@ impl BindWatcher { handle.abort(); } - let _ = umount(WATCH_MOUNT_POINT_PATH); + // try umount watchable mount path in turn + if Path::new(WATCH_MOUNT_POINT_PATH).is_dir() { + let _ = umount(WATCH_MOUNT_POINT_PATH); + } else if Path::new(WATCH_MOUNT_POINT_PATH_PASSTHROUGH).is_dir() { + let _ = umount(WATCH_MOUNT_POINT_PATH_PASSTHROUGH); + } } } @@ -528,10 +549,11 @@ impl BindWatcher { mod tests { use super::*; use crate::mount::is_mounted; - use crate::skip_if_not_root; use nix::unistd::{Gid, Uid}; + use scopeguard::defer; use std::fs; use std::thread; + use test_utils::skip_if_not_root; async fn create_test_storage(dir: &Path, id: &str) -> Result<(protos::Storage, PathBuf)> { let src_path = dir.join(format!("src{}", id)); @@ -756,7 +778,7 @@ mod tests { 22 ); assert_eq!( - fs::read_to_string(&entries.0[0].target_mount_point.as_path().join("1.txt")).unwrap(), + fs::read_to_string(entries.0[0].target_mount_point.as_path().join("1.txt")).unwrap(), "updated" ); @@ -801,7 +823,7 @@ mod tests { 2 ); assert_eq!( - fs::read_to_string(&entries.0[1].target_mount_point.as_path().join("foo.txt")).unwrap(), + fs::read_to_string(entries.0[1].target_mount_point.as_path().join("foo.txt")).unwrap(), "updated" ); @@ -978,7 +1000,7 @@ mod tests { // create a path we'll remove later fs::create_dir_all(source_dir.path().join("tmp")).unwrap(); - fs::write(&source_dir.path().join("tmp/test-file"), "foo").unwrap(); + fs::write(source_dir.path().join("tmp/test-file"), "foo").unwrap(); assert_eq!(entry.scan(&logger).await.unwrap(), 3); // root, ./tmp, test-file // Verify expected directory, file: @@ -1269,19 +1291,26 @@ mod tests { #[tokio::test] #[serial] + #[cfg(not(target_arch = "aarch64"))] async fn create_tmpfs() { skip_if_not_root!(); let logger = slog::Logger::root(slog::Discard, o!()); let mut watcher = BindWatcher::default(); - watcher.mount(&logger).await.unwrap(); - assert!(is_mounted(WATCH_MOUNT_POINT_PATH).unwrap()); + for mount_point in [WATCH_MOUNT_POINT_PATH, WATCH_MOUNT_POINT_PATH_PASSTHROUGH] { + fs::create_dir_all(mount_point).unwrap(); + // ensure the watchable directory is deleted. + defer!(fs::remove_dir_all(mount_point).unwrap()); - thread::sleep(Duration::from_millis(20)); + watcher.mount(&logger).await.unwrap(); + assert!(is_mounted(mount_point).unwrap()); + + thread::sleep(Duration::from_millis(20)); - watcher.cleanup(); - assert!(!is_mounted(WATCH_MOUNT_POINT_PATH).unwrap()); + watcher.cleanup(); + assert!(!is_mounted(mount_point).unwrap()); + } } #[tokio::test] @@ -1289,6 +1318,10 @@ mod tests { async fn spawn_thread() { skip_if_not_root!(); + fs::create_dir_all(WATCH_MOUNT_POINT_PATH).unwrap(); + // ensure the watchable directory is deleted. + defer!(fs::remove_dir_all(WATCH_MOUNT_POINT_PATH).unwrap()); + let source_dir = tempfile::tempdir().unwrap(); fs::write(source_dir.path().join("1.txt"), "one").unwrap(); @@ -1319,6 +1352,10 @@ mod tests { async fn verify_container_cleanup_watching() { skip_if_not_root!(); + fs::create_dir_all(WATCH_MOUNT_POINT_PATH).unwrap(); + // ensure the watchable directory is deleted. + defer!(fs::remove_dir_all(WATCH_MOUNT_POINT_PATH).unwrap()); + let source_dir = tempfile::tempdir().unwrap(); fs::write(source_dir.path().join("1.txt"), "one").unwrap(); diff --git a/src/agent/vsock-exporter/Cargo.toml b/src/agent/vsock-exporter/Cargo.toml index 87e66ed99947..7bec1d87acf8 100644 --- a/src/agent/vsock-exporter/Cargo.toml +++ b/src/agent/vsock-exporter/Cargo.toml @@ -3,11 +3,12 @@ name = "vsock-exporter" version = "0.1.0" authors = ["James O. D. Hunt "] edition = "2018" +license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nix = "0.23.0" +nix = "0.24.2" libc = "0.2.94" thiserror = "1.0.26" opentelemetry = { version = "0.14.0", features=["serialize"] } @@ -17,4 +18,4 @@ bincode = "1.3.3" byteorder = "1.4.3" slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] } async-trait = "0.1.50" -tokio = "1.2.0" +tokio = "1.28.1" diff --git a/src/dragonball/.gitignore b/src/dragonball/.gitignore new file mode 100644 index 000000000000..c5078494edbc --- /dev/null +++ b/src/dragonball/.gitignore @@ -0,0 +1,2 @@ +target +.idea diff --git a/src/dragonball/Cargo.lock b/src/dragonball/Cargo.lock new file mode 100644 index 000000000000..cec250d497ad --- /dev/null +++ b/src/dragonball/Cargo.lock @@ -0,0 +1,1833 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "blake3" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "caps" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" +dependencies = [ + "libc", + "thiserror", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.48.5", +] + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "dbs-address-space" +version = "0.3.0" +dependencies = [ + "arc-swap", + "lazy_static", + "libc", + "nix 0.23.2", + "thiserror", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "dbs-allocator" +version = "0.1.1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "dbs-arch" +version = "0.2.3" +dependencies = [ + "kvm-bindings", + "kvm-ioctls", + "libc", + "memoffset", + "thiserror", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "dbs-boot" +version = "0.4.0" +dependencies = [ + "dbs-arch", + "kvm-bindings", + "kvm-ioctls", + "lazy_static", + "libc", + "thiserror", + "vm-fdt", + "vm-memory", +] + +[[package]] +name = "dbs-device" +version = "0.2.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "dbs-interrupt" +version = "0.2.2" +dependencies = [ + "dbs-arch", + "dbs-device", + "kvm-bindings", + "kvm-ioctls", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "dbs-legacy-devices" +version = "0.1.1" +dependencies = [ + "dbs-device", + "dbs-utils", + "libc", + "log", + "serde", + "vm-superio", + "vmm-sys-util", +] + +[[package]] +name = "dbs-upcall" +version = "0.3.0" +dependencies = [ + "anyhow", + "dbs-utils", + "dbs-virtio-devices", + "log", + "thiserror", + "timerfd", +] + +[[package]] +name = "dbs-utils" +version = "0.2.1" +dependencies = [ + "anyhow", + "event-manager", + "libc", + "log", + "serde", + "thiserror", + "timerfd", + "vmm-sys-util", +] + +[[package]] +name = "dbs-virtio-devices" +version = "0.3.1" +dependencies = [ + "byteorder", + "caps", + "dbs-device", + "dbs-interrupt", + "dbs-utils", + "epoll", + "fuse-backend-rs", + "io-uring", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-rafs", + "nydus-storage", + "rlimit", + "sendfd", + "serde", + "serde_json", + "thiserror", + "threadpool", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dragonball" +version = "0.1.0" +dependencies = [ + "anyhow", + "arc-swap", + "bytes", + "crossbeam-channel", + "dbs-address-space", + "dbs-allocator", + "dbs-arch", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "dbs-legacy-devices", + "dbs-upcall", + "dbs-utils", + "dbs-virtio-devices", + "fuse-backend-rs", + "kvm-bindings", + "kvm-ioctls", + "lazy_static", + "libc", + "linux-loader", + "log", + "nix 0.24.3", + "procfs 0.12.0", + "prometheus", + "seccompiler", + "serde", + "serde_derive", + "serde_json", + "slog", + "slog-async", + "slog-scope", + "slog-term", + "test-utils", + "thiserror", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "epoll" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20df693c700404f7e19d4d6fae6b15215d2913c27955d2b9d6f2c0f537511cd0" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "errno" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "event-manager" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "377fa591135fbe23396a18e2655a6d5481bf7c5823cdfa3cc81b01a229cbe640" +dependencies = [ + "libc", + "vmm-sys-util", +] + +[[package]] +name = "filetime" +version = "0.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.3.5", + "windows-sys 0.48.0", +] + +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "fuse-backend-rs" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f85357722be4bf3d0b7548bedf7499686c77628c2c61cb99c6519463f7a9e5f0" +dependencies = [ + "arc-swap", + "bitflags 1.3.2", + "caps", + "core-foundation-sys", + "lazy_static", + "libc", + "log", + "mio", + "nix 0.24.3", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.2", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "io-uring" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd1e1a01cfb924fd8c5c43b6827965db394f5a3a16c599ce03452266e1cf984c" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jobserver" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kvm-bindings" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe70e65a5b092161d17f5005b66e5eefe7a94a70c332e755036fc4af78c4e79" +dependencies = [ + "vmm-sys-util", +] + +[[package]] +name = "kvm-ioctls" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a321cabd827642499c77e27314f388dd83a717a5ca716b86476fb947f73ae4" +dependencies = [ + "kvm-bindings", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "libz-sys" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-loader" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9259ddbfbb52cc918f6bbc60390004ddd0228cf1d85f402009ff2b3d95de83f" +dependencies = [ + "vm-memory", +] + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "memchr" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "nix" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c" +dependencies = [ + "bitflags 1.3.2", + "cc", + "cfg-if", + "libc", + "memoffset", +] + +[[package]] +name = "nix" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.2", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + +[[package]] +name = "nydus-api" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64c62d8a36c10b654b87246a39861b2c05f68e96ab3b2f002f5a54f406d5e0e" +dependencies = [ + "libc", + "log", + "serde", + "serde_json", + "toml", +] + +[[package]] +name = "nydus-rafs" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adde865ef71c91c5f139c4c05ca5aedb6fbd53f530d646b13409ac5220b85467" +dependencies = [ + "anyhow", + "arc-swap", + "bitflags 1.3.2", + "fuse-backend-rs", + "lazy_static", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-storage", + "nydus-utils", + "serde", + "serde_json", + "vm-memory", +] + +[[package]] +name = "nydus-storage" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4023f15303dbbda47797d07e9acd2045862ce82c7e28cd66f70b09bda5584cbb" +dependencies = [ + "arc-swap", + "bitflags 1.3.2", + "fuse-backend-rs", + "hex", + "lazy_static", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-utils", + "serde", + "serde_json", + "tar", + "tokio", + "vm-memory", +] + +[[package]] +name = "nydus-utils" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1f7bcde0f3906cf49101f2d40e485b0155eee97e3358eefd4783448c4f69c96" +dependencies = [ + "blake3", + "flate2", + "httpdate", + "lazy_static", + "libc", + "libz-sys", + "log", + "lz4", + "lz4-sys", + "nix 0.24.3", + "nydus-api", + "openssl", + "serde", + "serde_json", + "sha2", + "tokio", + "zstd", +] + +[[package]] +name = "object" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "openssl" +version = "0.10.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-src" +version = "300.1.3+3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd2c101a165fff9935e34def4669595ab1c7847943c42be86e21503e482be107" +dependencies = [ + "cc", +] + +[[package]] +name = "openssl-sys" +version = "0.9.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" +dependencies = [ + "cc", + "libc", + "openssl-src", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.3.5", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "procfs" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "chrono", + "flate2", + "hex", + "lazy_static", + "libc", +] + +[[package]] +name = "procfs" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "hex", + "lazy_static", + "rustix 0.36.15", +] + +[[package]] +name = "prometheus" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot", + "procfs 0.14.2", + "protobuf", + "thiserror", +] + +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall 0.2.16", + "thiserror", +] + +[[package]] +name = "rlimit" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347703a5ae47adf1e693144157be231dde38c72bd485925cae7407ad3e52480b" +dependencies = [ + "libc", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.36.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seccompiler" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01d1292a1131b22ccea49f30bd106f1238b5ddeec1a98d39268dcc31d540e68" +dependencies = [ + "libc", +] + +[[package]] +name = "sendfd" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604b71b8fc267e13bb3023a2c901126c8f349393666a6d98ac1ae5729b701798" +dependencies = [ + "libc", +] + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "slog" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06" + +[[package]] +name = "slog-async" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72c8038f898a2c79507940990f05386455b3a317d8f18d4caea7cbc3d5096b84" +dependencies = [ + "crossbeam-channel", + "slog", + "take_mut", + "thread_local", +] + +[[package]] +name = "slog-scope" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786" +dependencies = [ + "arc-swap", + "lazy_static", + "slog", +] + +[[package]] +name = "slog-term" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +dependencies = [ + "atty", + "slog", + "term", + "thread_local", + "time", +] + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "2.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "take_mut" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" + +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "test-utils" +version = "0.1.0" +dependencies = [ + "nix 0.24.3", +] + +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "time" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +dependencies = [ + "deranged", + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +dependencies = [ + "time-core", +] + +[[package]] +name = "timerfd" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3fd47d83ad0b5c7be2e8db0b9d712901ef6ce5afbcc6f676761004f5104ea2" +dependencies = [ + "rustix 0.37.23", +] + +[[package]] +name = "tokio" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +dependencies = [ + "backtrace", + "num_cpus", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "virtio-bindings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff512178285488516ed85f15b5d0113a7cdb89e9e8a760b269ae4f02b84bd6b" + +[[package]] +name = "virtio-queue" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ba81e2bcc21c0d2fc5e6683e79367e26ad219197423a498df801d79d5ba77bd" +dependencies = [ + "log", + "virtio-bindings", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vm-fdt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43fb5a6bd1a7d423ad72802801036719b7546cf847a103f8fe4575f5b0d45a6" + +[[package]] +name = "vm-memory" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688a70366615b45575a424d9c665561c1b5ab2224d494f706b6a6812911a827c" +dependencies = [ + "arc-swap", + "libc", + "winapi", +] + +[[package]] +name = "vm-superio" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4b5231d334edbc03b22704caa1a022e4c07491d6df736593f26094df8b04a51" + +[[package]] +name = "vmm-sys-util" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd64fe09d8e880e600c324e7d664760a17f56e9672b7495a86381b49e4f72f46" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "xattr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" +dependencies = [ + "libc", +] + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.8+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml new file mode 100644 index 000000000000..bbb5166f2bb0 --- /dev/null +++ b/src/dragonball/Cargo.toml @@ -0,0 +1,63 @@ +[package] +name = "dragonball" +version = "0.1.0" +authors = ["The Kata Containers community "] +description = "A secure sandbox for Kata Containers" +keywords = ["kata-containers", "sandbox", "vmm", "dragonball"] +homepage = "https://katacontainers.io/" +repository = "https://github.com/kata-containers/kata-containers.git" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +anyhow = "1.0.32" +arc-swap = "1.5.0" +bytes = "1.1.0" +dbs-address-space = { path = "./src/dbs_address_space" } +dbs-allocator = { path = "./src/dbs_allocator" } +dbs-arch = { path = "./src/dbs_arch" } +dbs-boot = { path = "./src/dbs_boot" } +dbs-device = { path = "./src/dbs_device" } +dbs-interrupt = { path = "./src/dbs_interrupt", features = ["kvm-irq"] } +dbs-legacy-devices = { path = "./src/dbs_legacy_devices" } +dbs-upcall = { path = "./src/dbs_upcall" , optional = true } +dbs-utils = { path = "./src/dbs_utils" } +dbs-virtio-devices = { path = "./src/dbs_virtio_devices", optional = true, features = ["virtio-mmio"] } +kvm-bindings = "0.6.0" +kvm-ioctls = "0.12.0" +lazy_static = "1.2" +libc = "0.2.39" +linux-loader = "0.8.0" +log = "0.4.14" +nix = "0.24.2" +procfs = "0.12.0" +prometheus = { version = "0.13.0", features = ["process"] } +seccompiler = "0.2.0" +serde = "1.0.27" +serde_derive = "1.0.27" +serde_json = "1.0.9" +slog = "2.5.2" +slog-scope = "4.4.0" +thiserror = "1" +vmm-sys-util = "0.11.0" +virtio-queue = { version = "0.7.0", optional = true } +vm-memory = { version = "0.10.0", features = ["backend-mmap"] } +crossbeam-channel = "0.5.6" +fuse-backend-rs = "0.10.5" + +[dev-dependencies] +slog-async = "2.7.0" +slog-term = "2.9.0" +test-utils = { path = "../libs/test-utils" } + +[features] +acpi = [] +atomic-guest-memory = ["vm-memory/backend-atomic"] +hotplug = ["virtio-vsock"] +virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"] +virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"] +virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"] +# virtio-fs only work on atomic-guest-memory +virtio-fs = ["dbs-virtio-devices/virtio-fs", "virtio-queue", "atomic-guest-memory"] +virtio-mem = ["dbs-virtio-devices/virtio-mem", "virtio-queue", "atomic-guest-memory"] +virtio-balloon = ["dbs-virtio-devices/virtio-balloon", "virtio-queue"] diff --git a/src/dragonball/LICENSE b/src/dragonball/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/Makefile b/src/dragonball/Makefile new file mode 100644 index 000000000000..68ee3bd46d8e --- /dev/null +++ b/src/dragonball/Makefile @@ -0,0 +1,53 @@ +# Copyright (c) 2019-2022 Alibaba Cloud. All rights reserved. +# Copyright (c) 2019-2022 Ant Group. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +include ../../utils.mk + +ifeq ($(ARCH), s390x) +default build check test clippy: + @echo "s390x not support currently" + exit 0 +else + +default: build + +build: + @echo "INFO: cargo build..." + cargo build --all-features --target $(TRIPLE) + +static-checks-build: + @echo "INFO: static-checks-build do nothing.." + +check: clippy format + +clippy: + @echo "INFO: cargo clippy..." + cargo clippy --all-targets --all-features \ + -- \ + -D warnings + +vendor: + @echo "INFO: vendor do nothing.." + +format: + @echo "INFO: cargo fmt..." + cargo fmt -- --check + +clean: + cargo clean + +test: +ifdef SUPPORT_VIRTUALIZATION + RUST_BACKTRACE=1 cargo test --all-features --target $(TRIPLE) -- --nocapture --test-threads=1 +else + @echo "INFO: skip testing dragonball, it need virtualization support." + exit 0 +endif + +coverage: + RUST_BACKTRACE=1 cargo llvm-cov --all-features --target $(TRIPLE) -- --nocapture --test-threads=1 + +endif # ifeq ($(ARCH), s390x) + +.DEFAULT_GOAL := default diff --git a/src/dragonball/README.md b/src/dragonball/README.md new file mode 100644 index 000000000000..767b9af47dfe --- /dev/null +++ b/src/dragonball/README.md @@ -0,0 +1,53 @@ +# Introduction +`Dragonball Sandbox` is a light-weight virtual machine manager (VMM) based on Linux Kernel-based Virtual Machine (KVM), +which is optimized for container workloads with: +- container image management and acceleration service +- flexible and high-performance virtual device drivers +- low CPU and memory overhead +- minimal startup time +- optimized concurrent startup speed + +`Dragonball Sandbox` aims to provide a simple solution for the Kata Containers community. It is integrated into Kata 3.0 +runtime as a built-in VMM and gives users an out-of-the-box Kata Containers experience without complex environment setup +and configuration process. + +# Getting Started +[TODO](https://github.com/kata-containers/kata-containers/issues/4302) + +# Documentation + +- Device: [Device Document](docs/device.md) +- vCPU: [vCPU Document](docs/vcpu.md) +- API: [API Document](docs/api.md) +- `Upcall`: [`Upcall` Document](docs/upcall.md) +- `dbs_acpi`: [`dbs_acpi` Document](src/dbs_acpi/README.md) +- `dbs_address_space`: [`dbs_address_space` Document](src/dbs_address_space/README.md) +- `dbs_allocator`: [`dbs_allocator` Document](src/dbs_allocator/README.md) +- `dbs_arch`: [`dbs_arch` Document](src/dbs_arch/README.md) +- `dbs_boot`: [`dbs_boot` Document](src/dbs_boot/README.md) +- `dbs_device`: [`dbs_device` Document](src/dbs_device/README.md) +- `dbs_interrupt`: [`dbs_interrput` Document](src/dbs_interrupt/README.md) +- `dbs_legacy_devices`: [`dbs_legacy_devices` Document](src/dbs_legacy_devices/README.md) +- `dbs_tdx`: [`dbs_tdx` Document](src/dbs_tdx/README.md) +- `dbs_upcall`: [`dbs_upcall` Document](src/dbs_upcall/README.md) +- `dbs_utils`: [`dbs_utils` Document](src/dbs_utils/README.md) +- `dbs_virtio_devices`: [`dbs_virtio_devices` Document](src/dbs_virtio_devices/README.md) + +Currently, the documents are still actively adding. +You could see the [official documentation](docs/) page for more details. + +# Supported Architectures +- x86-64 +- aarch64 + +# Supported Kernel +[TODO](https://github.com/kata-containers/kata-containers/issues/4303) + +# Acknowledgement +Part of the code is based on the [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor) project, [`crosvm`](https://github.com/google/crosvm) project and [Firecracker](https://github.com/firecracker-microvm/firecracker) project. They are all rust written virtual machine managers with advantages on safety and security. + +`Dragonball sandbox` is designed to be a VMM that is customized for Kata Containers and we will focus on optimizing container workloads for Kata ecosystem. The focus on the Kata community is what differentiates us from other rust written virtual machines. + +# License + +`Dragonball` is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. \ No newline at end of file diff --git a/src/dragonball/THIRD-PARTY b/src/dragonball/THIRD-PARTY new file mode 100644 index 000000000000..c3069125a350 --- /dev/null +++ b/src/dragonball/THIRD-PARTY @@ -0,0 +1,27 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/src/dragonball/docs/api.md b/src/dragonball/docs/api.md new file mode 100644 index 000000000000..dab49835ae24 --- /dev/null +++ b/src/dragonball/docs/api.md @@ -0,0 +1,27 @@ +# API + +We provide plenty API for Kata runtime to interact with `Dragonball` virtual machine manager. +This document provides the introduction for each of them. + +## `ConfigureBootSource` +Configure the boot source of the VM using `BootSourceConfig`. This action can only be called before the VM has booted. + +### Boot Source Config +1. `kernel_path`: Path of the kernel image. `Dragonball` only supports compressed kernel image for now. +2. `initrd_path`: Path of the initrd (could be None) +3. `boot_args`: Boot arguments passed to the kernel (could be None) + +## `SetVmConfiguration` +Set virtual machine configuration using `VmConfigInfo` to initialize VM. + +### VM Config Info +1. `vcpu_count`: Number of vCPU to start. Currently we only support up to 255 vCPUs. +2. `max_vcpu_count`: Max number of vCPU can be added through CPU hotplug. +3. `cpu_pm`: CPU power management. +4. `cpu_topology`: CPU topology information (including `threads_per_core`, `cores_per_die`, `dies_per_socket` and `sockets`). +5. `vpmu_feature`: `vPMU` feature level. +6. `mem_type`: Memory type that can be either `hugetlbfs` or `shmem`, default is `shmem`. +7. `mem_file_path` : Memory file path. +8. `mem_size_mib`: The memory size in MiB. The maximum memory size is 1TB. +9. `serial_path`: Optional sock path. + diff --git a/src/dragonball/docs/device.md b/src/dragonball/docs/device.md new file mode 100644 index 000000000000..ab2e078e7b85 --- /dev/null +++ b/src/dragonball/docs/device.md @@ -0,0 +1,20 @@ +# Device + +## Device Manager + +Currently we have following device manager: +| Name | Description | +| --- | --- | +| [address space manager](../src/address_space_manager.rs) | abstracts virtual machine's physical management and provide mapping for guest virtual memory and MMIO ranges of emulated virtual devices, pass-through devices and vCPU | +| [config manager](../src/config_manager.rs) | provides abstractions for configuration information | +| [console manager](../src/device_manager/console_manager.rs) | provides management for all console devices | +| [resource manager](../src/resource_manager.rs) |provides resource management for `legacy_irq_pool`, `msi_irq_pool`, `pio_pool`, `mmio_pool`, `mem_pool`, `kvm_mem_slot_pool` with builder `ResourceManagerBuilder` | +| [VSOCK device manager](../src/device_manager/vsock_dev_mgr.rs) | provides configuration info for `VIRTIO-VSOCK` and management for all VSOCK devices | + + +## Device supported +`VIRTIO-VSOCK` +`i8042` +`COM1` +`COM2` + diff --git a/src/dragonball/docs/images/upcall-architecture.svg b/src/dragonball/docs/images/upcall-architecture.svg new file mode 100644 index 000000000000..a74c37f45539 --- /dev/null +++ b/src/dragonball/docs/images/upcall-architecture.svg @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + + Canvas 1 + + + Layer 1 + + + + + + Guest User + + + + + Guest Kernel + + + + + + + + Hypervisor + + + + + + + + socket + + + + + + Device + Manager + Service + + + + + bind + + + + + listen + + + + + accept + + + + + new kthread + + + + + + virtio-vsocket + + + + + + virtio-vsocket backend + + + + + + + + + + + + + + + + + + Port + + + + + + Device + Manager + Backend + + + + + + + + + Service B + + + + + + Backend B + + + + + + + + …… + + + + + …… + + + + + Upcall Server + + + + + + + + Service handler + + + + + + Conn + + + + + + Conn + + + + + + + + + + + diff --git a/src/dragonball/docs/upcall.md b/src/dragonball/docs/upcall.md new file mode 100644 index 000000000000..292b33d2703d --- /dev/null +++ b/src/dragonball/docs/upcall.md @@ -0,0 +1,30 @@ +# `Upcall` + +## What is `Upcall`? + +`Upcall` is a direct communication tool between VMM and guest developed upon `vsock`. The server side of the `upcall` is a driver in guest kernel (kernel patches are needed for this feature) and it'll start to serve the requests after the kernel starts. And the client side is in Dragonball VMM , it'll be a thread that communicates with `vsock` through `uds`. + +We want to keep the lightweight of the VM through the implementation of the `upcall`. + +![architecture overview](images/upcall-architecture.svg) +## What can `upcall` do? + +We define specific operations in the device manager service (one of the services in `upcall` we developed) to perform device hotplug / hot-unplug including vCPU hotplug, `virtio-mmio` hotplug, and memory hotplug. We have accomplished device hotplug / hot-unplug directly through `upcall` in order to avoid the virtualization of ACPI to minimize virtual machines overhead. And there could be many other uses if other services are implemented. + +## How to enable `upcall`? + +`Upcall` needs a server in the guest kernel which will be several kernel patches for the `upcall` server itself and different services registered in the `upcall` server. It's currently tested on upstream Linux kernel 5.10. + +To make it easy for users to use, we have open-source the `upcall` guest patches in [Dragonball experimental guest patches](../../../tools/packaging/kernel/patches/5.10.x/dragonball-experimental) and develop `upcall` support in [Kata guest kernel building script](../../../tools/packaging/kernel/build-kernel.sh). + +You could use following command to download the upstream kernel (currently Dragonball uses 5.10.25) and put the `upcall` patches and other Kata patches into kernel code. + +`sh build-kernel.sh -e -t dragonball -f setup` + +`-e` here means experimental, mainly because `upcall` patches are not in upstream Linux kernel. +`-t dragonball` is for specifying hypervisor type +`-f` is for generating `.config` file + +After this command, the kernel code with `upcall` and related `.config` file are all set up in the directory `kata-linux-dragonball-experimental-5.10.25-[config version]`. You can either manually compile the kernel with `make` command or following [Document for build-kernel.sh](../../../tools/packaging/kernel/README.md) to build and use this guest kernel. + +Also, a client-side is also needed in VMM. Dragonball has already open-source the way to implement `upcall` client and Dragonball compiled with `dbs-upcall` feature will enable Dragonball client side. \ No newline at end of file diff --git a/src/dragonball/docs/vcpu.md b/src/dragonball/docs/vcpu.md new file mode 100644 index 000000000000..e2be8037b60f --- /dev/null +++ b/src/dragonball/docs/vcpu.md @@ -0,0 +1,42 @@ +# vCPU + +## vCPU Manager +The vCPU manager is to manage all vCPU related actions, we will dive into some of the important structure members in this doc. + +For now, aarch64 vCPU support is still under development, we'll introduce it when we merge `runtime-rs` to the master branch. (issue: #4445) + +### vCPU config +`VcpuConfig` is used to configure guest overall CPU info. + +`boot_vcpu_count` is used to define the initial vCPU number. + +`max_vcpu_count` is used to define the maximum vCPU number and it's used for the upper boundary for CPU hotplug feature + +`thread_per_core`, `cores_per_die`, `dies_per_socket` and `socket` are used to define CPU topology. + +`vpmu_feature` is used to define `vPMU` feature level. +If `vPMU` feature is `Disabled`, it means `vPMU` feature is off (by default). +If `vPMU` feature is `LimitedlyEnabled`, it means minimal `vPMU` counters are supported (cycles and instructions). +If `vPMU` feature is `FullyEnabled`, it means all `vPMU` counters are supported + +## vCPU State + +There are four states for vCPU state machine: `running`, `paused`, `waiting_exit`, `exited`. There is a state machine to maintain the task flow. + +When the vCPU is created, it'll turn to `paused` state. After vCPU resource is ready at VMM, it'll send a `Resume` event to the vCPU thread, and then vCPU state will change to `running`. + +During the `running` state, VMM will catch vCPU exit and execute different logic according to the exit reason. + +If the VMM catch some exit reasons that it cannot handle, the state will change to `waiting_exit` and VMM will stop the virtual machine. +When the state switches to `waiting_exit`, an exit event will be sent to vCPU `exit_evt`, event manager will detect the change in `exit_evt` and set VMM `exit_evt_flag` as 1. A thread serving for VMM event loop will check `exit_evt_flag` and if the flag is 1, it'll stop the VMM. + +When the VMM is stopped / destroyed, the state will change to `exited`. + +## vCPU Hot plug +Since `Dragonball Sandbox` doesn't support virtualization of ACPI system, we use [`upcall`](https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall) to establish a direct communication channel between `Dragonball` and Guest in order to trigger vCPU hotplug. + +To use `upcall`, kernel patches are needed, you can get the patches from [`upcall`](https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall) page, and we'll provide a ready-to-use guest kernel binary for you to try. + +vCPU hot plug / hot unplug range is [1, `max_vcpu_count`]. Operations not in this range will be invalid. + + diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs new file mode 100644 index 000000000000..6efdb027ad6b --- /dev/null +++ b/src/dragonball/src/address_space_manager.rs @@ -0,0 +1,897 @@ +// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Address space abstraction to manage virtual machine's physical address space. +//! +//! The AddressSpace abstraction is introduced to manage virtual machine's physical address space. +//! The regions in virtual machine's physical address space may be used to: +//! 1) map guest virtual memory +//! 2) map MMIO ranges for emulated virtual devices, such as virtio-fs DAX window. +//! 3) map MMIO ranges for pass-through devices, such as PCI device BARs. +//! 4) map MMIO ranges for to vCPU, such as local APIC. +//! 5) not used/available +//! +//! A related abstraction, vm_memory::GuestMemory, is used to access guest virtual memory only. +//! In other words, AddressSpace is the resource owner, and GuestMemory is an accessor for guest +//! virtual memory. + +use std::collections::{BTreeMap, HashMap}; +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread; + +use dbs_address_space::{ + AddressSpace, AddressSpaceError, AddressSpaceLayout, AddressSpaceRegion, + AddressSpaceRegionType, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED, +}; +use dbs_allocator::Constraint; +use kvm_bindings::kvm_userspace_memory_region; +use kvm_ioctls::VmFd; +use log::{debug, error, info, warn}; +use nix::sys::mman; +use nix::unistd::dup; +#[cfg(feature = "atomic-guest-memory")] +use vm_memory::GuestMemoryAtomic; +use vm_memory::{ + address::Address, FileOffset, GuestAddress, GuestAddressSpace, GuestMemoryMmap, + GuestMemoryRegion, GuestRegionMmap, GuestUsize, MemoryRegionAddress, MmapRegion, +}; + +use crate::resource_manager::ResourceManager; +use crate::vm::NumaRegionInfo; + +#[cfg(not(feature = "atomic-guest-memory"))] +/// Concrete GuestAddressSpace type used by the VMM. +pub type GuestAddressSpaceImpl = Arc; + +#[cfg(feature = "atomic-guest-memory")] +/// Concrete GuestAddressSpace type used by the VMM. +pub type GuestAddressSpaceImpl = GuestMemoryAtomic; + +/// Concrete GuestMemory type used by the VMM. +pub type GuestMemoryImpl = as GuestAddressSpace>::M; +/// Concrete GuestRegion type used by the VMM. +pub type GuestRegionImpl = GuestRegionMmap; + +// Maximum number of working threads for memory pre-allocation. +const MAX_PRE_ALLOC_THREAD: u64 = 16; + +// Control the actual number of pre-allocating threads. After several performance tests, we decide to use one thread to do pre-allocating for every 4G memory. +const PRE_ALLOC_GRANULARITY: u64 = 32; + +// We don't have plan to support mainframe computer and only focus on PC servers. +// 64 as max nodes should be enough for now. +const MAX_NODE: u32 = 64; + +// We will split the memory region if it conflicts with the MMIO hole. +// But if the space below the MMIO hole is smaller than the MINIMAL_SPLIT_SPACE, we won't split the memory region in order to enhance performance. +const MINIMAL_SPLIT_SPACE: u64 = 128 << 20; + +/// Errors associated with virtual machine address space management. +#[derive(Debug, thiserror::Error)] +pub enum AddressManagerError { + /// Invalid address space operation. + #[error("invalid address space operation")] + InvalidOperation, + + /// Invalid address range. + #[error("invalid address space region (0x{0:x}, 0x{1:x})")] + InvalidAddressRange(u64, GuestUsize), + + /// No available mem address. + #[error("no available mem address")] + NoAvailableMemAddress, + + /// No available kvm slotse. + #[error("no available kvm slots")] + NoAvailableKvmSlot, + + /// Address manager failed to create memfd to map anonymous memory. + #[error("address manager failed to create memfd to map anonymous memory")] + CreateMemFd(#[source] nix::Error), + + /// Address manager failed to open memory file. + #[error("address manager failed to open memory file")] + OpenFile(#[source] std::io::Error), + + /// Memory file provided is invalid due to empty file path, non-existent file path and other possible mistakes. + #[error("memory file provided to address manager {0} is invalid")] + FileInvalid(String), + + /// Memory file provided is invalid due to empty memory type + #[error("memory type provided to address manager {0} is invalid")] + TypeInvalid(String), + + /// Failed to set size for memory file. + #[error("address manager failed to set size for memory file")] + SetFileSize(#[source] std::io::Error), + + /// Failed to unlink memory file. + #[error("address manager failed to unlink memory file")] + UnlinkFile(#[source] nix::Error), + + /// Failed to duplicate fd of memory file. + #[error("address manager failed to duplicate memory file descriptor")] + DupFd(#[source] nix::Error), + + /// Failure in accessing the memory located at some address. + #[error("address manager failed to access guest memory located at 0x{0:x}")] + AccessGuestMemory(u64, #[source] vm_memory::mmap::Error), + + /// Failed to create GuestMemory + #[error("address manager failed to create guest memory object")] + CreateGuestMemory(#[source] vm_memory::Error), + + /// Failure in initializing guest memory. + #[error("address manager failed to initialize guest memory")] + GuestMemoryNotInitialized, + + /// Failed to mmap() guest memory + #[error("address manager failed to mmap() guest memory into current process")] + MmapGuestMemory(#[source] vm_memory::mmap::MmapRegionError), + + /// Failed to set KVM memory slot. + #[error("address manager failed to configure KVM memory slot")] + KvmSetMemorySlot(#[source] kvm_ioctls::Error), + + /// Failed to set madvise on AddressSpaceRegion + #[error("address manager failed to set madvice() on guest memory region")] + Madvise(#[source] nix::Error), + + /// join threads fail + #[error("address manager failed to join threads")] + JoinFail, + + /// Failed to create Address Space Region + #[error("address manager failed to create Address Space Region {0}")] + CreateAddressSpaceRegion(#[source] AddressSpaceError), +} + +type Result = std::result::Result; + +/// Parameters to configure address space creation operations. +pub struct AddressSpaceMgrBuilder<'a> { + mem_type: &'a str, + mem_file: &'a str, + mem_index: u32, + mem_suffix: bool, + mem_prealloc: bool, + dirty_page_logging: bool, + vmfd: Option>, +} + +impl<'a> AddressSpaceMgrBuilder<'a> { + /// Create a new [`AddressSpaceMgrBuilder`] object. + pub fn new(mem_type: &'a str, mem_file: &'a str) -> Result { + if mem_type.is_empty() { + return Err(AddressManagerError::TypeInvalid(mem_type.to_string())); + } + Ok(AddressSpaceMgrBuilder { + mem_type, + mem_file, + mem_index: 0, + mem_suffix: true, + mem_prealloc: false, + dirty_page_logging: false, + vmfd: None, + }) + } + + /// Enable/disable adding numbered suffix to memory file path. + /// This feature could be useful to generate hugetlbfs files with number suffix. (e.g. shmem0, shmem1) + pub fn toggle_file_suffix(&mut self, enabled: bool) { + self.mem_suffix = enabled; + } + + /// Enable/disable memory pre-allocation. + /// Enable this feature could improve performance stability at the start of workload by avoiding page fault. + /// Disable this feature may influence performance stability but the cpu resource consumption and start-up time will decrease. + pub fn toggle_prealloc(&mut self, prealloc: bool) { + self.mem_prealloc = prealloc; + } + + /// Enable/disable KVM dirty page logging. + pub fn toggle_dirty_page_logging(&mut self, logging: bool) { + self.dirty_page_logging = logging; + } + + /// Set KVM [`VmFd`] handle to configure memory slots. + pub fn set_kvm_vm_fd(&mut self, vmfd: Arc) -> Option> { + let mut existing_vmfd = None; + if self.vmfd.is_some() { + existing_vmfd = self.vmfd.clone(); + } + self.vmfd = Some(vmfd); + existing_vmfd + } + + /// Build a ['AddressSpaceMgr'] using the configured parameters. + pub fn build( + self, + res_mgr: &ResourceManager, + numa_region_infos: &[NumaRegionInfo], + ) -> Result { + let mut mgr = AddressSpaceMgr::default(); + mgr.create_address_space(res_mgr, numa_region_infos, self)?; + Ok(mgr) + } + + fn get_next_mem_file(&mut self) -> String { + if self.mem_suffix { + let path = format!("{}{}", self.mem_file, self.mem_index); + self.mem_index += 1; + path + } else { + self.mem_file.to_string() + } + } +} + +/// Struct to manage virtual machine's physical address space. +pub struct AddressSpaceMgr { + address_space: Option, + vm_as: Option, + base_to_slot: Arc>>, + prealloc_handlers: Vec>, + prealloc_exit: Arc, + numa_nodes: BTreeMap, +} + +impl AddressSpaceMgr { + /// Query address space manager is initialized or not + pub fn is_initialized(&self) -> bool { + self.address_space.is_some() + } + + /// Gets address space. + pub fn address_space(&self) -> Option<&AddressSpace> { + self.address_space.as_ref() + } + + /// Get the guest memory. + pub fn vm_memory(&self) -> Option<::T> { + self.get_vm_as().map(|m| m.memory()) + } + + /// Create the address space for a virtual machine. + /// + /// This method is designed to be called when starting up a virtual machine instead of at + /// runtime, so it's expected the virtual machine will be tore down and no strict error recover. + pub fn create_address_space( + &mut self, + res_mgr: &ResourceManager, + numa_region_infos: &[NumaRegionInfo], + mut param: AddressSpaceMgrBuilder, + ) -> Result<()> { + let mut regions = Vec::new(); + let mut start_addr = dbs_boot::layout::GUEST_MEM_START; + + // Create address space regions. + for info in numa_region_infos.iter() { + info!("numa_region_info {:?}", info); + // convert size_in_mib to bytes + let size = info + .size + .checked_shl(20) + .ok_or(AddressManagerError::InvalidOperation)?; + + // Guest memory does not intersect with the MMIO hole. + // TODO: make it work for ARM (issue #4307) + if start_addr > dbs_boot::layout::MMIO_LOW_END + || start_addr + size <= dbs_boot::layout::MMIO_LOW_START + { + let region = self.create_region(start_addr, size, info, &mut param)?; + regions.push(region); + start_addr = start_addr + .checked_add(size) + .ok_or(AddressManagerError::InvalidOperation)?; + } else { + // Add guest memory below the MMIO hole, avoid splitting the memory region + // if the available address region is small than MINIMAL_SPLIT_SPACE MiB. + let mut below_size = dbs_boot::layout::MMIO_LOW_START + .checked_sub(start_addr) + .ok_or(AddressManagerError::InvalidOperation)?; + if below_size < (MINIMAL_SPLIT_SPACE) { + below_size = 0; + } else { + let region = self.create_region(start_addr, below_size, info, &mut param)?; + regions.push(region); + } + + // Add guest memory above the MMIO hole + let above_start = dbs_boot::layout::MMIO_LOW_END + 1; + let above_size = size + .checked_sub(below_size) + .ok_or(AddressManagerError::InvalidOperation)?; + let region = self.create_region(above_start, above_size, info, &mut param)?; + regions.push(region); + start_addr = above_start + .checked_add(above_size) + .ok_or(AddressManagerError::InvalidOperation)?; + } + } + + // Create GuestMemory object + let mut vm_memory = GuestMemoryMmap::new(); + for reg in regions.iter() { + // Allocate used guest memory addresses. + // These addresses are statically allocated, resource allocation/update should not fail. + let constraint = Constraint::new(reg.len()) + .min(reg.start_addr().raw_value()) + .max(reg.last_addr().raw_value()); + let _key = res_mgr + .allocate_mem_address(&constraint) + .ok_or(AddressManagerError::NoAvailableMemAddress)?; + let mmap_reg = self.create_mmap_region(reg.clone())?; + + vm_memory = vm_memory + .insert_region(mmap_reg.clone()) + .map_err(AddressManagerError::CreateGuestMemory)?; + self.map_to_kvm(res_mgr, ¶m, reg, mmap_reg)?; + } + + #[cfg(feature = "atomic-guest-memory")] + { + self.vm_as = Some(AddressSpace::convert_into_vm_as(vm_memory)); + } + #[cfg(not(feature = "atomic-guest-memory"))] + { + self.vm_as = Some(Arc::new(vm_memory)); + } + + let layout = AddressSpaceLayout::new( + *dbs_boot::layout::GUEST_PHYS_END, + dbs_boot::layout::GUEST_MEM_START, + *dbs_boot::layout::GUEST_MEM_END, + ); + self.address_space = Some(AddressSpace::from_regions(regions, layout)); + + Ok(()) + } + + // size unit: Byte + fn create_region( + &mut self, + start_addr: u64, + size_bytes: u64, + info: &NumaRegionInfo, + param: &mut AddressSpaceMgrBuilder, + ) -> Result> { + let mem_file_path = param.get_next_mem_file(); + let region = AddressSpaceRegion::create_default_memory_region( + GuestAddress(start_addr), + size_bytes, + info.host_numa_node_id, + param.mem_type, + &mem_file_path, + param.mem_prealloc, + false, + ) + .map_err(AddressManagerError::CreateAddressSpaceRegion)?; + let region = Arc::new(region); + + self.insert_into_numa_nodes( + ®ion, + info.guest_numa_node_id.unwrap_or(0), + &info.vcpu_ids, + ); + info!( + "create new region: guest addr 0x{:x}-0x{:x} size {}", + start_addr, + start_addr + size_bytes, + size_bytes + ); + + Ok(region) + } + + fn map_to_kvm( + &mut self, + res_mgr: &ResourceManager, + param: &AddressSpaceMgrBuilder, + reg: &Arc, + mmap_reg: Arc, + ) -> Result<()> { + // Build mapping between GPA <-> HVA, by adding kvm memory slot. + let slot = res_mgr + .allocate_kvm_mem_slot(1, None) + .ok_or(AddressManagerError::NoAvailableKvmSlot)?; + + if let Some(vmfd) = param.vmfd.as_ref() { + let host_addr = mmap_reg + .get_host_address(MemoryRegionAddress(0)) + .map_err(|_e| AddressManagerError::InvalidOperation)?; + let flags = 0u32; + + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: reg.start_addr().raw_value(), + memory_size: reg.len(), + userspace_addr: host_addr as u64, + flags, + }; + + info!( + "VM: guest memory region {:x} starts at {:x?}", + reg.start_addr().raw_value(), + host_addr + ); + // Safe because the guest regions are guaranteed not to overlap. + unsafe { vmfd.set_user_memory_region(mem_region) } + .map_err(AddressManagerError::KvmSetMemorySlot)?; + } + + self.base_to_slot + .lock() + .unwrap() + .insert(reg.start_addr().raw_value(), slot); + + Ok(()) + } + + /// Mmap the address space region into current process. + pub fn create_mmap_region( + &mut self, + region: Arc, + ) -> Result> { + // Special check for 32bit host with 64bit virtual machines. + if region.len() > usize::MAX as u64 { + return Err(AddressManagerError::InvalidAddressRange( + region.start_addr().raw_value(), + region.len(), + )); + } + // The device MMIO regions may not be backed by memory files, so refuse to mmap them. + if region.region_type() == AddressSpaceRegionType::DeviceMemory { + return Err(AddressManagerError::InvalidOperation); + } + + // The GuestRegionMmap/MmapRegion will take ownership of the FileOffset object, + // so we have to duplicate the fd here. It's really a dirty design. + let file_offset = match region.file_offset().as_ref() { + Some(fo) => { + let fd = dup(fo.file().as_raw_fd()).map_err(AddressManagerError::DupFd)?; + // Safe because we have just duplicated the raw fd. + let file = unsafe { File::from_raw_fd(fd) }; + let file_offset = FileOffset::new(file, fo.start()); + Some(file_offset) + } + None => None, + }; + let perm_flags = if (region.perm_flags() & libc::MAP_POPULATE) != 0 && region.is_hugepage() + { + // mmap(MAP_POPULATE) conflicts with madive(MADV_HUGEPAGE) because mmap(MAP_POPULATE) + // will pre-fault in all memory with normal pages before madive(MADV_HUGEPAGE) gets + // called. So remove the MAP_POPULATE flag and memory will be faulted in by working + // threads. + region.perm_flags() & (!libc::MAP_POPULATE) + } else { + region.perm_flags() + }; + let mmap_reg = MmapRegion::build( + file_offset, + region.len() as usize, + libc::PROT_READ | libc::PROT_WRITE, + perm_flags, + ) + .map_err(AddressManagerError::MmapGuestMemory)?; + + if region.is_anonpage() { + self.configure_anon_mem(&mmap_reg)?; + } + if let Some(node_id) = region.host_numa_node_id() { + self.configure_numa(&mmap_reg, node_id)?; + } + if region.is_hugepage() { + self.configure_thp_and_prealloc(®ion, &mmap_reg)?; + } + + let reg = GuestRegionImpl::new(mmap_reg, region.start_addr()) + .map_err(AddressManagerError::CreateGuestMemory)?; + Ok(Arc::new(reg)) + } + + fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<()> { + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_DONTFORK, + ) + } + .map_err(AddressManagerError::Madvise) + } + + fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<()> { + let nodemask = 1_u64 + .checked_shl(node_id) + .ok_or(AddressManagerError::InvalidOperation)?; + let res = unsafe { + libc::syscall( + libc::SYS_mbind, + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + MPOL_PREFERRED, + &nodemask as *const u64, + MAX_NODE, + MPOL_MF_MOVE, + ) + }; + if res < 0 { + warn!( + "failed to mbind memory to host_numa_node_id {}: this may affect performance", + node_id + ); + } + Ok(()) + } + + // We set Transparent Huge Page (THP) through mmap to increase performance. + // In order to reduce the impact of page fault on performance, we start several threads (up to MAX_PRE_ALLOC_THREAD) to touch every 4k page of the memory region to manually do memory pre-allocation. + // The reason why we don't use mmap to enable THP and pre-alloction is that THP setting won't take effect in this operation (tested in kernel 4.9) + fn configure_thp_and_prealloc( + &mut self, + region: &Arc, + mmap_reg: &MmapRegion, + ) -> Result<()> { + debug!( + "Setting MADV_HUGEPAGE on AddressSpaceRegion addr {:x?} len {:x?}", + mmap_reg.as_ptr(), + mmap_reg.size() + ); + + // Safe because we just create the MmapRegion + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_HUGEPAGE, + ) + } + .map_err(AddressManagerError::Madvise)?; + + if region.perm_flags() & libc::MAP_POPULATE > 0 { + // Touch every 4k page to trigger allocation. The step is 4K instead of 2M to ensure + // pre-allocation when running out of huge pages. + const PAGE_SIZE: u64 = 4096; + const PAGE_SHIFT: u32 = 12; + let addr = mmap_reg.as_ptr() as u64; + // Here we use >> PAGE_SHIFT to calculate how many 4K pages in the memory region. + let npage = (mmap_reg.size() as u64) >> PAGE_SHIFT; + + let mut touch_thread = ((mmap_reg.size() as u64) >> PRE_ALLOC_GRANULARITY) + 1; + if touch_thread > MAX_PRE_ALLOC_THREAD { + touch_thread = MAX_PRE_ALLOC_THREAD; + } + + let per_npage = npage / touch_thread; + for n in 0..touch_thread { + let start_npage = per_npage * n; + let end_npage = if n == (touch_thread - 1) { + npage + } else { + per_npage * (n + 1) + }; + let mut per_addr = addr + (start_npage * PAGE_SIZE); + let should_stop = self.prealloc_exit.clone(); + + let handler = thread::Builder::new() + .name("PreallocThread".to_string()) + .spawn(move || { + info!("PreallocThread start start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread ); + for _ in start_npage..end_npage { + if should_stop.load(Ordering::Acquire) { + info!("PreallocThread stop start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread); + break; + } + + // Reading from a THP page may be served by the zero page, so only + // write operation could ensure THP memory allocation. So use + // the compare_exchange(old_val, old_val) trick to trigger allocation. + let addr_ptr = per_addr as *mut u8; + let read_byte = unsafe { std::ptr::read_volatile(addr_ptr) }; + let atomic_u8 : &AtomicU8 = unsafe {&*(addr_ptr as *mut AtomicU8)}; + let _ = atomic_u8.compare_exchange(read_byte, read_byte, Ordering::SeqCst, Ordering::SeqCst); + per_addr += PAGE_SIZE; + } + + info!("PreallocThread done start_npage: {:?}, end_npage: {:?}, per_addr: {:?}, thread_number: {:?}", + start_npage, end_npage, per_addr, touch_thread ); + }); + + match handler { + Err(e) => error!( + "Failed to create working thread for async pre-allocation, {:?}. This may affect performance stability at the start of the workload.", + e + ), + Ok(hdl) => self.prealloc_handlers.push(hdl), + } + } + } + + Ok(()) + } + + /// Get the address space object + pub fn get_address_space(&self) -> Option<&AddressSpace> { + self.address_space.as_ref() + } + + /// Get the default guest memory object, which will be used to access virtual machine's default + /// guest memory. + pub fn get_vm_as(&self) -> Option<&GuestAddressSpaceImpl> { + self.vm_as.as_ref() + } + + /// Get the base to slot map + pub fn get_base_to_slot_map(&self) -> Arc>> { + self.base_to_slot.clone() + } + + /// get numa nodes infos from address space manager. + pub fn get_numa_nodes(&self) -> &BTreeMap { + &self.numa_nodes + } + + /// add cpu and memory numa informations to BtreeMap + fn insert_into_numa_nodes( + &mut self, + region: &Arc, + guest_numa_node_id: u32, + vcpu_ids: &[u32], + ) { + let node = self + .numa_nodes + .entry(guest_numa_node_id) + .or_insert_with(NumaNode::new); + node.add_info(&NumaNodeInfo { + base: region.start_addr(), + size: region.len(), + }); + node.add_vcpu_ids(vcpu_ids); + } + + /// get address space layout from address space manager. + pub fn get_layout(&self) -> Result { + self.address_space + .as_ref() + .map(|v| v.layout()) + .ok_or(AddressManagerError::GuestMemoryNotInitialized) + } + + /// Wait for the pre-allocation working threads to finish work. + /// + /// Force all working threads to exit if `stop` is true. + pub fn wait_prealloc(&mut self, stop: bool) -> Result<()> { + if stop { + self.prealloc_exit.store(true, Ordering::Release); + } + while let Some(handlers) = self.prealloc_handlers.pop() { + if let Err(e) = handlers.join() { + error!("wait_prealloc join fail {:?}", e); + return Err(AddressManagerError::JoinFail); + } + } + Ok(()) + } +} + +impl Default for AddressSpaceMgr { + /// Create a new empty AddressSpaceMgr + fn default() -> Self { + AddressSpaceMgr { + address_space: None, + vm_as: None, + base_to_slot: Arc::new(Mutex::new(HashMap::new())), + prealloc_handlers: Vec::new(), + prealloc_exit: Arc::new(AtomicBool::new(false)), + numa_nodes: BTreeMap::new(), + } + } +} + +#[cfg(test)] +mod tests { + use dbs_boot::layout::GUEST_MEM_START; + use std::ops::Deref; + + use vm_memory::{Bytes, GuestAddressSpace, GuestMemory, GuestMemoryRegion}; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + + #[test] + fn test_create_address_space() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let vm_as = as_mgr.get_vm_as().unwrap(); + let guard = vm_as.memory(); + let gmem = guard.deref(); + assert_eq!(gmem.num_regions(), 1); + + let reg = gmem + .find_region(GuestAddress(GUEST_MEM_START + mem_size - 1)) + .unwrap(); + assert_eq!(reg.start_addr(), GuestAddress(GUEST_MEM_START)); + assert_eq!(reg.len(), mem_size); + assert!(gmem + .find_region(GuestAddress(GUEST_MEM_START + mem_size)) + .is_none()); + assert!(reg.file_offset().is_some()); + + let buf = [0x1u8, 0x2u8, 0x3u8, 0x4u8, 0x5u8]; + gmem.write_slice(&buf, GuestAddress(GUEST_MEM_START)) + .unwrap(); + + // Update middle of mapped memory region + let mut val = 0xa5u8; + gmem.write_obj(val, GuestAddress(GUEST_MEM_START + 0x1)) + .unwrap(); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x1)).unwrap(); + assert_eq!(val, 0xa5); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START)).unwrap(); + assert_eq!(val, 1); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x2)).unwrap(); + assert_eq!(val, 3); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x5)).unwrap(); + assert_eq!(val, 0); + + // Read ahead of mapped memory region + assert!(gmem + .read_obj::(GuestAddress(GUEST_MEM_START + mem_size)) + .is_err()); + + let res_mgr = ResourceManager::new(None); + let mem_size = dbs_boot::layout::MMIO_LOW_START + (1 << 30); + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let vm_as = as_mgr.get_vm_as().unwrap(); + let guard = vm_as.memory(); + let gmem = guard.deref(); + #[cfg(target_arch = "x86_64")] + assert_eq!(gmem.num_regions(), 2); + #[cfg(target_arch = "aarch64")] + assert_eq!(gmem.num_regions(), 1); + + // Test dropping GuestMemoryMmap object releases all resources. + for _ in 0..10000 { + let res_mgr = ResourceManager::new(None); + let mem_size = 1 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let _as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + } + let file = TempFile::new().unwrap().into_file(); + let fd = file.as_raw_fd(); + // fd should be small enough if there's no leaking of fds. + assert!(fd < 1000); + } + + #[test] + fn test_address_space_mgr_get_boundary() { + let layout = AddressSpaceLayout::new( + *dbs_boot::layout::GUEST_PHYS_END, + dbs_boot::layout::GUEST_MEM_START, + *dbs_boot::layout::GUEST_MEM_END, + ); + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + assert_eq!(as_mgr.get_layout().unwrap(), layout); + } + + #[test] + fn test_address_space_mgr_get_numa_nodes() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let cpu_vec = vec![1, 2]; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: cpu_vec.clone(), + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let mut numa_node = NumaNode::new(); + numa_node.add_info(&NumaNodeInfo { + base: GuestAddress(GUEST_MEM_START), + size: mem_size, + }); + numa_node.add_vcpu_ids(&cpu_vec); + + assert_eq!(*as_mgr.get_numa_nodes().get(&0).unwrap(), numa_node); + } + + #[test] + fn test_address_space_mgr_async_prealloc() { + let res_mgr = ResourceManager::new(None); + let mem_size = 2 << 20; + let cpu_vec = vec![1, 2]; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: cpu_vec, + }]; + let mut builder = AddressSpaceMgrBuilder::new("hugeshmem", "").unwrap(); + builder.toggle_prealloc(true); + let mut as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + as_mgr.wait_prealloc(false).unwrap(); + } + + #[test] + fn test_address_space_mgr_builder() { + let mut builder = AddressSpaceMgrBuilder::new("shmem", "/tmp/shmem").unwrap(); + + assert_eq!(builder.mem_type, "shmem"); + assert_eq!(builder.mem_file, "/tmp/shmem"); + assert_eq!(builder.mem_index, 0); + assert!(builder.mem_suffix); + assert!(!builder.mem_prealloc); + assert!(!builder.dirty_page_logging); + assert!(builder.vmfd.is_none()); + + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem0"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem1"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem2"); + assert_eq!(builder.mem_index, 3); + + builder.toggle_file_suffix(false); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); + assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); + assert_eq!(builder.mem_index, 3); + + builder.toggle_prealloc(true); + builder.toggle_dirty_page_logging(true); + assert!(builder.mem_prealloc); + assert!(builder.dirty_page_logging); + } + + #[test] + fn test_configure_invalid_numa() { + let res_mgr = ResourceManager::new(None); + let mem_size = 128 << 20; + let numa_region_infos = vec![NumaRegionInfo { + size: mem_size >> 20, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids: vec![1, 2], + }]; + let builder = AddressSpaceMgrBuilder::new("shmem", "").unwrap(); + let as_mgr = builder.build(&res_mgr, &numa_region_infos).unwrap(); + let mmap_reg = MmapRegion::new(8).unwrap(); + + assert!(as_mgr.configure_numa(&mmap_reg, u32::MAX).is_err()); + } +} diff --git a/src/dragonball/src/api/mod.rs b/src/dragonball/src/api/mod.rs new file mode 100644 index 000000000000..75ca6af690a0 --- /dev/null +++ b/src/dragonball/src/api/mod.rs @@ -0,0 +1,6 @@ +// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! API related data structures to configure the vmm. + +pub mod v1; diff --git a/src/dragonball/src/api/v1/boot_source.rs b/src/dragonball/src/api/v1/boot_source.rs new file mode 100644 index 000000000000..612de04a1819 --- /dev/null +++ b/src/dragonball/src/api/v1/boot_source.rs @@ -0,0 +1,55 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde_derive::{Deserialize, Serialize}; + +/// Default guest kernel command line: +/// - `reboot=k` shutdown the guest on reboot, instead of well... rebooting; +/// - `panic=1` on panic, reboot after 1 second; +/// - `pci=off` do not scan for PCI devices (ser boot time); +/// - `nomodules` disable loadable kernel module support; +/// - `8250.nr_uarts=0` disable 8250 serial interface; +/// - `i8042.noaux` do not probe the i8042 controller for an attached mouse (ser boot time); +/// - `i8042.nomux` do not probe i8042 for a multiplexing controller (ser boot time); +/// - `i8042.nopnp` do not use ACPIPnP to discover KBD/AUX controllers (ser boot time); +/// - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (ser boot time). +pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 \ + i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd"; + +/// Strongly typed data structure used to configure the boot source of the microvm. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize, Default)] +#[serde(deny_unknown_fields)] +pub struct BootSourceConfig { + /// Path of the kernel image. + /// We only support uncompressed kernel for Dragonball. + pub kernel_path: String, + /// Path of the initrd, if there is one. + /// ps. rootfs is set in BlockDeviceConfigInfo + pub initrd_path: Option, + /// The boot arguments to pass to the kernel. + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_args: Option, +} + +/// Errors associated with actions on `BootSourceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum BootSourceConfigError { + /// The kernel file cannot be opened. + #[error( + "the kernel file cannot be opened due to invalid kernel path or invalid permissions: {0}" + )] + InvalidKernelPath(#[source] std::io::Error), + + /// The initrd file cannot be opened. + #[error("the initrd file cannot be opened due to invalid path or invalid permissions: {0}")] + InvalidInitrdPath(#[source] std::io::Error), + + /// The kernel command line is invalid. + #[error("the kernel command line is invalid: {0}")] + InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error), + + /// The boot source cannot be update post boot. + #[error("the update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, +} diff --git a/src/dragonball/src/api/v1/instance_info.rs b/src/dragonball/src/api/v1/instance_info.rs new file mode 100644 index 000000000000..45d03b414489 --- /dev/null +++ b/src/dragonball/src/api/v1/instance_info.rs @@ -0,0 +1,92 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use serde_derive::{Deserialize, Serialize}; + +/// The microvm state. +/// +/// When Dragonball starts, the instance state is Uninitialized. Once start_microvm method is +/// called, the state goes from Uninitialized to Starting. The state is changed to Running until +/// the start_microvm method ends. Halting and Halted are currently unsupported. +#[derive(Copy, Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub enum InstanceState { + /// Microvm is not initialized. + Uninitialized, + /// Microvm is starting. + Starting, + /// Microvm is running. + Running, + /// Microvm is Paused. + Paused, + /// Microvm received a halt instruction. + Halting, + /// Microvm is halted. + Halted, + /// Microvm exit instead of process exit. + Exited(i32), +} + +/// The state of async actions +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] +pub enum AsyncState { + /// Uninitialized + Uninitialized, + /// Success + Success, + /// Failure + Failure, +} + +/// The strongly typed that contains general information about the microVM. +#[derive(Debug, Deserialize, Serialize)] +pub struct InstanceInfo { + /// The ID of the microVM. + pub id: String, + /// The state of the microVM. + pub state: InstanceState, + /// The version of the VMM that runs the microVM. + pub vmm_version: String, + /// The pid of the current VMM process. + pub pid: u32, + /// The tid of the current VMM master thread. + pub master_tid: u32, + /// The state of async actions. + pub async_state: AsyncState, + /// List of tids of vcpu threads (vcpu index, tid) + pub tids: Vec<(u8, u32)>, + /// Last instance downtime + pub last_instance_downtime: u64, +} + +impl InstanceInfo { + /// create instance info object with given id, version, and platform type + pub fn new(id: String, vmm_version: String) -> Self { + InstanceInfo { + id, + state: InstanceState::Uninitialized, + vmm_version, + pid: std::process::id(), + master_tid: 0, + async_state: AsyncState::Uninitialized, + tids: Vec::new(), + last_instance_downtime: 0, + } + } +} + +impl Default for InstanceInfo { + fn default() -> Self { + InstanceInfo { + id: String::from(""), + state: InstanceState::Uninitialized, + vmm_version: env!("CARGO_PKG_VERSION").to_string(), + pid: std::process::id(), + master_tid: 0, + async_state: AsyncState::Uninitialized, + tids: Vec::new(), + last_instance_downtime: 0, + } + } +} diff --git a/src/dragonball/src/api/v1/machine_config.rs b/src/dragonball/src/api/v1/machine_config.rs new file mode 100644 index 000000000000..7d78f3e443e2 --- /dev/null +++ b/src/dragonball/src/api/v1/machine_config.rs @@ -0,0 +1,86 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// We only support this number of vcpus for now. Mostly because we have set all vcpu related metrics as u8 +/// and breaking u8 will take extra efforts. +pub const MAX_SUPPORTED_VCPUS: u8 = 254; + +/// Memory hotplug value should have alignment in this size (unit: MiB) +pub const MEMORY_HOTPLUG_ALIGHMENT: u8 = 64; + +/// Errors associated with configuring the microVM. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum VmConfigError { + /// Cannot update the configuration of the microvm post boot. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The max vcpu count is invalid. + #[error("the vCPU number shouldn't large than {}", MAX_SUPPORTED_VCPUS)] + VcpuCountExceedsMaximum, + + /// The vcpu count is invalid. When hyperthreading is enabled, the `cpu_count` must be either + /// 1 or an even number. + #[error( + "the vCPU number '{0}' can only be 1 or an even number when hyperthreading is enabled" + )] + InvalidVcpuCount(u8), + + /// The threads_per_core is invalid. It should be either 1 or 2. + #[error("the threads_per_core number '{0}' can only be 1 or 2")] + InvalidThreadsPerCore(u8), + + /// The cores_per_die is invalid. It should be larger than 0. + #[error("the cores_per_die number '{0}' can only be larger than 0")] + InvalidCoresPerDie(u8), + + /// The dies_per_socket is invalid. It should be larger than 0. + #[error("the dies_per_socket number '{0}' can only be larger than 0")] + InvalidDiesPerSocket(u8), + + /// The socket number is invalid. It should be either 1 or 2. + #[error("the socket number '{0}' can only be 1 or 2")] + InvalidSocket(u8), + + /// max vcpu count inferred from cpu topology(threads_per_core * cores_per_die * dies_per_socket * sockets) should be larger or equal to vcpu_count + #[error("the max vcpu count inferred from cpu topology '{0}' (threads_per_core * cores_per_die * dies_per_socket * sockets) should be larger or equal to vcpu_count")] + InvalidCpuTopology(u8), + + /// The max vcpu count is invalid. + #[error( + "the max vCPU number '{0}' shouldn't less than vCPU count and can only be 1 or an even number when hyperthreading is enabled" + )] + InvalidMaxVcpuCount(u8), + + /// The memory size is invalid. The memory can only be an unsigned integer. + #[error("the memory size 0x{0:x}MiB is invalid")] + InvalidMemorySize(usize), + + /// The hotplug memory size is invalid. The memory can only be an unsigned integer. + #[error( + "the hotplug memory size '{0}' (MiB) is invalid, must be multiple of {}", + MEMORY_HOTPLUG_ALIGHMENT + )] + InvalidHotplugMemorySize(usize), + + /// The memory type is invalid. + #[error("the memory type '{0}' is invalid")] + InvalidMemType(String), + + /// The memory file path is invalid. + #[error("the memory file path is invalid")] + InvalidMemFilePath(String), + + /// NUMA region memory size is invalid + #[error("Total size of memory in NUMA regions: {0}, should matches memory size in config")] + InvalidNumaRegionMemorySize(usize), + + /// NUMA region vCPU count is invalid + #[error("Total counts of vCPUs in NUMA regions: {0}, should matches max vcpu count in config")] + InvalidNumaRegionCpuCount(u16), + + /// NUMA region vCPU count is invalid + #[error("Max id of vCPUs in NUMA regions: {0}, should matches max vcpu count in config")] + InvalidNumaRegionCpuMaxId(u16), +} diff --git a/src/dragonball/src/api/v1/mod.rs b/src/dragonball/src/api/v1/mod.rs new file mode 100644 index 000000000000..99e3075ebb41 --- /dev/null +++ b/src/dragonball/src/api/v1/mod.rs @@ -0,0 +1,19 @@ +// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! API Version 1 related data structures to configure the vmm. + +mod vmm_action; +pub use self::vmm_action::*; + +/// Wrapper for configuring the microVM boot source. +mod boot_source; +pub use self::boot_source::{BootSourceConfig, BootSourceConfigError, DEFAULT_KERNEL_CMDLINE}; + +/// Wrapper over the microVM general information. +mod instance_info; +pub use self::instance_info::{InstanceInfo, InstanceState}; + +/// Wrapper for configuring the memory and CPU of the microVM. +mod machine_config; +pub use self::machine_config::{VmConfigError, MAX_SUPPORTED_VCPUS}; diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs new file mode 100644 index 000000000000..f8914f10d9f9 --- /dev/null +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -0,0 +1,1649 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::fs::File; + +use crossbeam_channel::{Receiver, Sender, TryRecvError}; +use log::{debug, error, info, warn}; + +use crate::error::{Result, StartMicroVmError, StopMicrovmError}; +use crate::event_manager::EventManager; +use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; +use crate::vmm::Vmm; + +use crate::hypervisor_metrics::get_hypervisor_metrics; + +use self::VmConfigError::*; +use self::VmmActionError::MachineConfig; + +#[cfg(feature = "virtio-balloon")] +pub use crate::device_manager::balloon_dev_mgr::{BalloonDeviceConfigInfo, BalloonDeviceError}; +#[cfg(feature = "virtio-blk")] +pub use crate::device_manager::blk_dev_mgr::{ + BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, +}; +#[cfg(feature = "virtio-fs")] +pub use crate::device_manager::fs_dev_mgr::{ + FsDeviceConfigInfo, FsDeviceConfigUpdateInfo, FsDeviceError, FsDeviceMgr, FsMountConfigInfo, +}; +#[cfg(feature = "virtio-mem")] +pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError}; +#[cfg(feature = "virtio-net")] +pub use crate::device_manager::virtio_net_dev_mgr::{ + VirtioNetDeviceConfigInfo, VirtioNetDeviceConfigUpdateInfo, VirtioNetDeviceError, + VirtioNetDeviceMgr, +}; +#[cfg(feature = "virtio-vsock")] +pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; +#[cfg(feature = "hotplug")] +pub use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; + +use super::*; + +/// Wrapper for all errors associated with VMM actions. +#[derive(Debug, thiserror::Error)] +pub enum VmmActionError { + /// Invalid virtual machine instance ID. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// VM doesn't exist and can't get VM information. + #[error("VM doesn't exist and can't get VM information")] + VmNotExist, + + /// Failed to hotplug, due to Upcall not ready. + #[error("Upcall not ready, can't hotplug device.")] + UpcallServerNotReady, + + /// Error when get prometheus metrics. + /// Currently does not distinguish between error types for metrics. + #[error("failed to get hypervisor metrics")] + GetHypervisorMetrics, + + /// The action `ConfigureBootSource` failed either because of bad user input or an internal + /// error. + #[error("failed to configure boot source for VM: {0}")] + BootSource(#[source] BootSourceConfigError), + + /// The action `StartMicroVm` failed either because of bad user input or an internal error. + #[error("failed to boot the VM: {0}")] + StartMicroVm(#[source] StartMicroVmError), + + /// The action `StopMicroVm` failed either because of bad user input or an internal error. + #[error("failed to shutdown the VM: {0}")] + StopMicrovm(#[source] StopMicrovmError), + + /// One of the actions `GetVmConfiguration` or `SetVmConfiguration` failed either because of bad + /// input or an internal error. + #[error("failed to set configuration for the VM: {0}")] + MachineConfig(#[source] VmConfigError), + + #[cfg(feature = "virtio-vsock")] + /// The action `InsertVsockDevice` failed either because of bad user input or an internal error. + #[error("failed to add virtio-vsock device: {0}")] + Vsock(#[source] VsockDeviceError), + + #[cfg(feature = "virtio-blk")] + /// Block device related errors. + #[error("virtio-blk device error: {0}")] + Block(#[source] BlockDeviceError), + + #[cfg(feature = "virtio-net")] + /// Net device related errors. + #[error("virtio-net device error: {0}")] + VirtioNet(#[source] VirtioNetDeviceError), + + #[cfg(feature = "virtio-fs")] + /// The action `InsertFsDevice` failed either because of bad user input or an internal error. + #[error("virtio-fs device error: {0}")] + FsDevice(#[source] FsDeviceError), + + #[cfg(feature = "hotplug")] + /// The action `ResizeVcpu` Failed + #[error("vcpu resize error : {0}")] + ResizeVcpu(#[source] VcpuResizeError), + + /// Cannot access address space. + #[error("Cannot access address space.")] + AddressSpaceNotInitialized, + + #[cfg(feature = "virtio-mem")] + /// Mem device related errors. + #[error("virtio-mem device error: {0}")] + Mem(#[source] MemDeviceError), + + #[cfg(feature = "virtio-balloon")] + /// Balloon device related errors. + #[error("virtio-balloon device error: {0}")] + Balloon(#[source] BalloonDeviceError), +} + +/// This enum represents the public interface of the VMM. Each action contains various +/// bits of information (ids, paths, etc.). +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum VmmAction { + /// Configure the boot source of the microVM using `BootSourceConfig`. + /// This action can only be called before the microVM has booted. + ConfigureBootSource(BootSourceConfig), + + /// Launch the microVM. This action can only be called before the microVM has booted. + StartMicroVm, + + /// Shutdown the vmicroVM. This action can only be called after the microVM has booted. + /// When vmm is used as the crate by the other process, which is need to + /// shutdown the vcpu threads and destory all of the object. + ShutdownMicroVm, + + /// Get the configuration of the microVM. + GetVmConfiguration, + + /// Get Prometheus Metrics. + GetHypervisorMetrics, + + /// Set the microVM configuration (memory & vcpu) using `VmConfig` as input. This + /// action can only be called before the microVM has booted. + SetVmConfiguration(VmConfigInfo), + + #[cfg(feature = "virtio-vsock")] + /// Add a new vsock device or update one that already exists using the + /// `VsockDeviceConfig` as input. This action can only be called before the microVM has + /// booted. The response is sent using the `OutcomeSender`. + InsertVsockDevice(VsockDeviceConfigInfo), + + #[cfg(feature = "virtio-blk")] + /// Add a new block device or update one that already exists using the `BlockDeviceConfig` as + /// input. This action can only be called before the microVM has booted. + InsertBlockDevice(BlockDeviceConfigInfo), + + #[cfg(feature = "virtio-blk")] + /// Remove a new block device for according to given drive_id + RemoveBlockDevice(String), + + #[cfg(feature = "virtio-blk")] + /// Update a block device, after microVM start. Currently, the only updatable properties + /// are the RX and TX rate limiters. + UpdateBlockDevice(BlockDeviceConfigUpdateInfo), + + #[cfg(feature = "virtio-net")] + /// Add a new network interface config or update one that already exists using the + /// `NetworkInterfaceConfig` as input. This action can only be called before the microVM has + /// booted. The response is sent using the `OutcomeSender`. + InsertNetworkDevice(VirtioNetDeviceConfigInfo), + + #[cfg(feature = "virtio-net")] + /// Update a network interface, after microVM start. Currently, the only updatable properties + /// are the RX and TX rate limiters. + UpdateNetworkInterface(VirtioNetDeviceConfigUpdateInfo), + + #[cfg(feature = "virtio-fs")] + /// Add a new shared fs device or update one that already exists using the + /// `FsDeviceConfig` as input. This action can only be called before the microVM has + /// booted. + InsertFsDevice(FsDeviceConfigInfo), + + #[cfg(feature = "virtio-fs")] + /// Attach a new virtiofs Backend fs or detach an existing virtiofs Backend fs using the + /// `FsMountConfig` as input. This action can only be called _after_ the microVM has + /// booted. + ManipulateFsBackendFs(FsMountConfigInfo), + + #[cfg(feature = "virtio-fs")] + /// Update fs rate limiter, after microVM start. + UpdateFsDevice(FsDeviceConfigUpdateInfo), + + #[cfg(feature = "hotplug")] + /// Resize Vcpu number in the guest. + ResizeVcpu(VcpuResizeInfo), + + #[cfg(feature = "virtio-mem")] + /// Add a new mem device or update one that already exists using the `MemDeviceConfig` as input. + InsertMemDevice(MemDeviceConfigInfo), + + #[cfg(feature = "virtio-balloon")] + /// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig` + /// as input. + InsertBalloonDevice(BalloonDeviceConfigInfo), +} + +/// The enum represents the response sent by the VMM in case of success. The response is either +/// empty, when no data needs to be sent, or an internal VMM structure. +#[derive(Debug)] +pub enum VmmData { + /// No data is sent on the channel. + Empty, + /// The microVM configuration represented by `VmConfigInfo`. + MachineConfiguration(Box), + /// Prometheus Metrics represented by String. + HypervisorMetrics(String), +} + +/// Request data type used to communicate between the API and the VMM. +pub type VmmRequest = Box; + +/// Data type used to communicate between the API and the VMM. +pub type VmmRequestResult = std::result::Result; + +/// Response data type used to communicate between the API and the VMM. +pub type VmmResponse = Box; + +/// VMM Service to handle requests from the API server. +/// +/// There are two levels of API servers as below: +/// API client <--> VMM API Server <--> VMM Core +pub struct VmmService { + from_api: Receiver, + to_api: Sender, + machine_config: VmConfigInfo, +} + +impl VmmService { + /// Create a new VMM API server instance. + pub fn new(from_api: Receiver, to_api: Sender) -> Self { + VmmService { + from_api, + to_api, + machine_config: VmConfigInfo::default(), + } + } + + /// Handle requests from the HTTP API Server and send back replies. + pub fn run_vmm_action(&mut self, vmm: &mut Vmm, event_mgr: &mut EventManager) -> Result<()> { + let request = match self.from_api.try_recv() { + Ok(t) => *t, + Err(TryRecvError::Empty) => { + warn!("Got a spurious notification from api thread"); + return Ok(()); + } + Err(TryRecvError::Disconnected) => { + panic!("The channel's sending half was disconnected. Cannot receive data."); + } + }; + debug!("receive vmm action: {:?}", request); + + let response = match request { + VmmAction::ConfigureBootSource(boot_source_body) => { + self.configure_boot_source(vmm, boot_source_body) + } + VmmAction::StartMicroVm => self.start_microvm(vmm, event_mgr), + VmmAction::ShutdownMicroVm => self.shutdown_microvm(vmm), + VmmAction::GetVmConfiguration => Ok(VmmData::MachineConfiguration(Box::new( + self.machine_config.clone(), + ))), + VmmAction::GetHypervisorMetrics => self.get_hypervisor_metrics(), + VmmAction::SetVmConfiguration(machine_config) => { + self.set_vm_configuration(vmm, machine_config) + } + #[cfg(feature = "virtio-vsock")] + VmmAction::InsertVsockDevice(vsock_cfg) => self.add_vsock_device(vmm, vsock_cfg), + #[cfg(feature = "virtio-blk")] + VmmAction::InsertBlockDevice(block_device_config) => { + self.add_block_device(vmm, event_mgr, block_device_config) + } + #[cfg(feature = "virtio-blk")] + VmmAction::UpdateBlockDevice(blk_update) => { + self.update_blk_rate_limiters(vmm, blk_update) + } + #[cfg(feature = "virtio-blk")] + VmmAction::RemoveBlockDevice(drive_id) => { + self.remove_block_device(vmm, event_mgr, &drive_id) + } + #[cfg(feature = "virtio-net")] + VmmAction::InsertNetworkDevice(virtio_net_cfg) => { + self.add_virtio_net_device(vmm, event_mgr, virtio_net_cfg) + } + #[cfg(feature = "virtio-net")] + VmmAction::UpdateNetworkInterface(netif_update) => { + self.update_net_rate_limiters(vmm, netif_update) + } + #[cfg(feature = "virtio-fs")] + VmmAction::InsertFsDevice(fs_cfg) => self.add_fs_device(vmm, fs_cfg), + + #[cfg(feature = "virtio-fs")] + VmmAction::ManipulateFsBackendFs(fs_mount_cfg) => { + self.manipulate_fs_backend_fs(vmm, fs_mount_cfg) + } + #[cfg(feature = "virtio-fs")] + VmmAction::UpdateFsDevice(fs_update_cfg) => { + self.update_fs_rate_limiters(vmm, fs_update_cfg) + } + #[cfg(feature = "hotplug")] + VmmAction::ResizeVcpu(vcpu_resize_cfg) => self.resize_vcpu(vmm, vcpu_resize_cfg), + #[cfg(feature = "virtio-mem")] + VmmAction::InsertMemDevice(mem_cfg) => self.add_mem_device(vmm, event_mgr, mem_cfg), + #[cfg(feature = "virtio-balloon")] + VmmAction::InsertBalloonDevice(balloon_cfg) => { + self.add_balloon_device(vmm, event_mgr, balloon_cfg) + } + }; + + debug!("send vmm response: {:?}", response); + self.send_response(response) + } + + fn send_response(&self, result: VmmRequestResult) -> Result<()> { + self.to_api + .send(Box::new(result)) + .map_err(|_| ()) + .expect("vmm: one-shot API result channel has been closed"); + + Ok(()) + } + + fn configure_boot_source( + &self, + vmm: &mut Vmm, + boot_source_config: BootSourceConfig, + ) -> VmmRequestResult { + use super::BootSourceConfigError::{ + InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, + UpdateNotAllowedPostBoot, + }; + use super::VmmActionError::BootSource; + + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(BootSource(UpdateNotAllowedPostBoot)); + } + + let kernel_file = File::open(&boot_source_config.kernel_path) + .map_err(|e| BootSource(InvalidKernelPath(e)))?; + + let initrd_file = match boot_source_config.initrd_path { + None => None, + Some(ref path) => Some(File::open(path).map_err(|e| BootSource(InvalidInitrdPath(e)))?), + }; + + let mut cmdline = linux_loader::cmdline::Cmdline::new(dbs_boot::layout::CMDLINE_MAX_SIZE) + .map_err(|err| BootSource(InvalidKernelCommandLine(err)))?; + let boot_args = boot_source_config + .boot_args + .unwrap_or_else(|| String::from(DEFAULT_KERNEL_CMDLINE)); + cmdline + .insert_str(boot_args) + .map_err(|e| BootSource(InvalidKernelCommandLine(e)))?; + + let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline); + vm.set_kernel_config(kernel_config); + + Ok(VmmData::Empty) + } + + fn start_microvm(&mut self, vmm: &mut Vmm, event_mgr: &mut EventManager) -> VmmRequestResult { + use self::StartMicroVmError::MicroVMAlreadyRunning; + use self::VmmActionError::StartMicroVm; + + let vmm_seccomp_filter = vmm.vmm_seccomp_filter(); + let vcpu_seccomp_filter = vmm.vcpu_seccomp_filter(); + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(StartMicroVm(MicroVMAlreadyRunning)); + } + + vm.start_microvm(event_mgr, vmm_seccomp_filter, vcpu_seccomp_filter) + .map(|_| VmmData::Empty) + .map_err(StartMicroVm) + } + + fn shutdown_microvm(&mut self, vmm: &mut Vmm) -> VmmRequestResult { + vmm.event_ctx.exit_evt_triggered = true; + + Ok(VmmData::Empty) + } + + /// Get prometheus metrics. + fn get_hypervisor_metrics(&self) -> VmmRequestResult { + get_hypervisor_metrics() + .map_err(|_| VmmActionError::GetHypervisorMetrics) + .map(VmmData::HypervisorMetrics) + } + + /// Set virtual machine configuration. + pub fn set_vm_configuration( + &mut self, + vmm: &mut Vmm, + machine_config: VmConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(MachineConfig(UpdateNotAllowedPostBoot)); + } + + // If the check is successful, set it up together. + let mut config = vm.vm_config().clone(); + if config.vcpu_count != machine_config.vcpu_count { + let vcpu_count = machine_config.vcpu_count; + // Check that the vcpu_count value is >=1. + if vcpu_count == 0 { + return Err(MachineConfig(InvalidVcpuCount(vcpu_count))); + } + config.vcpu_count = vcpu_count; + } + + if config.cpu_topology != machine_config.cpu_topology { + let cpu_topology = &machine_config.cpu_topology; + config.cpu_topology = handle_cpu_topology(cpu_topology, config.vcpu_count)?.clone(); + } else { + // the same default + let mut default_cpu_topology = CpuTopology { + threads_per_core: 1, + cores_per_die: config.vcpu_count, + dies_per_socket: 1, + sockets: 1, + }; + if machine_config.max_vcpu_count > config.vcpu_count { + default_cpu_topology.cores_per_die = machine_config.max_vcpu_count; + } + config.cpu_topology = default_cpu_topology; + } + let cpu_topology = &config.cpu_topology; + let max_vcpu_from_topo = cpu_topology.threads_per_core + * cpu_topology.cores_per_die + * cpu_topology.dies_per_socket + * cpu_topology.sockets; + // If the max_vcpu_count inferred by cpu_topology is not equal to + // max_vcpu_count, max_vcpu_count will be changed. currently, max vcpu size + // is used when cpu_topology is not defined and help define the cores_per_die + // for the default cpu topology. + let mut max_vcpu_count = machine_config.max_vcpu_count; + if max_vcpu_count < config.vcpu_count { + return Err(MachineConfig(InvalidMaxVcpuCount(max_vcpu_count))); + } + if max_vcpu_from_topo != max_vcpu_count { + max_vcpu_count = max_vcpu_from_topo; + info!("Since max_vcpu_count is not equal to cpu topo information, we have changed the max vcpu count to {}", max_vcpu_from_topo); + } + config.max_vcpu_count = max_vcpu_count; + + config.cpu_pm = machine_config.cpu_pm; + config.mem_type = machine_config.mem_type; + + let mem_size_mib_value = machine_config.mem_size_mib; + // Support 1TB memory at most, 2MB aligned for huge page. + if mem_size_mib_value == 0 || mem_size_mib_value > 0x10_0000 || mem_size_mib_value % 2 != 0 + { + return Err(MachineConfig(InvalidMemorySize(mem_size_mib_value))); + } + config.mem_size_mib = mem_size_mib_value; + + config.mem_file_path = machine_config.mem_file_path.clone(); + + if config.mem_type == "hugetlbfs" && config.mem_file_path.is_empty() { + return Err(MachineConfig(InvalidMemFilePath("".to_owned()))); + } + config.vpmu_feature = machine_config.vpmu_feature; + + // If serial_path is: + // - None, legacy_manager will create_stdio_console. + // - Some(path), legacy_manager will create_socket_console on that path. + config.serial_path = machine_config.serial_path; + + vm.set_vm_config(config.clone()); + self.machine_config = config; + + Ok(VmmData::Empty) + } + + #[cfg(feature = "virtio-vsock")] + fn add_vsock_device(&self, vmm: &mut Vmm, config: VsockDeviceConfigInfo) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(VmmActionError::Vsock( + VsockDeviceError::UpdateNotAllowedPostBoot, + )); + } + + // VMADDR_CID_ANY (-1U) means any address for binding; + // VMADDR_CID_HYPERVISOR (0) is reserved for services built into the hypervisor; + // VMADDR_CID_RESERVED (1) must not be used; + // VMADDR_CID_HOST (2) is the well-known address of the host. + if config.guest_cid <= 2 { + return Err(VmmActionError::Vsock(VsockDeviceError::GuestCIDInvalid( + config.guest_cid, + ))); + } + + info!("add_vsock_device: {:?}", config); + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + VmmActionError::Vsock(VsockDeviceError::UpdateNotAllowedPostBoot) + })?; + + vm.device_manager_mut() + .vsock_manager + .insert_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Vsock) + } + + #[cfg(feature = "virtio-blk")] + // Only call this function as part of the API. + // If the drive_id does not exist, a new Block Device Config is added to the list. + fn add_block_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: BlockDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicroVmError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; + } + VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot) + })?; + + vm.device_manager_mut() + .block_manager + .insert_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } + + #[cfg(feature = "virtio-blk")] + /// Updates configuration for an emulated net device as described in `config`. + fn update_blk_rate_limiters( + &mut self, + vmm: &mut Vmm, + config: BlockDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + vm.device_manager_mut() + .block_manager + .update_device_ratelimiters(config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } + + #[cfg(feature = "virtio-blk")] + // Remove the device + fn remove_block_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + drive_id: &str, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|_| VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot))?; + + vm.device_manager_mut() + .block_manager + .remove_device(ctx, drive_id) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } + + #[cfg(feature = "virtio-net")] + fn add_virtio_net_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: VirtioNetDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::VirtioNet(VirtioNetDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + vm.device_manager_mut() + .virtio_net_manager + .insert_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::VirtioNet) + } + + #[cfg(feature = "virtio-net")] + fn update_net_rate_limiters( + &mut self, + vmm: &mut Vmm, + config: VirtioNetDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + vm.device_manager_mut() + .virtio_net_manager + .update_device_ratelimiters(config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::VirtioNet) + } + + #[cfg(feature = "virtio-fs")] + fn add_fs_device(&mut self, vmm: &mut Vmm, config: FsDeviceConfigInfo) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + let hotplug = vm.is_vm_initialized(); + if !cfg!(feature = "hotplug") && hotplug { + return Err(VmmActionError::FsDevice( + FsDeviceError::UpdateNotAllowedPostBoot, + )); + } + + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + VmmActionError::FsDevice(FsDeviceError::UpdateNotAllowedPostBoot) + })?; + FsDeviceMgr::insert_device(vm.device_manager_mut(), ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } + + #[cfg(feature = "virtio-fs")] + fn manipulate_fs_backend_fs( + &self, + vmm: &mut Vmm, + config: FsMountConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); + } + + FsDeviceMgr::manipulate_backend_fs(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } + + #[cfg(feature = "virtio-fs")] + fn update_fs_rate_limiters( + &self, + vmm: &mut Vmm, + config: FsDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); + } + + FsDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } + + #[cfg(feature = "hotplug")] + fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { + if !cfg!(feature = "dbs-upcall") { + warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature."); + return Ok(VmmData::Empty); + } + + let vm = vmm.get_vm_mut().ok_or(VmmActionError::VmNotExist)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::ResizeVcpu( + VcpuResizeError::UpdateNotAllowedPreBoot, + )); + } + + #[cfg(feature = "dbs-upcall")] + vm.resize_vcpu(config, None).map_err(|e| { + if let VcpuResizeError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; + } + VmmActionError::ResizeVcpu(e) + })?; + + Ok(VmmData::Empty) + } + + #[cfg(feature = "virtio-mem")] + fn add_mem_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: MemDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + vm.device_manager_mut() + .mem_manager + .insert_or_update_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Mem) + } + + #[cfg(feature = "virtio-balloon")] + fn add_balloon_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: BalloonDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; + + if config.size_mib != 0 { + info!("add_balloon_device: wait prealloc"); + vm.stop_prealloc().map_err(VmmActionError::StartMicroVm)?; + } + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + vm.device_manager_mut() + .balloon_manager + .insert_or_update_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Balloon) + } +} + +fn handle_cpu_topology( + cpu_topology: &CpuTopology, + vcpu_count: u8, +) -> std::result::Result<&CpuTopology, VmmActionError> { + // Check if dies_per_socket, cores_per_die, threads_per_core and socket number is valid + if cpu_topology.threads_per_core < 1 || cpu_topology.threads_per_core > 2 { + return Err(MachineConfig(InvalidThreadsPerCore( + cpu_topology.threads_per_core, + ))); + } + let vcpu_count_from_topo = cpu_topology + .sockets + .checked_mul(cpu_topology.dies_per_socket) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.cores_per_die) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.threads_per_core) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))?; + if vcpu_count_from_topo > MAX_SUPPORTED_VCPUS { + return Err(MachineConfig(VcpuCountExceedsMaximum)); + } + if vcpu_count_from_topo < vcpu_count { + return Err(MachineConfig(InvalidCpuTopology(vcpu_count_from_topo))); + } + + Ok(cpu_topology) +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + + use crossbeam_channel::unbounded; + use dbs_utils::epoll_manager::EpollManager; + use test_utils::skip_if_not_root; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::vmm::tests::create_vmm_instance; + + struct TestData<'a> { + req: Option, + vm_state: InstanceState, + f: &'a dyn Fn(VmmRequestResult), + } + + impl<'a> TestData<'a> { + fn new(req: VmmAction, vm_state: InstanceState, f: &'a dyn Fn(VmmRequestResult)) -> Self { + Self { + req: Some(req), + vm_state, + f, + } + } + + fn check_request(&mut self) { + let (to_vmm, from_api) = unbounded(); + let (to_api, from_vmm) = unbounded(); + + let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); + let mut vservice = VmmService::new(from_api, to_api); + + let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); + let mut v = vmm.lock().unwrap(); + + let vm = v.get_vm_mut().unwrap(); + vm.set_instance_state(self.vm_state); + + to_vmm.send(Box::new(self.req.take().unwrap())).unwrap(); + assert!(vservice.run_vmm_action(&mut v, &mut event_mgr).is_ok()); + + let response = from_vmm.try_recv(); + assert!(response.is_ok()); + (self.f)(*response.unwrap()); + } + } + + #[test] + fn test_vmm_action_receive_unknown() { + skip_if_not_root!(); + + let (_to_vmm, from_api) = unbounded(); + let (to_api, _from_vmm) = unbounded(); + let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); + let mut vservice = VmmService::new(from_api, to_api); + let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); + let mut v = vmm.lock().unwrap(); + + assert!(vservice.run_vmm_action(&mut v, &mut event_mgr).is_ok()); + } + + #[should_panic] + #[test] + fn test_vmm_action_disconnected() { + let (to_vmm, from_api) = unbounded(); + let (to_api, _from_vmm) = unbounded(); + let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone()))); + let mut vservice = VmmService::new(from_api, to_api); + let mut event_mgr = EventManager::new(&vmm, epoll_mgr).unwrap(); + let mut v = vmm.lock().unwrap(); + + drop(to_vmm); + vservice.run_vmm_action(&mut v, &mut event_mgr).unwrap(); + } + + #[test] + fn test_vmm_action_config_boot_source() { + skip_if_not_root!(); + + let kernel_file = TempFile::new().unwrap(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::ConfigureBootSource(BootSourceConfig::default()), + InstanceState::Running, + &|result| { + if let Err(VmmActionError::BootSource( + BootSourceConfigError::UpdateNotAllowedPostBoot, + )) = result + { + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to configure boot source for VM: \ + the update operation is not allowed after boot", + ); + assert_eq!(err_string, expected_err); + } else { + panic!(); + } + }, + ), + // invalid kernel file path + TestData::new( + VmmAction::ConfigureBootSource(BootSourceConfig::default()), + InstanceState::Uninitialized, + &|result| { + if let Err(VmmActionError::BootSource( + BootSourceConfigError::InvalidKernelPath(_), + )) = result + { + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to configure boot source for VM: \ + the kernel file cannot be opened due to invalid kernel path or invalid permissions: \ + No such file or directory (os error 2)"); + assert_eq!(err_string, expected_err); + } else { + panic!(); + } + }, + ), + //success + TestData::new( + VmmAction::ConfigureBootSource(BootSourceConfig { + kernel_path: kernel_file.as_path().to_str().unwrap().to_string(), + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[test] + fn test_vmm_action_set_vm_configuration() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::UpdateNotAllowedPostBoot + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + update operation is not allowed after boot", + ); + assert_eq!(err_string, expected_err); + }, + ), + // invalid cpu count (0) + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + vcpu_count: 0, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::InvalidVcpuCount(0) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the vCPU number '0' can only be 1 or an even number when hyperthreading is enabled"); + assert_eq!(err_string, expected_err); + }, + ), + // invalid max cpu count (too small) + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + vcpu_count: 4, + max_vcpu_count: 2, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::InvalidMaxVcpuCount(2) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the max vCPU number '2' shouldn't less than vCPU count and can only be 1 or an even number when hyperthreading is enabled"); + assert_eq!(err_string, expected_err); + }, + ), + // invalid cpu topology (larger than 254) + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + vcpu_count: 254, + cpu_topology: CpuTopology { + threads_per_core: 2, + cores_per_die: 128, + dies_per_socket: 1, + sockets: 1, + }, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::VcpuCountExceedsMaximum + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the vCPU number shouldn't large than 254", + ); + + assert_eq!(err_string, expected_err) + }, + ), + // cpu topology and max_vcpu_count are not matched - success + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + vcpu_count: 16, + max_vcpu_count: 32, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 128, + dies_per_socket: 1, + sockets: 1, + }, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + result.unwrap(); + }, + ), + // invalid threads_per_core + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + vcpu_count: 4, + max_vcpu_count: 4, + cpu_topology: CpuTopology { + threads_per_core: 4, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::InvalidThreadsPerCore(4) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the threads_per_core number '4' can only be 1 or 2", + ); + + assert_eq!(err_string, expected_err) + }, + ), + // invalid mem size + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + mem_size_mib: 3, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::InvalidMemorySize(3) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the memory size 0x3MiB is invalid", + ); + assert_eq!(err_string, expected_err); + }, + ), + // invalid mem path + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo { + mem_type: String::from("hugetlbfs"), + mem_file_path: String::from(""), + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::MachineConfig( + VmConfigError::InvalidMemFilePath(_) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to set configuration for the VM: \ + the memory file path is invalid", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::SetVmConfiguration(VmConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[test] + fn test_vmm_action_start_microvm() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state (running) + TestData::new(VmmAction::StartMicroVm, InstanceState::Running, &|result| { + assert!(matches!( + result, + Err(VmmActionError::StartMicroVm( + StartMicroVmError::MicroVMAlreadyRunning + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to boot the VM: \ + the virtual machine is already running", + ); + assert_eq!(err_string, expected_err); + }), + // no kernel configuration + TestData::new( + VmmAction::StartMicroVm, + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::StartMicroVm( + StartMicroVmError::MissingKernelConfig + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to boot the VM: \ + cannot start the virtual machine without kernel configuration", + ); + assert_eq!(err_string, expected_err); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[test] + fn test_vmm_action_shutdown_microvm() { + skip_if_not_root!(); + + let tests = &mut [ + // success + TestData::new( + VmmAction::ShutdownMicroVm, + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-blk")] + #[test] + fn test_vmm_action_insert_block_device() { + skip_if_not_root!(); + + let dummy_file = TempFile::new().unwrap(); + let dummy_path = dummy_file.as_path().to_owned(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::InsertBlockDevice(BlockDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Block( + BlockDeviceError::UpdateNotAllowedPostBoot + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-blk device error: \ + block device does not support runtime update", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertBlockDevice(BlockDeviceConfigInfo { + path_on_host: dummy_path, + device_type: crate::device_manager::blk_dev_mgr::BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 256, + use_shared_irq: None, + use_generic_irq: None, + }), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-blk")] + #[test] + fn test_vmm_action_update_block_device() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid id + TestData::new( + VmmAction::UpdateBlockDevice(BlockDeviceConfigUpdateInfo { + drive_id: String::from("1"), + rate_limiter: None, + }), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Block(BlockDeviceError::InvalidDeviceId(_))) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-blk device error: \ + invalid block device id '1'", + ); + assert_eq!(err_string, expected_err); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-blk")] + #[test] + fn test_vmm_action_remove_block_device() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::RemoveBlockDevice(String::from("1")), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Block( + BlockDeviceError::UpdateNotAllowedPostBoot + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-blk device error: \ + block device does not support runtime update", + ); + assert_eq!(err_string, expected_err); + }, + ), + // invalid id + TestData::new( + VmmAction::RemoveBlockDevice(String::from("1")), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Block(BlockDeviceError::InvalidDeviceId(_))) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-blk device error: \ + invalid block device id '1'", + ); + assert_eq!(err_string, expected_err); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-fs")] + #[test] + fn test_vmm_action_insert_fs_device() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::InsertFsDevice(FsDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::FsDevice( + FsDeviceError::UpdateNotAllowedPostBoot + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-fs device error: \ + update operation is not allowed after boot", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertFsDevice(FsDeviceConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-fs")] + #[test] + fn test_vmm_action_manipulate_fs_device() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::ManipulateFsBackendFs(FsMountConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-fs device error: \ + vm is not running when attaching a backend fs", + ); + assert_eq!(err_string, expected_err); + }, + ), + // invalid backend + TestData::new( + VmmAction::ManipulateFsBackendFs(FsMountConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::FsDevice( + FsDeviceError::AttachBackendFailed(_) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + println!("{}", err_string); + let expected_err = String::from( + "virtio-fs device error: \ + Fs device attach a backend fs failed", + ); + assert_eq!(err_string, expected_err); + }, + ), + ]; + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-net")] + #[test] + fn test_vmm_action_insert_network_device() { + skip_if_not_root!(); + + let tests = &mut [ + // hotplug unready + TestData::new( + VmmAction::InsertNetworkDevice(VirtioNetDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::StartMicroVm( + StartMicroVmError::UpcallMissVsock + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to boot the VM: \ + the upcall client needs a virtio-vsock device for communication", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertNetworkDevice(VirtioNetDeviceConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-net")] + #[test] + fn test_vmm_action_update_network_interface() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid id + TestData::new( + VmmAction::UpdateNetworkInterface(VirtioNetDeviceConfigUpdateInfo { + iface_id: String::from("1"), + rx_rate_limiter: None, + tx_rate_limiter: None, + }), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::VirtioNet( + VirtioNetDeviceError::InvalidIfaceId(_) + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "virtio-net device error: \ + invalid virtio-net iface id '1'", + ); + assert_eq!(err_string, expected_err); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-vsock")] + #[test] + fn test_vmm_action_insert_vsock_device() { + skip_if_not_root!(); + + let tests = &mut [ + // invalid state + TestData::new( + VmmAction::InsertVsockDevice(VsockDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Vsock( + VsockDeviceError::UpdateNotAllowedPostBoot + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to add virtio-vsock device: \ + update operation is not allowed after boot", + ); + assert_eq!(err_string, expected_err); + }, + ), + // invalid guest_cid + TestData::new( + VmmAction::InsertVsockDevice(VsockDeviceConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::Vsock(VsockDeviceError::GuestCIDInvalid(0))) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to add virtio-vsock device: \ + the guest CID 0 is invalid", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertVsockDevice(VsockDeviceConfigInfo { + guest_cid: 3, + ..Default::default() + }), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-mem")] + #[test] + fn test_vmm_action_insert_mem_device() { + skip_if_not_root!(); + + let tests = &mut [ + // hotplug unready + TestData::new( + VmmAction::InsertMemDevice(MemDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::StartMicroVm( + StartMicroVmError::UpcallMissVsock + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to boot the VM: \ + the upcall client needs a virtio-vsock device for communication", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertMemDevice(MemDeviceConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } + + #[cfg(feature = "virtio-balloon")] + #[test] + fn test_vmm_action_insert_balloon_device() { + skip_if_not_root!(); + + let tests = &mut [ + // hotplug unready + TestData::new( + VmmAction::InsertBalloonDevice(BalloonDeviceConfigInfo::default()), + InstanceState::Running, + &|result| { + assert!(matches!( + result, + Err(VmmActionError::StartMicroVm( + StartMicroVmError::UpcallMissVsock + )) + )); + let err_string = format!("{}", result.unwrap_err()); + let expected_err = String::from( + "failed to boot the VM: \ + the upcall client needs a virtio-vsock device for communication", + ); + assert_eq!(err_string, expected_err); + }, + ), + // success + TestData::new( + VmmAction::InsertBalloonDevice(BalloonDeviceConfigInfo::default()), + InstanceState::Uninitialized, + &|result| { + assert!(result.is_ok()); + }, + ), + ]; + + for t in tests.iter_mut() { + t.check_request(); + } + } +} diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs new file mode 100644 index 000000000000..ff74fb925a57 --- /dev/null +++ b/src/dragonball/src/config_manager.rs @@ -0,0 +1,828 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryInto; +use std::io; +use std::ops::{Index, IndexMut}; +use std::sync::Arc; + +use dbs_device::DeviceIo; +use dbs_utils::rate_limiter::{RateLimiter, TokenBucket}; +use serde_derive::{Deserialize, Serialize}; + +/// Get bucket update for rate limiter. +#[macro_export] +macro_rules! get_bucket_update { + ($self:ident, $rate_limiter: ident, $metric: ident) => {{ + match &$self.$rate_limiter { + Some(rl_cfg) => { + let tb_cfg = &rl_cfg.$metric; + dbs_utils::rate_limiter::RateLimiter::make_bucket( + tb_cfg.size, + tb_cfg.one_time_burst, + tb_cfg.refill_time, + ) + // Updated active rate-limiter. + .map(dbs_utils::rate_limiter::BucketUpdate::Update) + // Updated/deactivated rate-limiter + .unwrap_or(dbs_utils::rate_limiter::BucketUpdate::Disabled) + } + // No update to the rate-limiter. + None => dbs_utils::rate_limiter::BucketUpdate::None, + } + }}; +} + +/// Trait for generic configuration information. +pub trait ConfigItem { + /// Related errors. + type Err; + + /// Get the unique identifier of the configuration item. + fn id(&self) -> &str; + + /// Check whether current configuration item conflicts with another one. + fn check_conflicts(&self, other: &Self) -> std::result::Result<(), Self::Err>; +} + +/// Struct to manage a group of configuration items. +#[derive(Debug, Default, Deserialize, PartialEq, Eq, Serialize)] +pub struct ConfigInfos +where + T: ConfigItem + Clone, +{ + configs: Vec, +} + +impl ConfigInfos +where + T: ConfigItem + Clone + Default, +{ + /// Constructor + pub fn new() -> Self { + ConfigInfos::default() + } + + /// Insert a configuration item in the group. + pub fn insert(&mut self, config: T) -> std::result::Result<(), T::Err> { + for item in self.configs.iter() { + config.check_conflicts(item)?; + } + self.configs.push(config); + + Ok(()) + } + + /// Update a configuration item in the group. + pub fn update(&mut self, config: T, err: T::Err) -> std::result::Result<(), T::Err> { + match self.get_index_by_id(&config) { + None => Err(err), + Some(index) => { + for (idx, item) in self.configs.iter().enumerate() { + if idx != index { + config.check_conflicts(item)?; + } + } + self.configs[index] = config; + Ok(()) + } + } + } + + /// Insert or update a configuration item in the group. + pub fn insert_or_update(&mut self, config: T) -> std::result::Result<(), T::Err> { + match self.get_index_by_id(&config) { + None => { + for item in self.configs.iter() { + config.check_conflicts(item)?; + } + + self.configs.push(config) + } + Some(index) => { + for (idx, item) in self.configs.iter().enumerate() { + if idx != index { + config.check_conflicts(item)?; + } + } + self.configs[index] = config; + } + } + + Ok(()) + } + + /// Remove the matching configuration entry. + pub fn remove(&mut self, config: &T) -> Option { + if let Some(index) = self.get_index_by_id(config) { + Some(self.configs.remove(index)) + } else { + None + } + } + + /// Returns an immutable iterator over the config items + pub fn iter(&self) -> ::std::slice::Iter { + self.configs.iter() + } + + /// Get the configuration entry with matching ID. + pub fn get_by_id(&self, item: &T) -> Option<&T> { + let id = item.id(); + + self.configs.iter().rfind(|cfg| cfg.id() == id) + } + + fn get_index_by_id(&self, item: &T) -> Option { + let id = item.id(); + self.configs.iter().position(|cfg| cfg.id() == id) + } +} + +impl Clone for ConfigInfos +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + ConfigInfos { + configs: self.configs.clone(), + } + } +} + +/// Struct to maintain configuration information for a device. +pub struct DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + /// Configuration information for the device object. + pub config: T, + /// The associated device object. + pub device: Option>, +} + +impl DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + /// Create a new instance of ['DeviceInfoGroup']. + pub fn new(config: T) -> Self { + DeviceConfigInfo { + config, + device: None, + } + } + + /// Create a new instance of ['DeviceInfoGroup'] with optional device. + pub fn new_with_device(config: T, device: Option>) -> Self { + DeviceConfigInfo { config, device } + } + + /// Set the device object associated with the configuration. + pub fn set_device(&mut self, device: Arc) { + self.device = Some(device); + } +} + +impl Clone for DeviceConfigInfo +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + DeviceConfigInfo::new_with_device(self.config.clone(), self.device.clone()) + } +} + +/// Struct to maintain configuration information for a group of devices. +pub struct DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + info_list: Vec>, +} + +impl Default for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn default() -> Self { + Self::new() + } +} + +impl DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + /// Create a new instance of ['DeviceConfigInfos']. + pub fn new() -> Self { + DeviceConfigInfos { + info_list: Vec::new(), + } + } + + /// Insert or update configuration information for a device. + pub fn insert_or_update(&mut self, config: &T) -> std::result::Result { + let device_info = DeviceConfigInfo::new(config.clone()); + Ok(match self.get_index_by_id(config) { + Some(index) => { + for (idx, info) in self.info_list.iter().enumerate() { + if idx != index { + info.config.check_conflicts(config)?; + } + } + self.info_list[index].config = config.clone(); + index + } + None => { + for info in self.info_list.iter() { + info.config.check_conflicts(config)?; + } + self.info_list.push(device_info); + self.info_list.len() - 1 + } + }) + } + + /// Remove a device configuration information object. + pub fn remove(&mut self, index: usize) -> Option> { + if self.info_list.len() > index { + Some(self.info_list.remove(index)) + } else { + None + } + } + + /// Get number of device configuration information objects. + pub fn len(&self) -> usize { + self.info_list.len() + } + + /// Returns true if the device configuration information objects is empty. + pub fn is_empty(&self) -> bool { + self.info_list.len() == 0 + } + + /// Add a device configuration information object at the tail. + pub fn push(&mut self, info: DeviceConfigInfo) { + self.info_list.push(info); + } + + /// Iterator for configuration information objects. + pub fn iter(&self) -> std::slice::Iter> { + self.info_list.iter() + } + + /// Mutable iterator for configuration information objects. + pub fn iter_mut(&mut self) -> std::slice::IterMut> { + self.info_list.iter_mut() + } + + /// Remove the last device config info from the `info_list`. + pub fn pop(&mut self) -> Option> { + self.info_list.pop() + } + + fn get_index_by_id(&self, config: &T) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(config.id())) + } +} + +impl Index for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + type Output = DeviceConfigInfo; + fn index(&self, idx: usize) -> &Self::Output { + &self.info_list[idx] + } +} + +impl IndexMut for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn index_mut(&mut self, idx: usize) -> &mut Self::Output { + &mut self.info_list[idx] + } +} + +impl Clone for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn clone(&self) -> Self { + DeviceConfigInfos { + info_list: self.info_list.clone(), + } + } +} + +/// Configuration information for RateLimiter token bucket. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] +pub struct TokenBucketConfigInfo { + /// The size for the token bucket. A TokenBucket of `size` total capacity will take `refill_time` + /// milliseconds to go from zero tokens to total capacity. + pub size: u64, + /// Number of free initial tokens, that can be consumed at no cost. + pub one_time_burst: u64, + /// Complete refill time in milliseconds. + pub refill_time: u64, +} + +impl TokenBucketConfigInfo { + fn resize(&mut self, n: u64) { + if n != 0 { + self.size /= n; + self.one_time_burst /= n; + } + } +} + +impl From for TokenBucket { + fn from(t: TokenBucketConfigInfo) -> TokenBucket { + (&t).into() + } +} + +impl From<&TokenBucketConfigInfo> for TokenBucket { + fn from(t: &TokenBucketConfigInfo) -> TokenBucket { + TokenBucket::new(t.size, t.one_time_burst, t.refill_time) + } +} + +/// Configuration information for RateLimiter objects. +#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] +pub struct RateLimiterConfigInfo { + /// Data used to initialize the RateLimiter::bandwidth bucket. + pub bandwidth: TokenBucketConfigInfo, + /// Data used to initialize the RateLimiter::ops bucket. + pub ops: TokenBucketConfigInfo, +} + +impl RateLimiterConfigInfo { + /// Update the bandwidth budget configuration. + pub fn update_bandwidth(&mut self, new_config: TokenBucketConfigInfo) { + self.bandwidth = new_config; + } + + /// Update the ops budget configuration. + pub fn update_ops(&mut self, new_config: TokenBucketConfigInfo) { + self.ops = new_config; + } + + /// resize the limiter to its 1/n. + pub fn resize(&mut self, n: u64) { + self.bandwidth.resize(n); + self.ops.resize(n); + } +} + +impl TryInto for &RateLimiterConfigInfo { + type Error = io::Error; + + fn try_into(self) -> Result { + RateLimiter::new( + self.bandwidth.size, + self.bandwidth.one_time_burst, + self.bandwidth.refill_time, + self.ops.size, + self.ops.one_time_burst, + self.ops.refill_time, + ) + } +} + +impl TryInto for RateLimiterConfigInfo { + type Error = io::Error; + + fn try_into(self) -> Result { + RateLimiter::new( + self.bandwidth.size, + self.bandwidth.one_time_burst, + self.bandwidth.refill_time, + self.ops.size, + self.ops.one_time_burst, + self.ops.refill_time, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, thiserror::Error)] + pub enum DummyError { + #[error("configuration entry exists")] + Exist, + } + + #[derive(Clone, Debug, Default)] + pub struct DummyConfigInfo { + id: String, + content: String, + } + + impl ConfigItem for DummyConfigInfo { + type Err = DummyError; + + fn id(&self) -> &str { + &self.id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), DummyError> { + if self.id == other.id || self.content == other.content { + Err(DummyError::Exist) + } else { + Ok(()) + } + } + } + + type DummyConfigInfos = ConfigInfos; + + #[test] + fn test_insert_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: cannot insert new item with the same id. + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert(config2).unwrap_err(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config3).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + // Test case: cannot insert new item with the same content. + let config4 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config4).unwrap_err(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + } + + #[test] + fn test_update_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.update(config2, DummyError::Exist).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Test case: cannot update a non-existing entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.update(config3, DummyError::Exist).unwrap_err(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Test case: cannot update an entry with conflicting content + let config4 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert(config4).unwrap(); + let config5 = DummyConfigInfo { + id: "1".to_owned(), + content: "c".to_owned(), + }; + configs.update(config5, DummyError::Exist).unwrap_err(); + } + + #[test] + fn test_insert_or_update_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(config1).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(config2.clone()).unwrap(); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + + // Add a second entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config3.clone()).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + // Lookup the first entry + let config4 = configs + .get_by_id(&DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }) + .unwrap(); + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + + // Lookup the second entry + let config5 = configs + .get_by_id(&DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }) + .unwrap(); + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + + // Test case: can't insert an entry with conflicting content + let config6 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config6).unwrap_err(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + } + + #[test] + fn test_remove_config_info() { + let mut configs = DummyConfigInfos::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(config1).unwrap(); + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(config2.clone()).unwrap(); + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(config3.clone()).unwrap(); + assert_eq!(configs.configs.len(), 2); + assert_eq!(configs.configs[0].id, "1"); + assert_eq!(configs.configs[0].content, "b"); + assert_eq!(configs.configs[1].id, "2"); + assert_eq!(configs.configs[1].content, "c"); + + let config4 = configs + .remove(&DummyConfigInfo { + id: "1".to_owned(), + content: "no value".to_owned(), + }) + .unwrap(); + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + assert_eq!(configs.configs.len(), 1); + assert_eq!(configs.configs[0].id, "2"); + assert_eq!(configs.configs[0].content, "c"); + + let config5 = configs + .remove(&DummyConfigInfo { + id: "2".to_owned(), + content: "no value".to_owned(), + }) + .unwrap(); + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + assert_eq!(configs.configs.len(), 0); + } + + type DummyDeviceInfoList = DeviceConfigInfos; + + #[test] + fn test_insert_or_update_device_info() { + let mut configs = DummyDeviceInfoList::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(&config1).unwrap(); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "a"); + + // Test case: succeed to update an existing entry + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(&config2 /* */).unwrap(); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + + // Add a second entry + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config3).unwrap(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + + // Lookup the first entry + let config4_id = configs + .get_index_by_id(&DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }) + .unwrap(); + let config4 = &configs[config4_id].config; + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + + // Lookup the second entry + let config5_id = configs + .get_index_by_id(&DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }) + .unwrap(); + let config5 = &configs[config5_id].config; + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + + // Test case: can't insert an entry with conflicting content + let config6 = DummyConfigInfo { + id: "3".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config6).unwrap_err(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + } + + #[test] + fn test_remove_device_info() { + let mut configs = DummyDeviceInfoList::new(); + + let config1 = DummyConfigInfo { + id: "1".to_owned(), + content: "a".to_owned(), + }; + configs.insert_or_update(&config1).unwrap(); + let config2 = DummyConfigInfo { + id: "1".to_owned(), + content: "b".to_owned(), + }; + configs.insert_or_update(&config2).unwrap(); + let config3 = DummyConfigInfo { + id: "2".to_owned(), + content: "c".to_owned(), + }; + configs.insert_or_update(&config3).unwrap(); + assert_eq!(configs.len(), 2); + assert_eq!(configs[0].config.id, "1"); + assert_eq!(configs[0].config.content, "b"); + assert_eq!(configs[1].config.id, "2"); + assert_eq!(configs[1].config.content, "c"); + + let config4 = configs.remove(0).unwrap().config; + assert_eq!(config4.id, config2.id); + assert_eq!(config4.content, config2.content); + assert_eq!(configs.len(), 1); + assert_eq!(configs[0].config.id, "2"); + assert_eq!(configs[0].config.content, "c"); + + let config5 = configs.remove(0).unwrap().config; + assert_eq!(config5.id, config3.id); + assert_eq!(config5.content, config3.content); + assert_eq!(configs.len(), 0); + } + + #[test] + fn test_rate_limiter_configs() { + const SIZE: u64 = 1024 * 1024; + const ONE_TIME_BURST: u64 = 1024; + const REFILL_TIME: u64 = 1000; + + let c: TokenBucketConfigInfo = TokenBucketConfigInfo { + size: SIZE, + one_time_burst: ONE_TIME_BURST, + refill_time: REFILL_TIME, + }; + let b: TokenBucket = c.into(); + assert_eq!(b.capacity(), SIZE); + assert_eq!(b.one_time_burst(), ONE_TIME_BURST); + assert_eq!(b.refill_time_ms(), REFILL_TIME); + + let mut rlc = RateLimiterConfigInfo { + bandwidth: TokenBucketConfigInfo { + size: SIZE, + one_time_burst: ONE_TIME_BURST, + refill_time: REFILL_TIME, + }, + ops: TokenBucketConfigInfo { + size: SIZE * 2, + one_time_burst: 0, + refill_time: REFILL_TIME * 2, + }, + }; + let rl: RateLimiter = (&rlc).try_into().unwrap(); + assert_eq!(rl.bandwidth().unwrap().capacity(), SIZE); + assert_eq!(rl.bandwidth().unwrap().one_time_burst(), ONE_TIME_BURST); + assert_eq!(rl.bandwidth().unwrap().refill_time_ms(), REFILL_TIME); + assert_eq!(rl.ops().unwrap().capacity(), SIZE * 2); + assert_eq!(rl.ops().unwrap().one_time_burst(), 0); + assert_eq!(rl.ops().unwrap().refill_time_ms(), REFILL_TIME * 2); + + let bandwidth = TokenBucketConfigInfo { + size: SIZE * 2, + one_time_burst: ONE_TIME_BURST * 2, + refill_time: REFILL_TIME * 2, + }; + rlc.update_bandwidth(bandwidth); + assert_eq!(rlc.bandwidth.size, SIZE * 2); + assert_eq!(rlc.bandwidth.one_time_burst, ONE_TIME_BURST * 2); + assert_eq!(rlc.bandwidth.refill_time, REFILL_TIME * 2); + assert_eq!(rlc.ops.size, SIZE * 2); + assert_eq!(rlc.ops.one_time_burst, 0); + assert_eq!(rlc.ops.refill_time, REFILL_TIME * 2); + + let ops = TokenBucketConfigInfo { + size: SIZE * 3, + one_time_burst: ONE_TIME_BURST * 3, + refill_time: REFILL_TIME * 3, + }; + rlc.update_ops(ops); + assert_eq!(rlc.bandwidth.size, SIZE * 2); + assert_eq!(rlc.bandwidth.one_time_burst, ONE_TIME_BURST * 2); + assert_eq!(rlc.bandwidth.refill_time, REFILL_TIME * 2); + assert_eq!(rlc.ops.size, SIZE * 3); + assert_eq!(rlc.ops.one_time_burst, ONE_TIME_BURST * 3); + assert_eq!(rlc.ops.refill_time, REFILL_TIME * 3); + } +} diff --git a/src/dragonball/src/dbs_acpi/Cargo.toml b/src/dragonball/src/dbs_acpi/Cargo.toml new file mode 100644 index 000000000000..df5e7867a6b7 --- /dev/null +++ b/src/dragonball/src/dbs_acpi/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "dbs-acpi" +version = "0.1.0" +authors = ["Alibaba Dragonball Team"] +description = "acpi definitions for virtual machines." +license = "Apache-2.0" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "acpi", "vmm", "secure-sandbox"] +readme = "README.md" + +[dependencies] +vm-memory = "0.9.0" \ No newline at end of file diff --git a/src/dragonball/src/dbs_acpi/README.md b/src/dragonball/src/dbs_acpi/README.md new file mode 100644 index 000000000000..cc2b497545d7 --- /dev/null +++ b/src/dragonball/src/dbs_acpi/README.md @@ -0,0 +1,11 @@ +# dbs-acpi + +`dbs-acpi` provides ACPI data structures for VMM to emulate ACPI behavior. + +## Acknowledgement + +Part of the code is derived from the [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor) project. + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/src/dragonball/src/dbs_acpi/src/lib.rs b/src/dragonball/src/dbs_acpi/src/lib.rs new file mode 100644 index 000000000000..a3094e30965c --- /dev/null +++ b/src/dragonball/src/dbs_acpi/src/lib.rs @@ -0,0 +1,29 @@ +// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +pub mod rsdp; +pub mod sdt; + +fn generate_checksum(data: &[u8]) -> u8 { + (255 - data.iter().fold(0u8, |acc, x| acc.wrapping_add(*x))).wrapping_add(1) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_generate_checksum() { + let mut buf = [0x00; 8]; + let sum = generate_checksum(&buf); + assert_eq!(sum, 0); + buf[0] = 0xff; + let sum = generate_checksum(&buf); + assert_eq!(sum, 1); + buf[0] = 0xaa; + buf[1] = 0xcc; + buf[4] = generate_checksum(&buf); + let sum = buf.iter().fold(0u8, |s, v| s.wrapping_add(*v)); + assert_eq!(sum, 0); + } +} diff --git a/src/dragonball/src/dbs_acpi/src/rsdp.rs b/src/dragonball/src/dbs_acpi/src/rsdp.rs new file mode 100644 index 000000000000..05c36f809146 --- /dev/null +++ b/src/dragonball/src/dbs_acpi/src/rsdp.rs @@ -0,0 +1,60 @@ +// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// RSDP (Root System Description Pointer) is a data structure used in the ACPI programming interface. +use vm_memory::ByteValued; + +#[repr(packed)] +#[derive(Clone, Copy, Default)] +pub struct Rsdp { + pub signature: [u8; 8], + pub checksum: u8, + pub oem_id: [u8; 6], + pub revision: u8, + _rsdt_addr: u32, + pub length: u32, + pub xsdt_addr: u64, + pub extended_checksum: u8, + _reserved: [u8; 3], +} + +// SAFETY: Rsdp only contains a series of integers +unsafe impl ByteValued for Rsdp {} + +impl Rsdp { + pub fn new(xsdt_addr: u64) -> Self { + let mut rsdp = Rsdp { + signature: *b"RSD PTR ", + checksum: 0, + oem_id: *b"ALICLD", + revision: 1, + _rsdt_addr: 0, + length: std::mem::size_of::() as u32, + xsdt_addr, + extended_checksum: 0, + _reserved: [0; 3], + }; + rsdp.checksum = super::generate_checksum(&rsdp.as_slice()[0..19]); + rsdp.extended_checksum = super::generate_checksum(rsdp.as_slice()); + rsdp + } + + pub fn len() -> usize { + std::mem::size_of::() + } +} +#[cfg(test)] +mod tests { + use super::Rsdp; + use vm_memory::bytes::ByteValued; + #[test] + fn test_rsdp() { + let rsdp = Rsdp::new(0xa0000); + let sum = rsdp + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + } +} diff --git a/src/dragonball/src/dbs_acpi/src/sdt.rs b/src/dragonball/src/dbs_acpi/src/sdt.rs new file mode 100644 index 000000000000..f6a79f5761b4 --- /dev/null +++ b/src/dragonball/src/dbs_acpi/src/sdt.rs @@ -0,0 +1,137 @@ +// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +#[repr(packed)] +pub struct GenericAddress { + pub address_space_id: u8, + pub register_bit_width: u8, + pub register_bit_offset: u8, + pub access_size: u8, + pub address: u64, +} + +impl GenericAddress { + pub fn io_port_address(address: u16) -> Self { + GenericAddress { + address_space_id: 1, + register_bit_width: 8 * std::mem::size_of::() as u8, + register_bit_offset: 0, + access_size: std::mem::size_of::() as u8, + address: u64::from(address), + } + } + + pub fn mmio_address(address: u64) -> Self { + GenericAddress { + address_space_id: 0, + register_bit_width: 8 * std::mem::size_of::() as u8, + register_bit_offset: 0, + access_size: std::mem::size_of::() as u8, + address, + } + } +} + +pub struct Sdt { + data: Vec, +} + +#[allow(clippy::len_without_is_empty)] +impl Sdt { + pub fn new(signature: [u8; 4], length: u32, revision: u8) -> Self { + assert!(length >= 36); + const OEM_ID: [u8; 6] = *b"ALICLD"; + const OEM_TABLE: [u8; 8] = *b"RUND "; + const CREATOR_ID: [u8; 4] = *b"ALIC"; + let mut data = Vec::with_capacity(length as usize); + data.extend_from_slice(&signature); + data.extend_from_slice(&length.to_le_bytes()); + data.push(revision); + data.push(0); // checksum + data.extend_from_slice(&OEM_ID); // oem id u32 + data.extend_from_slice(&OEM_TABLE); // oem table + data.extend_from_slice(&1u32.to_le_bytes()); // oem revision u32 + data.extend_from_slice(&CREATOR_ID); // creator id u32 + data.extend_from_slice(&1u32.to_le_bytes()); // creator revison u32 + assert_eq!(data.len(), 36); + data.resize(length as usize, 0); + let mut sdt = Sdt { data }; + sdt.update_checksum(); + sdt + } + + pub fn update_checksum(&mut self) { + self.data[9] = 0; + let checksum = super::generate_checksum(self.data.as_slice()); + self.data[9] = checksum + } + + pub fn as_slice(&self) -> &[u8] { + self.data.as_slice() + } + + pub fn append(&mut self, value: T) { + let orig_length = self.data.len(); + let new_length = orig_length + std::mem::size_of::(); + self.data.resize(new_length, 0); + self.write_u32(4, new_length as u32); + self.write(orig_length, value); + } + + pub fn append_slice(&mut self, data: &[u8]) { + let orig_length = self.data.len(); + let new_length = orig_length + data.len(); + self.write_u32(4, new_length as u32); + self.data.extend_from_slice(data); + self.update_checksum(); + } + + /// Write a value at the given offset + pub fn write(&mut self, offset: usize, value: T) { + assert!((offset + (std::mem::size_of::() - 1)) < self.data.len()); + unsafe { + *(((self.data.as_mut_ptr() as usize) + offset) as *mut T) = value; + } + self.update_checksum(); + } + + pub fn write_u8(&mut self, offset: usize, val: u8) { + self.write(offset, val); + } + + pub fn write_u16(&mut self, offset: usize, val: u16) { + self.write(offset, val); + } + + pub fn write_u32(&mut self, offset: usize, val: u32) { + self.write(offset, val); + } + + pub fn write_u64(&mut self, offset: usize, val: u64) { + self.write(offset, val); + } + + pub fn len(&self) -> usize { + self.data.len() + } +} +#[cfg(test)] +mod tests { + use super::Sdt; + #[test] + fn test_sdt() { + let mut sdt = Sdt::new(*b"TEST", 40, 1); + let sum: u8 = sdt + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + sdt.write_u32(36, 0x12345678); + let sum: u8 = sdt + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + } +} diff --git a/src/dragonball/src/dbs_address_space/Cargo.toml b/src/dragonball/src/dbs_address_space/Cargo.toml new file mode 100644 index 000000000000..2ebd84fe6d4b --- /dev/null +++ b/src/dragonball/src/dbs_address_space/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "dbs-address-space" +version = "0.3.0" +authors = ["Alibaba Dragonball Team"] +description = "address space manager for virtual machines." +license = "Apache-2.0" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "address", "vmm", "secure-sandbox"] +readme = "README.md" + +[dependencies] +arc-swap = ">=0.4.8" +libc = "0.2.39" +nix = "0.23.1" +lazy_static = "1" +thiserror = "1" +vmm-sys-util = "0.11.0" +vm-memory = { version = "0.10", features = ["backend-mmap", "backend-atomic"] } diff --git a/src/dragonball/src/dbs_address_space/LICENSE b/src/dragonball/src/dbs_address_space/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_address_space/README.md b/src/dragonball/src/dbs_address_space/README.md new file mode 100644 index 000000000000..e3ea81d4c52b --- /dev/null +++ b/src/dragonball/src/dbs_address_space/README.md @@ -0,0 +1,80 @@ +# dbs-address-space + +## Design + +The `dbs-address-space` crate is an address space manager for virtual machines, which manages memory and MMIO resources resident in the guest physical address space. + +Main components are: +- `AddressSpaceRegion`: Struct to maintain configuration information about a guest address region. +```rust +#[derive(Debug, Clone)] +pub struct AddressSpaceRegion { + /// Type of address space regions. + pub ty: AddressSpaceRegionType, + /// Base address of the region in virtual machine's physical address space. + pub base: GuestAddress, + /// Size of the address space region. + pub size: GuestUsize, + /// Host NUMA node ids assigned to this region. + pub host_numa_node_id: Option, + + /// File/offset tuple to back the memory allocation. + file_offset: Option, + /// Mmap permission flags. + perm_flags: i32, + /// Hugepage madvise hint. + /// + /// It needs 'advise' or 'always' policy in host shmem config. + is_hugepage: bool, + /// Hotplug hint. + is_hotplug: bool, + /// Anonymous memory hint. + /// + /// It should be true for regions with the MADV_DONTFORK flag enabled. + is_anon: bool, +} +``` +- `AddressSpaceBase`: Base implementation to manage guest physical address space, without support of region hotplug. +```rust +#[derive(Clone)] +pub struct AddressSpaceBase { + regions: Vec>, + layout: AddressSpaceLayout, +} +``` +- `AddressSpaceBase`: An address space implementation with region hotplug capability. +```rust +/// The `AddressSpace` is a wrapper over [AddressSpaceBase] to support hotplug of +/// address space regions. +#[derive(Clone)] +pub struct AddressSpace { + state: Arc>, +} +``` + +## Usage +```rust +// 1. create several memory regions +let reg = Arc::new( + AddressSpaceRegion::create_default_memory_region( + GuestAddress(0x100000), + 0x100000, + None, + "shmem", + "", + false, + false, + false, + ) + .unwrap() +); +let regions = vec![reg]; +// 2. create layout (depending on archs) +let layout = AddressSpaceLayout::new(GUEST_PHYS_END, GUEST_MEM_START, GUEST_MEM_END); +// 3. create address space from regions and layout +let address_space = AddressSpace::from_regions(regions, layout.clone()); +``` + +## License + +This project is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. diff --git a/src/dragonball/src/dbs_address_space/src/address_space.rs b/src/dragonball/src/dbs_address_space/src/address_space.rs new file mode 100644 index 000000000000..35bfab66d985 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/address_space.rs @@ -0,0 +1,830 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Physical address space manager for virtual machines. + +use std::sync::Arc; + +use arc_swap::ArcSwap; +use vm_memory::{GuestAddress, GuestMemoryMmap}; + +use crate::{AddressSpaceError, AddressSpaceLayout, AddressSpaceRegion, AddressSpaceRegionType}; + +/// Base implementation to manage guest physical address space, without support of region hotplug. +#[derive(Clone)] +pub struct AddressSpaceBase { + regions: Vec>, + layout: AddressSpaceLayout, +} + +impl AddressSpaceBase { + /// Create an instance of `AddressSpaceBase` from an `AddressSpaceRegion` array. + /// + /// To achieve better performance by using binary search algorithm, the `regions` vector + /// will gotten sorted by guest physical address. + /// + /// Note, panicking if some regions intersects with each other. + /// + /// # Arguments + /// * `regions` - prepared regions to managed by the address space instance. + /// * `layout` - prepared address space layout configuration. + pub fn from_regions( + mut regions: Vec>, + layout: AddressSpaceLayout, + ) -> Self { + regions.sort_unstable_by_key(|v| v.base); + for region in regions.iter() { + if !layout.is_region_valid(region) { + panic!( + "Invalid region {:?} for address space layout {:?}", + region, layout + ); + } + } + for idx in 1..regions.len() { + if regions[idx].intersect_with(®ions[idx - 1]) { + panic!("address space regions intersect with each other"); + } + } + AddressSpaceBase { regions, layout } + } + + /// Insert a new address space region into the address space. + /// + /// # Arguments + /// * `region` - the new region to be inserted. + pub fn insert_region( + &mut self, + region: Arc, + ) -> Result<(), AddressSpaceError> { + if !self.layout.is_region_valid(®ion) { + return Err(AddressSpaceError::InvalidAddressRange( + region.start_addr().0, + region.len(), + )); + } + for idx in 0..self.regions.len() { + if self.regions[idx].intersect_with(®ion) { + return Err(AddressSpaceError::InvalidAddressRange( + region.start_addr().0, + region.len(), + )); + } + } + self.regions.push(region); + Ok(()) + } + + /// Enumerate all regions in the address space. + /// + /// # Arguments + /// * `cb` - the callback function to apply to each region. + pub fn walk_regions(&self, mut cb: F) -> Result<(), AddressSpaceError> + where + F: FnMut(&Arc) -> Result<(), AddressSpaceError>, + { + for reg in self.regions.iter() { + cb(reg)?; + } + + Ok(()) + } + + /// Get address space layout associated with the address space. + pub fn layout(&self) -> AddressSpaceLayout { + self.layout.clone() + } + + /// Get maximum of guest physical address in the address space. + pub fn last_addr(&self) -> GuestAddress { + let mut last_addr = GuestAddress(self.layout.mem_start); + for reg in self.regions.iter() { + if reg.ty != AddressSpaceRegionType::DAXMemory && reg.last_addr() > last_addr { + last_addr = reg.last_addr(); + } + } + last_addr + } + + /// Check whether the guest physical address `guest_addr` belongs to a DAX memory region. + /// + /// # Arguments + /// * `guest_addr` - the guest physical address to inquire + pub fn is_dax_region(&self, guest_addr: GuestAddress) -> bool { + for reg in self.regions.iter() { + // Safe because we have validate the region when creating the address space object. + if reg.region_type() == AddressSpaceRegionType::DAXMemory + && reg.start_addr() <= guest_addr + && reg.start_addr().0 + reg.len() > guest_addr.0 + { + return true; + } + } + false + } + + /// Get protection flags of memory region that guest physical address `guest_addr` belongs to. + /// + /// # Arguments + /// * `guest_addr` - the guest physical address to inquire + pub fn prot_flags(&self, guest_addr: GuestAddress) -> Result { + for reg in self.regions.iter() { + if reg.start_addr() <= guest_addr && reg.start_addr().0 + reg.len() > guest_addr.0 { + return Ok(reg.prot_flags()); + } + } + + Err(AddressSpaceError::InvalidRegionType) + } + + /// Get optional NUMA node id associated with guest physical address `gpa`. + /// + /// # Arguments + /// * `gpa` - guest physical address to query. + pub fn numa_node_id(&self, gpa: u64) -> Option { + for reg in self.regions.iter() { + if gpa >= reg.base.0 && gpa < (reg.base.0 + reg.size) { + return reg.host_numa_node_id; + } + } + None + } +} + +/// An address space implementation with region hotplug capability. +/// +/// The `AddressSpace` is a wrapper over [AddressSpaceBase] to support hotplug of +/// address space regions. +#[derive(Clone)] +pub struct AddressSpace { + state: Arc>, +} + +impl AddressSpace { + /// Convert a [GuestMemoryMmap] object into `GuestMemoryAtomic`. + pub fn convert_into_vm_as( + gm: GuestMemoryMmap, + ) -> vm_memory::atomic::GuestMemoryAtomic { + vm_memory::atomic::GuestMemoryAtomic::from(Arc::new(gm)) + } + + /// Create an instance of `AddressSpace` from an `AddressSpaceRegion` array. + /// + /// To achieve better performance by using binary search algorithm, the `regions` vector + /// will gotten sorted by guest physical address. + /// + /// Note, panicking if some regions intersects with each other. + /// + /// # Arguments + /// * `regions` - prepared regions to managed by the address space instance. + /// * `layout` - prepared address space layout configuration. + pub fn from_regions(regions: Vec>, layout: AddressSpaceLayout) -> Self { + let base = AddressSpaceBase::from_regions(regions, layout); + + AddressSpace { + state: Arc::new(ArcSwap::new(Arc::new(base))), + } + } + + /// Insert a new address space region into the address space. + /// + /// # Arguments + /// * `region` - the new region to be inserted. + pub fn insert_region( + &mut self, + region: Arc, + ) -> Result<(), AddressSpaceError> { + let curr = self.state.load().regions.clone(); + let layout = self.state.load().layout.clone(); + let mut base = AddressSpaceBase::from_regions(curr, layout); + base.insert_region(region)?; + let _old = self.state.swap(Arc::new(base)); + + Ok(()) + } + + /// Enumerate all regions in the address space. + /// + /// # Arguments + /// * `cb` - the callback function to apply to each region. + pub fn walk_regions(&self, cb: F) -> Result<(), AddressSpaceError> + where + F: FnMut(&Arc) -> Result<(), AddressSpaceError>, + { + self.state.load().walk_regions(cb) + } + + /// Get address space layout associated with the address space. + pub fn layout(&self) -> AddressSpaceLayout { + self.state.load().layout() + } + + /// Get maximum of guest physical address in the address space. + pub fn last_addr(&self) -> GuestAddress { + self.state.load().last_addr() + } + + /// Check whether the guest physical address `guest_addr` belongs to a DAX memory region. + /// + /// # Arguments + /// * `guest_addr` - the guest physical address to inquire + pub fn is_dax_region(&self, guest_addr: GuestAddress) -> bool { + self.state.load().is_dax_region(guest_addr) + } + + /// Get protection flags of memory region that guest physical address `guest_addr` belongs to. + /// + /// # Arguments + /// * `guest_addr` - the guest physical address to inquire + pub fn prot_flags(&self, guest_addr: GuestAddress) -> Result { + self.state.load().prot_flags(guest_addr) + } + + /// Get optional NUMA node id associated with guest physical address `gpa`. + /// + /// # Arguments + /// * `gpa` - guest physical address to query. + pub fn numa_node_id(&self, gpa: u64) -> Option { + self.state.load().numa_node_id(gpa) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use vm_memory::GuestUsize; + use vmm_sys_util::tempfile::TempFile; + + // define macros for unit test + const GUEST_PHYS_END: u64 = (1 << 46) - 1; + const GUEST_MEM_START: u64 = 0; + const GUEST_MEM_END: u64 = GUEST_PHYS_END >> 1; + const GUEST_DEVICE_START: u64 = GUEST_MEM_END + 1; + + #[test] + fn test_address_space_base_from_regions() { + let mut file = TempFile::new().unwrap().into_file(); + let sample_buf = &[1, 2, 3, 4, 5]; + assert!(file.write_all(sample_buf).is_ok()); + file.set_len(0x10000).unwrap(); + + let reg = Arc::new( + AddressSpaceRegion::create_device_region(GuestAddress(GUEST_DEVICE_START), 0x1000) + .unwrap(), + ); + let regions = vec![reg]; + let layout = AddressSpaceLayout::new(GUEST_PHYS_END, GUEST_MEM_START, GUEST_MEM_END); + let address_space = AddressSpaceBase::from_regions(regions, layout.clone()); + assert_eq!(address_space.layout(), layout); + } + + #[test] + #[should_panic(expected = "Invalid region")] + fn test_address_space_base_from_regions_when_region_invalid() { + let reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x1000, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg]; + let layout = AddressSpaceLayout::new(0x2000, 0x200, 0x1800); + let _address_space = AddressSpaceBase::from_regions(regions, layout); + } + + #[test] + #[should_panic(expected = "address space regions intersect with each other")] + fn test_address_space_base_from_regions_when_region_intersected() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x200), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let _address_space = AddressSpaceBase::from_regions(regions, layout); + } + + #[test] + fn test_address_space_base_insert_region() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1]; + let layout = AddressSpaceLayout::new(0x2000, 0x100, 0x1800); + let mut address_space = AddressSpaceBase::from_regions(regions, layout); + + // Normal case. + address_space.insert_region(reg2).unwrap(); + assert!(!address_space.regions[1].intersect_with(&address_space.regions[0])); + + // Error invalid address range case when region invaled. + let invalid_reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x0), + 0x100, + None, + None, + 0, + 0, + false, + )); + assert_eq!( + format!( + "{:?}", + address_space.insert_region(invalid_reg).err().unwrap() + ), + format!("InvalidAddressRange({:?}, {:?})", 0x0, 0x100) + ); + + // Error Error invalid address range case when region to be inserted will intersect + // exsisting regions. + let intersected_reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x400), + 0x200, + None, + None, + 0, + 0, + false, + )); + assert_eq!( + format!( + "{:?}", + address_space.insert_region(intersected_reg).err().unwrap() + ), + format!("InvalidAddressRange({:?}, {:?})", 0x400, 0x200) + ); + } + + #[test] + fn test_address_space_base_walk_regions() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpaceBase::from_regions(regions, layout); + + // The argument of walk_regions is a function which takes a &Arc + // and returns result. This function will be applied to all regions. + fn do_not_have_hotplug(region: &Arc) -> Result<(), AddressSpaceError> { + if region.is_hotplug() { + Err(AddressSpaceError::InvalidRegionType) // The Error type is dictated to AddressSpaceError. + } else { + Ok(()) + } + } + assert!(matches!( + address_space.walk_regions(do_not_have_hotplug).unwrap(), + () + )); + } + + #[test] + fn test_address_space_base_last_addr() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpaceBase::from_regions(regions, layout); + + assert_eq!(address_space.last_addr(), GuestAddress(0x500 - 1)); + } + + #[test] + fn test_address_space_base_is_dax_region() { + let page_size = 4096; + let address_space_region = vec![ + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(page_size), + page_size as GuestUsize, + )), + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(page_size * 2), + page_size as GuestUsize, + )), + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DAXMemory, + GuestAddress(GUEST_DEVICE_START), + page_size as GuestUsize, + )), + ]; + let layout = AddressSpaceLayout::new(GUEST_PHYS_END, GUEST_MEM_START, GUEST_MEM_END); + let address_space = AddressSpaceBase::from_regions(address_space_region, layout); + + assert!(!address_space.is_dax_region(GuestAddress(page_size))); + assert!(!address_space.is_dax_region(GuestAddress(page_size * 2))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + 1))); + assert!(!address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + page_size))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + page_size - 1))); + } + + #[test] + fn test_address_space_base_prot_flags() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + Some(0), + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x300, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpaceBase::from_regions(regions, layout); + + // Normal case, reg1. + assert_eq!(address_space.prot_flags(GuestAddress(0x200)).unwrap(), 0); + // Normal case, reg2. + assert_eq!( + address_space.prot_flags(GuestAddress(0x500)).unwrap(), + libc::PROT_READ | libc::PROT_WRITE + ); + // Inquire gpa where no region is set. + assert!(matches!( + address_space.prot_flags(GuestAddress(0x600)), + Err(AddressSpaceError::InvalidRegionType) + )); + } + + #[test] + fn test_address_space_base_numa_node_id() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + Some(0), + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x300, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpaceBase::from_regions(regions, layout); + + // Normal case. + assert_eq!(address_space.numa_node_id(0x200).unwrap(), 0); + // Inquire region with None as its numa node id. + assert_eq!(address_space.numa_node_id(0x400), None); + // Inquire gpa where no region is set. + assert_eq!(address_space.numa_node_id(0x600), None); + } + + #[test] + fn test_address_space_convert_into_vm_as() { + // ! Further and detailed test is needed here. + let gmm = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x0), 0x400)]).unwrap(); + let _vm = AddressSpace::convert_into_vm_as(gmm); + } + + #[test] + fn test_address_space_insert_region() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1]; + let layout = AddressSpaceLayout::new(0x2000, 0x100, 0x1800); + let mut address_space = AddressSpace::from_regions(regions, layout); + + // Normal case. + assert!(matches!(address_space.insert_region(reg2).unwrap(), ())); + + // Error invalid address range case when region invaled. + let invalid_reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x0), + 0x100, + None, + None, + 0, + 0, + false, + )); + assert_eq!( + format!( + "{:?}", + address_space.insert_region(invalid_reg).err().unwrap() + ), + format!("InvalidAddressRange({:?}, {:?})", 0x0, 0x100) + ); + + // Error Error invalid address range case when region to be inserted will intersect + // exsisting regions. + let intersected_reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x400), + 0x200, + None, + None, + 0, + 0, + false, + )); + assert_eq!( + format!( + "{:?}", + address_space.insert_region(intersected_reg).err().unwrap() + ), + format!("InvalidAddressRange({:?}, {:?})", 0x400, 0x200) + ); + } + + #[test] + fn test_address_space_walk_regions() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpace::from_regions(regions, layout); + + fn access_all_hotplug_flag( + region: &Arc, + ) -> Result<(), AddressSpaceError> { + region.is_hotplug(); + Ok(()) + } + + assert!(matches!( + address_space.walk_regions(access_all_hotplug_flag).unwrap(), + () + )); + } + + #[test] + fn test_address_space_layout() { + let reg = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x1000, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpace::from_regions(regions, layout.clone()); + + assert_eq!(layout, address_space.layout()); + } + + #[test] + fn test_address_space_last_addr() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + None, + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x200, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpace::from_regions(regions, layout); + + assert_eq!(address_space.last_addr(), GuestAddress(0x500 - 1)); + } + + #[test] + fn test_address_space_is_dax_region() { + let page_size = 4096; + let address_space_region = vec![ + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(page_size), + page_size as GuestUsize, + )), + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(page_size * 2), + page_size as GuestUsize, + )), + Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DAXMemory, + GuestAddress(GUEST_DEVICE_START), + page_size as GuestUsize, + )), + ]; + let layout = AddressSpaceLayout::new(GUEST_PHYS_END, GUEST_MEM_START, GUEST_MEM_END); + let address_space = AddressSpace::from_regions(address_space_region, layout); + + assert!(!address_space.is_dax_region(GuestAddress(page_size))); + assert!(!address_space.is_dax_region(GuestAddress(page_size * 2))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + 1))); + assert!(!address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + page_size))); + assert!(address_space.is_dax_region(GuestAddress(GUEST_DEVICE_START + page_size - 1))); + } + + #[test] + fn test_address_space_prot_flags() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + Some(0), + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x300, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpace::from_regions(regions, layout); + + // Normal case, reg1. + assert_eq!(address_space.prot_flags(GuestAddress(0x200)).unwrap(), 0); + // Normal case, reg2. + assert_eq!( + address_space.prot_flags(GuestAddress(0x500)).unwrap(), + libc::PROT_READ | libc::PROT_WRITE + ); + // Inquire gpa where no region is set. + assert!(matches!( + address_space.prot_flags(GuestAddress(0x600)), + Err(AddressSpaceError::InvalidRegionType) + )); + } + + #[test] + fn test_address_space_numa_node_id() { + let reg1 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x100), + 0x200, + Some(0), + None, + 0, + 0, + false, + )); + let reg2 = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x300), + 0x300, + None, + None, + 0, + 0, + false, + )); + let regions = vec![reg1, reg2]; + let layout = AddressSpaceLayout::new(0x2000, 0x0, 0x1800); + let address_space = AddressSpace::from_regions(regions, layout); + + // Normal case. + assert_eq!(address_space.numa_node_id(0x200).unwrap(), 0); + // Inquire region with None as its numa node id. + assert_eq!(address_space.numa_node_id(0x400), None); + // Inquire gpa where no region is set. + assert_eq!(address_space.numa_node_id(0x600), None); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/layout.rs b/src/dragonball/src/dbs_address_space/src/layout.rs new file mode 100644 index 000000000000..cd6c6bfb0a79 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/layout.rs @@ -0,0 +1,154 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use lazy_static::lazy_static; + +use crate::{AddressSpaceRegion, AddressSpaceRegionType}; + +// Max retry times for reading /proc +const PROC_READ_RETRY: u64 = 5; + +lazy_static! { + /// Upper bound of host memory. + pub static ref USABLE_END: u64 = { + for _ in 0..PROC_READ_RETRY { + if let Ok(buf) = std::fs::read("/proc/meminfo") { + let content = String::from_utf8_lossy(&buf); + for line in content.lines() { + if line.starts_with("MemTotal:") { + if let Some(end) = line.find(" kB") { + if let Ok(size) = line[9..end].trim().parse::() { + return (size << 10) - 1; + } + } + } + } + } + } + panic!("Exceed max retry times. Cannot get total mem size from /proc/meminfo"); + }; +} + +/// Address space layout configuration. +/// +/// The layout configuration must guarantee that `mem_start` <= `mem_end` <= `phys_end`. +/// Non-memory region should be arranged into the range [mem_end, phys_end). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AddressSpaceLayout { + /// end of guest physical address + pub phys_end: u64, + /// start of guest memory address + pub mem_start: u64, + /// end of guest memory address + pub mem_end: u64, + /// end of usable memory address + pub usable_end: u64, +} + +impl AddressSpaceLayout { + /// Create a new instance of `AddressSpaceLayout`. + pub fn new(phys_end: u64, mem_start: u64, mem_end: u64) -> Self { + AddressSpaceLayout { + phys_end, + mem_start, + mem_end, + usable_end: *USABLE_END, + } + } + + /// Check whether an region is valid with the constraints of the layout. + pub fn is_region_valid(&self, region: &AddressSpaceRegion) -> bool { + let region_end = match region.base.0.checked_add(region.size) { + None => return false, + Some(v) => v, + }; + + match region.ty { + AddressSpaceRegionType::DefaultMemory => { + if region.base.0 < self.mem_start || region_end > self.mem_end { + return false; + } + } + AddressSpaceRegionType::DeviceMemory | AddressSpaceRegionType::DAXMemory => { + if region.base.0 < self.mem_end || region_end > self.phys_end { + return false; + } + } + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vm_memory::GuestAddress; + + #[test] + fn test_is_region_valid() { + let layout = AddressSpaceLayout::new(0x1_0000_0000, 0x1000_0000, 0x2000_0000); + + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x0), + 0x1_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x2000_0000), + 0x1_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1_0000), + 0x2000_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(u64::MAX), + 0x1_0000_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1000_0000), + 0x1_0000, + ); + assert!(layout.is_region_valid(®ion)); + + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(0x1000_0000), + 0x1_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(0x1_0000_0000), + 0x1_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(0x1_0000), + 0x1_0000_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(u64::MAX), + 0x1_0000_0000, + ); + assert!(!layout.is_region_valid(®ion)); + let region = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(0x8000_0000), + 0x1_0000, + ); + assert!(layout.is_region_valid(®ion)); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/lib.rs b/src/dragonball/src/dbs_address_space/src/lib.rs new file mode 100644 index 000000000000..7e38cbbddce4 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/lib.rs @@ -0,0 +1,87 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] + +//! Traits and Structs to manage guest physical address space for virtual machines. +//! +//! The [vm-memory](https://crates.io/crates/vm-memory) implements mechanisms to manage and access +//! guest memory resident in guest physical address space. In addition to guest memory, there may +//! be other type of devices resident in the same guest physical address space. +//! +//! The `dbs-address-space` crate provides traits and structs to manage the guest physical address +//! space for virtual machines, and mechanisms to coordinate all the devices resident in the +//! guest physical address space. + +use vm_memory::GuestUsize; + +mod address_space; +pub use self::address_space::{AddressSpace, AddressSpaceBase}; + +mod layout; +pub use layout::{AddressSpaceLayout, USABLE_END}; + +mod memory; +pub use memory::{GuestMemoryHybrid, GuestMemoryManager, GuestRegionHybrid, GuestRegionRaw}; + +mod numa; +pub use self::numa::{NumaIdTable, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED}; + +mod region; +pub use region::{AddressSpaceRegion, AddressSpaceRegionType}; + +/// Errors associated with virtual machine address space management. +#[derive(Debug, thiserror::Error)] +pub enum AddressSpaceError { + /// Invalid address space region type. + #[error("invalid address space region type")] + InvalidRegionType, + + /// Invalid address range. + #[error("invalid address space region (0x{0:x}, 0x{1:x})")] + InvalidAddressRange(u64, GuestUsize), + + /// Invalid guest memory source type. + #[error("invalid memory source type {0}")] + InvalidMemorySourceType(String), + + /// Failed to create memfd to map anonymous memory. + #[error("can not create memfd to map anonymous memory")] + CreateMemFd(#[source] nix::Error), + + /// Failed to open memory file. + #[error("can not open memory file")] + OpenFile(#[source] std::io::Error), + + /// Failed to create directory. + #[error("can not create directory")] + CreateDir(#[source] std::io::Error), + + /// Failed to set size for memory file. + #[error("can not set size for memory file")] + SetFileSize(#[source] std::io::Error), + + /// Failed to unlink memory file. + #[error("can not unlink memory file")] + UnlinkFile(#[source] nix::Error), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_code() { + let e = AddressSpaceError::InvalidRegionType; + + assert_eq!(format!("{e}"), "invalid address space region type"); + assert_eq!(format!("{e:?}"), "InvalidRegionType"); + assert_eq!( + format!( + "{}", + AddressSpaceError::InvalidMemorySourceType("test".to_string()) + ), + "invalid memory source type test" + ); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/memory/hybrid.rs b/src/dragonball/src/dbs_address_space/src/memory/hybrid.rs new file mode 100644 index 000000000000..87a09749e828 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/memory/hybrid.rs @@ -0,0 +1,1105 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{Read, Write}; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use vm_memory::bitmap::{Bitmap, BS}; +use vm_memory::guest_memory::GuestMemoryIterator; +use vm_memory::mmap::{Error, NewBitmap}; +use vm_memory::{ + guest_memory, AtomicAccess, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion, + GuestRegionMmap, GuestUsize, MemoryRegionAddress, VolatileSlice, +}; + +use crate::GuestRegionRaw; + +/// An adapter for different concrete implementations of `GuestMemoryRegion`. +#[derive(Debug)] +pub enum GuestRegionHybrid { + /// Region of type `GuestRegionMmap`. + Mmap(GuestRegionMmap), + /// Region of type `GuestRegionRaw`. + Raw(GuestRegionRaw), +} + +impl GuestRegionHybrid { + /// Create a `GuestRegionHybrid` object from `GuestRegionMmap` object. + pub fn from_mmap_region(region: GuestRegionMmap) -> Self { + GuestRegionHybrid::Mmap(region) + } + + /// Create a `GuestRegionHybrid` object from `GuestRegionRaw` object. + pub fn from_raw_region(region: GuestRegionRaw) -> Self { + GuestRegionHybrid::Raw(region) + } +} + +impl Bytes for GuestRegionHybrid { + type E = guest_memory::Error; + + fn write(&self, buf: &[u8], addr: MemoryRegionAddress) -> guest_memory::Result { + match self { + GuestRegionHybrid::Mmap(region) => region.write(buf, addr), + GuestRegionHybrid::Raw(region) => region.write(buf, addr), + } + } + + fn read(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> guest_memory::Result { + match self { + GuestRegionHybrid::Mmap(region) => region.read(buf, addr), + GuestRegionHybrid::Raw(region) => region.read(buf, addr), + } + } + + fn write_slice(&self, buf: &[u8], addr: MemoryRegionAddress) -> guest_memory::Result<()> { + match self { + GuestRegionHybrid::Mmap(region) => region.write_slice(buf, addr), + GuestRegionHybrid::Raw(region) => region.write_slice(buf, addr), + } + } + + fn read_slice(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> guest_memory::Result<()> { + match self { + GuestRegionHybrid::Mmap(region) => region.read_slice(buf, addr), + GuestRegionHybrid::Raw(region) => region.read_slice(buf, addr), + } + } + + fn read_from( + &self, + addr: MemoryRegionAddress, + src: &mut F, + count: usize, + ) -> guest_memory::Result + where + F: Read, + { + match self { + GuestRegionHybrid::Mmap(region) => region.read_from(addr, src, count), + GuestRegionHybrid::Raw(region) => region.read_from(addr, src, count), + } + } + + fn read_exact_from( + &self, + addr: MemoryRegionAddress, + src: &mut F, + count: usize, + ) -> guest_memory::Result<()> + where + F: Read, + { + match self { + GuestRegionHybrid::Mmap(region) => region.read_exact_from(addr, src, count), + GuestRegionHybrid::Raw(region) => region.read_exact_from(addr, src, count), + } + } + + fn write_to( + &self, + addr: MemoryRegionAddress, + dst: &mut F, + count: usize, + ) -> guest_memory::Result + where + F: Write, + { + match self { + GuestRegionHybrid::Mmap(region) => region.write_to(addr, dst, count), + GuestRegionHybrid::Raw(region) => region.write_to(addr, dst, count), + } + } + + fn write_all_to( + &self, + addr: MemoryRegionAddress, + dst: &mut F, + count: usize, + ) -> guest_memory::Result<()> + where + F: Write, + { + match self { + GuestRegionHybrid::Mmap(region) => region.write_all_to(addr, dst, count), + GuestRegionHybrid::Raw(region) => region.write_all_to(addr, dst, count), + } + } + + fn store( + &self, + val: T, + addr: MemoryRegionAddress, + order: Ordering, + ) -> guest_memory::Result<()> { + match self { + GuestRegionHybrid::Mmap(region) => region.store(val, addr, order), + GuestRegionHybrid::Raw(region) => region.store(val, addr, order), + } + } + + fn load( + &self, + addr: MemoryRegionAddress, + order: Ordering, + ) -> guest_memory::Result { + match self { + GuestRegionHybrid::Mmap(region) => region.load(addr, order), + GuestRegionHybrid::Raw(region) => region.load(addr, order), + } + } +} + +impl GuestMemoryRegion for GuestRegionHybrid { + type B = B; + + fn len(&self) -> GuestUsize { + match self { + GuestRegionHybrid::Mmap(region) => region.len(), + GuestRegionHybrid::Raw(region) => region.len(), + } + } + + fn start_addr(&self) -> GuestAddress { + match self { + GuestRegionHybrid::Mmap(region) => region.start_addr(), + GuestRegionHybrid::Raw(region) => region.start_addr(), + } + } + + fn bitmap(&self) -> &Self::B { + match self { + GuestRegionHybrid::Mmap(region) => region.bitmap(), + GuestRegionHybrid::Raw(region) => region.bitmap(), + } + } + + fn get_host_address(&self, addr: MemoryRegionAddress) -> guest_memory::Result<*mut u8> { + match self { + GuestRegionHybrid::Mmap(region) => region.get_host_address(addr), + GuestRegionHybrid::Raw(region) => region.get_host_address(addr), + } + } + + fn file_offset(&self) -> Option<&FileOffset> { + match self { + GuestRegionHybrid::Mmap(region) => region.file_offset(), + GuestRegionHybrid::Raw(region) => region.file_offset(), + } + } + + unsafe fn as_slice(&self) -> Option<&[u8]> { + match self { + GuestRegionHybrid::Mmap(region) => region.as_slice(), + GuestRegionHybrid::Raw(region) => region.as_slice(), + } + } + + unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> { + match self { + GuestRegionHybrid::Mmap(region) => region.as_mut_slice(), + GuestRegionHybrid::Raw(region) => region.as_mut_slice(), + } + } + + fn get_slice( + &self, + offset: MemoryRegionAddress, + count: usize, + ) -> guest_memory::Result>> { + match self { + GuestRegionHybrid::Mmap(region) => region.get_slice(offset, count), + GuestRegionHybrid::Raw(region) => region.get_slice(offset, count), + } + } + + #[cfg(target_os = "linux")] + fn is_hugetlbfs(&self) -> Option { + match self { + GuestRegionHybrid::Mmap(region) => region.is_hugetlbfs(), + GuestRegionHybrid::Raw(region) => region.is_hugetlbfs(), + } + } +} + +/// [`GuestMemory`](trait.GuestMemory.html) implementation that manage hybrid types of guest memory +/// regions. +/// +/// Represents the entire physical memory of the guest by tracking all its memory regions. +/// Each region is an instance of `GuestRegionHybrid`. +#[derive(Clone, Debug, Default)] +pub struct GuestMemoryHybrid { + pub(crate) regions: Vec>>, +} + +impl GuestMemoryHybrid { + /// Creates an empty `GuestMemoryHybrid` instance. + pub fn new() -> Self { + Self::default() + } +} + +impl GuestMemoryHybrid { + /// Creates a new `GuestMemoryHybrid` from a vector of regions. + /// + /// # Arguments + /// + /// * `regions` - The vector of regions. + /// The regions shouldn't overlap and they should be sorted + /// by the starting address. + pub fn from_regions(mut regions: Vec>) -> Result { + Self::from_arc_regions(regions.drain(..).map(Arc::new).collect()) + } + + /// Creates a new `GuestMemoryHybrid` from a vector of Arc regions. + /// + /// Similar to the constructor `from_regions()` as it returns a + /// `GuestMemoryHybrid`. The need for this constructor is to provide a way for + /// consumer of this API to create a new `GuestMemoryHybrid` based on existing + /// regions coming from an existing `GuestMemoryHybrid` instance. + /// + /// # Arguments + /// + /// * `regions` - The vector of `Arc` regions. + /// The regions shouldn't overlap and they should be sorted + /// by the starting address. + pub fn from_arc_regions(regions: Vec>>) -> Result { + if regions.is_empty() { + return Err(Error::NoMemoryRegion); + } + + for window in regions.windows(2) { + let prev = &window[0]; + let next = &window[1]; + + if prev.start_addr() > next.start_addr() { + return Err(Error::UnsortedMemoryRegions); + } + + if prev.last_addr() >= next.start_addr() { + return Err(Error::MemoryRegionOverlap); + } + } + + Ok(Self { regions }) + } + + /// Insert a region into the `GuestMemoryHybrid` object and return a new `GuestMemoryHybrid`. + /// + /// # Arguments + /// * `region`: the memory region to insert into the guest memory object. + pub fn insert_region( + &self, + region: Arc>, + ) -> Result, Error> { + let mut regions = self.regions.clone(); + regions.push(region); + regions.sort_by_key(|x| x.start_addr()); + + Self::from_arc_regions(regions) + } + + /// Remove a region into the `GuestMemoryHybrid` object and return a new `GuestMemoryHybrid` + /// on success, together with the removed region. + /// + /// # Arguments + /// * `base`: base address of the region to be removed + /// * `size`: size of the region to be removed + pub fn remove_region( + &self, + base: GuestAddress, + size: GuestUsize, + ) -> Result<(GuestMemoryHybrid, Arc>), Error> { + if let Ok(region_index) = self.regions.binary_search_by_key(&base, |x| x.start_addr()) { + if self.regions.get(region_index).unwrap().len() as GuestUsize == size { + let mut regions = self.regions.clone(); + let region = regions.remove(region_index); + return Ok((Self { regions }, region)); + } + } + + Err(Error::InvalidGuestRegion) + } +} + +/// An iterator over the elements of `GuestMemoryHybrid`. +/// +/// This struct is created by `GuestMemory::iter()`. See its documentation for more. +pub struct Iter<'a, B>(std::slice::Iter<'a, Arc>>); + +impl<'a, B> Iterator for Iter<'a, B> { + type Item = &'a GuestRegionHybrid; + + fn next(&mut self) -> Option { + self.0.next().map(AsRef::as_ref) + } +} + +impl<'a, B: 'a> GuestMemoryIterator<'a, GuestRegionHybrid> for GuestMemoryHybrid { + type Iter = Iter<'a, B>; +} + +impl GuestMemory for GuestMemoryHybrid { + type R = GuestRegionHybrid; + + type I = Self; + + fn num_regions(&self) -> usize { + self.regions.len() + } + + fn find_region(&self, addr: GuestAddress) -> Option<&GuestRegionHybrid> { + let index = match self.regions.binary_search_by_key(&addr, |x| x.start_addr()) { + Ok(x) => Some(x), + // Within the closest region with starting address < addr + Err(x) if (x > 0 && addr <= self.regions[x - 1].last_addr()) => Some(x - 1), + _ => None, + }; + index.map(|x| self.regions[x].as_ref()) + } + + fn iter(&self) -> Iter { + Iter(self.regions.iter()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Seek; + use vm_memory::{GuestMemoryError, MmapRegion}; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_region_new() { + let start_addr = GuestAddress(0x0); + + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x400).unwrap(), start_addr).unwrap(); + let guest_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + + assert_eq!(guest_region.start_addr(), start_addr); + assert_eq!(guest_region.len(), 0x400); + + let mut buf = [0u8; 1024]; + let raw_region = + unsafe { GuestRegionRaw::<()>::new(start_addr, &mut buf as *mut _, 0x800) }; + let guest_region = GuestRegionHybrid::from_raw_region(raw_region); + + assert_eq!(guest_region.start_addr(), start_addr); + assert_eq!(guest_region.len(), 0x800); + } + + #[test] + fn test_write_and_read_on_mmap_region() { + let start_addr = GuestAddress(0x0); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + let buf_to_write = [0xF0u8; 0x400]; + let write_addr = MemoryRegionAddress(0x400); + + // Normal case. + let number_of_bytes_write = guest_region.write(&buf_to_write, write_addr).unwrap(); + assert_eq!(number_of_bytes_write, 0x400); + let mut buf_read = [0u8; 0x400]; + let number_of_bytes_read = guest_region.read(&mut buf_read, write_addr).unwrap(); + assert_eq!(number_of_bytes_read, 0x400); + assert_eq!(buf_read, [0xF0u8; 0x400]); + + // Error invalid backend address case in write(). + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write(&buf_to_write, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in read(). + assert!(matches!( + guest_region + .read(&mut buf_read, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_write_and_read_on_raw_region() { + let start_addr = GuestAddress(0x0); + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_region = GuestRegionHybrid::from_raw_region(raw_region); + let buf_to_write = [0xF0u8; 0x400]; + let write_addr = MemoryRegionAddress(0x400); + + // Normal case. + let number_of_bytes_write = guest_region.write(&buf_to_write, write_addr).unwrap(); + assert_eq!(number_of_bytes_write, 0x400); + let mut buf_read = [0u8; 0x400]; + let number_of_bytes_read = guest_region.read(&mut buf_read, write_addr).unwrap(); + assert_eq!(number_of_bytes_read, 0x400); + assert_eq!(buf_read, [0xF0u8; 0x400]); + + // Error invalid backend address case in write(). + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write(&buf_to_write, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in read(). + assert!(matches!( + guest_region + .read(&mut buf_read, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_write_slice_and_read_slice_on_mmap_region() { + let start_addr = GuestAddress(0x0); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + let buf_to_write = [0xF0u8; 0x400]; + let write_addr = MemoryRegionAddress(0x400); + + // Normal case. + guest_region.write_slice(&buf_to_write, write_addr).unwrap(); + let mut buf_read = [0x0u8; 0x400]; + guest_region.read_slice(&mut buf_read, write_addr).unwrap(); + assert_eq!(buf_read, [0xF0u8; 0x400]); + + // Error invalid backend address case in write_slice(). + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write_slice(&buf_to_write, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error partial buffer case in write_slice(). + let insufficient_addr = MemoryRegionAddress(0x600); + assert_eq!( + format!( + "{:?}", + guest_region + .write_slice(&buf_to_write, insufficient_addr) + .err() + .unwrap() + ), + format!( + "PartialBuffer {{ expected: {:?}, completed: {:?} }}", + buf_to_write.len(), + guest_region.len() as usize - 0x600_usize + ) + ); + + // Error invalid backend address case in write_slice(). + let invalid_addr = MemoryRegionAddress(0x900); + let mut buf_read = [0x0u8; 0x400]; + assert!(matches!( + guest_region + .read_slice(&mut buf_read, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error partial buffer case in write_slice(). + let insufficient_addr = MemoryRegionAddress(0x600); + let mut buf_read = [0x0u8; 0x400]; + assert_eq!( + format!( + "{:?}", + guest_region + .read_slice(&mut buf_read, insufficient_addr) + .err() + .unwrap() + ), + format!( + "PartialBuffer {{ expected: {:?}, completed: {:?} }}", + buf_to_write.len(), + guest_region.len() as usize - 0x600_usize + ) + ); + assert_eq!( + { + let mut buf = [0x0u8; 0x400]; + for cell in buf.iter_mut().take(0x200) { + *cell = 0xF0; + } + buf + }, + buf_read + ); + } + + #[test] + fn test_write_and_read_slice_on_raw_region() { + let start_addr = GuestAddress(0x0); + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_region = GuestRegionHybrid::from_raw_region(raw_region); + let buf_to_write = [0xF0u8; 0x400]; + let write_addr = MemoryRegionAddress(0x400); + + // Normal case. + guest_region.write_slice(&buf_to_write, write_addr).unwrap(); + let mut buf_read = [0x0u8; 0x400]; + guest_region.read_slice(&mut buf_read, write_addr).unwrap(); + assert_eq!(buf_read, [0xF0u8; 0x400]); + + // Error invalid backend address case in write_slice(). + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write_slice(&buf_to_write, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error partial buffer case in write_slice(). + let insufficient_addr = MemoryRegionAddress(0x600); + assert_eq!( + format!( + "{:?}", + guest_region + .write_slice(&buf_to_write, insufficient_addr) + .err() + .unwrap() + ), + format!( + "PartialBuffer {{ expected: {:?}, completed: {:?} }}", + buf_to_write.len(), + guest_region.len() as usize - 0x600_usize + ) + ); + + // Error invalid backend address case in write_slice(). + let invalid_addr = MemoryRegionAddress(0x900); + let mut buf_read = [0x0u8; 0x400]; + assert!(matches!( + guest_region + .read_slice(&mut buf_read, invalid_addr) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error partial buffer case in write_slice(). + let insufficient_addr = MemoryRegionAddress(0x600); + let mut buf_read = [0x0u8; 0x400]; + assert_eq!( + format!( + "{:?}", + guest_region + .read_slice(&mut buf_read, insufficient_addr) + .err() + .unwrap() + ), + format!( + "PartialBuffer {{ expected: {:?}, completed: {:?} }}", + buf_to_write.len(), + guest_region.len() as usize - 0x600_usize + ) + ); + assert_eq!( + { + let mut buf = [0x0u8; 0x400]; + for cell in buf.iter_mut().take(0x200) { + *cell = 0xF0; + } + buf + }, + buf_read + ); + } + + #[test] + fn test_read_from_and_write_to_on_mmap_region() { + let start_addr = GuestAddress(0x0); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + let write_addr = MemoryRegionAddress(0x400); + let original_content = b"hello world"; + let size_of_file = original_content.len(); + + // Normal case. + let mut file_to_write_mmap_region = TempFile::new().unwrap().into_file(); + file_to_write_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + file_to_write_mmap_region + .write_all(original_content) + .unwrap(); + // Rewind file pointer after write operation. + file_to_write_mmap_region.rewind().unwrap(); + guest_region + .read_from(write_addr, &mut file_to_write_mmap_region, size_of_file) + .unwrap(); + let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file(); + file_read_from_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + guest_region + .write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file) + .unwrap(); + // Rewind file pointer after write operation. + file_read_from_mmap_region.rewind().unwrap(); + let mut content = String::new(); + file_read_from_mmap_region + .read_to_string(&mut content) + .unwrap(); + assert_eq!(content.as_bytes(), original_content); + assert_eq!( + file_read_from_mmap_region.metadata().unwrap().len(), + size_of_file as u64 + ); + + // Error invalid backend address case in read_from() on mmap region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in write_to() on mmap region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_read_from_and_write_to_on_raw_region() { + let start_addr = GuestAddress(0x0); + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_region = GuestRegionHybrid::from_raw_region(raw_region); + let write_addr = MemoryRegionAddress(0x400); + let original_content = b"hello world"; + let size_of_file = original_content.len(); + + // Normal case. + let mut file_to_write_mmap_region = TempFile::new().unwrap().into_file(); + file_to_write_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + file_to_write_mmap_region + .write_all(original_content) + .unwrap(); + // Rewind file pointer after write operation. + file_to_write_mmap_region.rewind().unwrap(); + guest_region + .read_from(write_addr, &mut file_to_write_mmap_region, size_of_file) + .unwrap(); + let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file(); + file_read_from_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + guest_region + .write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file) + .unwrap(); + // Rewind file pointer after write operation. + file_read_from_mmap_region.rewind().unwrap(); + let mut content = String::new(); + file_read_from_mmap_region + .read_to_string(&mut content) + .unwrap(); + assert_eq!(content.as_bytes(), original_content); + assert_eq!( + file_read_from_mmap_region.metadata().unwrap().len(), + size_of_file as u64 + ); + + // Error invalid backend address case in read_from() on raw region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .read_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in write_to() on raw region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region + .write_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_write_all_to_and_read_exact_from() { + let start_addr = GuestAddress(0x0); + let write_addr = MemoryRegionAddress(0x400); + let original_content = b"hello world"; + let size_of_file = original_content.len(); + // Preset a GuestRegionHybrid from a mmap region + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_mmap_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + // Preset a GuestRegionHybrid from a raw region + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_raw_region = GuestRegionHybrid::from_raw_region(raw_region); + + // Normal case on mmap region. + let mut file_to_write_mmap_region = TempFile::new().unwrap().into_file(); + file_to_write_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + file_to_write_mmap_region + .write_all(original_content) + .unwrap(); + file_to_write_mmap_region.rewind().unwrap(); + guest_mmap_region + .read_exact_from(write_addr, &mut file_to_write_mmap_region, size_of_file) + .unwrap(); + let mut file_read_from_mmap_region = TempFile::new().unwrap().into_file(); + file_read_from_mmap_region + .set_len(size_of_file as u64) + .unwrap(); + guest_mmap_region + .write_all_to(write_addr, &mut file_read_from_mmap_region, size_of_file) + .unwrap(); + file_read_from_mmap_region.rewind().unwrap(); + let mut content = String::new(); + file_read_from_mmap_region + .read_to_string(&mut content) + .unwrap(); + assert_eq!(content.as_bytes(), original_content); + assert_eq!( + file_read_from_mmap_region.metadata().unwrap().len(), + size_of_file as u64 + ); + + // Normal case on raw region. + let mut file_to_write_raw_region = TempFile::new().unwrap().into_file(); + file_to_write_raw_region + .set_len(size_of_file as u64) + .unwrap(); + file_to_write_raw_region + .write_all(original_content) + .unwrap(); + file_to_write_raw_region.rewind().unwrap(); + guest_raw_region + .read_exact_from(write_addr, &mut file_to_write_raw_region, size_of_file) + .unwrap(); + let mut file_read_from_raw_region = TempFile::new().unwrap().into_file(); + file_read_from_raw_region + .set_len(size_of_file as u64) + .unwrap(); + guest_raw_region + .write_all_to(write_addr, &mut file_read_from_raw_region, size_of_file) + .unwrap(); + file_read_from_raw_region.rewind().unwrap(); + let mut content = String::new(); + file_read_from_raw_region + .read_to_string(&mut content) + .unwrap(); + assert_eq!(content.as_bytes(), original_content); + assert_eq!( + file_read_from_raw_region.metadata().unwrap().len(), + size_of_file as u64 + ); + + // Error invalid backend address case in read_exact_from() on mmap region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_mmap_region + .read_exact_from(invalid_addr, &mut file_to_write_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in write_all_to() on mmap region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_mmap_region + .write_all_to(invalid_addr, &mut file_read_from_mmap_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in read_exact_from() on raw region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_raw_region + .read_exact_from(invalid_addr, &mut file_to_write_raw_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in write_all_to() on raw region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_raw_region + .write_all_to(invalid_addr, &mut file_read_from_raw_region, size_of_file) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_store_and_load() { + let test_val = 0xFF; + let start_addr = GuestAddress(0x0); + let write_addr = MemoryRegionAddress(0x400); + // Preset a GuestRegionHybrid from a mmap region + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_mmap_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + // Preset a GuestRegionHybrid from a raw region + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_raw_region = GuestRegionHybrid::from_raw_region(raw_region); + + // Normal case. + guest_mmap_region + .store(test_val, write_addr, Ordering::Relaxed) + .unwrap(); + let val_read_from_mmap_region: u64 = guest_mmap_region + .load(write_addr, Ordering::Relaxed) + .unwrap(); + assert_eq!(val_read_from_mmap_region, test_val); + guest_raw_region + .store(test_val, write_addr, Ordering::Relaxed) + .unwrap(); + let val_read_from_raw_region: u64 = guest_raw_region + .load(write_addr, Ordering::Relaxed) + .unwrap(); + assert_eq!(val_read_from_raw_region, test_val); + + // Error invalid backend address case in store() on mmap region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_mmap_region + .store(test_val, invalid_addr, Ordering::Relaxed) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in store() on raw region. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_raw_region + .store(test_val, invalid_addr, Ordering::Relaxed) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in laod() on mmap region. + assert!(matches!( + guest_mmap_region + .load::(invalid_addr, Ordering::Relaxed) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + + // Error invalid backend address case in laod() on raw region. + assert!(matches!( + guest_raw_region + .load::(invalid_addr, Ordering::Relaxed) + .err() + .unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_bitmap() { + // TODO: #185 Need futher and detailed test on bitmap object. + let start_addr = GuestAddress(0x0); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_mmap_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_raw_region = GuestRegionHybrid::from_raw_region(raw_region); + + assert_eq!(guest_mmap_region.bitmap(), guest_raw_region.bitmap()); + } + + #[test] + fn test_get_host_address_on_mmap_region() { + let start_addr = GuestAddress(0x0); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x800).unwrap(), start_addr).unwrap(); + let guest_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + + // Normal case. + let addr_1 = guest_region + .get_host_address(MemoryRegionAddress(0x0)) + .unwrap(); + let addr_2 = guest_region + .get_host_address(MemoryRegionAddress(0x400)) + .unwrap(); + assert_eq!(addr_1 as u64 + 0x400, addr_2 as u64); + + // Error invalid backend address case. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region.get_host_address(invalid_addr).err().unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + #[test] + fn test_get_host_address_on_raw_region() { + let start_addr = GuestAddress(0x0); + let mut buf_of_raw_region = [0u8; 0x800]; + let raw_region = unsafe { + GuestRegionRaw::<()>::new(start_addr, &mut buf_of_raw_region as *mut _, 0x800) + }; + let guest_region = GuestRegionHybrid::from_raw_region(raw_region); + + // Normal case. + let addr_1 = guest_region + .get_host_address(MemoryRegionAddress(0x0)) + .unwrap(); + let addr_2 = guest_region + .get_host_address(MemoryRegionAddress(0x400)) + .unwrap(); + assert_eq!(addr_1 as u64 + 0x400, addr_2 as u64); + + // Error invalid backend address case. + let invalid_addr = MemoryRegionAddress(0x900); + assert!(matches!( + guest_region.get_host_address(invalid_addr).err().unwrap(), + GuestMemoryError::InvalidBackendAddress + )); + } + + // TODO: #186 The following function are not yet implemented: + // - 'fn file_offset()' + // - 'unsafe fn as_slice()' + // - 'unsafe fn as_mut_slice()' + // Tests of these functions will be needed when they are implemented. + + #[test] + fn test_guest_memory_mmap_get_slice() { + //Preset a GuestRegionHybrid from a mmap region + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x400).unwrap(), GuestAddress(0)).unwrap(); + let guest_mmap_region = GuestRegionHybrid::from_mmap_region(mmap_reg); + + // Normal case. + let slice_addr = MemoryRegionAddress(0x100); + let slice_size = 0x200; + let slice = guest_mmap_region.get_slice(slice_addr, slice_size).unwrap(); + assert_eq!(slice.len(), slice_size); + + // Empty slice. + let slice_addr = MemoryRegionAddress(0x200); + let slice_size = 0x0; + let slice = guest_mmap_region.get_slice(slice_addr, slice_size).unwrap(); + assert!(slice.is_empty()); + + // Error case when slice_size is beyond the boundary. + let slice_addr = MemoryRegionAddress(0x300); + let slice_size = 0x200; + assert!(guest_mmap_region.get_slice(slice_addr, slice_size).is_err()); + } + + #[test] + fn test_from_regions_on_guest_memory_hybrid() { + // Normal case. + let mut regions = Vec::>::new(); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x100).unwrap(), GuestAddress(0x100)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x100).unwrap(), GuestAddress(0x200)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let guest_region = GuestMemoryHybrid::<()>::from_regions(regions).unwrap(); + assert_eq!(guest_region.regions[0].start_addr(), GuestAddress(0x100)); + assert_eq!(guest_region.regions[1].start_addr(), GuestAddress(0x200)); + + // Error unsorted region case. + let mut regions = Vec::>::new(); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x400).unwrap(), GuestAddress(0x200)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x400).unwrap(), GuestAddress(0x100)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let guest_region = GuestMemoryHybrid::<()>::from_regions(regions); + assert!(matches!( + guest_region.err().unwrap(), + Error::UnsortedMemoryRegions + )); + + // Error no memory region case. + let regions = Vec::>::new(); + let guest_region = GuestMemoryHybrid::<()>::from_regions(regions); + assert!(matches!(guest_region.err().unwrap(), Error::NoMemoryRegion)); + } + + #[test] + fn test_iterator_on_guest_region_hybrid() { + let mut regions = Vec::>::new(); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x100).unwrap(), GuestAddress(0x100)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let mmap_reg = + GuestRegionMmap::new(MmapRegion::<()>::new(0x100).unwrap(), GuestAddress(0x200)) + .unwrap(); + regions.push(GuestRegionHybrid::Mmap(mmap_reg)); + let guest_region = GuestMemoryHybrid::<()>::from_regions(regions).unwrap(); + let mut region = guest_region.iter(); + + assert_eq!(region.next().unwrap().start_addr(), GuestAddress(0x100)); + assert_eq!(region.next().unwrap().start_addr(), GuestAddress(0x200)); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/memory/mod.rs b/src/dragonball/src/dbs_address_space/src/memory/mod.rs new file mode 100644 index 000000000000..371acda9def8 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/memory/mod.rs @@ -0,0 +1,193 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Structs to manage guest memory for virtual machines. +//! +//! The `vm-memory` crate only provides traits and structs to access normal guest memory, +//! it doesn't support special guest memory like virtio-fs/virtio-pmem DAX window etc. +//! So this crate provides `GuestMemoryManager` over `vm-memory` to provide uniform abstraction +//! for all guest memory. +//! +//! It also provides interfaces to coordinate guest memory hotplug events. + +use std::str::FromStr; +use std::sync::Arc; +use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; + +mod raw_region; +pub use raw_region::GuestRegionRaw; + +mod hybrid; +pub use hybrid::{GuestMemoryHybrid, GuestRegionHybrid}; + +/// Type of source to allocate memory for virtual machines. +#[derive(Debug, Eq, PartialEq)] +pub enum MemorySourceType { + /// File on HugeTlbFs. + FileOnHugeTlbFs, + /// mmap() without flag `MAP_HUGETLB`. + MmapAnonymous, + /// mmap() with flag `MAP_HUGETLB`. + MmapAnonymousHugeTlbFs, + /// memfd() without flag `MFD_HUGETLB`. + MemFdShared, + /// memfd() with flag `MFD_HUGETLB`. + MemFdOnHugeTlbFs, +} + +impl MemorySourceType { + /// Check whether the memory source is huge page. + pub fn is_hugepage(&self) -> bool { + *self == Self::FileOnHugeTlbFs + || *self == Self::MmapAnonymousHugeTlbFs + || *self == Self::MemFdOnHugeTlbFs + } + + /// Check whether the memory source is anonymous memory. + pub fn is_mmap_anonymous(&self) -> bool { + *self == Self::MmapAnonymous || *self == Self::MmapAnonymousHugeTlbFs + } +} + +impl FromStr for MemorySourceType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "hugetlbfs" => Ok(MemorySourceType::FileOnHugeTlbFs), + "memfd" => Ok(MemorySourceType::MemFdShared), + "shmem" => Ok(MemorySourceType::MemFdShared), + "hugememfd" => Ok(MemorySourceType::MemFdOnHugeTlbFs), + "hugeshmem" => Ok(MemorySourceType::MemFdOnHugeTlbFs), + "anon" => Ok(MemorySourceType::MmapAnonymous), + "mmap" => Ok(MemorySourceType::MmapAnonymous), + "hugeanon" => Ok(MemorySourceType::MmapAnonymousHugeTlbFs), + "hugemmap" => Ok(MemorySourceType::MmapAnonymousHugeTlbFs), + _ => Err(format!("unknown memory source type {s}")), + } + } +} + +#[derive(Debug, Default)] +struct GuestMemoryHotplugManager {} + +/// The `GuestMemoryManager` manages all guest memory for virtual machines. +/// +/// The `GuestMemoryManager` fulfills several different responsibilities. +/// - First, it manages different types of guest memory, such as normal guest memory, virtio-fs +/// DAX window and virtio-pmem DAX window etc. Different clients may want to access different +/// types of memory. So the manager maintains two GuestMemory objects, one contains all guest +/// memory, the other contains only normal guest memory. +/// - Second, it coordinates memory/DAX window hotplug events, so clients may register hooks +/// to receive hotplug notifications. +#[allow(unused)] +#[derive(Debug, Clone)] +pub struct GuestMemoryManager { + default: GuestMemoryAtomic, + /// GuestMemory object hosts all guest memory. + hybrid: GuestMemoryAtomic, + /// GuestMemory object for vIOMMU. + iommu: GuestMemoryAtomic, + /// GuestMemory object hosts normal guest memory. + normal: GuestMemoryAtomic, + hotplug: Arc, +} + +impl GuestMemoryManager { + /// Create a new instance of `GuestMemoryManager`. + pub fn new() -> Self { + Self::default() + } + + /// Get a reference to the normal `GuestMemory` object. + pub fn get_normal_guest_memory(&self) -> &GuestMemoryAtomic { + &self.normal + } + + /// Try to downcast the `GuestAddressSpace` object to a `GuestMemoryManager` object. + pub fn to_manager(_m: &AS) -> Option<&Self> { + None + } +} + +impl Default for GuestMemoryManager { + fn default() -> Self { + let hybrid = GuestMemoryAtomic::new(GuestMemoryHybrid::new()); + let iommu = GuestMemoryAtomic::new(GuestMemoryHybrid::new()); + let normal = GuestMemoryAtomic::new(GuestMemoryMmap::new()); + // By default, it provides to the `GuestMemoryHybrid` object containing all guest memory. + let default = hybrid.clone(); + + GuestMemoryManager { + default, + hybrid, + iommu, + normal, + hotplug: Arc::new(GuestMemoryHotplugManager::default()), + } + } +} + +impl GuestAddressSpace for GuestMemoryManager { + type M = GuestMemoryHybrid; + type T = GuestMemoryLoadGuard; + + fn memory(&self) -> Self::T { + self.default.memory() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memory_source_type() { + assert_eq!( + MemorySourceType::from_str("hugetlbfs").unwrap(), + MemorySourceType::FileOnHugeTlbFs + ); + assert_eq!( + MemorySourceType::from_str("memfd").unwrap(), + MemorySourceType::MemFdShared + ); + assert_eq!( + MemorySourceType::from_str("shmem").unwrap(), + MemorySourceType::MemFdShared + ); + assert_eq!( + MemorySourceType::from_str("hugememfd").unwrap(), + MemorySourceType::MemFdOnHugeTlbFs + ); + assert_eq!( + MemorySourceType::from_str("hugeshmem").unwrap(), + MemorySourceType::MemFdOnHugeTlbFs + ); + assert_eq!( + MemorySourceType::from_str("anon").unwrap(), + MemorySourceType::MmapAnonymous + ); + assert_eq!( + MemorySourceType::from_str("mmap").unwrap(), + MemorySourceType::MmapAnonymous + ); + assert_eq!( + MemorySourceType::from_str("hugeanon").unwrap(), + MemorySourceType::MmapAnonymousHugeTlbFs + ); + assert_eq!( + MemorySourceType::from_str("hugemmap").unwrap(), + MemorySourceType::MmapAnonymousHugeTlbFs + ); + assert!(MemorySourceType::from_str("test").is_err()); + } + + #[ignore] + #[test] + fn test_to_manager() { + let manager = GuestMemoryManager::new(); + let mgr = GuestMemoryManager::to_manager(&manager).unwrap(); + + assert_eq!(&manager as *const _, mgr as *const _); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/memory/raw_region.rs b/src/dragonball/src/dbs_address_space/src/memory/raw_region.rs new file mode 100644 index 000000000000..5af21ca3e612 --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/memory/raw_region.rs @@ -0,0 +1,990 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{Read, Write}; +use std::sync::atomic::Ordering; + +use vm_memory::bitmap::{Bitmap, BS}; +use vm_memory::mmap::NewBitmap; +use vm_memory::volatile_memory::compute_offset; +use vm_memory::{ + guest_memory, volatile_memory, Address, AtomicAccess, Bytes, FileOffset, GuestAddress, + GuestMemoryRegion, GuestUsize, MemoryRegionAddress, VolatileSlice, +}; + +/// Guest memory region for virtio-fs DAX window. +#[derive(Debug)] +pub struct GuestRegionRaw { + guest_base: GuestAddress, + addr: *mut u8, + size: usize, + bitmap: B, +} + +impl GuestRegionRaw { + /// Create a `GuestRegionRaw` object from raw pointer. + /// + /// # Safety + /// Caller needs to ensure `addr` and `size` are valid with static lifetime. + pub unsafe fn new(guest_base: GuestAddress, addr: *mut u8, size: usize) -> Self { + let bitmap = B::with_len(size); + + GuestRegionRaw { + guest_base, + addr, + size, + bitmap, + } + } +} + +impl Bytes for GuestRegionRaw { + type E = guest_memory::Error; + + fn write(&self, buf: &[u8], addr: MemoryRegionAddress) -> guest_memory::Result { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .write(buf, maddr) + .map_err(Into::into) + } + + fn read(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> guest_memory::Result { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .read(buf, maddr) + .map_err(Into::into) + } + + fn write_slice(&self, buf: &[u8], addr: MemoryRegionAddress) -> guest_memory::Result<()> { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .write_slice(buf, maddr) + .map_err(Into::into) + } + + fn read_slice(&self, buf: &mut [u8], addr: MemoryRegionAddress) -> guest_memory::Result<()> { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .read_slice(buf, maddr) + .map_err(Into::into) + } + + fn read_from( + &self, + addr: MemoryRegionAddress, + src: &mut F, + count: usize, + ) -> guest_memory::Result + where + F: Read, + { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .read_from::(maddr, src, count) + .map_err(Into::into) + } + + fn read_exact_from( + &self, + addr: MemoryRegionAddress, + src: &mut F, + count: usize, + ) -> guest_memory::Result<()> + where + F: Read, + { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .read_exact_from::(maddr, src, count) + .map_err(Into::into) + } + + fn write_to( + &self, + addr: MemoryRegionAddress, + dst: &mut F, + count: usize, + ) -> guest_memory::Result + where + F: Write, + { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .write_to::(maddr, dst, count) + .map_err(Into::into) + } + + fn write_all_to( + &self, + addr: MemoryRegionAddress, + dst: &mut F, + count: usize, + ) -> guest_memory::Result<()> + where + F: Write, + { + let maddr = addr.raw_value() as usize; + self.as_volatile_slice() + .unwrap() + .write_all_to::(maddr, dst, count) + .map_err(Into::into) + } + + fn store( + &self, + val: T, + addr: MemoryRegionAddress, + order: Ordering, + ) -> guest_memory::Result<()> { + self.as_volatile_slice().and_then(|s| { + s.store(val, addr.raw_value() as usize, order) + .map_err(Into::into) + }) + } + + fn load( + &self, + addr: MemoryRegionAddress, + order: Ordering, + ) -> guest_memory::Result { + self.as_volatile_slice() + .and_then(|s| s.load(addr.raw_value() as usize, order).map_err(Into::into)) + } +} + +impl GuestMemoryRegion for GuestRegionRaw { + type B = B; + + fn len(&self) -> GuestUsize { + self.size as GuestUsize + } + + fn start_addr(&self) -> GuestAddress { + self.guest_base + } + + fn bitmap(&self) -> &Self::B { + &self.bitmap + } + + fn get_host_address(&self, addr: MemoryRegionAddress) -> guest_memory::Result<*mut u8> { + // Not sure why wrapping_offset is not unsafe. Anyway this + // is safe because we've just range-checked addr using check_address. + self.check_address(addr) + .ok_or(guest_memory::Error::InvalidBackendAddress) + .map(|addr| self.addr.wrapping_offset(addr.raw_value() as isize)) + } + + fn file_offset(&self) -> Option<&FileOffset> { + None + } + + unsafe fn as_slice(&self) -> Option<&[u8]> { + // This is safe because we mapped the area at addr ourselves, so this slice will not + // overflow. However, it is possible to alias. + Some(std::slice::from_raw_parts(self.addr, self.size)) + } + + unsafe fn as_mut_slice(&self) -> Option<&mut [u8]> { + // This is safe because we mapped the area at addr ourselves, so this slice will not + // overflow. However, it is possible to alias. + Some(std::slice::from_raw_parts_mut(self.addr, self.size)) + } + + fn get_slice( + &self, + offset: MemoryRegionAddress, + count: usize, + ) -> guest_memory::Result>> { + let offset = offset.raw_value() as usize; + let end = compute_offset(offset, count)?; + if end > self.size { + return Err(volatile_memory::Error::OutOfBounds { addr: end }.into()); + } + + // Safe because we checked that offset + count was within our range and we only ever hand + // out volatile accessors. + Ok(unsafe { + VolatileSlice::with_bitmap( + (self.addr as usize + offset) as *mut _, + count, + self.bitmap.slice_at(offset), + ) + }) + } + + #[cfg(target_os = "linux")] + fn is_hugetlbfs(&self) -> Option { + None + } +} + +#[cfg(test)] +mod tests { + extern crate vmm_sys_util; + + use super::*; + use crate::{GuestMemoryHybrid, GuestRegionHybrid}; + use std::sync::Arc; + use vm_memory::{GuestAddressSpace, GuestMemory, VolatileMemory}; + + /* + use crate::bitmap::tests::test_guest_memory_and_region; + use crate::bitmap::AtomicBitmap; + use crate::GuestAddressSpace; + + use std::fs::File; + use std::mem; + use std::path::Path; + use vmm_sys_util::tempfile::TempFile; + + type GuestMemoryMmap = super::GuestMemoryMmap<()>; + type GuestRegionMmap = super::GuestRegionMmap<()>; + type MmapRegion = super::MmapRegion<()>; + */ + + #[test] + fn test_region_raw_new() { + let mut buf = [0u8; 1024]; + let m = + unsafe { GuestRegionRaw::<()>::new(GuestAddress(0x10_0000), &mut buf as *mut _, 1024) }; + + assert_eq!(m.start_addr(), GuestAddress(0x10_0000)); + assert_eq!(m.len(), 1024); + } + + /* + fn check_guest_memory_mmap( + maybe_guest_mem: Result, + expected_regions_summary: &[(GuestAddress, usize)], + ) { + assert!(maybe_guest_mem.is_ok()); + + let guest_mem = maybe_guest_mem.unwrap(); + assert_eq!(guest_mem.num_regions(), expected_regions_summary.len()); + let maybe_last_mem_reg = expected_regions_summary.last(); + if let Some((region_addr, region_size)) = maybe_last_mem_reg { + let mut last_addr = region_addr.unchecked_add(*region_size as u64); + if last_addr.raw_value() != 0 { + last_addr = last_addr.unchecked_sub(1); + } + assert_eq!(guest_mem.last_addr(), last_addr); + } + for ((region_addr, region_size), mmap) in expected_regions_summary + .iter() + .zip(guest_mem.regions.iter()) + { + assert_eq!(region_addr, &mmap.guest_base); + assert_eq!(region_size, &mmap.mapping.size()); + + assert!(guest_mem.find_region(*region_addr).is_some()); + } + } + + fn new_guest_memory_mmap( + regions_summary: &[(GuestAddress, usize)], + ) -> Result { + GuestMemoryMmap::from_ranges(regions_summary) + } + + fn new_guest_memory_mmap_from_regions( + regions_summary: &[(GuestAddress, usize)], + ) -> Result { + GuestMemoryMmap::from_regions( + regions_summary + .iter() + .map(|(region_addr, region_size)| { + GuestRegionMmap::new(MmapRegion::new(*region_size).unwrap(), *region_addr) + .unwrap() + }) + .collect(), + ) + } + + fn new_guest_memory_mmap_from_arc_regions( + regions_summary: &[(GuestAddress, usize)], + ) -> Result { + GuestMemoryMmap::from_arc_regions( + regions_summary + .iter() + .map(|(region_addr, region_size)| { + Arc::new( + GuestRegionMmap::new(MmapRegion::new(*region_size).unwrap(), *region_addr) + .unwrap(), + ) + }) + .collect(), + ) + } + + fn new_guest_memory_mmap_with_files( + regions_summary: &[(GuestAddress, usize)], + ) -> Result { + let regions: Vec<(GuestAddress, usize, Option)> = regions_summary + .iter() + .map(|(region_addr, region_size)| { + let f = TempFile::new().unwrap().into_file(); + f.set_len(*region_size as u64).unwrap(); + + (*region_addr, *region_size, Some(FileOffset::new(f, 0))) + }) + .collect(); + + GuestMemoryMmap::from_ranges_with_files(®ions) + } + */ + + #[test] + fn slice_addr() { + let mut buf = [0u8; 1024]; + let m = + unsafe { GuestRegionRaw::<()>::new(GuestAddress(0x10_0000), &mut buf as *mut _, 1024) }; + + let s = m.get_slice(MemoryRegionAddress(2), 3).unwrap(); + assert_eq!(s.as_ptr(), &mut buf[2] as *mut _); + } + + /* + #[test] + fn test_address_in_range() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x400).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x400).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x800); + let guest_mem = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x400), (start_addr2, 0x400)]).unwrap(); + let guest_mem_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x400, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x400, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let guest_mem_list = vec![guest_mem, guest_mem_backed_by_file]; + for guest_mem in guest_mem_list.iter() { + assert!(guest_mem.address_in_range(GuestAddress(0x200))); + assert!(!guest_mem.address_in_range(GuestAddress(0x600))); + assert!(guest_mem.address_in_range(GuestAddress(0xa00))); + assert!(!guest_mem.address_in_range(GuestAddress(0xc00))); + } + } + + #[test] + fn test_check_address() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x400).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x400).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x800); + let guest_mem = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x400), (start_addr2, 0x400)]).unwrap(); + let guest_mem_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x400, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x400, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let guest_mem_list = vec![guest_mem, guest_mem_backed_by_file]; + for guest_mem in guest_mem_list.iter() { + assert_eq!( + guest_mem.check_address(GuestAddress(0x200)), + Some(GuestAddress(0x200)) + ); + assert_eq!(guest_mem.check_address(GuestAddress(0x600)), None); + assert_eq!( + guest_mem.check_address(GuestAddress(0xa00)), + Some(GuestAddress(0xa00)) + ); + assert_eq!(guest_mem.check_address(GuestAddress(0xc00)), None); + } + } + + #[test] + fn test_to_region_addr() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x400).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x400).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x800); + let guest_mem = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x400), (start_addr2, 0x400)]).unwrap(); + let guest_mem_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x400, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x400, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let guest_mem_list = vec![guest_mem, guest_mem_backed_by_file]; + for guest_mem in guest_mem_list.iter() { + assert!(guest_mem.to_region_addr(GuestAddress(0x600)).is_none()); + let (r0, addr0) = guest_mem.to_region_addr(GuestAddress(0x800)).unwrap(); + let (r1, addr1) = guest_mem.to_region_addr(GuestAddress(0xa00)).unwrap(); + assert!(r0.as_ptr() == r1.as_ptr()); + assert_eq!(addr0, MemoryRegionAddress(0)); + assert_eq!(addr1, MemoryRegionAddress(0x200)); + } + } + + #[test] + fn test_get_host_address() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x400).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x400).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x800); + let guest_mem = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x400), (start_addr2, 0x400)]).unwrap(); + let guest_mem_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x400, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x400, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let guest_mem_list = vec![guest_mem, guest_mem_backed_by_file]; + for guest_mem in guest_mem_list.iter() { + assert!(guest_mem.get_host_address(GuestAddress(0x600)).is_err()); + let ptr0 = guest_mem.get_host_address(GuestAddress(0x800)).unwrap(); + let ptr1 = guest_mem.get_host_address(GuestAddress(0xa00)).unwrap(); + assert_eq!( + ptr0, + guest_mem.find_region(GuestAddress(0x800)).unwrap().as_ptr() + ); + assert_eq!(unsafe { ptr0.offset(0x200) }, ptr1); + } + } + + #[test] + fn test_deref() { + let f = TempFile::new().unwrap().into_file(); + f.set_len(0x400).unwrap(); + + let start_addr = GuestAddress(0x0); + let guest_mem = GuestMemoryMmap::from_ranges(&[(start_addr, 0x400)]).unwrap(); + let guest_mem_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[( + start_addr, + 0x400, + Some(FileOffset::new(f, 0)), + )]) + .unwrap(); + + let guest_mem_list = vec![guest_mem, guest_mem_backed_by_file]; + for guest_mem in guest_mem_list.iter() { + let sample_buf = &[1, 2, 3, 4, 5]; + + assert_eq!(guest_mem.write(sample_buf, start_addr).unwrap(), 5); + let slice = guest_mem + .find_region(GuestAddress(0)) + .unwrap() + .as_volatile_slice() + .unwrap(); + + let buf = &mut [0, 0, 0, 0, 0]; + assert_eq!(slice.read(buf, 0).unwrap(), 5); + assert_eq!(buf, sample_buf); + } + } + + #[test] + fn test_read_u64() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x1000).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x1000).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x1000); + let bad_addr = GuestAddress(0x2001); + let bad_addr2 = GuestAddress(0x1ffc); + let max_addr = GuestAddress(0x2000); + + let gm = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]).unwrap(); + let gm_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x1000, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x1000, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let gm_list = vec![gm, gm_backed_by_file]; + for gm in gm_list.iter() { + let val1: u64 = 0xaa55_aa55_aa55_aa55; + let val2: u64 = 0x55aa_55aa_55aa_55aa; + assert_eq!( + format!("{:?}", gm.write_obj(val1, bad_addr).err().unwrap()), + format!("InvalidGuestAddress({:?})", bad_addr,) + ); + assert_eq!( + format!("{:?}", gm.write_obj(val1, bad_addr2).err().unwrap()), + format!( + "PartialBuffer {{ expected: {:?}, completed: {:?} }}", + mem::size_of::(), + max_addr.checked_offset_from(bad_addr2).unwrap() + ) + ); + + gm.write_obj(val1, GuestAddress(0x500)).unwrap(); + gm.write_obj(val2, GuestAddress(0x1000 + 32)).unwrap(); + let num1: u64 = gm.read_obj(GuestAddress(0x500)).unwrap(); + let num2: u64 = gm.read_obj(GuestAddress(0x1000 + 32)).unwrap(); + assert_eq!(val1, num1); + assert_eq!(val2, num2); + } + } + + #[test] + fn write_and_read() { + let f = TempFile::new().unwrap().into_file(); + f.set_len(0x400).unwrap(); + + let mut start_addr = GuestAddress(0x1000); + let gm = GuestMemoryMmap::from_ranges(&[(start_addr, 0x400)]).unwrap(); + let gm_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[( + start_addr, + 0x400, + Some(FileOffset::new(f, 0)), + )]) + .unwrap(); + + let gm_list = vec![gm, gm_backed_by_file]; + for gm in gm_list.iter() { + let sample_buf = &[1, 2, 3, 4, 5]; + + assert_eq!(gm.write(sample_buf, start_addr).unwrap(), 5); + + let buf = &mut [0u8; 5]; + assert_eq!(gm.read(buf, start_addr).unwrap(), 5); + assert_eq!(buf, sample_buf); + + start_addr = GuestAddress(0x13ff); + assert_eq!(gm.write(sample_buf, start_addr).unwrap(), 1); + assert_eq!(gm.read(buf, start_addr).unwrap(), 1); + assert_eq!(buf[0], sample_buf[0]); + start_addr = GuestAddress(0x1000); + } + } + + #[test] + fn read_to_and_write_from_mem() { + let f = TempFile::new().unwrap().into_file(); + f.set_len(0x400).unwrap(); + + let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0x1000), 0x400)]).unwrap(); + let gm_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[( + GuestAddress(0x1000), + 0x400, + Some(FileOffset::new(f, 0)), + )]) + .unwrap(); + + let gm_list = vec![gm, gm_backed_by_file]; + for gm in gm_list.iter() { + let addr = GuestAddress(0x1010); + let mut file = if cfg!(unix) { + File::open(Path::new("/dev/zero")).unwrap() + } else { + File::open(Path::new("c:\\Windows\\system32\\ntoskrnl.exe")).unwrap() + }; + gm.write_obj(!0u32, addr).unwrap(); + gm.read_exact_from(addr, &mut file, mem::size_of::()) + .unwrap(); + let value: u32 = gm.read_obj(addr).unwrap(); + if cfg!(unix) { + assert_eq!(value, 0); + } else { + assert_eq!(value, 0x0090_5a4d); + } + + let mut sink = Vec::new(); + gm.write_all_to(addr, &mut sink, mem::size_of::()) + .unwrap(); + if cfg!(unix) { + assert_eq!(sink, vec![0; mem::size_of::()]); + } else { + assert_eq!(sink, vec![0x4d, 0x5a, 0x90, 0x00]); + }; + } + } + + #[test] + fn create_vec_with_regions() { + let region_size = 0x400; + let regions = vec![ + (GuestAddress(0x0), region_size), + (GuestAddress(0x1000), region_size), + ]; + let mut iterated_regions = Vec::new(); + let gm = GuestMemoryMmap::from_ranges(®ions).unwrap(); + + for region in gm.iter() { + assert_eq!(region.len(), region_size as GuestUsize); + } + + for region in gm.iter() { + iterated_regions.push((region.start_addr(), region.len() as usize)); + } + assert_eq!(regions, iterated_regions); + + assert!(regions + .iter() + .map(|x| (x.0, x.1)) + .eq(iterated_regions.iter().copied())); + + assert_eq!(gm.regions[0].guest_base, regions[0].0); + assert_eq!(gm.regions[1].guest_base, regions[1].0); + } + + #[test] + fn test_memory() { + let region_size = 0x400; + let regions = vec![ + (GuestAddress(0x0), region_size), + (GuestAddress(0x1000), region_size), + ]; + let mut iterated_regions = Vec::new(); + let gm = Arc::new(GuestMemoryMmap::from_ranges(®ions).unwrap()); + let mem = gm.memory(); + + for region in mem.iter() { + assert_eq!(region.len(), region_size as GuestUsize); + } + + for region in mem.iter() { + iterated_regions.push((region.start_addr(), region.len() as usize)); + } + assert_eq!(regions, iterated_regions); + + assert!(regions + .iter() + .map(|x| (x.0, x.1)) + .eq(iterated_regions.iter().copied())); + + assert_eq!(gm.regions[0].guest_base, regions[0].0); + assert_eq!(gm.regions[1].guest_base, regions[1].0); + } + + #[test] + fn test_access_cross_boundary() { + let f1 = TempFile::new().unwrap().into_file(); + f1.set_len(0x1000).unwrap(); + let f2 = TempFile::new().unwrap().into_file(); + f2.set_len(0x1000).unwrap(); + + let start_addr1 = GuestAddress(0x0); + let start_addr2 = GuestAddress(0x1000); + let gm = + GuestMemoryMmap::from_ranges(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]).unwrap(); + let gm_backed_by_file = GuestMemoryMmap::from_ranges_with_files(&[ + (start_addr1, 0x1000, Some(FileOffset::new(f1, 0))), + (start_addr2, 0x1000, Some(FileOffset::new(f2, 0))), + ]) + .unwrap(); + + let gm_list = vec![gm, gm_backed_by_file]; + for gm in gm_list.iter() { + let sample_buf = &[1, 2, 3, 4, 5]; + assert_eq!(gm.write(sample_buf, GuestAddress(0xffc)).unwrap(), 5); + let buf = &mut [0u8; 5]; + assert_eq!(gm.read(buf, GuestAddress(0xffc)).unwrap(), 5); + assert_eq!(buf, sample_buf); + } + } + + #[test] + fn test_retrieve_fd_backing_memory_region() { + let f = TempFile::new().unwrap().into_file(); + f.set_len(0x400).unwrap(); + + let start_addr = GuestAddress(0x0); + let gm = GuestMemoryMmap::from_ranges(&[(start_addr, 0x400)]).unwrap(); + assert!(gm.find_region(start_addr).is_some()); + let region = gm.find_region(start_addr).unwrap(); + assert!(region.file_offset().is_none()); + + let gm = GuestMemoryMmap::from_ranges_with_files(&[( + start_addr, + 0x400, + Some(FileOffset::new(f, 0)), + )]) + .unwrap(); + assert!(gm.find_region(start_addr).is_some()); + let region = gm.find_region(start_addr).unwrap(); + assert!(region.file_offset().is_some()); + } + + // Windows needs a dedicated test where it will retrieve the allocation + // granularity to determine a proper offset (other than 0) that can be + // used for the backing file. Refer to Microsoft docs here: + // https://docs.microsoft.com/en-us/windows/desktop/api/memoryapi/nf-memoryapi-mapviewoffile + #[test] + #[cfg(unix)] + fn test_retrieve_offset_from_fd_backing_memory_region() { + let f = TempFile::new().unwrap().into_file(); + f.set_len(0x1400).unwrap(); + // Needs to be aligned on 4k, otherwise mmap will fail. + let offset = 0x1000; + + let start_addr = GuestAddress(0x0); + let gm = GuestMemoryMmap::from_ranges(&[(start_addr, 0x400)]).unwrap(); + assert!(gm.find_region(start_addr).is_some()); + let region = gm.find_region(start_addr).unwrap(); + assert!(region.file_offset().is_none()); + + let gm = GuestMemoryMmap::from_ranges_with_files(&[( + start_addr, + 0x400, + Some(FileOffset::new(f, offset)), + )]) + .unwrap(); + assert!(gm.find_region(start_addr).is_some()); + let region = gm.find_region(start_addr).unwrap(); + assert!(region.file_offset().is_some()); + assert_eq!(region.file_offset().unwrap().start(), offset); + } + */ + + #[test] + fn test_mmap_insert_region() { + let start_addr1 = GuestAddress(0); + let start_addr2 = GuestAddress(0x10_0000); + + let guest_mem = GuestMemoryHybrid::<()>::new(); + let mut raw_buf = [0u8; 0x1000]; + let raw_ptr = &mut raw_buf as *mut u8; + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr1, raw_ptr, 0x1000) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr2, raw_ptr, 0x1000) }; + let gm = &guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let mem_orig = gm.memory(); + assert_eq!(mem_orig.num_regions(), 2); + + let reg = unsafe { GuestRegionRaw::new(GuestAddress(0x8000), raw_ptr, 0x1000) }; + let mmap = Arc::new(GuestRegionHybrid::from_raw_region(reg)); + let gm = gm.insert_region(mmap).unwrap(); + let reg = unsafe { GuestRegionRaw::new(GuestAddress(0x4000), raw_ptr, 0x1000) }; + let mmap = Arc::new(GuestRegionHybrid::from_raw_region(reg)); + let gm = gm.insert_region(mmap).unwrap(); + let reg = unsafe { GuestRegionRaw::new(GuestAddress(0xc000), raw_ptr, 0x1000) }; + let mmap = Arc::new(GuestRegionHybrid::from_raw_region(reg)); + let gm = gm.insert_region(mmap).unwrap(); + let reg = unsafe { GuestRegionRaw::new(GuestAddress(0xc000), raw_ptr, 0x1000) }; + let mmap = Arc::new(GuestRegionHybrid::from_raw_region(reg)); + gm.insert_region(mmap).unwrap_err(); + + assert_eq!(mem_orig.num_regions(), 2); + assert_eq!(gm.num_regions(), 5); + + assert_eq!(gm.regions[0].start_addr(), GuestAddress(0x0000)); + assert_eq!(gm.regions[1].start_addr(), GuestAddress(0x4000)); + assert_eq!(gm.regions[2].start_addr(), GuestAddress(0x8000)); + assert_eq!(gm.regions[3].start_addr(), GuestAddress(0xc000)); + assert_eq!(gm.regions[4].start_addr(), GuestAddress(0x10_0000)); + } + + #[test] + fn test_mmap_remove_region() { + let start_addr1 = GuestAddress(0); + let start_addr2 = GuestAddress(0x10_0000); + + let guest_mem = GuestMemoryHybrid::<()>::new(); + let mut raw_buf = [0u8; 0x1000]; + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x1000) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr2, &mut raw_buf as *mut _, 0x1000) }; + let gm = &guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let mem_orig = gm.memory(); + assert_eq!(mem_orig.num_regions(), 2); + + gm.remove_region(GuestAddress(0), 128).unwrap_err(); + gm.remove_region(GuestAddress(0x4000), 128).unwrap_err(); + let (gm, region) = gm.remove_region(GuestAddress(0x10_0000), 0x1000).unwrap(); + + assert_eq!(mem_orig.num_regions(), 2); + assert_eq!(gm.num_regions(), 1); + + assert_eq!(gm.regions[0].start_addr(), GuestAddress(0x0000)); + assert_eq!(region.start_addr(), GuestAddress(0x10_0000)); + } + + #[test] + fn test_guest_memory_mmap_get_slice() { + let start_addr1 = GuestAddress(0); + let mut raw_buf = [0u8; 0x400]; + let region = + unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x400) }; + + // Normal case. + let slice_addr = MemoryRegionAddress(0x100); + let slice_size = 0x200; + let slice = region.get_slice(slice_addr, slice_size).unwrap(); + assert_eq!(slice.len(), slice_size); + + // Empty slice. + let slice_addr = MemoryRegionAddress(0x200); + let slice_size = 0x0; + let slice = region.get_slice(slice_addr, slice_size).unwrap(); + assert!(slice.is_empty()); + + // Error case when slice_size is beyond the boundary. + let slice_addr = MemoryRegionAddress(0x300); + let slice_size = 0x200; + assert!(region.get_slice(slice_addr, slice_size).is_err()); + } + + #[test] + fn test_guest_memory_mmap_as_volatile_slice() { + let start_addr1 = GuestAddress(0); + let mut raw_buf = [0u8; 0x400]; + let region = + unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x400) }; + let region_size = 0x400; + + // Test slice length. + let slice = region.as_volatile_slice().unwrap(); + assert_eq!(slice.len(), region_size); + + // Test slice data. + let v = 0x1234_5678u32; + let r = slice.get_ref::(0x200).unwrap(); + r.store(v); + assert_eq!(r.load(), v); + } + + #[test] + fn test_guest_memory_get_slice() { + let start_addr1 = GuestAddress(0); + let start_addr2 = GuestAddress(0x800); + + let guest_mem = GuestMemoryHybrid::<()>::new(); + let mut raw_buf = [0u8; 0x400]; + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr2, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + + // Normal cases. + let slice_size = 0x200; + let slice = guest_mem + .get_slice(GuestAddress(0x100), slice_size) + .unwrap(); + assert_eq!(slice.len(), slice_size); + + let slice_size = 0x400; + let slice = guest_mem + .get_slice(GuestAddress(0x800), slice_size) + .unwrap(); + assert_eq!(slice.len(), slice_size); + + // Empty slice. + assert!(guest_mem + .get_slice(GuestAddress(0x900), 0) + .unwrap() + .is_empty()); + + // Error cases, wrong size or base address. + assert!(guest_mem.get_slice(GuestAddress(0), 0x500).is_err()); + assert!(guest_mem.get_slice(GuestAddress(0x600), 0x100).is_err()); + assert!(guest_mem.get_slice(GuestAddress(0xc00), 0x100).is_err()); + } + + #[test] + fn test_checked_offset() { + let start_addr1 = GuestAddress(0); + let start_addr2 = GuestAddress(0x800); + let start_addr3 = GuestAddress(0xc00); + + let guest_mem = GuestMemoryHybrid::<()>::new(); + let mut raw_buf = [0u8; 0x400]; + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr2, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr3, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + + assert_eq!( + guest_mem.checked_offset(start_addr1, 0x200), + Some(GuestAddress(0x200)) + ); + assert_eq!( + guest_mem.checked_offset(start_addr1, 0xa00), + Some(GuestAddress(0xa00)) + ); + assert_eq!( + guest_mem.checked_offset(start_addr2, 0x7ff), + Some(GuestAddress(0xfff)) + ); + assert_eq!(guest_mem.checked_offset(start_addr2, 0xc00), None); + assert_eq!(guest_mem.checked_offset(start_addr1, std::usize::MAX), None); + + assert_eq!(guest_mem.checked_offset(start_addr1, 0x400), None); + assert_eq!( + guest_mem.checked_offset(start_addr1, 0x400 - 1), + Some(GuestAddress(0x400 - 1)) + ); + } + + #[test] + fn test_check_range() { + let start_addr1 = GuestAddress(0); + let start_addr2 = GuestAddress(0x800); + let start_addr3 = GuestAddress(0xc00); + + let guest_mem = GuestMemoryHybrid::<()>::new(); + let mut raw_buf = [0u8; 0x400]; + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr1, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr2, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + let reg = unsafe { GuestRegionRaw::<()>::new(start_addr3, &mut raw_buf as *mut _, 0x400) }; + let guest_mem = guest_mem + .insert_region(Arc::new(GuestRegionHybrid::from_raw_region(reg))) + .unwrap(); + + assert!(guest_mem.check_range(start_addr1, 0x0)); + assert!(guest_mem.check_range(start_addr1, 0x200)); + assert!(guest_mem.check_range(start_addr1, 0x400)); + assert!(!guest_mem.check_range(start_addr1, 0xa00)); + assert!(guest_mem.check_range(start_addr2, 0x7ff)); + assert!(guest_mem.check_range(start_addr2, 0x800)); + assert!(!guest_mem.check_range(start_addr2, 0x801)); + assert!(!guest_mem.check_range(start_addr2, 0xc00)); + assert!(!guest_mem.check_range(start_addr1, usize::MAX)); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/numa.rs b/src/dragonball/src/dbs_address_space/src/numa.rs new file mode 100644 index 000000000000..71f2d748a9ae --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/numa.rs @@ -0,0 +1,85 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Types for NUMA information. + +use vm_memory::{GuestAddress, GuestUsize}; + +/// Strategy of mbind() and don't lead to OOM. +pub const MPOL_PREFERRED: u32 = 1; + +/// Strategy of mbind() +pub const MPOL_MF_MOVE: u32 = 2; + +/// Type for recording numa ids of different devices +pub struct NumaIdTable { + /// vectors of numa id for each memory region + pub memory: Vec, + /// vectors of numa id for each cpu + pub cpu: Vec, +} + +/// Record numa node memory information. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct NumaNodeInfo { + /// Base address of the region in guest physical address space. + pub base: GuestAddress, + /// Size of the address region. + pub size: GuestUsize, +} + +/// Record all region's info of a numa node. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct NumaNode { + region_infos: Vec, + vcpu_ids: Vec, +} + +impl NumaNode { + /// get reference of region_infos in numa node. + pub fn region_infos(&self) -> &Vec { + &self.region_infos + } + + /// get vcpu ids belonging to a numa node. + pub fn vcpu_ids(&self) -> &Vec { + &self.vcpu_ids + } + + /// add a new numa region info into this numa node. + pub fn add_info(&mut self, info: &NumaNodeInfo) { + self.region_infos.push(*info); + } + + /// add a group of vcpu ids belong to this numa node + pub fn add_vcpu_ids(&mut self, vcpu_ids: &[u32]) { + self.vcpu_ids.extend(vcpu_ids) + } + + /// create a new numa node struct + pub fn new() -> NumaNode { + NumaNode { + region_infos: Vec::new(), + vcpu_ids: Vec::new(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_numa_node() { + let mut numa_node = NumaNode::new(); + let info = NumaNodeInfo { + base: GuestAddress(0), + size: 1024, + }; + numa_node.add_info(&info); + assert_eq!(*numa_node.region_infos(), vec![info]); + let vcpu_ids = vec![0, 1, 2, 3]; + numa_node.add_vcpu_ids(&vcpu_ids); + assert_eq!(*numa_node.vcpu_ids(), vcpu_ids); + } +} diff --git a/src/dragonball/src/dbs_address_space/src/region.rs b/src/dragonball/src/dbs_address_space/src/region.rs new file mode 100644 index 000000000000..a0a832404c5c --- /dev/null +++ b/src/dragonball/src/dbs_address_space/src/region.rs @@ -0,0 +1,564 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::ffi::CString; +use std::fs::{File, OpenOptions}; +use std::os::unix::io::FromRawFd; +use std::path::Path; +use std::str::FromStr; + +use nix::sys::memfd; +use vm_memory::{Address, FileOffset, GuestAddress, GuestUsize}; + +use crate::memory::MemorySourceType; +use crate::memory::MemorySourceType::MemFdShared; +use crate::AddressSpaceError; + +/// Type of address space regions. +/// +/// On physical machines, physical memory may have different properties, such as +/// volatile vs non-volatile, read-only vs read-write, non-executable vs executable etc. +/// On virtual machines, the concept of memory property may be extended to support better +/// cooperation between the hypervisor and the guest kernel. Here address space region type means +/// what the region will be used for by the guest OS, and different permissions and policies may +/// be applied to different address space regions. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum AddressSpaceRegionType { + /// Normal memory accessible by CPUs and IO devices. + DefaultMemory, + /// MMIO address region for Devices. + DeviceMemory, + /// DAX address region for virtio-fs/virtio-pmem. + DAXMemory, +} + +/// Struct to maintain configuration information about a guest address region. +#[derive(Debug, Clone)] +pub struct AddressSpaceRegion { + /// Type of address space regions. + pub ty: AddressSpaceRegionType, + /// Base address of the region in virtual machine's physical address space. + pub base: GuestAddress, + /// Size of the address space region. + pub size: GuestUsize, + /// Host NUMA node ids assigned to this region. + pub host_numa_node_id: Option, + + /// File/offset tuple to back the memory allocation. + file_offset: Option, + /// Mmap permission flags. + perm_flags: i32, + /// Mmap protection flags. + prot_flags: i32, + /// Hugepage madvise hint. + /// + /// It needs 'advise' or 'always' policy in host shmem config. + is_hugepage: bool, + /// Hotplug hint. + is_hotplug: bool, + /// Anonymous memory hint. + /// + /// It should be true for regions with the MADV_DONTFORK flag enabled. + is_anon: bool, +} + +#[allow(clippy::too_many_arguments)] +impl AddressSpaceRegion { + /// Create an address space region with default configuration. + pub fn new(ty: AddressSpaceRegionType, base: GuestAddress, size: GuestUsize) -> Self { + AddressSpaceRegion { + ty, + base, + size, + host_numa_node_id: None, + file_offset: None, + perm_flags: libc::MAP_SHARED, + prot_flags: libc::PROT_READ | libc::PROT_WRITE, + is_hugepage: false, + is_hotplug: false, + is_anon: false, + } + } + + /// Create an address space region with all configurable information. + /// + /// # Arguments + /// * `ty` - Type of the address region + /// * `base` - Base address in VM to map content + /// * `size` - Length of content to map + /// * `numa_node_id` - Optional NUMA node id to allocate memory from + /// * `file_offset` - Optional file descriptor and offset to map content from + /// * `perm_flags` - mmap permission flags + /// * `prot_flags` - mmap protection flags + /// * `is_hotplug` - Whether it's a region for hotplug. + pub fn build( + ty: AddressSpaceRegionType, + base: GuestAddress, + size: GuestUsize, + host_numa_node_id: Option, + file_offset: Option, + perm_flags: i32, + prot_flags: i32, + is_hotplug: bool, + ) -> Self { + let mut region = Self::new(ty, base, size); + + region.set_host_numa_node_id(host_numa_node_id); + region.set_file_offset(file_offset); + region.set_perm_flags(perm_flags); + region.set_prot_flags(prot_flags); + if is_hotplug { + region.set_hotplug(); + } + + region + } + + /// Create an address space region to map memory into the virtual machine. + /// + /// # Arguments + /// * `base` - Base address in VM to map content + /// * `size` - Length of content to map + /// * `numa_node_id` - Optional NUMA node id to allocate memory from + /// * `mem_type` - Memory mapping from, 'shmem' or 'hugetlbfs' + /// * `mem_file_path` - Memory file path + /// * `mem_prealloc` - Whether to enable pre-allocation of guest memory + /// * `is_hotplug` - Whether it's a region for hotplug. + pub fn create_default_memory_region( + base: GuestAddress, + size: GuestUsize, + numa_node_id: Option, + mem_type: &str, + mem_file_path: &str, + mem_prealloc: bool, + is_hotplug: bool, + ) -> Result { + Self::create_memory_region( + base, + size, + numa_node_id, + mem_type, + mem_file_path, + mem_prealloc, + libc::PROT_READ | libc::PROT_WRITE, + is_hotplug, + ) + } + + /// Create an address space region to map memory from memfd/hugetlbfs into the virtual machine. + /// + /// # Arguments + /// * `base` - Base address in VM to map content + /// * `size` - Length of content to map + /// * `numa_node_id` - Optional NUMA node id to allocate memory from + /// * `mem_type` - Memory mapping from, 'shmem' or 'hugetlbfs' + /// * `mem_file_path` - Memory file path + /// * `mem_prealloc` - Whether to enable pre-allocation of guest memory + /// * `is_hotplug` - Whether it's a region for hotplug. + /// * `prot_flags` - mmap protection flags + pub fn create_memory_region( + base: GuestAddress, + size: GuestUsize, + numa_node_id: Option, + mem_type: &str, + mem_file_path: &str, + mem_prealloc: bool, + prot_flags: i32, + is_hotplug: bool, + ) -> Result { + let perm_flags = if mem_prealloc { + libc::MAP_SHARED | libc::MAP_POPULATE + } else { + libc::MAP_SHARED + }; + let source_type = MemorySourceType::from_str(mem_type) + .map_err(|_e| AddressSpaceError::InvalidMemorySourceType(mem_type.to_string()))?; + let mut reg = match source_type { + MemorySourceType::MemFdShared | MemorySourceType::MemFdOnHugeTlbFs => { + let fn_str = if source_type == MemFdShared { + CString::new("shmem").expect("CString::new('shmem') failed") + } else { + CString::new("hugeshmem").expect("CString::new('hugeshmem') failed") + }; + let filename = fn_str.as_c_str(); + let fd = memfd::memfd_create(filename, memfd::MemFdCreateFlag::empty()) + .map_err(AddressSpaceError::CreateMemFd)?; + // Safe because we have just created the fd. + let file: File = unsafe { File::from_raw_fd(fd) }; + file.set_len(size).map_err(AddressSpaceError::SetFileSize)?; + Self::build( + AddressSpaceRegionType::DefaultMemory, + base, + size, + numa_node_id, + Some(FileOffset::new(file, 0)), + perm_flags, + prot_flags, + is_hotplug, + ) + } + MemorySourceType::MmapAnonymous | MemorySourceType::MmapAnonymousHugeTlbFs => { + let mut perm_flags = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS; + if mem_prealloc { + perm_flags |= libc::MAP_POPULATE + } + Self::build( + AddressSpaceRegionType::DefaultMemory, + base, + size, + numa_node_id, + None, + perm_flags, + prot_flags, + is_hotplug, + ) + } + MemorySourceType::FileOnHugeTlbFs => { + let path = Path::new(mem_file_path); + if let Some(parent_dir) = path.parent() { + // Ensure that the parent directory is existed for the mem file path. + std::fs::create_dir_all(parent_dir).map_err(AddressSpaceError::CreateDir)?; + } + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(mem_file_path) + .map_err(AddressSpaceError::OpenFile)?; + nix::unistd::unlink(mem_file_path).map_err(AddressSpaceError::UnlinkFile)?; + file.set_len(size).map_err(AddressSpaceError::SetFileSize)?; + let file_offset = FileOffset::new(file, 0); + Self::build( + AddressSpaceRegionType::DefaultMemory, + base, + size, + numa_node_id, + Some(file_offset), + perm_flags, + prot_flags, + is_hotplug, + ) + } + }; + + if source_type.is_hugepage() { + reg.set_hugepage(); + } + if source_type.is_mmap_anonymous() { + reg.set_anonpage(); + } + + Ok(reg) + } + + /// Create an address region for device MMIO. + /// + /// # Arguments + /// * `base` - Base address in VM to map content + /// * `size` - Length of content to map + pub fn create_device_region( + base: GuestAddress, + size: GuestUsize, + ) -> Result { + Ok(Self::build( + AddressSpaceRegionType::DeviceMemory, + base, + size, + None, + None, + 0, + 0, + false, + )) + } + + /// Get type of the address space region. + pub fn region_type(&self) -> AddressSpaceRegionType { + self.ty + } + + /// Get size of region. + pub fn len(&self) -> GuestUsize { + self.size + } + + /// Get the inclusive start physical address of the region. + pub fn start_addr(&self) -> GuestAddress { + self.base + } + + /// Get the inclusive end physical address of the region. + pub fn last_addr(&self) -> GuestAddress { + debug_assert!(self.size > 0 && self.base.checked_add(self.size).is_some()); + GuestAddress(self.base.raw_value() + self.size - 1) + } + + /// Get mmap permission flags of the address space region. + pub fn perm_flags(&self) -> i32 { + self.perm_flags + } + + /// Set mmap permission flags for the address space region. + pub fn set_perm_flags(&mut self, perm_flags: i32) { + self.perm_flags = perm_flags; + } + + /// Get mmap protection flags of the address space region. + pub fn prot_flags(&self) -> i32 { + self.prot_flags + } + + /// Set mmap protection flags for the address space region. + pub fn set_prot_flags(&mut self, prot_flags: i32) { + self.prot_flags = prot_flags; + } + + /// Get host_numa_node_id flags + pub fn host_numa_node_id(&self) -> Option { + self.host_numa_node_id + } + + /// Set associated NUMA node ID to allocate memory from for this region. + pub fn set_host_numa_node_id(&mut self, host_numa_node_id: Option) { + self.host_numa_node_id = host_numa_node_id; + } + + /// Check whether the address space region is backed by a memory file. + pub fn has_file(&self) -> bool { + self.file_offset.is_some() + } + + /// Get optional file associated with the region. + pub fn file_offset(&self) -> Option<&FileOffset> { + self.file_offset.as_ref() + } + + /// Set associated file/offset pair for the region. + pub fn set_file_offset(&mut self, file_offset: Option) { + self.file_offset = file_offset; + } + + /// Set the hotplug hint. + pub fn set_hotplug(&mut self) { + self.is_hotplug = true + } + + /// Get the hotplug hint. + pub fn is_hotplug(&self) -> bool { + self.is_hotplug + } + + /// Set hugepage hint for `madvise()`, only takes effect when the memory type is `shmem`. + pub fn set_hugepage(&mut self) { + self.is_hugepage = true + } + + /// Get the hugepage hint. + pub fn is_hugepage(&self) -> bool { + self.is_hugepage + } + + /// Set the anonymous memory hint. + pub fn set_anonpage(&mut self) { + self.is_anon = true + } + + /// Get the anonymous memory hint. + pub fn is_anonpage(&self) -> bool { + self.is_anon + } + + /// Check whether the address space region is valid. + pub fn is_valid(&self) -> bool { + self.size > 0 && self.base.checked_add(self.size).is_some() + } + + /// Check whether the address space region intersects with another one. + pub fn intersect_with(&self, other: &AddressSpaceRegion) -> bool { + // Treat invalid address region as intersecting always + let end1 = match self.base.checked_add(self.size) { + Some(addr) => addr, + None => return true, + }; + let end2 = match other.base.checked_add(other.size) { + Some(addr) => addr, + None => return true, + }; + + !(end1 <= other.base || self.base >= end2) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_address_space_region_valid() { + let reg1 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0xFFFFFFFFFFFFF000), + 0x2000, + ); + assert!(!reg1.is_valid()); + let reg1 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0xFFFFFFFFFFFFF000), + 0x1000, + ); + assert!(!reg1.is_valid()); + let reg1 = AddressSpaceRegion::new( + AddressSpaceRegionType::DeviceMemory, + GuestAddress(0xFFFFFFFFFFFFE000), + 0x1000, + ); + assert!(reg1.is_valid()); + assert_eq!(reg1.start_addr(), GuestAddress(0xFFFFFFFFFFFFE000)); + assert_eq!(reg1.len(), 0x1000); + assert!(!reg1.has_file()); + assert!(reg1.file_offset().is_none()); + assert_eq!(reg1.perm_flags(), libc::MAP_SHARED); + assert_eq!(reg1.prot_flags(), libc::PROT_READ | libc::PROT_WRITE); + assert_eq!(reg1.region_type(), AddressSpaceRegionType::DeviceMemory); + + let tmp_file = TempFile::new().unwrap(); + let mut f = tmp_file.into_file(); + let sample_buf = &[1, 2, 3, 4, 5]; + assert!(f.write_all(sample_buf).is_ok()); + let reg2 = AddressSpaceRegion::build( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1000), + 0x1000, + None, + Some(FileOffset::new(f, 0x0)), + 0x5a, + 0x5a, + false, + ); + assert_eq!(reg2.region_type(), AddressSpaceRegionType::DefaultMemory); + assert!(reg2.is_valid()); + assert_eq!(reg2.start_addr(), GuestAddress(0x1000)); + assert_eq!(reg2.len(), 0x1000); + assert!(reg2.has_file()); + assert!(reg2.file_offset().is_some()); + assert_eq!(reg2.perm_flags(), 0x5a); + assert_eq!(reg2.prot_flags(), 0x5a); + } + + #[test] + fn test_address_space_region_intersect() { + let reg1 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1000), + 0x1000, + ); + let reg2 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x2000), + 0x1000, + ); + let reg3 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1000), + 0x1001, + ); + let reg4 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0x1100), + 0x100, + ); + let reg5 = AddressSpaceRegion::new( + AddressSpaceRegionType::DefaultMemory, + GuestAddress(0xFFFFFFFFFFFFF000), + 0x2000, + ); + + assert!(!reg1.intersect_with(®2)); + assert!(!reg2.intersect_with(®1)); + + // intersect with self + assert!(reg1.intersect_with(®1)); + + // intersect with others + assert!(reg3.intersect_with(®2)); + assert!(reg2.intersect_with(®3)); + assert!(reg1.intersect_with(®4)); + assert!(reg4.intersect_with(®1)); + assert!(reg1.intersect_with(®5)); + assert!(reg5.intersect_with(®1)); + } + + #[test] + fn test_create_device_region() { + let reg = AddressSpaceRegion::create_device_region(GuestAddress(0x10000), 0x1000).unwrap(); + assert_eq!(reg.region_type(), AddressSpaceRegionType::DeviceMemory); + assert_eq!(reg.start_addr(), GuestAddress(0x10000)); + assert_eq!(reg.len(), 0x1000); + } + + #[test] + fn test_create_default_memory_region() { + AddressSpaceRegion::create_default_memory_region( + GuestAddress(0x100000), + 0x100000, + None, + "invalid", + "invalid", + false, + false, + ) + .unwrap_err(); + + let reg = AddressSpaceRegion::create_default_memory_region( + GuestAddress(0x100000), + 0x100000, + None, + "shmem", + "", + false, + false, + ) + .unwrap(); + assert_eq!(reg.region_type(), AddressSpaceRegionType::DefaultMemory); + assert_eq!(reg.start_addr(), GuestAddress(0x100000)); + assert_eq!(reg.last_addr(), GuestAddress(0x1fffff)); + assert_eq!(reg.len(), 0x100000); + assert!(reg.file_offset().is_some()); + + let reg = AddressSpaceRegion::create_default_memory_region( + GuestAddress(0x100000), + 0x100000, + None, + "hugeshmem", + "", + true, + false, + ) + .unwrap(); + assert_eq!(reg.region_type(), AddressSpaceRegionType::DefaultMemory); + assert_eq!(reg.start_addr(), GuestAddress(0x100000)); + assert_eq!(reg.last_addr(), GuestAddress(0x1fffff)); + assert_eq!(reg.len(), 0x100000); + assert!(reg.file_offset().is_some()); + + let reg = AddressSpaceRegion::create_default_memory_region( + GuestAddress(0x100000), + 0x100000, + None, + "mmap", + "", + true, + false, + ) + .unwrap(); + assert_eq!(reg.region_type(), AddressSpaceRegionType::DefaultMemory); + assert_eq!(reg.start_addr(), GuestAddress(0x100000)); + assert_eq!(reg.last_addr(), GuestAddress(0x1fffff)); + assert_eq!(reg.len(), 0x100000); + assert!(reg.file_offset().is_none()); + + // TODO: test hugetlbfs + } +} diff --git a/src/dragonball/src/dbs_allocator/Cargo.toml b/src/dragonball/src/dbs_allocator/Cargo.toml new file mode 100644 index 000000000000..c3c0f3c10954 --- /dev/null +++ b/src/dragonball/src/dbs_allocator/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "dbs-allocator" +version = "0.1.1" +authors = ["Liu Jiang "] +description = "a resource allocator for virtual machine manager" +license = "Apache-2.0" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball"] +readme = "README.md" + +[dependencies] +thiserror = "1.0" diff --git a/src/dragonball/src/dbs_allocator/LICENSE b/src/dragonball/src/dbs_allocator/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_allocator/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_allocator/README.md b/src/dragonball/src/dbs_allocator/README.md new file mode 100644 index 000000000000..2e4b07a8c312 --- /dev/null +++ b/src/dragonball/src/dbs_allocator/README.md @@ -0,0 +1,106 @@ +# dbs-allocator + +## Design + +The resource manager in the `Dragonball Sandbox` needs to manage and allocate different kinds of resource for the +sandbox (virtual machine), such as memory-mapped I/O address space, port I/O address space, legacy IRQ numbers, +MSI/MSI-X vectors, device instance id, etc. The `dbs-allocator` crate is designed to help the resource manager +to track and allocate these types of resources. + +Main components are: +- *Constraints*: struct to declare constraints for resource allocation. +```rust +#[derive(Copy, Clone, Debug)] +pub struct Constraint { + /// Size of resource to allocate. + pub size: u64, + /// Lower boundary for resource allocation. + pub min: u64, + /// Upper boundary for resource allocation. + pub max: u64, + /// Alignment for allocated resource. + pub align: u64, + /// Policy for resource allocation. + pub policy: AllocPolicy, +} +``` +- `IntervalTree`: An interval tree implementation specialized for VMM resource management. +```rust +pub struct IntervalTree { + pub(crate) root: Option>, +} +​ +pub fn allocate(&mut self, constraint: &Constraint) -> Option +pub fn free(&mut self, key: &Range) -> Option +pub fn insert(&mut self, key: Range, data: Option) -> Self +pub fn update(&mut self, key: &Range, data: T) -> Option +pub fn delete(&mut self, key: &Range) -> Option +pub fn get(&self, key: &Range) -> Option> +``` + +## Usage +The concept of Interval Tree may seem complicated, but using dbs-allocator to do resource allocation and release is simple and straightforward. +You can following these steps to allocate your VMM resource. +```rust +// 1. To start with, we should create an interval tree for some specific resouces and give maximum address/id range as root node. The range here could be address range, id range, etc. +​ +let mut resources_pool = IntervalTree::new(); +resources_pool.insert(Range::new(MIN_RANGE, MAX_RANGE), None); +​ +// 2. Next, create a constraint with the size for your resource, you could also assign the maximum, minimum and alignment for the constraint. Then we could use the constraint to allocate the resource in the range we previously decided. Interval Tree will give you the appropriate range. +let mut constraint = Constraint::new(SIZE); +let mut resources_range = self.resources_pool.allocate(&constraint); +​ +// 3. Then we could use the resource range to let other crates like vm-pci / vm-device to create and maintain the device +let mut device = Device::create(resources_range, ..) +``` + +## Example +We will show examples for allocating an unused PCI device ID from the PCI device ID pool and allocating memory address using dbs-allocator +```rust +use dbs_allocator::{Constraint, IntervalTree, Range}; +​ +// Init a dbs-allocator IntervalTree +let mut pci_device_pool = IntervalTree::new(); +​ +// Init PCI device id pool with the range 0 to 255 +pci_device_pool.insert(Range::new(0x0u8, 0xffu8), None); +​ +// Construct a constraint with size 1 and alignment 1 to ask for an ID. +let mut constraint = Constraint::new(1u64).align(1u64); +​ +// Get an ID from the pci_device_pool +let mut id = pci_device_pool.allocate(&constraint).map(|e| e.min as u8); +​ +// Pass the ID generated from dbs-allocator to vm-pci specified functions to create pci devices +let mut pci_device = PciDevice::new(id as u8, ..); + +``` + +```rust +use dbs_allocator::{Constraint, IntervalTree, Range}; +​ +// Init a dbs-allocator IntervalTree +let mut mem_pool = IntervalTree::new(); +​ +// Init memory address from GUEST_MEM_START to GUEST_MEM_END +mem_pool.insert(Range::new(GUEST_MEM_START, GUEST_MEM_END), None); +​ +// Construct a constraint with size, maximum addr and minimum address of memory region to ask for an memory allocation range. +let constraint = Constraint::new(region.len()) + .min(region.start_addr().raw_value()) + .max(region.last_addr().raw_value()); +​ +// Get the memory allocation range from the pci_device_pool +let mem_range = mem_pool.allocate(&constraint).unwrap(); +​ +// Update the mem_range in IntervalTree with memory region info +mem_pool.update(&mem_range, region); +​ +// After allocation, we can use the memory range to do mapping and other memory related work. +... +``` + +## License + +This project is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. \ No newline at end of file diff --git a/src/dragonball/src/dbs_allocator/src/interval_tree.rs b/src/dragonball/src/dbs_allocator/src/interval_tree.rs new file mode 100644 index 000000000000..c2a13c5c8d70 --- /dev/null +++ b/src/dragonball/src/dbs_allocator/src/interval_tree.rs @@ -0,0 +1,1297 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! An interval tree implementation specialized for VMM resource management. +//! +//! It's not designed as a generic interval tree, but specialized for VMM resource management. +//! In addition to the normal get()/insert()/delete()/update() tree operations, it also implements +//! allocate()/free() for resource allocation. +//! +//! # Examples +//! ```rust +//! extern crate dbs_allocator; +//! use dbs_allocator::{Constraint, IntervalTree, NodeState, Range}; +//! +//! // Create an interval tree and add available resources. +//! let mut tree = IntervalTree::::new(); +//! tree.insert(Range::new(0x100u32, 0x100u32), None); +//! tree.insert(Range::new(0x200u16, 0x2ffu16), None); +//! +//! // Allocate a range with constraints. +//! let mut constraint = Constraint::new(8u64); +//! constraint.min = 0x211; +//! constraint.max = 0x21f; +//! constraint.align = 0x8; +//! +//! let key = tree.allocate(&constraint); +//! assert_eq!(key, Some(Range::new(0x218u64, 0x21fu64))); +//! let val = tree.get(&Range::new(0x218u64, 0x21fu64)); +//! assert_eq!(val, Some(NodeState::Allocated)); +//! +//! // Associate data with the allocated range and mark the range as occupied. +//! // Note: caller needs to protect from concurrent access between allocate() and the first call +//! // to update() to mark range as occupied. +//! let old = tree.update(&Range::new(0x218u32, 0x21fu32), 2); +//! assert_eq!(old, None); +//! let old = tree.update(&Range::new(0x218u32, 0x21fu32), 3); +//! assert_eq!(old, Some(2)); +//! let val = tree.get(&Range::new(0x218u32, 0x21fu32)); +//! assert_eq!(val, Some(NodeState::Valued(&3))); +//! +//! // Free allocated resource. +//! let old = tree.free(key.as_ref().unwrap()); +//! assert_eq!(old, Some(3)); +//! ``` + +use std::cmp::{max, min, Ordering}; + +use crate::{AllocPolicy, Constraint}; + +/// Represent a closed range `[min, max]`. +#[allow(missing_docs)] +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Range { + pub min: u64, + pub max: u64, +} + +impl std::fmt::Debug for Range { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "[ {:016x}, {:016x} ]", self.min, self.max) + } +} + +impl Range { + /// Create a instance of [`Range`] with given `min` and `max`. + /// + /// ## Panic + /// - if min is bigger than max + /// - if min == 0 && max == u64:MAX + pub fn new(min: T, max: T) -> Self + where + u64: From, + { + let umin = u64::from(min); + let umax = u64::from(max); + if umin > umax || (umin == 0 && umax == u64::MAX) { + panic!("interval_tree: Range({}, {}) is invalid", umin, umax); + } + Range { + min: umin, + max: umax, + } + } + + /// Create a instance of [`Range`] with given base and size. + /// + /// ## Panic + /// - if base + size wraps around + /// - if base == 0 && size == u64::MAX + pub fn with_size(base: T, size: T) -> Self + where + u64: From, + { + let umin = u64::from(base); + let umax = u64::from(size).checked_add(umin).unwrap(); + if umin > umax || (umin == 0 && umax == std::u64::MAX) { + panic!("interval_tree: Range({}, {}) is invalid", umin, umax); + } + Range { + min: umin, + max: umax, + } + } + + /// Create a instance of [`Range`] containing only the point `value`. + pub fn new_point(value: T) -> Self + where + u64: From, + { + let val = u64::from(value); + Range { min: val, max: val } + } + + /// Get size of the range. + pub fn len(&self) -> u64 { + self.max - self.min + 1 + } + + /// Check whether the range is empty. + pub fn is_empty(&self) -> bool { + false + } + + /// Check whether two Range objects intersect with each other. + pub fn intersect(&self, other: &Range) -> bool { + max(self.min, other.min) <= min(self.max, other.max) + } + + /// Check whether another [Range] object is fully covered by this range. + pub fn contain(&self, other: &Range) -> bool { + self.min <= other.min && self.max >= other.max + } + + /// Create a new instance of [Range] with `min` aligned to `align`. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::Range; + /// + /// let a = Range::new(2u32, 6u32); + /// assert_eq!(a.align_to(0), Some(Range::new(2u32, 6u32))); + /// assert_eq!(a.align_to(1), Some(Range::new(2u16, 6u16))); + /// assert_eq!(a.align_to(2), Some(Range::new(2u64, 6u64))); + /// assert_eq!(a.align_to(4), Some(Range::new(4u8, 6u8))); + /// assert_eq!(a.align_to(8), None); + /// assert_eq!(a.align_to(3), None); + /// let b = Range::new(2u8, 2u8); + /// assert_eq!(b.align_to(2), Some(Range::new(2u8, 2u8))); + /// ``` + pub fn align_to(&self, align: u64) -> Option { + match align { + 0 | 1 => Some(*self), + _ => { + if align & (align - 1) != 0 { + return None; + } + if let Some(min) = self.min.checked_add(align - 1).map(|v| v & !(align - 1)) { + if min <= self.max { + return Some(Range::new(min, self.max)); + } + } + None + } + } + } +} + +impl PartialOrd for Range { + fn partial_cmp(&self, other: &Self) -> Option { + match self.min.cmp(&other.min) { + Ordering::Equal => Some(self.max.cmp(&other.max)), + res => Some(res), + } + } +} + +impl Ord for Range { + fn cmp(&self, other: &Self) -> Ordering { + match self.min.cmp(&other.min) { + Ordering::Equal => self.max.cmp(&other.max), + res => res, + } + } +} + +/// State of interval tree node. +/// +/// Valid state transitions: +/// - None -> Free: [IntervalTree::insert()] +/// - None -> Valued: [IntervalTree::insert()] +/// - Free -> Allocated: [IntervalTree::allocate()] +/// - Allocated -> Valued(T): [IntervalTree::update()] +/// - Valued -> Valued(T): [IntervalTree::update()] +/// - Allocated -> Free: [IntervalTree::free()] +/// - Valued(T) -> Free: [IntervalTree::free()] +/// - * -> None: [IntervalTree::delete()] +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord)] +pub enum NodeState { + /// Node is free + Free, + /// Node is allocated but without associated data + Allocated, + /// Node is allocated with associated data. + Valued(T), +} + +impl NodeState { + fn take(&mut self) -> Self { + std::mem::replace(self, NodeState::::Free) + } + + fn replace(&mut self, value: NodeState) -> Self { + std::mem::replace(self, value) + } + + fn as_ref(&self) -> NodeState<&T> { + match self { + NodeState::::Valued(ref x) => NodeState::<&T>::Valued(x), + NodeState::::Allocated => NodeState::<&T>::Allocated, + NodeState::::Free => NodeState::<&T>::Free, + } + } + + fn as_mut(&mut self) -> NodeState<&mut T> { + match self { + NodeState::::Valued(ref mut x) => NodeState::<&mut T>::Valued(x), + NodeState::::Allocated => NodeState::<&mut T>::Allocated, + NodeState::::Free => NodeState::<&mut T>::Free, + } + } + + fn is_free(&self) -> bool { + matches!(self, NodeState::::Free) + } +} + +impl From> for Option { + fn from(n: NodeState) -> Option { + match n { + NodeState::::Free | NodeState::::Allocated => None, + NodeState::::Valued(data) => Some(data), + } + } +} + +/// Internal tree node to implement interval tree. +#[derive(Debug, PartialEq, Eq)] +struct InnerNode { + /// Interval handled by this node. + key: Range, + /// Optional contained data, None if the node is free. + data: NodeState, + /// Optional left child of current node. + left: Option>, + /// Optional right child of current node. + right: Option>, + /// Cached height of the node. + height: u32, + /// Cached maximum valued covered by this node. + max_key: u64, +} + +impl InnerNode { + fn new(key: Range, data: NodeState) -> Self { + InnerNode { + key, + data, + left: None, + right: None, + height: 1, + max_key: key.max, + } + } +} + +/// Newtype for interval tree nodes. +#[derive(Debug, PartialEq, Eq)] +struct Node(Box>); + +impl Node { + fn new(key: Range, data: Option) -> Self { + let value = if let Some(t) = data { + NodeState::Valued(t) + } else { + NodeState::Free + }; + Node(Box::new(InnerNode::new(key, value))) + } + + /// Returns a readonly reference to the node associated with the `key` or None if not found. + fn search(&self, key: &Range) -> Option<&Self> { + match self.0.key.cmp(key) { + Ordering::Equal => Some(self), + Ordering::Less => self.0.right.as_ref().and_then(|node| node.search(key)), + Ordering::Greater => self.0.left.as_ref().and_then(|node| node.search(key)), + } + } + + /// Returns a shared reference to the node covers full range of the `key`. + fn search_superset(&self, key: &Range) -> Option<&Self> { + if self.0.key.contain(key) { + Some(self) + } else if key.max < self.0.key.min && self.0.left.is_some() { + // Safe to unwrap() because we have just checked it. + self.0.left.as_ref().unwrap().search_superset(key) + } else if key.min > self.0.key.max && self.0.right.is_some() { + // Safe to unwrap() because we have just checked it. + self.0.right.as_ref().unwrap().search_superset(key) + } else { + None + } + } + + /// Returns a mutable reference to the node covers full range of the `key`. + fn search_superset_mut(&mut self, key: &Range) -> Option<&mut Self> { + if self.0.key.contain(key) { + Some(self) + } else if key.max < self.0.key.min && self.0.left.is_some() { + // Safe to unwrap() because we have just checked it. + self.0.left.as_mut().unwrap().search_superset_mut(key) + } else if key.min > self.0.key.max && self.0.right.is_some() { + // Safe to unwrap() because we have just checked it. + self.0.right.as_mut().unwrap().search_superset_mut(key) + } else { + None + } + } + + /// Insert a new (key, data) pair into the subtree. + /// + /// Note: it will panic if the new key intersects with existing nodes. + fn insert(mut self, key: Range, data: Option) -> Self { + match self.0.key.cmp(&key) { + Ordering::Equal => { + panic!("interval_tree: key {:?} exists", key); + } + Ordering::Less => { + if self.0.key.intersect(&key) { + panic!( + "interval_tree: key {:?} intersects with existing {:?}", + key, self.0.key + ); + } + match self.0.right { + None => self.0.right = Some(Node::new(key, data)), + Some(_) => self.0.right = self.0.right.take().map(|n| n.insert(key, data)), + } + } + Ordering::Greater => { + if self.0.key.intersect(&key) { + panic!( + "interval_tree: key {:?} intersects with existing {:?}", + key, self.0.key + ); + } + match self.0.left { + None => self.0.left = Some(Node::new(key, data)), + Some(_) => self.0.left = self.0.left.take().map(|n| n.insert(key, data)), + } + } + } + self.updated_node() + } + + /// Update an existing entry and return the old value. + fn update(&mut self, key: &Range, data: NodeState) -> Option { + match self.0.key.cmp(key) { + Ordering::Equal => { + match (self.0.data.as_ref(), data.as_ref()) { + (NodeState::<&T>::Free, NodeState::<&T>::Free) + | (NodeState::<&T>::Free, NodeState::<&T>::Valued(_)) + | (NodeState::<&T>::Allocated, NodeState::<&T>::Free) + | (NodeState::<&T>::Allocated, NodeState::<&T>::Allocated) + | (NodeState::<&T>::Valued(_), NodeState::<&T>::Free) + | (NodeState::<&T>::Valued(_), NodeState::<&T>::Allocated) => { + panic!("try to update unallocated interval tree node"); + } + _ => {} + } + self.0.data.replace(data).into() + } + Ordering::Less => match self.0.right.as_mut() { + None => None, + Some(node) => node.update(key, data), + }, + Ordering::Greater => match self.0.left.as_mut() { + None => None, + Some(node) => node.update(key, data), + }, + } + } + + /// Delete `key` from the subtree. + /// + /// Note: it doesn't return whether the key exists in the subtree, so caller need to ensure the + /// logic. + fn delete(mut self, key: &Range) -> (Option, Option) { + match self.0.key.cmp(key) { + Ordering::Equal => { + let data = self.0.data.take(); + return (data.into(), self.delete_root()); + } + Ordering::Less => { + if let Some(node) = self.0.right.take() { + let (data, right) = node.delete(key); + self.0.right = right; + return (data, Some(self.updated_node())); + } + } + Ordering::Greater => { + if let Some(node) = self.0.left.take() { + let (data, left) = node.delete(key); + self.0.left = left; + return (data, Some(self.updated_node())); + } + } + } + (None, Some(self)) + } + + /// Rotate the node if necessary to keep balance. + fn rotate(self) -> Self { + let l = height(&self.0.left); + let r = height(&self.0.right); + match (l as i32) - (r as i32) { + 1 | 0 | -1 => self, + 2 => self.rotate_left_successor(), + -2 => self.rotate_right_successor(), + _ => unreachable!(), + } + } + + /// Perform a single left rotation on this node. + fn rotate_left(mut self) -> Self { + let mut new_root = self.0.right.take().expect("Node is broken"); + self.0.right = new_root.0.left.take(); + self.update_cached_info(); + new_root.0.left = Some(self); + new_root.update_cached_info(); + new_root + } + + /// Perform a single right rotation on this node. + fn rotate_right(mut self) -> Self { + let mut new_root = self.0.left.take().expect("Node is broken"); + self.0.left = new_root.0.right.take(); + self.update_cached_info(); + new_root.0.right = Some(self); + new_root.update_cached_info(); + new_root + } + + /// Performs a rotation when the left successor is too high. + fn rotate_left_successor(mut self) -> Self { + let left = self.0.left.take().expect("Node is broken"); + if height(&left.0.left) < height(&left.0.right) { + let rotated = left.rotate_left(); + self.0.left = Some(rotated); + self.update_cached_info(); + } else { + self.0.left = Some(left); + } + self.rotate_right() + } + + /// Performs a rotation when the right successor is too high. + fn rotate_right_successor(mut self) -> Self { + let right = self.0.right.take().expect("Node is broken"); + if height(&right.0.left) > height(&right.0.right) { + let rotated = right.rotate_right(); + self.0.right = Some(rotated); + self.update_cached_info(); + } else { + self.0.right = Some(right); + } + self.rotate_left() + } + + fn delete_root(mut self) -> Option { + match (self.0.left.take(), self.0.right.take()) { + (None, None) => None, + (Some(l), None) => Some(l), + (None, Some(r)) => Some(r), + (Some(l), Some(r)) => Some(Self::combine_subtrees(l, r)), + } + } + + /// Find the minimal key below the tree and returns a new optional tree where the minimal + /// value has been removed and the (optional) minimal node as tuple (min_node, remaining) + fn get_new_root(mut self) -> (Self, Option) { + match self.0.left.take() { + None => { + let remaining = self.0.right.take(); + (self, remaining) + } + Some(left) => { + let (min_node, left) = left.get_new_root(); + self.0.left = left; + (min_node, Some(self.updated_node())) + } + } + } + + fn combine_subtrees(l: Self, r: Self) -> Self { + let (mut new_root, remaining) = r.get_new_root(); + new_root.0.left = Some(l); + new_root.0.right = remaining; + new_root.updated_node() + } + + fn find_candidate(&self, constraint: &Constraint) -> Option<&Self> { + match constraint.policy { + AllocPolicy::FirstMatch => self.first_match(constraint), + AllocPolicy::Default => self.first_match(constraint), + } + } + + fn first_match(&self, constraint: &Constraint) -> Option<&Self> { + let mut candidate = if self.0.left.is_some() { + self.0.left.as_ref().unwrap().first_match(constraint) + } else { + None + }; + + if candidate.is_none() && self.check_constraint(constraint) { + candidate = Some(self); + } + if candidate.is_none() && self.0.right.is_some() { + candidate = self.0.right.as_ref().unwrap().first_match(constraint); + } + candidate + } + + fn check_constraint(&self, constraint: &Constraint) -> bool { + if self.0.data.is_free() { + let min = std::cmp::max(self.0.key.min, constraint.min); + let max = std::cmp::min(self.0.key.max, constraint.max); + if min <= max { + let key = Range::new(min, max); + if constraint.align == 0 || constraint.align == 1 { + return key.len() >= constraint.size; + } + return match key.align_to(constraint.align) { + None => false, + Some(aligned_key) => aligned_key.len() >= constraint.size, + }; + } + } + false + } + + /// Update cached information of the node. + /// Please make sure that the cached values of both children are up to date. + fn update_cached_info(&mut self) { + self.0.height = max(height(&self.0.left), height(&self.0.right)) + 1; + self.0.max_key = max( + max_key(&self.0.left), + max(max_key(&self.0.right), self.0.key.max), + ); + } + + /// Update the sub-tree to keep balance. + fn updated_node(mut self) -> Self { + self.update_cached_info(); + self.rotate() + } +} + +/// Compute height of the optional sub-tree. +fn height(node: &Option>) -> u32 { + node.as_ref().map_or(0, |n| n.0.height) +} + +/// Compute maximum key value covered by the optional sub-tree. +fn max_key(node: &Option>) -> u64 { + node.as_ref().map_or(0, |n| n.0.max_key) +} + +/// An interval tree implementation specialized for VMM resource management. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct IntervalTree { + root: Option>, +} + +impl IntervalTree { + /// Construct a default empty [IntervalTree] object. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// + /// let tree = dbs_allocator::IntervalTree::::new(); + /// ``` + pub fn new() -> Self { + IntervalTree { root: None } + } + + /// Check whether the interval tree is empty. + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Get the data item associated with the key, or return None if no match found. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = dbs_allocator::IntervalTree::::new(); + /// assert!(tree.is_empty()); + /// assert_eq!(tree.get(&Range::new(0x101u64, 0x101u64)), None); + /// tree.insert(Range::new(0x100u64, 0x100u64), Some(1)); + /// tree.insert(Range::new(0x200u64, 0x2ffu64), None); + /// assert!(!tree.is_empty()); + /// assert_eq!( + /// tree.get(&Range::new(0x100u64, 0x100u64)), + /// Some(NodeState::Valued(&1)) + /// ); + /// assert_eq!( + /// tree.get(&Range::new(0x200u64, 0x2ffu64)), + /// Some(NodeState::Free) + /// ); + /// assert_eq!(tree.get(&Range::new(0x101u64, 0x101u64)), None); + /// assert_eq!(tree.get(&Range::new(0x100u64, 0x101u64)), None); + /// ``` + pub fn get(&self, key: &Range) -> Option> { + match self.root { + None => None, + Some(ref node) => node.search(key).map(|n| n.0.data.as_ref()), + } + } + + /// Get a shared reference to the node fully covering the entire key range. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + /// tree.insert(Range::new(0x200u32, 0x2ffu32), None); + /// assert_eq!( + /// tree.get_superset(&Range::new(0x100u32, 0x100u32)), + /// Some((&Range::new(0x100u32, 0x100u32), NodeState::Valued(&1))) + /// ); + /// assert_eq!( + /// tree.get_superset(&Range::new(0x210u32, 0x210u32)), + /// Some((&Range::new(0x200u32, 0x2ffu32), NodeState::Free)) + /// ); + /// assert_eq!( + /// tree.get_superset(&Range::new(0x2ffu32, 0x2ffu32)), + /// Some((&Range::new(0x200u32, 0x2ffu32), NodeState::Free)) + /// ); + /// ``` + pub fn get_superset(&self, key: &Range) -> Option<(&Range, NodeState<&T>)> { + match self.root { + None => None, + Some(ref node) => node + .search_superset(key) + .map(|n| (&n.0.key, n.0.data.as_ref())), + } + } + + /// Get a mutable reference to the node fully covering the entire key range. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + /// tree.insert(Range::new(0x200u32, 0x2ffu32), None); + /// assert_eq!( + /// tree.get_superset_mut(&Range::new(0x100u32, 0x100u32)), + /// Some((&Range::new(0x100u32, 0x100u32), NodeState::Valued(&mut 1))) + /// ); + /// assert_eq!( + /// tree.get_superset_mut(&Range::new(0x210u32, 0x210u32)), + /// Some((&Range::new(0x200u32, 0x2ffu32), NodeState::Free)) + /// ); + /// assert_eq!( + /// tree.get_superset_mut(&Range::new(0x2ffu32, 0x2ffu32)), + /// Some((&Range::new(0x200u32, 0x2ffu32), NodeState::Free)) + /// ); + /// ``` + pub fn get_superset_mut(&mut self, key: &Range) -> Option<(&Range, NodeState<&mut T>)> { + match self.root { + None => None, + Some(ref mut node) => node + .search_superset_mut(key) + .map(|n| (&n.0.key, n.0.data.as_mut())), + } + } + + /// Get a shared reference to the value associated with the id. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u16, 0x100u16), Some(1)); + /// tree.insert(Range::new(0x200u16, 0x2ffu16), None); + /// assert_eq!(tree.get_by_id(0x100u16), Some(&1)); + /// assert_eq!(tree.get_by_id(0x210u32), None); + /// assert_eq!(tree.get_by_id(0x2ffu64), None); + /// ``` + pub fn get_by_id(&self, id: U) -> Option<&T> + where + u64: From, + { + match self.root { + None => None, + Some(ref node) => { + let key = Range::new_point(id); + match node.search_superset(&key) { + Some(node) => node.0.data.as_ref().into(), + None => None, + } + } + } + } + + /// Get a mutable reference to the value associated with the id. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u16, 0x100u16), Some(1)); + /// tree.insert(Range::new(0x200u16, 0x2ffu16), None); + /// assert_eq!(tree.get_by_id_mut(0x100u16), Some(&mut 1)); + /// assert_eq!(tree.get_by_id_mut(0x210u32), None); + /// assert_eq!(tree.get_by_id_mut(0x2ffu64), None); + /// ``` + pub fn get_by_id_mut(&mut self, id: U) -> Option<&mut T> + where + u64: From, + { + match self.root { + None => None, + Some(ref mut node) => { + let key = Range::new_point(id); + match node.search_superset_mut(&key) { + Some(node) => node.0.data.as_mut().into(), + None => None, + } + } + } + } + + /// Insert the (key, data) pair into the interval tree, panic if intersects with existing nodes. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, NodeState, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + /// tree.insert(Range::new(0x200u32, 0x2ffu32), None); + /// assert_eq!( + /// tree.get(&Range::new(0x100u64, 0x100u64)), + /// Some(NodeState::Valued(&1)) + /// ); + /// assert_eq!( + /// tree.get(&Range::new(0x200u64, 0x2ffu64)), + /// Some(NodeState::Free) + /// ); + /// ``` + pub fn insert(&mut self, key: Range, data: Option) { + match self.root.take() { + None => self.root = Some(Node::new(key, data)), + Some(node) => self.root = Some(node.insert(key, data)), + } + } + + /// Update an existing entry and return the old value. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{Constraint, IntervalTree, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u64, 0x100u64), None); + /// tree.insert(Range::new(0x200u64, 0x2ffu64), None); + /// + /// let constraint = Constraint::new(2u32); + /// let key = tree.allocate(&constraint); + /// assert_eq!(key, Some(Range::new(0x200u64, 0x201u64))); + /// let old = tree.update(&Range::new(0x200u64, 0x201u64), 2); + /// assert_eq!(old, None); + /// let old = tree.update(&Range::new(0x200u64, 0x201u64), 3); + /// assert_eq!(old, Some(2)); + /// ``` + pub fn update(&mut self, key: &Range, data: T) -> Option { + match self.root.as_mut() { + None => None, + Some(node) => node.update(key, NodeState::::Valued(data)), + } + } + + /// Remove the `key` from the tree and return the associated data. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{IntervalTree, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u64, 0x100u64), Some(1)); + /// tree.insert(Range::new(0x200u64, 0x2ffu64), None); + /// let old = tree.delete(&Range::new(0x100u64, 0x100u64)); + /// assert_eq!(old, Some(1)); + /// let old = tree.delete(&Range::new(0x200u64, 0x2ffu64)); + /// assert_eq!(old, None); + /// ``` + pub fn delete(&mut self, key: &Range) -> Option { + match self.root.take() { + Some(node) => { + let (data, root) = node.delete(key); + self.root = root; + data + } + None => None, + } + } + + /// Allocate a resource range according the allocation constraints. + /// + /// # Examples + /// ```rust + /// extern crate dbs_allocator; + /// use dbs_allocator::{Constraint, IntervalTree, Range}; + /// + /// let mut tree = IntervalTree::::new(); + /// tree.insert(Range::new(0x100u64, 0x100u64), None); + /// tree.insert(Range::new(0x200u64, 0x2ffu64), None); + /// + /// let constraint = Constraint::new(2u8); + /// let key = tree.allocate(&constraint); + /// assert_eq!(key, Some(Range::new(0x200u64, 0x201u64))); + /// tree.update(&Range::new(0x200u64, 0x201u64), 2); + /// ``` + pub fn allocate(&mut self, constraint: &Constraint) -> Option { + if constraint.size == 0 { + return None; + } + let candidate = match self.root.as_mut() { + None => None, + Some(node) => node.find_candidate(constraint), + }; + + match candidate { + None => None, + Some(node) => { + let node_key = node.0.key; + let range = Range::new( + max(node_key.min, constraint.min), + min(node_key.max, constraint.max), + ); + // Safe to unwrap because candidate satisfy the constraints. + let aligned_key = range.align_to(constraint.align).unwrap(); + let result = Range::new(aligned_key.min, aligned_key.min + constraint.size - 1); + + // Allocate a resource from the node, no need to split the candidate node. + if node_key.min == aligned_key.min && node_key.len() == constraint.size { + self.root + .as_mut() + .unwrap() + .update(&node_key, NodeState::::Allocated); + return Some(node_key); + } + + // Split the candidate node. + // TODO: following algorithm is not optimal in preference of simplicity. + self.delete(&node_key); + if aligned_key.min > node_key.min { + self.insert(Range::new(node_key.min, aligned_key.min - 1), None); + } + self.insert(result, None); + if result.max < node_key.max { + self.insert(Range::new(result.max + 1, node_key.max), None); + } + + self.root + .as_mut() + .unwrap() + .update(&result, NodeState::::Allocated); + Some(result) + } + } + } + + /// Free an allocated range and return the associated data. + pub fn free(&mut self, key: &Range) -> Option { + let result = self.delete(key); + let mut range = *key; + + // Try to merge with adjacent free nodes. + if range.min > 0 { + if let Some((r, v)) = self.get_superset(&Range::new(range.min - 1, range.min - 1)) { + if v.is_free() { + range.min = r.min; + } + } + } + if range.max < std::u64::MAX { + if let Some((r, v)) = self.get_superset(&Range::new(range.max + 1, range.max + 1)) { + if v.is_free() { + range.max = r.max; + } + } + } + + if range.min < key.min { + self.delete(&Range::new(range.min, key.min - 1)); + } + if range.max > key.max { + self.delete(&Range::new(key.max + 1, range.max)); + } + self.insert(range, None); + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[should_panic] + fn test_new_range() { + let _ = Range::new(2u8, 1u8); + } + + #[test] + #[should_panic] + fn test_new_range_overflow() { + let _ = Range::new(0u64, std::u64::MAX); + } + + #[test] + fn test_range_intersect() { + let range_a = Range::new(1u8, 4u8); + let range_b = Range::new(4u16, 6u16); + let range_c = Range::new(2u32, 3u32); + let range_d = Range::new(4u64, 4u64); + let range_e = Range::new(5u32, 6u32); + + assert!(range_a.intersect(&range_b)); + assert!(range_b.intersect(&range_a)); + assert!(range_a.intersect(&range_c)); + assert!(range_c.intersect(&range_a)); + assert!(range_a.intersect(&range_d)); + assert!(range_d.intersect(&range_a)); + assert!(!range_a.intersect(&range_e)); + assert!(!range_e.intersect(&range_a)); + + assert_eq!(range_a.len(), 4); + assert_eq!(range_d.len(), 1); + } + + #[test] + fn test_range_contain() { + let range_a = Range::new(2u8, 6u8); + assert!(range_a.contain(&Range::new(2u8, 3u8))); + assert!(range_a.contain(&Range::new(3u8, 4u8))); + assert!(range_a.contain(&Range::new(5u8, 5u8))); + assert!(range_a.contain(&Range::new(5u8, 6u8))); + assert!(range_a.contain(&Range::new(6u8, 6u8))); + assert!(!range_a.contain(&Range::new(1u8, 1u8))); + assert!(!range_a.contain(&Range::new(1u8, 2u8))); + assert!(!range_a.contain(&Range::new(1u8, 3u8))); + assert!(!range_a.contain(&Range::new(1u8, 7u8))); + assert!(!range_a.contain(&Range::new(7u8, 8u8))); + assert!(!range_a.contain(&Range::new(6u8, 7u8))); + assert!(!range_a.contain(&Range::new(7u8, 8u8))); + } + + #[test] + fn test_range_align_to() { + let range_a = Range::new(2u32, 6); + assert_eq!(range_a.align_to(0), Some(Range::new(2u64, 6u64))); + assert_eq!(range_a.align_to(1), Some(Range::new(2u8, 6u8))); + assert_eq!(range_a.align_to(2), Some(Range::new(2u16, 6u16))); + assert_eq!(range_a.align_to(4), Some(Range::new(4u32, 6u32))); + assert_eq!(range_a.align_to(8), None); + assert_eq!(range_a.align_to(3), None); + + let range_b = Range::new(0xFFFF_FFFF_FFFF_FFFDu64, 0xFFFF_FFFF_FFFF_FFFFu64); + assert_eq!( + range_b.align_to(2), + Some(Range::new(0xFFFF_FFFF_FFFF_FFFEu64, 0xFFFF_FFFF_FFFF_FFFF)) + ); + assert_eq!(range_b.align_to(4), None); + } + + #[test] + fn test_range_ord() { + let range_a = Range::new(1u32, 4u32); + let range_b = Range::new(1u32, 4u32); + let range_c = Range::new(1u32, 3u32); + let range_d = Range::new(1u32, 5u32); + let range_e = Range::new(2u32, 2u32); + + assert_eq!(range_a, range_b); + assert_eq!(range_b, range_a); + assert!(range_a > range_c); + assert!(range_c < range_a); + assert!(range_a < range_d); + assert!(range_d > range_a); + assert!(range_a < range_e); + assert!(range_e > range_a); + } + + #[should_panic] + #[test] + fn test_tree_insert_equal() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u16, 0x200), Some(1)); + tree.insert(Range::new(0x100u32, 0x200), None); + } + + #[should_panic] + #[test] + fn test_tree_insert_intersect_on_right() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100, 0x200u32), Some(1)); + tree.insert(Range::new(0x200, 0x2ffu64), None); + } + + #[should_panic] + #[test] + fn test_tree_insert_intersect_on_left() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100, 0x200u32), Some(1)); + tree.insert(Range::new(0x000, 0x100u64), None); + } + + #[test] + fn test_tree_get_superset() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + tree.insert(Range::new(0x001u16, 0x008u16), None); + tree.insert(Range::new(0x009u16, 0x00fu16), None); + tree.insert(Range::new(0x200u16, 0x2ffu16), None); + let mut constraint = Constraint::new(8u64); + constraint.min = 0x211; + constraint.max = 0x21f; + constraint.align = 0x8; + tree.allocate(&constraint); + + // Valued case. + assert_eq!( + tree.get_superset(&Range::new(0x100u32, 0x100)), + Some((&Range::new(0x100, 0x100u32), NodeState::Valued(&1))) + ); + + // Free case. + assert_eq!( + tree.get_superset(&Range::new(0x200u16, 0x200)), + Some((&Range::new(0x200, 0x217u64), NodeState::Free)) + ); + assert_eq!( + tree.get_superset(&Range::new(0x2ffu32, 0x2ff)), + Some((&Range::new(0x220, 0x2ffu32), NodeState::Free)) + ); + + // Allocated case. + assert_eq!( + tree.get_superset(&Range::new(0x218u16, 0x21f)), + Some((&Range::new(0x218, 0x21fu16), NodeState::Allocated)) + ); + + // None case. + assert_eq!(tree.get_superset(&Range::new(0x2ffu32, 0x300)), None); + assert_eq!(tree.get_superset(&Range::new(0x300u32, 0x300)), None); + assert_eq!(tree.get_superset(&Range::new(0x1ffu32, 0x300)), None); + } + + #[test] + fn test_tree_get_superset_mut() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + tree.insert(Range::new(0x200u16, 0x2ffu16), None); + let mut constraint = Constraint::new(8u64); + constraint.min = 0x211; + constraint.max = 0x21f; + constraint.align = 0x8; + tree.allocate(&constraint); + + // Valued case. + assert_eq!( + tree.get_superset_mut(&Range::new(0x100u32, 0x100u32)), + Some((&Range::new(0x100u32, 0x100u32), NodeState::Valued(&mut 1))) + ); + + // Allocated case. + assert_eq!( + tree.get_superset_mut(&Range::new(0x218u64, 0x21fu64)), + Some((&Range::new(0x218u64, 0x21fu64), NodeState::Allocated)) + ); + + // Free case. + assert_eq!( + tree.get_superset_mut(&Range::new(0x2ffu32, 0x2ffu32)), + Some((&Range::new(0x220u32, 0x2ffu32), NodeState::Free)) + ); + + // None case. + assert_eq!(tree.get_superset(&Range::new(0x2ffu32, 0x300)), None); + assert_eq!(tree.get_superset(&Range::new(0x300u32, 0x300)), None); + assert_eq!(tree.get_superset(&Range::new(0x1ffu32, 0x300)), None); + } + + #[test] + fn test_tree_update() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u32, 0x100u32), None); + tree.insert(Range::new(0x200u32, 0x2ffu32), None); + + let constraint = Constraint::new(2u32); + let key = tree.allocate(&constraint); + assert_eq!(key, Some(Range::new(0x200u32, 0x201u32))); + let old = tree.update(&Range::new(0x200u32, 0x201u32), 2); + assert_eq!(old, None); + let old = tree.update(&Range::new(0x200u32, 0x201u32), 3); + assert_eq!(old, Some(2)); + let old = tree.update(&Range::new(0x200u32, 0x200u32), 4); + assert_eq!(old, None); + let old = tree.update(&Range::new(0x200u32, 0x203u32), 5); + assert_eq!(old, None); + + tree.delete(&Range::new(0x200u32, 0x201u32)); + let old = tree.update(&Range::new(0x200u32, 0x201u32), 2); + assert_eq!(old, None); + } + + #[test] + fn test_tree_delete() { + let mut tree = IntervalTree::::new(); + assert_eq!(tree.get(&Range::new(0x101u32, 0x101u32)), None); + assert!(tree.is_empty()); + tree.insert(Range::new(0x100u32, 0x100u32), Some(1)); + tree.insert(Range::new(0x001u16, 0x00fu16), None); + tree.insert(Range::new(0x200u32, 0x2ffu32), None); + assert!(!tree.is_empty()); + assert_eq!( + tree.get(&Range::new(0x100u32, 0x100u32)), + Some(NodeState::Valued(&1)) + ); + assert_eq!( + tree.get(&Range::new(0x200u32, 0x2ffu32)), + Some(NodeState::Free) + ); + assert_eq!(tree.get(&Range::new(0x101u32, 0x101u32)), None); + + let old = tree.delete(&Range::new(0x001u16, 0x00fu16)); + assert_eq!(old, None); + let old = tree.delete(&Range::new(0x100u32, 0x100u32)); + assert_eq!(old, Some(1)); + let old = tree.delete(&Range::new(0x200u32, 0x2ffu32)); + assert_eq!(old, None); + + assert!(tree.is_empty()); + assert_eq!(tree.get(&Range::new(0x100u32, 0x100u32)), None); + assert_eq!(tree.get(&Range::new(0x200u32, 0x2ffu32)), None); + } + + #[test] + fn test_allocate_free() { + let mut tree = IntervalTree::::new(); + let mut constraint = Constraint::new(1u8); + + assert_eq!(tree.allocate(&constraint), None); + tree.insert(Range::new(0x100u16, 0x100u16), None); + tree.insert(Range::new(0x200u16, 0x2ffu16), None); + + let key = tree.allocate(&constraint); + assert_eq!(key, Some(Range::new(0x100u16, 0x100u16))); + let old = tree.update(&Range::new(0x100u16, 0x100u16), 2); + assert_eq!(old, None); + let val = tree.get(&Range::new(0x100u16, 0x100u16)); + assert_eq!(val, Some(NodeState::Valued(&2))); + + constraint.min = 0x100; + constraint.max = 0x100; + assert_eq!(tree.allocate(&constraint), None); + + constraint.min = 0x201; + constraint.max = 0x300; + constraint.align = 0x8; + constraint.size = 0x10; + assert_eq!( + tree.allocate(&constraint), + Some(Range::new(0x208u16, 0x217u16)) + ); + + // Free the node when it's still in 'Allocated' state. + let old = tree.free(&Range::new(0x208u16, 0x217u16)); + assert_eq!(old, None); + + // Reallocate the freed resource. + assert_eq!( + tree.allocate(&constraint), + Some(Range::new(0x208u16, 0x217u16)) + ); + + constraint.size = 0x100; + assert_eq!(tree.allocate(&constraint), None); + + // Verify that allocating a bigger range with smaller allocated range fails. + constraint.min = 0x200; + constraint.max = 0x2ff; + constraint.align = 0x8; + constraint.size = 0x100; + assert_eq!(tree.allocate(&constraint), None); + + // Free the node when it's in 'Valued' state. + tree.update(&Range::new(0x208u16, 0x217u16), 0x10); + assert_eq!(tree.allocate(&constraint), None); + let old = tree.free(&Range::new(0x208u16, 0x217u16)); + assert_eq!(old, Some(0x10)); + + // Reallocate the freed resource, verify that adjacent free nodes have been merged. + assert_eq!( + tree.allocate(&constraint), + Some(Range::new(0x200u32, 0x2ffu32)) + ); + } + + #[test] + fn test_with_size() { + let range_a = Range::with_size(1u8, 3u8); + let range_b = Range::with_size(4u16, 2u16); + let range_c = Range::with_size(2u32, 1u32); + let range_d = Range::with_size(4u64, 0u64); + let range_e = Range::with_size(5u32, 1u32); + + assert_eq!(range_a, Range::new(1u8, 4u8)); + assert_eq!(range_b, Range::new(4u16, 6u16)); + assert_eq!(range_c, Range::new(2u32, 3u32)); + assert_eq!(range_d, Range::new(4u64, 4u64)); + assert_eq!(range_e, Range::new(5u32, 6u32)); + } + + #[test] + fn test_new_point() { + let range_a = Range::new_point(1u8); + let range_b = Range::new_point(2u16); + let range_c = Range::new_point(3u32); + let range_d = Range::new_point(4u64); + let range_e = Range::new_point(5u32); + + assert_eq!(range_a, Range::with_size(1u8, 0u8)); + assert_eq!(range_b, Range::with_size(2u16, 0u16)); + assert_eq!(range_c, Range::with_size(3u32, 0u32)); + assert_eq!(range_d, Range::with_size(4u64, 0u64)); + assert_eq!(range_e, Range::with_size(5u32, 0u32)); + } + + #[test] + fn test_get_by_id() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u16, 0x100u16), Some(1)); + tree.insert(Range::new(0x001u32, 0x005u32), Some(2)); + tree.insert(Range::new(0x200u16, 0x2ffu16), None); + + assert_eq!(tree.get_by_id(0x100u16), Some(&1)); + assert_eq!(tree.get_by_id(0x002u32), Some(&2)); + assert_eq!(tree.get_by_id(0x210u32), None); + assert_eq!(tree.get_by_id(0x2ffu64), None); + } + + #[test] + fn test_get_by_id_mut() { + let mut tree = IntervalTree::::new(); + tree.insert(Range::new(0x100u16, 0x100u16), Some(1)); + tree.insert(Range::new(0x001u32, 0x005u32), Some(2)); + tree.insert(Range::new(0x200u16, 0x2ffu16), None); + + assert_eq!(tree.get_by_id_mut(0x100u16), Some(&mut 1)); + assert_eq!(tree.get_by_id_mut(0x002u32), Some(&mut 2)); + assert_eq!(tree.get_by_id_mut(0x210u32), None); + assert_eq!(tree.get_by_id_mut(0x2ffu64), None); + } +} diff --git a/src/dragonball/src/dbs_allocator/src/lib.rs b/src/dragonball/src/dbs_allocator/src/lib.rs new file mode 100644 index 000000000000..c489290d9791 --- /dev/null +++ b/src/dragonball/src/dbs_allocator/src/lib.rs @@ -0,0 +1,164 @@ +// Copyright (C) 2019, 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Data structures and algorithms to support resource allocation and management. +//! +//! The `dbs-allocator` crate provides data structures and algorithms to manage and allocate +//! integer identifiable resources. The resource manager in virtual machine monitor (VMM) may +//! manage and allocate resources for virtual machines by using: +//! - [Constraint]: Struct to declare constraints for resource allocation. +//! - [IntervalTree]: An interval tree implementation specialized for VMM resource management. + +#![deny(missing_docs)] + +pub mod interval_tree; +pub use interval_tree::{IntervalTree, NodeState, Range}; + +/// Error codes for resource allocation operations. +#[derive(thiserror::Error, Debug, Eq, PartialEq)] +pub enum Error { + /// Invalid boundary for resource allocation. + #[error("invalid boundary constraint: min ({0}), max ({1})")] + InvalidBoundary(u64, u64), +} + +/// Specialized version of [`std::result::Result`] for resource allocation operations. +pub type Result = std::result::Result; + +/// Resource allocation policies. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum AllocPolicy { + /// Default resource allocation policy. + Default, + /// Return the first available resource matching the allocation constraints. + FirstMatch, +} + +/// Struct to declare resource allocation constraints. +#[derive(Copy, Clone, Debug)] +pub struct Constraint { + /// Size of resource to allocate. + pub size: u64, + /// Lower boundary for resource allocation. + pub min: u64, + /// Upper boundary for resource allocation. + pub max: u64, + /// Alignment for allocated resource. + pub align: u64, + /// Policy for resource allocation. + pub policy: AllocPolicy, +} + +impl Constraint { + /// Create a new instance of [`Constraint`] with default settings. + pub fn new(size: T) -> Self + where + u64: From, + { + Constraint { + size: u64::from(size), + min: 0, + max: u64::MAX, + align: 1, + policy: AllocPolicy::Default, + } + } + + /// Set the lower boundary constraint for resource allocation. + pub fn min(mut self, min: T) -> Self + where + u64: From, + { + self.min = u64::from(min); + self + } + + /// Set the upper boundary constraint for resource allocation. + pub fn max(mut self, max: T) -> Self + where + u64: From, + { + self.max = u64::from(max); + self + } + + /// Set the alignment constraint for allocated resource. + pub fn align(mut self, align: T) -> Self + where + u64: From, + { + self.align = u64::from(align); + self + } + + /// Set the resource allocation policy. + pub fn policy(mut self, policy: AllocPolicy) -> Self { + self.policy = policy; + self + } + + /// Validate the resource allocation constraints. + pub fn validate(&self) -> Result<()> { + if self.max < self.min { + return Err(Error::InvalidBoundary(self.min, self.max)); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_set_min() { + let constraint = Constraint::new(2_u64).min(1_u64); + assert_eq!(constraint.min, 1_u64); + } + + #[test] + fn test_set_max() { + let constraint = Constraint::new(2_u64).max(100_u64); + assert_eq!(constraint.max, 100_u64); + } + + #[test] + fn test_set_align() { + let constraint = Constraint::new(2_u64).align(8_u64); + assert_eq!(constraint.align, 8_u64); + } + + #[test] + fn test_set_policy() { + let mut constraint = Constraint::new(2_u64).policy(AllocPolicy::FirstMatch); + assert_eq!(constraint.policy, AllocPolicy::FirstMatch); + constraint = constraint.policy(AllocPolicy::Default); + assert_eq!(constraint.policy, AllocPolicy::Default); + } + + #[test] + fn test_consistently_change_constraint() { + let constraint = Constraint::new(2_u64) + .min(1_u64) + .max(100_u64) + .align(8_u64) + .policy(AllocPolicy::FirstMatch); + assert_eq!(constraint.min, 1_u64); + assert_eq!(constraint.max, 100_u64); + assert_eq!(constraint.align, 8_u64); + assert_eq!(constraint.policy, AllocPolicy::FirstMatch); + } + + #[test] + fn test_set_invalid_boundary() { + // Normal case. + let constraint = Constraint::new(2_u64).max(1000_u64).min(999_u64); + assert!(constraint.validate().is_ok()); + + // Error case. + let constraint = Constraint::new(2_u64).max(999_u64).min(1000_u64); + assert_eq!( + constraint.validate(), + Err(Error::InvalidBoundary(1000u64, 999u64)) + ); + } +} diff --git a/src/dragonball/src/dbs_arch/Cargo.toml b/src/dragonball/src/dbs_arch/Cargo.toml new file mode 100644 index 000000000000..79b2957fc6b2 --- /dev/null +++ b/src/dragonball/src/dbs_arch/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "dbs-arch" +version = "0.2.3" +authors = ["Alibaba Dragonball Team"] +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +description = "A collection of CPU architecture specific constants and utilities." +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "secure-sandbox", "arch", "ARM64", "x86"] +readme = "README.md" + +[dependencies] +memoffset = "0.6" +kvm-bindings = { version = "0.6.0", features = ["fam-wrappers"] } +kvm-ioctls = "0.12.0" +thiserror = "1" +vm-memory = { version = "0.10" } +vmm-sys-util = "0.11.0" +libc = ">=0.2.39" + +[dev-dependencies] +vm-memory = { version = "0.10", features = ["backend-mmap"] } + +[package.metadata.docs.rs] +all-features = true diff --git a/src/dragonball/src/dbs_arch/LICENSE b/src/dragonball/src/dbs_arch/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_arch/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_arch/README.md b/src/dragonball/src/dbs_arch/README.md new file mode 100644 index 000000000000..60ff3a9aabe0 --- /dev/null +++ b/src/dragonball/src/dbs_arch/README.md @@ -0,0 +1,29 @@ +# dbs-arch + +## Design + +The `dbs-arch` crate is a collection of CPU architecture specific constants and utilities to hide CPU architecture details away from the Dragonball Sandbox or other VMMs. +Also, we have provided x86_64 CPUID support in this crate, for more details you could look at [this document](docs/x86_64_cpuid.md) + +## Supported Architectures + +- AMD64 (x86_64) +- ARM64 (aarch64) + +## Submodule List + +This repository contains the following submodules: +| Name | Arch| Description | +| --- | --- | --- | +| [x86_64::cpuid](src/x86_64/cpuid/) | x86_64 |Facilities to process CPUID information. | +| [x86_64::msr](src/x86_64/msr.rs) | x86_64 | Constants and functions for Model Specific Registers | +| [aarch64::gic](src/aarch64/gic) | aarch64 | Structures to manage GICv2/GICv3/ITS devices for ARM64 | +| [aarch64::regs](src/aarch64/regs.rs) | aarch64 | Constants and functions to configure and manage CPU registers | + +## Acknowledgement + +Part of the code is derived from the [Firecracker](https://github.com/firecracker-microvm/firecracker) project. + +## License + +This project is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. diff --git a/src/dragonball/src/dbs_arch/THIRD-PARTY b/src/dragonball/src/dbs_arch/THIRD-PARTY new file mode 120000 index 000000000000..301d0a498953 --- /dev/null +++ b/src/dragonball/src/dbs_arch/THIRD-PARTY @@ -0,0 +1 @@ +../../THIRD-PARTY \ No newline at end of file diff --git a/src/dragonball/src/dbs_arch/docs/x86_64_cpuid.md b/src/dragonball/src/dbs_arch/docs/x86_64_cpuid.md new file mode 100644 index 000000000000..57272ac73e56 --- /dev/null +++ b/src/dragonball/src/dbs_arch/docs/x86_64_cpuid.md @@ -0,0 +1,68 @@ +# CPUID + +## Design + +CPUID is designed as the CPUID filter for Intel and AMD CPU Identification. Through CPUID configuration, we could set CPU topology, Cache topology, PMU status and other features for the VMs. + +CPUID is developed based on the Firecracker CPUID code while we add other extensions such as CPU Topology and VPMU features. + +## Usage +To use CPUID, you should first use KVM_GET_CPUID2 ioctl to get the original CPUID then use process_cpuid() provided by the db-arch to filter CPUID with the information you want and suitable for VM conditions. + +Currently, we support following specifications that db-arch could use to filter CPUID: +```rust +pub struct VmSpec { + /// The vendor id of the CPU + cpu_vendor_id: [u8; 12], + /// The id of the current logical cpu in the range [0..cpu_count]. + cpu_id: u8, + /// The total number of logical cpus (includes cpus that could be hotplugged). + cpu_count: u8, + /// The desired brand string for the guest. + brand_string: BrandString, + /// threads per core for cpu topology information + threads_per_core: u8, + /// cores per die for cpu topology information + cores_per_die: u8, + /// dies per socket for cpu topology information + dies_per_socket: u8, + /// if vpmu feature is Disabled, it means vpmu feature is off (by default) + /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions) + /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported + vpmu_feature: VpmuFeatureLevel, +} +``` + +## Example +We will show examples for filtering CPUID. +First, you need to use KVM_GET_CPUID2 ioctl to get the original CPUID, this part is not included in the db-cpuid. + +```rust +// an example for getting the cpuid in the vmm. +let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; +let ret = unsafe {ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr())}; +if ret != 0 { + return Err(errno::Error::last()); +} +``` + +Then we could create the `VmSpec` to describe the VM specification we want and use process_cpuid() to filter CPUID. + +```rust +let cpuid_vm_spec = VmSpec::new( + self.id, + vcpu_config.max_all_vcpu_count as u8, + vcpu_config.threads_per_core, + vcpu_config.cores_per_die, + vcpu_config.dies_per_socket, + vcpu_config.vpmu_feature, + ) + .map_err(VcpuError::CpuId)?; + process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| { + METRICS.vcpu.process_cpuid.inc(); + error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e); + VcpuError::CpuId(e) + })?; +``` + +After the CPUID is filtered, we could use it to set the guest's CPUID. diff --git a/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv2.rs b/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv2.rs new file mode 100644 index 000000000000..5984570a9b4c --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv2.rs @@ -0,0 +1,110 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use kvm_ioctls::DeviceFd; + +use super::{GICDevice, Result}; + +/// Represent a GIC v2 device +pub struct GICv2 { + /// The file descriptor for the KVM device + fd: DeviceFd, + + /// GIC device properties, to be used for setting up the fdt entry + properties: [u64; 4], + + /// Number of CPUs handled by the device + vcpu_count: u64, +} + +impl GICv2 { + // Unfortunately bindgen omits defines that are based on other defines. + // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. + const KVM_VGIC_V2_DIST_SIZE: u64 = 0x1000; + const KVM_VGIC_V2_CPU_SIZE: u64 = 0x2000; + + // Device trees specific constants + const ARCH_GIC_V2_MAINT_IRQ: u32 = 8; + + /// Get the address of the GICv2 distributor. + const fn get_dist_addr() -> u64 { + crate::aarch64::gic::GIC_REG_END_ADDRESS - GICv2::KVM_VGIC_V2_DIST_SIZE + } + + /// Get the size of the GIC_v2 distributor. + const fn get_dist_size() -> u64 { + GICv2::KVM_VGIC_V2_DIST_SIZE + } + + /// Get the address of the GIC_v2 CPU. + const fn get_cpu_addr() -> u64 { + GICv2::get_dist_addr() - GICv2::KVM_VGIC_V2_CPU_SIZE + } + + /// Get the size of the GIC_v2 CPU. + const fn get_cpu_size() -> u64 { + GICv2::KVM_VGIC_V2_CPU_SIZE + } +} + +impl GICDevice for GICv2 { + fn device_fd(&self) -> &DeviceFd { + &self.fd + } + + fn device_properties(&self) -> &[u64] { + &self.properties + } + + fn vcpu_count(&self) -> u64 { + self.vcpu_count + } + + fn fdt_compatibility(&self) -> &str { + "arm,gic-400" + } + + fn fdt_maint_irq(&self) -> u32 { + GICv2::ARCH_GIC_V2_MAINT_IRQ + } + + fn version() -> u32 { + kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2 + } + + fn create_device(fd: DeviceFd, vcpu_count: u64) -> Box { + Box::new(GICv2 { + fd, + properties: [ + GICv2::get_dist_addr(), + GICv2::get_dist_size(), + GICv2::get_cpu_addr(), + GICv2::get_cpu_size(), + ], + vcpu_count, + }) + } + + fn init_device_attributes(gic_device: &dyn GICDevice) -> Result<()> { + /* Setting up the distributor attribute. + We are placing the GIC below 1GB so we need to substract the size of the distributor. */ + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_DIST), + &GICv2::get_dist_addr() as *const u64 as u64, + 0, + )?; + + /* Setting up the CPU attribute. */ + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_CPU), + &GICv2::get_cpu_addr() as *const u64 as u64, + 0, + )?; + + Ok(()) + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv3.rs b/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv3.rs new file mode 100644 index 000000000000..87a9081d5f60 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/gic/gicv3.rs @@ -0,0 +1,136 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::boxed::Box; +use std::collections::HashMap; + +use kvm_ioctls::{DeviceFd, VmFd}; + +use super::its::ItsType::{PciMsiIts, PlatformMsiIts}; +use super::its::{ItsType, ITS}; +use super::{GICDevice, Result}; + +/// GICv3 instance +pub struct GICv3 { + /// The file descriptor for the KVM device + fd: DeviceFd, + + /// GIC device properties, to be used for setting up the fdt entry + properties: [u64; 4], + + /// Number of CPUs handled by the device + vcpu_count: u64, + + /// ITS instance of this gic control + its: HashMap, +} + +impl GICv3 { + // Unfortunately bindgen omits defines that are based on other defines. + // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. + const SZ_64K: u64 = 0x0001_0000; + const KVM_VGIC_V3_DIST_SIZE: u64 = GICv3::SZ_64K; + const KVM_VGIC_V3_REDIST_SIZE: u64 = (2 * GICv3::SZ_64K); + + // Device trees specific constants + const ARCH_GIC_V3_MAINT_IRQ: u32 = 9; + + /// Get the address of the GIC distributor. + fn get_dist_addr() -> u64 { + crate::aarch64::gic::GIC_REG_END_ADDRESS - GICv3::KVM_VGIC_V3_DIST_SIZE + } + + /// Get the size of the GIC distributor. + fn get_dist_size() -> u64 { + GICv3::KVM_VGIC_V3_DIST_SIZE + } + + /// Get the address of the GIC redistributors. + pub fn get_redists_addr(vcpu_count: u64) -> u64 { + GICv3::get_dist_addr() - GICv3::get_redists_size(vcpu_count) + } + + /// Get the size of the GIC redistributors. + fn get_redists_size(vcpu_count: u64) -> u64 { + vcpu_count * GICv3::KVM_VGIC_V3_REDIST_SIZE + } +} + +impl GICDevice for GICv3 { + fn device_fd(&self) -> &DeviceFd { + &self.fd + } + + fn device_properties(&self) -> &[u64] { + &self.properties + } + + fn vcpu_count(&self) -> u64 { + self.vcpu_count + } + + fn fdt_compatibility(&self) -> &str { + "arm,gic-v3" + } + + fn fdt_maint_irq(&self) -> u32 { + GICv3::ARCH_GIC_V3_MAINT_IRQ + } + + fn get_its_reg_range(&self, its_type: &ItsType) -> Option<[u64; 2]> { + self.its.get(its_type).map(|its| its.get_reg_range()) + } + + fn attach_its(&mut self, vm: &VmFd) -> Result<()> { + let its = ITS::new(vm, self, PlatformMsiIts)?; + self.its.insert(PlatformMsiIts, its); + let its = ITS::new(vm, self, PciMsiIts)?; + self.its.insert(PciMsiIts, its); + Ok(()) + } + + fn version() -> u32 { + kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3 + } + + fn create_device(fd: DeviceFd, vcpu_count: u64) -> Box { + Box::new(GICv3 { + fd, + properties: [ + GICv3::get_dist_addr(), + GICv3::get_dist_size(), + GICv3::get_redists_addr(vcpu_count), + GICv3::get_redists_size(vcpu_count), + ], + vcpu_count, + its: HashMap::new(), + }) + } + + fn init_device_attributes(gic_device: &dyn GICDevice) -> Result<()> { + /* Setting up the distributor attribute. + We are placing the GIC below 1GB so we need to substract the size of the distributor. + */ + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_DIST.into(), + &GICv3::get_dist_addr() as *const u64 as u64, + 0, + )?; + + /* Setting up the redistributors' attribute. + We are calculating here the start of the redistributors address. We have one per CPU. + */ + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST.into(), + &GICv3::get_redists_addr(gic_device.vcpu_count()) as *const u64 as u64, + 0, + )?; + + Ok(()) + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/gic/its.rs b/src/dragonball/src/dbs_arch/src/aarch64/gic/its.rs new file mode 100644 index 000000000000..0f2384b68553 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/gic/its.rs @@ -0,0 +1,81 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use kvm_ioctls::{DeviceFd, VmFd}; + +use super::gicv3::GICv3; +use super::{Error, GICDevice, Result}; + +// ITS register range +const REG_RANGE_LEN: u64 = 0x20000; + +/// ITS type +#[derive(Hash, PartialEq, Eq)] +pub enum ItsType { + /// platform msi its + PlatformMsiIts, + /// pci msi its + PciMsiIts, +} + +/// Only GIC-V3 can use ITS +pub struct ITS { + /// The file descriptor for the KVM device + fd: DeviceFd, + reg_range: [u64; 2], +} + +impl ITS { + /// Create an ITS device + pub fn new(vm: &VmFd, gic_ctl: &GICv3, its_type: ItsType) -> Result { + let fd = ITS::create_device_fd(vm)?; + // Define the mmio space of platform msi its after the mmio space of pci msi its + let offset = match its_type { + ItsType::PlatformMsiIts => REG_RANGE_LEN, + ItsType::PciMsiIts => REG_RANGE_LEN * 2, + }; + let vcpu_count = gic_ctl.vcpu_count(); + // No document has been found to accurately describe the storage location and + // length of the ITS register. Currently, we store the ITS register in front of + // the redistributor register. And temporarily refer to the "arm, gic-v3-its" + // kernel document to set the ITS register length to 0x20000.In addition, + // reg_range is a two-tuple, representing the register base address and the + // length of the register address space. + let reg_range: [u64; 2] = [GICv3::get_redists_addr(vcpu_count) - offset, REG_RANGE_LEN]; + let its = ITS { fd, reg_range }; + let reg_base_addr = its.get_reg_range_base_addr(); + its.set_attribute(reg_base_addr)?; + Ok(its) + } + + fn create_device_fd(vm: &VmFd) -> Result { + let mut its_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, + fd: 0, + flags: 0, + }; + vm.create_device(&mut its_device).map_err(Error::CreateITS) + } + + fn set_attribute(&self, reg_base_addr: u64) -> Result<()> { + let attribute = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + attr: u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), + addr: ®_base_addr as *const u64 as u64, + flags: 0, + }; + self.fd + .set_device_attr(&attribute) + .map_err(Error::SetITSAttribute)?; + Ok(()) + } + + fn get_reg_range_base_addr(&self) -> u64 { + self.reg_range[0] + } + + /// Get its reg range + pub fn get_reg_range(&self) -> [u64; 2] { + self.reg_range + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/gic/mod.rs b/src/dragonball/src/dbs_arch/src/aarch64/gic/mod.rs new file mode 100644 index 000000000000..80099aaf7759 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/gic/mod.rs @@ -0,0 +1,218 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// Export gicv2 interface +pub mod gicv2; +/// Export gicv3 interface +pub mod gicv3; +/// Export ITS interface +pub mod its; + +use std::{boxed::Box, result}; + +use kvm_ioctls::{DeviceFd, VmFd}; + +use gicv2::GICv2; +use gicv3::GICv3; + +// As per virt/kvm/arm/vgic/vgic-kvm-device.c we need +// the number of interrupts our GIC will support to be: +// * bigger than 32 +// * less than 1023 and +// * a multiple of 32. +// We are setting up our interrupt controller to support a maximum of 128 interrupts. + +/// First usable interrupt on aarch64. +pub const IRQ_BASE: u32 = 32; + +/// Last usable interrupt on aarch64. +pub const IRQ_MAX: u32 = 159; + +/// Define the gic register end address. +pub const GIC_REG_END_ADDRESS: u64 = 1 << 30; // 1GB + +/// Errors thrown while setting up the GIC. +#[derive(Debug)] +pub enum Error { + /// Error while calling KVM ioctl for setting up the global interrupt controller. + CreateGIC(kvm_ioctls::Error), + /// Error while setting device attributes for the GIC. + SetDeviceAttribute(kvm_ioctls::Error), + /// The number of vCPUs in the GicState doesn't match the number of vCPUs on the system + InconsistentVcpuCount, + /// The VgicSysRegsState is invalid + InvalidVgicSysRegState, + /// ERROR while create ITS fail + CreateITS(kvm_ioctls::Error), + /// ERROR while set ITS attr fail + SetITSAttribute(kvm_ioctls::Error), +} +type Result = result::Result; + +/// Function that flushes `RDIST` pending tables into guest RAM. +/// +/// The tables get flushed to guest RAM whenever the VM gets stopped. +pub fn save_pending_tables(fd: &DeviceFd) -> Result<()> { + let init_gic_attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + attr: u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES), + addr: 0, + flags: 0, + }; + fd.set_device_attr(&init_gic_attr) + .map_err(Error::SetDeviceAttribute) +} + +/// Trait for GIC devices. +pub trait GICDevice: Send { + /// Returns the file descriptor of the GIC device + fn device_fd(&self) -> &DeviceFd; + + /// Returns an array with GIC device properties + fn device_properties(&self) -> &[u64]; + + /// Returns the number of vCPUs this GIC handles + fn vcpu_count(&self) -> u64; + + /// Returns the fdt compatibility property of the device + fn fdt_compatibility(&self) -> &str; + + /// Returns the maint_irq fdt property of the device + fn fdt_maint_irq(&self) -> u32; + + /// Get ITS reg range + fn get_its_reg_range(&self, _its_type: &its::ItsType) -> Option<[u64; 2]> { + None + } + + /// Only gic-v3 has its + fn attach_its(&mut self, _vm: &VmFd) -> Result<()> { + Ok(()) + } + + /// Returns the GIC version of the device + fn version() -> u32 + where + Self: Sized; + + /// Create the GIC device object + fn create_device(fd: DeviceFd, vcpu_count: u64) -> Box + where + Self: Sized; + + /// Setup the device-specific attributes + fn init_device_attributes(gic_device: &dyn GICDevice) -> Result<()> + where + Self: Sized; + + /// Initialize a GIC device + fn init_device(vm: &VmFd) -> Result + where + Self: Sized, + { + let mut gic_device = kvm_bindings::kvm_create_device { + type_: Self::version(), + fd: 0, + flags: 0, + }; + + vm.create_device(&mut gic_device).map_err(Error::CreateGIC) + } + + /// Set a GIC device attribute + fn set_device_attribute( + fd: &DeviceFd, + group: u32, + attr: u64, + addr: u64, + flags: u32, + ) -> Result<()> + where + Self: Sized, + { + let attr = kvm_bindings::kvm_device_attr { + group, + attr, + addr, + flags, + }; + fd.set_device_attr(&attr) + .map_err(Error::SetDeviceAttribute)?; + + Ok(()) + } + + /// Finalize the setup of a GIC device + fn finalize_device(gic_device: &dyn GICDevice) -> Result<()> + where + Self: Sized, + { + /* We need to tell the kernel how many irqs to support with this vgic. + * See the `layout` module for details. + */ + let nr_irqs: u32 = IRQ_MAX - IRQ_BASE + 1; + let nr_irqs_ptr = &nr_irqs as *const u32; + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, + nr_irqs_ptr as u64, + 0, + )?; + + /* Finalize the GIC. + * See https://code.woboq.org/linux/linux/virt/kvm/arm/vgic/vgic-kvm-device.c.html#211. + */ + Self::set_device_attribute( + gic_device.device_fd(), + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + 0, + 0, + )?; + + Ok(()) + } + + #[allow(clippy::new_ret_no_self)] + /// Method to initialize the GIC device + fn new(vm: &VmFd, vcpu_count: u64) -> Result> + where + Self: Sized, + { + let vgic_fd = Self::init_device(vm)?; + + let mut device = Self::create_device(vgic_fd, vcpu_count); + + device.attach_its(vm)?; + + Self::init_device_attributes(device.as_ref())?; + + Self::finalize_device(device.as_ref())?; + + Ok(device) + } +} + +/// Create a GIC device. +/// +/// It will try to create by default a GICv3 device. If that fails it will try +/// to fall-back to a GICv2 device. +pub fn create_gic(vm: &VmFd, vcpu_count: u64) -> Result> { + GICv3::new(vm, vcpu_count).or_else(|_| GICv2::new(vm, vcpu_count)) +} + +#[cfg(test)] +mod tests { + + use super::*; + use kvm_ioctls::Kvm; + + #[test] + fn test_create_gic() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(create_gic(&vm, 1).is_ok()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/mod.rs b/src/dragonball/src/dbs_arch/src/aarch64/mod.rs new file mode 100644 index 000000000000..89892e45ddf3 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/mod.rs @@ -0,0 +1,139 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! CPU architecture specific constants, structures and utilities for the `aarch64` architecture. + +/// Module for the global interrupt controller configuration. +pub mod gic; +/// Module for PMU virtualization. +pub mod pmu; +/// Logic for configuring aarch64 registers. +pub mod regs; + +use std::{fmt, result}; + +const MMIO_DEVICE_LEGACY_IRQ_NUMBER: usize = 1; + +/// Error for ARM64 architecture information +#[derive(Debug)] +pub enum Error { + /// MMIO device information error + MMIODeviceInfoError, + /// Invalid arguments + InvalidArguments, +} + +type Result = result::Result; + +/// Types of devices that can get attached to this platform. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)] +pub enum DeviceType { + /// Device Type: Virtio. + Virtio(u32), + /// Device Type: Serial. + #[cfg(target_arch = "aarch64")] + Serial, + /// Device Type: RTC. + #[cfg(target_arch = "aarch64")] + RTC, +} + +impl fmt::Display for DeviceType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{self:?}") + } +} + +/// Trait for devices to be added to the Flattened Device Tree. +pub trait DeviceInfoForFDT { + /// Returns the address where this device will be loaded. + fn addr(&self) -> u64; + /// Returns the amount of memory that needs to be reserved for this device. + fn length(&self) -> u64; + /// Returns the associated interrupt for this device. + fn irq(&self) -> Result; + /// Get device id + fn get_device_id(&self) -> Option; +} + +/// MMIO device info used for FDT generating. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MMIODeviceInfo { + /// MMIO address base + pub base: u64, + /// MMIO address size + pub size: u64, + /// Device irq + pub irqs: Vec, + /// Only virtio devices that support platform msi have device id + pub device_id: Option, +} + +impl MMIODeviceInfo { + /// Create mmio device info. + pub fn new(base: u64, size: u64, irqs: Vec, device_id: Option) -> Self { + MMIODeviceInfo { + base, + size, + irqs, + device_id, + } + } +} + +impl DeviceInfoForFDT for MMIODeviceInfo { + fn addr(&self) -> u64 { + self.base + } + + fn length(&self) -> u64 { + self.size + } + + fn irq(&self) -> Result { + // Currently mmio devices have only one legacy irq. + if self.irqs.len() != MMIO_DEVICE_LEGACY_IRQ_NUMBER { + return Err(Error::MMIODeviceInfoError); + } + let irq = self.irqs[0]; + if !(gic::IRQ_BASE..=gic::IRQ_MAX).contains(&irq) { + return Err(Error::MMIODeviceInfoError); + } + + Ok(irq) + } + + fn get_device_id(&self) -> Option { + self.device_id + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mmo_device_info() { + let info = MMIODeviceInfo::new(0x1000, 0x2000, vec![gic::IRQ_BASE], Some(5)); + assert_eq!(info.addr(), 0x1000); + assert_eq!(info.length(), 0x2000); + assert_eq!(info.irq().unwrap(), gic::IRQ_BASE); + assert_eq!(info.get_device_id(), Some(5)); + + let info = MMIODeviceInfo::new(0x1000, 0x2000, vec![gic::IRQ_BASE], None); + assert_eq!(info.get_device_id(), None); + } + + #[test] + fn test_mmo_device_info_get_irq() { + let info = MMIODeviceInfo::new(0x1000, 0x1000, vec![], None); + assert!(info.irq().is_err()); + let info = MMIODeviceInfo::new(0x1000, 0x1000, vec![1, 2], None); + assert!(info.irq().is_err()); + let info = MMIODeviceInfo::new(0x1000, 0x1000, vec![gic::IRQ_BASE - 1], None); + assert!(info.irq().is_err()); + let info = MMIODeviceInfo::new(0x1000, 0x1000, vec![gic::IRQ_MAX + 1], None); + assert!(info.irq().is_err()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/pmu.rs b/src/dragonball/src/dbs_arch/src/aarch64/pmu.rs new file mode 100644 index 000000000000..8d939a57646f --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/pmu.rs @@ -0,0 +1,172 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Constants and utilities for aarch64 PMU virtualization. + +use kvm_bindings::{ + kvm_device_attr, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, KVM_ARM_VCPU_PMU_V3_IRQ, +}; +use kvm_ioctls::{Error as KvmError, VcpuFd, VmFd}; +use thiserror::Error; + +/// PPI base number on aarch64. +pub const PPI_BASE: u32 = 16; +/// Pmu ppi number +pub const VIRTUAL_PMU_IRQ: u32 = 7; + +/// Errors thrown while setting up the PMU. +#[derive(Error, Debug)] +pub enum PmuError { + /// Error while check kvm pmu capability + #[error("Check kvm pmu capability failed: {0}")] + CheckKvmPmuCap(#[source] KvmError), + /// Error while check pmu irq. + #[error("Check pmu irq error: {0}")] + HasPmuIrq(#[source] KvmError), + /// Error while check pmu init. + #[error("Check pmu init error: {0}")] + HasPmuInit(#[source] KvmError), + /// Error while set pmu irq. + #[error("Set pmu irq error: {0}")] + SetPmuIrq(#[source] KvmError), + /// Error while set pmu init. + #[error("Set pmu init error: {0}")] + SetPmuInit(#[source] KvmError), +} + +type Result = std::result::Result; + +/// Tests whether a cpu supports KVM_ARM_VCPU_PMU_V3_IRQ attribute. +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn has_pmu_irq(vcpu: &VcpuFd) -> Result<()> { + let irq = (VIRTUAL_PMU_IRQ + PPI_BASE) as u64; + let attribute = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_IRQ), + addr: &irq as *const u64 as u64, + flags: 0, + }; + vcpu.has_device_attr(&attribute) + .map_err(PmuError::HasPmuIrq) +} + +/// Tests whether a cpu supports KVM_ARM_VCPU_PMU_V3_INIT attribute. +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn has_pmu_init(vcpu: &VcpuFd) -> Result<()> { + let attribute = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0, + flags: 0, + }; + vcpu.has_device_attr(&attribute) + .map_err(PmuError::HasPmuInit) +} + +/// Set KVM_ARM_VCPU_PMU_V3_IRQ for a specific vcpu. +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn set_pmu_irq(vcpu: &VcpuFd) -> Result<()> { + let irq = (VIRTUAL_PMU_IRQ + PPI_BASE) as u64; + let attribute = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_IRQ), + addr: &irq as *const u64 as u64, + flags: 0, + }; + vcpu.set_device_attr(&attribute) + .map_err(PmuError::SetPmuIrq) +} + +/// Set KVM_ARM_VCPU_PMU_V3_INIT for a specific vcpu. +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn set_pmu_init(vcpu: &VcpuFd) -> Result<()> { + let attribute = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0, + flags: 0, + }; + vcpu.set_device_attr(&attribute) + .map_err(PmuError::SetPmuInit) +} + +/// Check kvm pmu capability +/// +/// # Arguments +/// * `vm` - The VM file descriptor +fn check_kvm_pmu_cap(_vm: &VmFd) -> Result<()> { + // TODO: check KVM_CAP_ARM_PMU_V3 capability before setting PMU + // Cap for KVM_CAP_ARM_PMU_V3 isn't supported in kvm-ioctls upstream, so + // leave a todo here for supporting this check in the future. + // Interface: vm.check_extension(kvm_ioctls::Cap) + + Ok(()) +} + +/// Check pmu feature +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn check_pmu_feature(vcpu: &VcpuFd) -> Result<()> { + has_pmu_irq(vcpu)?; + has_pmu_init(vcpu) +} + +/// Set pmu feature +/// +/// # Arguments +/// * `vcpu` - The VCPU file descriptor +fn set_pmu_feature(vcpu: &VcpuFd) -> Result<()> { + set_pmu_irq(vcpu)?; + set_pmu_init(vcpu) +} + +/// Initialize PMU in for vcpu +/// +/// # Arguments +/// * `vm` - The VM file descriptor +/// * `vcpu` - The VCPU file descriptor +pub fn initialize_pmu(vm: &VmFd, vcpu: &VcpuFd) -> Result<()> { + check_kvm_pmu_cap(vm)?; + check_pmu_feature(vcpu)?; + set_pmu_feature(vcpu) +} + +#[cfg(test)] +mod tests { + use kvm_bindings::{kvm_vcpu_init, KVM_ARM_VCPU_PMU_V3, KVM_ARM_VCPU_PSCI_0_2}; + use kvm_ioctls::Kvm; + + use super::*; + use crate::gic::create_gic; + + #[test] + fn test_create_pmu() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + assert!(create_gic(&vm, 1).is_ok()); + assert!(initialize_pmu(&vm, &vcpu).is_err()); + + if check_kvm_pmu_cap(&vm).is_err() { + return; + } + + let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + kvi.features[0] = 1 << KVM_ARM_VCPU_PSCI_0_2 | 1 << KVM_ARM_VCPU_PMU_V3; + + assert!(vcpu.vcpu_init(&kvi).is_ok()); + assert!(initialize_pmu(&vm, &vcpu).is_ok()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/aarch64/regs.rs b/src/dragonball/src/dbs_arch/src/aarch64/regs.rs new file mode 100644 index 000000000000..ff57edd1a91e --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/aarch64/regs.rs @@ -0,0 +1,200 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Constants and utilities for aarch64 CPU generic, system and model specific registers. + +use std::{mem, result}; + +use kvm_bindings::*; +use kvm_ioctls::VcpuFd; +use memoffset::offset_of; +use vmm_sys_util; + +/// Errors thrown while setting aarch64 registers. +#[derive(Debug)] +pub enum Error { + /// Failed to get core register (PC, PSTATE or general purpose ones). + GetCoreRegister(kvm_ioctls::Error), + /// Failed to set core register (PC, PSTATE or general purpose ones). + SetCoreRegister(kvm_ioctls::Error), + /// Failed to get a system register. + GetSysRegister(kvm_ioctls::Error), + /// Failed to get the register list. + GetRegList(kvm_ioctls::Error), + /// Failed to get a system register. + SetRegister(kvm_ioctls::Error), + /// Failed to init fam reglist + FamRegister(vmm_sys_util::fam::Error), +} +type Result = result::Result; + +#[allow(non_upper_case_globals)] +// PSR (Processor State Register) bits. +// Taken from arch/arm64/include/uapi/asm/ptrace.h. +const PSR_MODE_EL1h: u64 = 0x0000_0005; +const PSR_F_BIT: u64 = 0x0000_0040; +const PSR_I_BIT: u64 = 0x0000_0080; +const PSR_A_BIT: u64 = 0x0000_0100; +const PSR_D_BIT: u64 = 0x0000_0200; +// Taken from arch/arm64/kvm/inject_fault.c. +const PSTATE_FAULT_BITS_64: u64 = PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; + +// Following are macros that help with getting the ID of a aarch64 core register. +// The core register are represented by the user_pt_regs structure. Look for it in +// arch/arm64/include/uapi/asm/ptrace.h. + +macro_rules! arm64_core_reg { + ($reg: tt) => { + // As per `kvm_arm_copy_reg_indices`, the id of a core register can be obtained like this: + // `const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | i`, where i is obtained with: + // `for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {` + // We are using here `user_pt_regs` since this structure contains the core register and it is at + // the start of `kvm_regs`. + // struct kvm_regs { + // struct user_pt_regs regs; /* sp = sp_el0 */ + // + // __u64 sp_el1; + // __u64 elr_el1; + // + // __u64 spsr[KVM_NR_SPSR]; + // + // struct user_fpsimd_state fp_regs; + //}; + // struct user_pt_regs { + // __u64 regs[31]; + // __u64 sp; + // __u64 pc; + // __u64 pstate; + //}; + // In our implementation we need: pc, pstate and user_pt_regs->regs[0]. + KVM_REG_ARM64 as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_ARM_CORE) + | ((offset_of!(user_pt_regs, $reg) / mem::size_of::()) as u64) + }; +} + +// This macro computes the ID of a specific ARM64 system register similar to how +// the kernel C macro does. +// https://elixir.bootlin.com/linux/v4.20.17/source/arch/arm64/include/uapi/asm/kvm.h#L203 +macro_rules! arm64_sys_reg { + ($name: tt, $op0: tt, $op1: tt, $crn: tt, $crm: tt, $op2: tt) => { + const $name: u64 = KVM_REG_ARM64 as u64 + | KVM_REG_SIZE_U64 as u64 + | KVM_REG_ARM64_SYSREG as u64 + | ((($op0 as u64) << KVM_REG_ARM64_SYSREG_OP0_SHIFT) + & KVM_REG_ARM64_SYSREG_OP0_MASK as u64) + | ((($op1 as u64) << KVM_REG_ARM64_SYSREG_OP1_SHIFT) + & KVM_REG_ARM64_SYSREG_OP1_MASK as u64) + | ((($crn as u64) << KVM_REG_ARM64_SYSREG_CRN_SHIFT) + & KVM_REG_ARM64_SYSREG_CRN_MASK as u64) + | ((($crm as u64) << KVM_REG_ARM64_SYSREG_CRM_SHIFT) + & KVM_REG_ARM64_SYSREG_CRM_MASK as u64) + | ((($op2 as u64) << KVM_REG_ARM64_SYSREG_OP2_SHIFT) + & KVM_REG_ARM64_SYSREG_OP2_MASK as u64); + }; +} + +// Constant imported from the Linux kernel: +// https://elixir.bootlin.com/linux/v4.20.17/source/arch/arm64/include/asm/sysreg.h#L135 +arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5); + +/// Configure core registers for a given CPU. +/// +/// # Arguments +/// +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +/// * `cpu_id` - Index of current vcpu. +/// * `boot_ip` - Starting instruction pointer. +/// * `mem` - Reserved DRAM for current VM. +pub fn setup_regs(vcpu: &VcpuFd, cpu_id: u8, boot_ip: u64, fdt_address: u64) -> Result<()> { + // Get the register index of the PSTATE (Processor State) register. + vcpu.set_one_reg(arm64_core_reg!(pstate), PSTATE_FAULT_BITS_64 as u128) + .map_err(Error::SetCoreRegister)?; + + // Other vCPUs are powered off initially awaiting PSCI wakeup. + if cpu_id == 0 { + // Setting the PC (Processor Counter) to the current program address (kernel address). + vcpu.set_one_reg(arm64_core_reg!(pc), boot_ip as u128) + .map_err(Error::SetCoreRegister)?; + + // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). + // "The device tree blob (dtb) must be placed on an 8-byte boundary and must + // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. + // We are choosing to place it the end of DRAM. See `get_fdt_addr`. + vcpu.set_one_reg(arm64_core_reg!(regs), fdt_address as u128) + .map_err(Error::SetCoreRegister)?; + } + Ok(()) +} + +/// Specifies whether a particular register is a system register or not. +/// The kernel splits the registers on aarch64 in core registers and system registers. +/// So, below we get the system registers by checking that they are not core registers. +/// +/// # Arguments +/// +/// * `regid` - The index of the register we are checking. +pub fn is_system_register(regid: u64) -> bool { + if (regid & KVM_REG_ARM_COPROC_MASK as u64) == KVM_REG_ARM_CORE as u64 { + return false; + } + + let size = regid & KVM_REG_SIZE_MASK; + if size != KVM_REG_SIZE_U32 && size != KVM_REG_SIZE_U64 { + panic!("Unexpected register size for system register {}", size); + } + true +} + +/// Read the MPIDR - Multiprocessor Affinity Register. +/// +/// # Arguments +/// +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +pub fn read_mpidr(vcpu: &VcpuFd) -> Result { + vcpu.get_one_reg(MPIDR_EL1) + .map(|value| value as u64) + .map_err(Error::GetSysRegister) +} + +#[cfg(test)] +mod tests { + use super::*; + use kvm_ioctls::Kvm; + + #[test] + fn test_setup_regs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + match setup_regs(&vcpu, 0, 0x0, crate::gic::GIC_REG_END_ADDRESS).unwrap_err() { + Error::SetCoreRegister(ref e) => assert_eq!(e.errno(), libc::ENOEXEC), + _ => panic!("Expected to receive Error::SetCoreRegister"), + } + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi).unwrap(); + vcpu.vcpu_init(&kvi).unwrap(); + + assert!(setup_regs(&vcpu, 0, 0x0, crate::gic::GIC_REG_END_ADDRESS).is_ok()); + } + + #[test] + fn test_read_mpidr() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi).unwrap(); + + // Must fail when vcpu is not initialized yet. + assert!(read_mpidr(&vcpu).is_err()); + + vcpu.vcpu_init(&kvi).unwrap(); + assert_eq!(read_mpidr(&vcpu).unwrap(), 0x80000000); + } +} diff --git a/src/dragonball/src/dbs_arch/src/lib.rs b/src/dragonball/src/dbs_arch/src/lib.rs new file mode 100644 index 000000000000..749ae181ffca --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/lib.rs @@ -0,0 +1,67 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] + +//! CPU architecture specific constants, structures and utilities. +//! +//! This crate provides CPU architecture specific constants, structures and utilities to abstract +//! away CPU architecture specific details from the Dragonball Secure Sandbox or other VMMs. +//! +//! # Supported CPU Architectures +//! - **x86_64**: x86_64 (also known as x64, x86-64, AMD64, and Intel 64) is a 64-bit +//! version of the x86 instruction set. +//! - **ARM64**: AArch64 or ARM64 is the 64-bit extension of the ARM architecture. + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub use aarch64::*; + +/// Enum indicating vpmu feature level +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub enum VpmuFeatureLevel { + /// Disabled means vpmu feature is off (by default) + Disabled, + /// LimitedlyEnabled means minimal vpmu counters are supported( only cycles and instructions ) + /// For aarch64, LimitedlyEnabled isn't supported currently. The ability will be implemented in the future. + LimitedlyEnabled, + /// FullyEnabled means all vpmu counters are supported + FullyEnabled, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_debug_trait() { + let level = VpmuFeatureLevel::Disabled; + assert_eq!(format!("{level:#?}"), "Disabled"); + + let level = VpmuFeatureLevel::LimitedlyEnabled; + assert_eq!(format!("{level:#?}"), "LimitedlyEnabled"); + + let level = VpmuFeatureLevel::FullyEnabled; + assert_eq!(format!("{level:#?}"), "FullyEnabled"); + } + + #[test] + fn test_eq_trait() { + let level = VpmuFeatureLevel::Disabled; + assert!(level == VpmuFeatureLevel::Disabled); + assert!(level != VpmuFeatureLevel::LimitedlyEnabled); + } + + #[test] + fn test_copy_trait() { + let level1 = VpmuFeatureLevel::Disabled; + let level2 = level1; + assert_eq!(level1, level2); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/bit_helper.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/bit_helper.rs new file mode 100644 index 000000000000..108578c62264 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/bit_helper.rs @@ -0,0 +1,599 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Helper to manipulate CPUID register content. + +#![macro_use] + +/// Structure representing a range of bits in a number. +/// +/// # Example +/// +/// ``` +/// #[macro_use] +/// use dbs_arch::cpuid::bit_helper::*; +/// +/// let range = BitRange { +/// msb_index: 7, +/// lsb_index: 3, +/// }; +/// ``` +/// The BitRange specified above will represent the following part of the number 72: +/// +-------------------------------------+---+---+---+---+---+---+---+---+---+---+ +/// | Base 2 Representation of the number | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | +/// +-------------------------------------+---+---+---+---+---+---+---+---+---+---+ +/// | bits indexes | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | +/// +-------------------------------------+---+---+---+---+---+---+---+---+---+---+ +/// | BitRange | | | * | * | * | * | * | | | | +/// +-------------------------------------+---+---+---+---+---+---+---+---+---+---+ +pub struct BitRange { + /// most significant bit index + pub msb_index: u32, + /// least significant bit index + pub lsb_index: u32, +} + +/// Trait containing helper methods for [`BitRange`](struct.BitRange.html) +/// +/// The methods are needed for: +/// - checking if the `BitRange` is valid for a type `T` +/// - creating masks for a type `T` +pub trait BitRangeExt { + /// Returns a value of type `T` that has all the bits in the specified bit range set to 1. + /// + /// # Example + /// + /// ``` + /// #[macro_use] + /// use dbs_arch::cpuid::bit_helper::*; + /// + /// let range = BitRange { + /// msb_index: 7, + /// lsb_index: 3, + /// }; + /// println!("binary value: {:b}", range.get_mask()); + /// ``` + /// The code above will print: + /// ```bash + /// binary value: 11111000 + /// ``` + fn get_mask(&self) -> T; + + /// Checks if the current BitRange is valid for type `T`. + fn is_valid(&self) -> bool; + + /// Asserts if `self.is_valid()` returns true. + fn check(&self) { + assert!(self.is_valid(), "Invalid BitRange"); + } +} + +const MAX_U32_BIT_INDEX: u32 = 31; + +impl BitRangeExt for BitRange { + fn get_mask(&self) -> u32 { + self.check(); + + ((((1_u64) << (self.msb_index - self.lsb_index + 1)) - 1) << self.lsb_index) as u32 + } + + fn is_valid(&self) -> bool { + self.msb_index >= self.lsb_index && self.msb_index <= MAX_U32_BIT_INDEX + } +} + +macro_rules! bit_range { + ($msb_index:expr, $lsb_index:expr) => { + BitRange { + msb_index: $msb_index, + lsb_index: $lsb_index, + } + }; +} + +/// Trait containing helper methods for bit operations. +pub trait BitHelper { + /// Reads the value of the bit at position `pos` + fn read_bit(&self, pos: u32) -> bool; + + /// Changes the value of the bit at position `pos` to `val` + fn write_bit(&mut self, pos: u32, val: bool) -> &mut Self; + + /// Reads the value stored within the specified range of bits + /// + /// # Example + /// + /// ``` + /// #[macro_use] + /// use dbs_arch::cpuid::bit_helper::*; + /// + /// let val: u32 = 0b000010001000; + /// let range = BitRange { + /// msb_index: 7, + /// lsb_index: 3, + /// }; + /// println!("binary value: {:b}", val.read_bits_in_range(&range)); + /// ``` + /// The code above will print: + /// ```bash + /// binary value: 10001 + /// ``` + fn read_bits_in_range(&self, bit_range: &BitRange) -> Self; + + /// Stores a value within the specified range of bits + /// + /// # Example + /// + /// ``` + /// #[macro_use] + /// use dbs_arch::cpuid::bit_helper::*; + /// + /// let mut val: u32 = 0; + /// let range = BitRange { + /// msb_index: 7, + /// lsb_index: 3, + /// }; + /// val.write_bits_in_range(&range, 0b10001 as u32); + /// println!("binary value: {:b}", val); + /// ``` + /// The code above will print: + /// ```bash + /// binary value: 10001000 + /// ``` + fn write_bits_in_range(&mut self, bit_range: &BitRange, val: Self) -> &mut Self; +} + +impl BitHelper for u32 { + fn read_bit(&self, pos: u32) -> bool { + assert!(pos <= MAX_U32_BIT_INDEX, "Invalid pos"); + + (*self & (1 << pos)) > 0 + } + + fn write_bit(&mut self, pos: u32, val: bool) -> &mut Self { + assert!(pos <= MAX_U32_BIT_INDEX, "Invalid pos"); + + *self &= !(1 << pos); + *self |= (val as u32) << pos; + self + } + + fn read_bits_in_range(&self, range: &BitRange) -> Self { + range.check(); + + (self & range.get_mask()) >> range.lsb_index + } + + fn write_bits_in_range(&mut self, range: &BitRange, val: Self) -> &mut Self { + range.check(); + let mask = range.get_mask(); + let max_val = mask >> range.lsb_index; + assert!(val <= max_val, "Invalid val"); + + *self &= !mask; + *self |= val << range.lsb_index; + self + } +} + +#[cfg(test)] +mod tests { + use crate::cpuid::bit_helper::*; + + #[test] + #[should_panic] + fn test_invalid_msb_index() { + let range = BitRange { + msb_index: 32, + lsb_index: 2, + }; + range.check(); + } + + #[test] + #[should_panic] + fn test_invalid_range() { + let range = BitRange { + msb_index: 10, + lsb_index: 15, + }; + range.check(); + } + + #[test] + #[should_panic] + fn test_invalid_write_bit() { + // Set bit to 1 + let mut val: u32 = 0; + val.write_bit(32, true); + } + + #[test] + fn test_simple_write_bit() { + // Set bit to 1 + let mut val: u32 = 0; + val.write_bit(5, true); + assert!(val == 1 << 5); + + // Set bit to 0 + val = 1 << 5; + val.write_bit(5, false); + assert!(val == 0); + } + + #[test] + #[should_panic] + fn test_invalid_read_bit() { + // Set bit to 1 + let val: u32 = 0; + val.read_bit(32); + } + + #[test] + fn test_simple_read_bit() { + // Set bit to 1 + let val: u32 = 0b10_0000; + assert!(val.read_bit(5)); + assert!(!val.read_bit(4)); + } + + #[test] + fn test_chained_write_bit() { + let mut val: u32 = 1 << 12; + + val.write_bit(5, true) + .write_bit(10, true) + .write_bit(15, true) + .write_bit(12, false); + assert!(val == 1 << 5 | 1 << 10 | 1 << 15); + } + + #[test] + fn test_get_u32_mask_for_range() { + // Test a couple of successive ranges + assert!( + BitRange { + msb_index: 3, + lsb_index: 2 + } + .get_mask() + == 0b1100 + ); + assert!( + BitRange { + msb_index: 4, + lsb_index: 2 + } + .get_mask() + == 0b11100 + ); + assert!( + BitRange { + msb_index: 5, + lsb_index: 2 + } + .get_mask() + == 0b11_1100 + ); + assert!( + BitRange { + msb_index: 6, + lsb_index: 2 + } + .get_mask() + == 0b111_1100 + ); + assert!( + BitRange { + msb_index: 7, + lsb_index: 2 + } + .get_mask() + == 0b1111_1100 + ); + } + + #[test] + #[should_panic] + fn test_invalid_read_bits() { + let val: u32 = 30; + val.read_bits_in_range(&BitRange { + msb_index: 32, + lsb_index: 2, + }); + } + + #[test] + fn test_read_bits() { + let val: u32 = 0b1000_0000_0000_0000_0011_0101_0001_0000; + + // Test a couple of successive ranges + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 3, + lsb_index: 2 + }) == 0b00 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 4, + lsb_index: 2 + }) == 0b100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 5, + lsb_index: 2 + }) == 0b0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 6, + lsb_index: 2 + }) == 0b00100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 7, + lsb_index: 2 + }) == 0b00_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 8, + lsb_index: 2 + }) == 0b100_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 9, + lsb_index: 2 + }) == 0b0100_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 10, + lsb_index: 2 + }) == 0b1_0100_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 11, + lsb_index: 2 + }) == 0b01_0100_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 12, + lsb_index: 2 + }) == 0b101_0100_0100 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 13, + lsb_index: 2 + }) == 0b1101_0100_0100 + ); + + // Test max left and max right + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 31, + lsb_index: 15 + }) == 0b1_0000_0000_0000_0000 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 14, + lsb_index: 0 + }) == 0b011_0101_0001_0000 + ); + assert!( + val.read_bits_in_range(&BitRange { + msb_index: 31, + lsb_index: 0 + }) == 0b1000_0000_0000_0000_0011_0101_0001_0000 + ); + } + + #[test] + #[should_panic] + fn test_invalid_write_bits() { + let mut val: u32 = 0; + + val.write_bits_in_range( + &BitRange { + msb_index: 32, + lsb_index: 2, + }, + 0b100, + ); + } + + #[test] + #[should_panic] + fn test_overflow_write_bits() { + let mut val: u32 = 0; + + val.write_bits_in_range( + &BitRange { + msb_index: 3, + lsb_index: 2, + }, + 0b100, + ); + } + + #[test] + fn test_simple_write_bits() { + let mut val: u32 = 0; + + // Test a couple of successive ranges + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 3, + lsb_index: 2 + }, + 0b00 + ) == &0b0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 4, + lsb_index: 2 + }, + 0b100 + ) == &0b10000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 5, + lsb_index: 2 + }, + 0b0100 + ) == &0b01_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 6, + lsb_index: 2 + }, + 0b0_0100 + ) == &0b001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 7, + lsb_index: 2 + }, + 0b00_0100 + ) == &0b0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 8, + lsb_index: 2 + }, + 0b100_0100 + ) == &0b1_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 9, + lsb_index: 2 + }, + 0b0100_0100 + ) == &0b01_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 10, + lsb_index: 2 + }, + 0b1_0100_0100 + ) == &0b101_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 11, + lsb_index: 2 + }, + 0b01_0100_0100 + ) == &0b0101_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 12, + lsb_index: 2 + }, + 0b101_0100_0100 + ) == &0b1_0101_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 13, + lsb_index: 2 + }, + 0b1101_0100_0100 + ) == &0b11_0101_0001_0000 + ); + + // Test max left and max right + val = 0; + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 31, + lsb_index: 15 + }, + 0b1_0000_0000_0000_0000 + ) == &0b1000_0000_0000_0000_0000_0000_0000_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 14, + lsb_index: 0 + }, + 0b011_0101_0001_0000 + ) == &0b1000_0000_0000_0000_0011_0101_0001_0000 + ); + assert!( + val.write_bits_in_range( + &BitRange { + msb_index: 31, + lsb_index: 0 + }, + 0b1000_0000_0000_0000_0011_0101_0001_0000 + ) == &0b1000_0000_0000_0000_0011_0101_0001_0000 + ); + } + + #[test] + fn test_chained_write_bits() { + let mut val: u32 = 0; + + // Test a couple of ranges + val.write_bits_in_range( + &BitRange { + msb_index: 4, + lsb_index: 2, + }, + 0b100, + ) + .write_bits_in_range( + &BitRange { + msb_index: 12, + lsb_index: 10, + }, + 0b110, + ) + .write_bits_in_range( + &BitRange { + msb_index: 24, + lsb_index: 20, + }, + 0b10101, + ) + .write_bits_in_range( + &BitRange { + msb_index: 31, + lsb_index: 28, + }, + 0b1011, + ); + + assert!(val == 0b1011_0001_0101_0000_0001_1000_0001_0000); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/brand_string.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/brand_string.rs new file mode 100644 index 000000000000..e9bc1df1609c --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/brand_string.rs @@ -0,0 +1,462 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::arch::x86_64::__cpuid as host_cpuid; +use std::slice; + +use crate::cpuid::common::{VENDOR_ID_AMD, VENDOR_ID_INTEL}; + +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum Error { + NotSupported, + Overflow(String), +} + +/// Register designations used to get/set specific register values within the brand string buffer. +pub enum Reg { + Eax = 0, + Ebx = 1, + Ecx = 2, + Edx = 3, +} + +const BRAND_STRING_INTEL: &[u8] = b"Intel(R) Xeon(R) Processor"; +const BRAND_STRING_AMD: &[u8] = b"AMD EPYC"; + +/// A CPUID brand string wrapper, providing some efficient manipulation primitives. +/// +/// This is achieved by bypassing the `O(n)` indexing, heap allocation, and the unicode checks +/// done by `std::string::String`. +#[derive(Clone, Debug, Default)] +pub struct BrandString { + /// Flattened buffer, holding an array of 32-bit register values. + /// + /// It has the following layout: + /// reg_buf[0] = leaf_0x80000002.Eax + /// reg_buf[1] = leaf_0x80000002.Ebx + /// reg_buf[2] = leaf_0x80000002.Ecx + /// reg_buf[3] = leaf_0x80000002.Edx + /// reg_buf[4] = leaf_0x80000003.Eax + /// ... + /// reg_buf[10] = leaf_0x80000004.Ecx + /// reg_buf[11] = leaf_0x80000004.Edx + /// When seen as a byte-array, this buffer holds the ASCII-encoded CPU brand string. + reg_buf: [u32; BrandString::REG_BUF_SIZE], + + /// Actual string length, in bytes. + /// + /// E.g. For "Intel CPU", this would be `strlen("Intel CPU") == 9`. + len: usize, +} + +impl BrandString { + /// Register buffer size (in number of registers). + /// + /// There are 3 leaves (0x800000002 through 0x80000004), each with 4 regs (Eax, Ebx, Ecx, Edx). + const REG_BUF_SIZE: usize = 3 * 4; + + /// Max Brand string length, in bytes (also in chars, since it is ASCII-encoded). + /// + /// The string is NULL-terminated, so the max string length is actually one byte + /// less than the buffer size in bytes + const MAX_LEN: usize = Self::REG_BUF_SIZE * 4 - 1; + + /// Creates an empty brand string (0-initialized) + fn new() -> Self { + Default::default() + } + + /// Generates the emulated brand string. + /// + /// For Intel CPUs, the brand string we expose will be: + /// "Intel(R) Xeon(R) Processor @ {host freq}" + /// where {host freq} is the CPU frequency, as present in the + /// host brand string (e.g. 4.01GHz). + /// + /// For AMD CPUs, the brand string we expose will be AMD EPYC. + /// + /// For other CPUs, we'll just expose an empty string. + /// + /// This is safe because we know BRAND_STRING_INTEL and BRAND_STRING_AMD to hold valid data + /// (allowed length and holding only valid ASCII chars). + pub fn from_vendor_id(vendor_id: &[u8; 12]) -> Result { + let brand = match vendor_id { + VENDOR_ID_INTEL => { + let mut this = BrandString::from_bytes_unchecked(BRAND_STRING_INTEL); + if let Ok(host_bstr) = BrandString::from_host_cpuid() { + if let Some(freq) = host_bstr.find_freq() { + this.push_bytes(b" @ ")?; + this.push_bytes(freq)?; + } + } + this + } + VENDOR_ID_AMD => BrandString::from_bytes_unchecked(BRAND_STRING_AMD), + _ => BrandString::from_bytes_unchecked(b""), + }; + + Ok(brand) + } + + /// Creates a brand string, initialized from the CPUID leaves 0x80000002 through 0x80000004 + /// of the host CPU. + fn from_host_cpuid() -> Result { + let mut this = Self::new(); + let mut cpuid_regs = unsafe { host_cpuid(0x8000_0000) }; + + if cpuid_regs.eax < 0x8000_0004 { + // Brand string not supported by the host CPU + return Err(Error::NotSupported); + } + + for leaf in 0x8000_0002..=0x8000_0004 { + cpuid_regs = unsafe { host_cpuid(leaf) }; + this.set_reg_for_leaf(leaf, Reg::Eax, cpuid_regs.eax); + this.set_reg_for_leaf(leaf, Reg::Ebx, cpuid_regs.ebx); + this.set_reg_for_leaf(leaf, Reg::Ecx, cpuid_regs.ecx); + this.set_reg_for_leaf(leaf, Reg::Edx, cpuid_regs.edx); + } + + let mut len = Self::MAX_LEN; + { + let this_bytes = this.as_bytes(); + while this_bytes[len - 1] == 0 && len > 0 { + len -= 1; + } + } + this.len = len; + + Ok(this) + } + + /// Creates a (custom) brand string, initialized from `src`. + /// + /// No checks are performed on the length of `src` or its contents (`src` should be an + /// ASCII-encoded string). + #[inline] + fn from_bytes_unchecked(src: &[u8]) -> Self { + let mut this = Self::new(); + this.len = src.len(); + this.as_bytes_mut()[..src.len()].copy_from_slice(src); + this + } + + /// Returns the given register value for the given CPUID leaf. + /// + /// `leaf` must be between 0x80000002 and 0x80000004. + #[inline] + pub fn get_reg_for_leaf(&self, leaf: u32, reg: Reg) -> u32 { + if (0x80000002u32..=0x80000004).contains(&leaf) { + // It's ok not to validate parameters here, leaf and reg should + // both be compile-time constants. If there's something wrong with them, + // that's a programming error and we should panic anyway. + self.reg_buf[(leaf - 0x8000_0002) as usize * 4 + reg as usize] + } else { + 0 + } + } + + /// Sets the value for the given leaf/register pair. + /// + /// `leaf` must be between 0x80000002 and 0x80000004. + #[inline] + fn set_reg_for_leaf(&mut self, leaf: u32, reg: Reg, val: u32) { + // It's ok not to validate parameters here, leaf and reg should + // both be compile-time constants. If there's something wrong with them, + // that's a programming error and we should panic anyway. + self.reg_buf[(leaf - 0x8000_0002) as usize * 4 + reg as usize] = val; + } + + /// Gets an immutable `u8` slice view into the brand string buffer. + #[inline] + fn as_bytes(&self) -> &[u8] { + // This is actually safe, because self.reg_buf has a fixed, known size, + // and also there's no risk of misalignment, since we're downgrading + // alignment constraints from dword to byte. + unsafe { slice::from_raw_parts(self.reg_buf.as_ptr() as *const u8, Self::REG_BUF_SIZE * 4) } + } + + /// Gets a mutable `u8` slice view into the brand string buffer. + #[inline] + fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + slice::from_raw_parts_mut(self.reg_buf.as_mut_ptr() as *mut u8, Self::REG_BUF_SIZE * 4) + } + } + + /// Asserts whether or not there is enough room to append `src` to the brand string. + fn check_push(&mut self, src: &[u8]) -> bool { + src.len() <= Self::MAX_LEN - self.len + } + + /// Appends `src` to the brand string if there is enough room to append it. + fn push_bytes(&mut self, src: &[u8]) -> Result<(), Error> { + if !self.check_push(src) { + // No room to push all of src. + return Err(Error::Overflow( + "Appending to the brand string failed.".to_string(), + )); + } + let start = self.len; + let count = src.len(); + self.len += count; + self.as_bytes_mut()[start..(start + count)].copy_from_slice(src); + Ok(()) + } + + /// Searches the brand string for the CPU frequency data it may contain (e.g. 4.01GHz), + /// and, if found, returns it as an `u8` slice. + /// + /// Basically, we're implementing a search for this regex: "([0-9]+\.[0-9]+[MGT]Hz)". + fn find_freq(&self) -> Option<&[u8]> { + // The algorithm for matching the regular expression above is based + // on a Moore machine, and 'stage' represents the current state of + // the machine. + enum Stages { + /// Initial state, looking for a digit. + Initial, + /// Found integer part of the frequency. + FoundFreqIntPart, + /// Found the decimal point. + FoundFreqDecimalPoint, + /// Found the decimal part. + FoundFreqDecimalPart, + /// Found the unit size. + FoundFreqUnitSize, + /// Found the H in 'Hz'. + FoundH, + } + + let mut freq_start = 0; + let mut decimal_start = 0; + + let mut stage = Stages::Initial; + + for (i, &ch) in self.as_bytes().iter().enumerate() { + match stage { + Stages::Initial => { + // Looking for one or more digits. + if ch.is_ascii_digit() { + freq_start = i; + stage = Stages::FoundFreqIntPart; + } + } + Stages::FoundFreqIntPart => { + // Looking for a decimal point. + if !ch.is_ascii_digit() { + if ch == b'.' { + stage = Stages::FoundFreqDecimalPoint; + } else { + stage = Stages::Initial; + } + } + } + Stages::FoundFreqDecimalPoint => { + // Looking for the decimal part. + if ch.is_ascii_digit() { + stage = Stages::FoundFreqDecimalPart; + decimal_start = i; + } else { + stage = Stages::Initial; + } + } + Stages::FoundFreqDecimalPart => { + // Looking for the unit of measure. + if !ch.is_ascii_digit() { + if ch == b'.' { + stage = Stages::FoundFreqDecimalPoint; + freq_start = decimal_start; + } else if ch == b'M' || ch == b'G' || ch == b'T' { + stage = Stages::FoundFreqUnitSize; + } else { + stage = Stages::Initial; + } + } + } + Stages::FoundFreqUnitSize => { + // Looking for the 'H' in 'Hz'. + if ch == b'H' { + stage = Stages::FoundH; + } else if ch.is_ascii_digit() { + stage = Stages::FoundFreqIntPart; + freq_start = i; + } else { + stage = Stages::Initial; + } + } + Stages::FoundH => { + // Looking for the 'z' in 'Hz'. + // If found, we stop the search and return the slice. + if ch == b'z' { + let freq_end = i + 1; + return Some(&self.as_bytes()[freq_start..freq_end]); + } else if ch.is_ascii_digit() { + stage = Stages::FoundFreqIntPart; + freq_start = i; + } else { + stage = Stages::Initial; + } + } + }; + } + None + } +} + +#[cfg(test)] +mod tests { + use std::iter::repeat; + + use super::*; + + #[test] + fn test_brand_string() { + #[inline] + fn pack_u32(src: &[u8]) -> u32 { + assert!(src.len() >= 4); + u32::from(src[0]) + | (u32::from(src[1]) << 8) + | (u32::from(src[2]) << 16) + | (u32::from(src[3]) << 24) + } + + const TEST_STR: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + let mut bstr = BrandString::from_bytes_unchecked(TEST_STR); + + // Test the immutable bitwise casts + // + { + for i in 0_usize..=1_usize { + let eax_offs = (4 * 4) * i; + let ebx_offs = (4 * 4) * i + 4; + let ecx_offs = (4 * 4) * i + 8; + let edx_offs = (4 * 4) * i + 12; + assert_eq!( + bstr.get_reg_for_leaf(0x8000_0002 + i as u32, Reg::Eax), + pack_u32(&TEST_STR[eax_offs..(eax_offs + 4)]) + ); + assert_eq!( + bstr.get_reg_for_leaf(0x8000_0002 + i as u32, Reg::Ebx), + pack_u32(&TEST_STR[ebx_offs..(ebx_offs + 4)]) + ); + assert_eq!( + bstr.get_reg_for_leaf(0x8000_0002 + i as u32, Reg::Ecx), + pack_u32(&TEST_STR[ecx_offs..(ecx_offs + 4)]) + ); + assert_eq!( + bstr.get_reg_for_leaf(0x8000_0002 + i as u32, Reg::Edx), + pack_u32(&TEST_STR[edx_offs..(edx_offs + 4)]) + ); + } + } + + assert_eq!(bstr.get_reg_for_leaf(0x8000_0005, Reg::Eax), 0); + + // Test find_freq() failure path + // + assert!(bstr.find_freq().is_none()); + + // Test mutable bitwise casting and finding the frequency substring + // + bstr.set_reg_for_leaf(0x8000_0003, Reg::Ebx, pack_u32(b"5.20")); + bstr.set_reg_for_leaf(0x8000_0003, Reg::Ecx, pack_u32(b"GHz ")); + assert_eq!(bstr.find_freq().unwrap(), b"5.20GHz"); + + let _overflow: [u8; 50] = [b'a'; 50]; + + // Test BrandString::check_push() + // + bstr = BrandString::new(); + assert!(bstr.check_push(b"Hello")); + bstr.push_bytes(b"Hello").unwrap(); + assert!(bstr.check_push(b", world!")); + bstr.push_bytes(b", world!").unwrap(); + + assert!(!bstr.check_push(&_overflow)); + + // Test BrandString::push_bytes() + // + let actual_len = bstr.as_bytes().len(); + let mut old_bytes: Vec = repeat(0).take(actual_len).collect(); + old_bytes.copy_from_slice(bstr.as_bytes()); + assert_eq!( + bstr.push_bytes(&_overflow), + Err(Error::Overflow( + "Appending to the brand string failed.".to_string() + )) + ); + assert!(bstr.as_bytes().to_vec() == old_bytes); + + // Test BrandString::from_host_cpuid() and get_reg_for_leaf() + // + match BrandString::from_host_cpuid() { + Ok(bstr) => { + for leaf in 0x8000_0002..=0x8000_0004_u32 { + let host_regs = unsafe { host_cpuid(leaf) }; + assert_eq!(bstr.get_reg_for_leaf(leaf, Reg::Eax), host_regs.eax); + assert_eq!(bstr.get_reg_for_leaf(leaf, Reg::Ebx), host_regs.ebx); + assert_eq!(bstr.get_reg_for_leaf(leaf, Reg::Ecx), host_regs.ecx); + assert_eq!(bstr.get_reg_for_leaf(leaf, Reg::Edx), host_regs.edx); + } + } + Err(Error::NotSupported) => { + // from_host_cpuid() should only fail if the host CPU doesn't support + // CPUID leaves up to 0x80000004, so let's make sure that's what happened. + let host_regs = unsafe { host_cpuid(0x8000_0000) }; + assert!(host_regs.eax < 0x8000_0004); + } + _ => panic!("This function should not return another type of error"), + } + + // Test BrandString::from_vendor_id() + let bstr = BrandString::from_vendor_id(VENDOR_ID_INTEL).unwrap(); + assert!(bstr.as_bytes().starts_with(BRAND_STRING_INTEL)); + let bstr = BrandString::from_vendor_id(VENDOR_ID_AMD).unwrap(); + assert!(bstr.as_bytes().starts_with(BRAND_STRING_AMD)); + let bstr = BrandString::from_vendor_id(b"............").unwrap(); + assert!(bstr.as_bytes() == vec![b'\0'; 48].as_slice()); + } + + #[test] + fn test_find_freq_fails() { + let bstr_thz = BrandString::from_bytes_unchecked(b"5.20THz"); + assert_eq!(bstr_thz.find_freq().unwrap(), b"5.20THz"); + + let bstr_unused_end = BrandString::from_bytes_unchecked(b"AAA5.20MHzXz"); + assert_eq!(bstr_unused_end.find_freq().unwrap(), b"5.20MHz"); + + let bstr_faulty_unit = BrandString::from_bytes_unchecked(b"5.20BHz "); + assert!(bstr_faulty_unit.find_freq().is_none()); + + let short_bstr = BrandString::from_bytes_unchecked(b"z"); + assert!(short_bstr.find_freq().is_none()); + + let skip_from_unit = BrandString::from_bytes_unchecked(b"Mz"); + assert!(skip_from_unit.find_freq().is_none()); + + let short_bstr = BrandString::from_bytes_unchecked(b"Hz"); + assert!(short_bstr.find_freq().is_none()); + + let short_bstr = BrandString::from_bytes_unchecked(b"GHz"); + assert!(short_bstr.find_freq().is_none()); + + let multiple_points_bstr = BrandString::from_bytes_unchecked(b"50.5.20GHz"); + assert_eq!(multiple_points_bstr.find_freq().unwrap(), b"5.20GHz"); + + let no_decimal_bstr = BrandString::from_bytes_unchecked(b"5GHz"); + assert!(no_decimal_bstr.find_freq().is_none()); + + let interrupted_bstr = BrandString::from_bytes_unchecked(b"500.00M5.20GHz"); + assert_eq!(interrupted_bstr.find_freq().unwrap(), b"5.20GHz"); + + let split_bstr = BrandString::from_bytes_unchecked(b"5.30AMHz"); + assert!(split_bstr.find_freq().is_none()); + + let long_bstr = BrandString::from_bytes_unchecked(b"1.12bc5.30MaHz2.4.25THz"); + assert_eq!(long_bstr.find_freq().unwrap(), b"4.25THz"); + + let found_h_bstr = BrandString::from_bytes_unchecked(b"1.A5.2MH3.20GHx4.30GHz"); + assert_eq!(found_h_bstr.find_freq().unwrap(), b"4.30GHz"); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/common.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/common.rs new file mode 100644 index 000000000000..292994a7b273 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/common.rs @@ -0,0 +1,105 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::arch::x86_64::{CpuidResult, __cpuid_count, __get_cpuid_max}; + +use super::cpu_leaf::*; + +pub(crate) const VENDOR_ID_INTEL: &[u8; 12] = b"GenuineIntel"; +pub(crate) const VENDOR_ID_AMD: &[u8; 12] = b"AuthenticAMD"; +pub(crate) const VENDOR_ID_HYGON: &[u8; 12] = b"HygonGenuine"; + +#[derive(Clone, Debug)] +pub enum Error { + InvalidParameters(String), + NotSupported, +} + +/// Get CPUID value for (`function`, `count`). +pub fn get_cpuid(function: u32, count: u32) -> Result { + #[cfg(target_env = "sgx")] + { + return Err(Error::NotSupported); + } + + // TODO: replace with validation based on `has_cpuid()` when it becomes stable: + // https://doc.rust-lang.org/core/arch/x86/fn.has_cpuid.html + // this is safe because the host supports the `cpuid` instruction + let max_function = unsafe { __get_cpuid_max(function & leaf_0x80000000::LEAF_NUM).0 }; + if function > max_function { + return Err(Error::InvalidParameters(format!( + "Function not supported: 0x{function:x}", + ))); + } + + // this is safe because the host supports the `cpuid` instruction + let entry = unsafe { __cpuid_count(function, count) }; + if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { + return Err(Error::InvalidParameters(format!("Invalid count: {count}"))); + } + + Ok(entry) +} + +/// Extracts the CPU vendor id from leaf 0x0. +pub fn get_vendor_id() -> Result<[u8; 12], Error> { + let vendor_entry = get_cpuid(0, 0)?; + let bytes: [u8; 12] = + unsafe { std::mem::transmute([vendor_entry.ebx, vendor_entry.edx, vendor_entry.ecx]) }; + + Ok(bytes) +} + +#[cfg(test)] +pub mod tests { + use super::*; + + pub fn get_topoext_fn() -> u32 { + let vendor_id = get_vendor_id(); + assert!(vendor_id.is_ok()); + let function = match &vendor_id.ok().unwrap() { + VENDOR_ID_INTEL => leaf_0x4::LEAF_NUM, + VENDOR_ID_AMD => leaf_0x8000001d::LEAF_NUM, + _ => 0, + }; + assert!(function != 0); + + function + } + + #[test] + fn test_get_cpu_id() { + // get_cpu_id should work correctly here + let topoext_fn = get_topoext_fn(); + + // check that get_cpuid works for valid parameters + match get_cpuid(topoext_fn, 0) { + Ok(topoext_entry) => { + assert!(topoext_entry.eax != 0); + } + _ => panic!("Wrong behavior"), + } + + // check that get_cpuid returns correct error for invalid `function` + match get_cpuid(0x9000_0000, 0) { + Err(Error::InvalidParameters(s)) => { + assert!(s == "Function not supported: 0x90000000"); + } + _ => panic!("Wrong behavior"), + } + + // check that get_cpuid returns correct error for invalid `count` + match get_cpuid(topoext_fn, 100) { + Err(Error::InvalidParameters(s)) => { + assert!(s == "Invalid count: 100"); + } + _ => panic!("Wrong behavior"), + } + } + + #[test] + fn test_get_vendor_id() { + let vendor_id = get_vendor_id().unwrap(); + assert!(matches!(&vendor_id, VENDOR_ID_INTEL | VENDOR_ID_AMD)); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/cpu_leaf.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/cpu_leaf.rs new file mode 100644 index 000000000000..0c121cdd26f4 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/cpu_leaf.rs @@ -0,0 +1,439 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![allow(missing_docs)] + +//! CPUID leaf registers constant values. + +#![allow(unused)] +pub mod leaf_0x0 { + pub const LEAF_NUM: u32 = 0x0; +} + +pub mod leaf_0x1 { + pub const LEAF_NUM: u32 = 0x1; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + pub const EXTENDED_FAMILY_ID_BITRANGE: BitRange = bit_range!(27, 20); + pub const EXTENDED_PROCESSOR_MODEL_BITRANGE: BitRange = bit_range!(19, 16); + pub const PROCESSOR_TYPE_BITRANGE: BitRange = bit_range!(13, 12); + pub const PROCESSOR_FAMILY_BITRANGE: BitRange = bit_range!(11, 8); + pub const PROCESSOR_MODEL_BITRANGE: BitRange = bit_range!(7, 4); + pub const STEPPING_BITRANGE: BitRange = bit_range!(3, 0); + } + + pub mod ebx { + use crate::cpuid::bit_helper::BitRange; + + // The bit-range containing the (fixed) default APIC ID. + pub const APICID_BITRANGE: BitRange = bit_range!(31, 24); + // The bit-range containing the logical processor count. + pub const CPU_COUNT_BITRANGE: BitRange = bit_range!(23, 16); + // The bit-range containing the number of bytes flushed when executing CLFLUSH. + pub const CLFLUSH_SIZE_BITRANGE: BitRange = bit_range!(15, 8); + } + + pub mod ecx { + // DTES64 = 64-bit debug store + pub const DTES64_BITINDEX: u32 = 2; + // MONITOR = Monitor/MWAIT + pub const MONITOR_BITINDEX: u32 = 3; + // CPL Qualified Debug Store + pub const DS_CPL_SHIFT: u32 = 4; + // 5 = VMX (Virtual Machine Extensions) + // 6 = SMX (Safer Mode Extensions) + // 7 = EIST (Enhanced Intel SpeedStep® technology) + // TM2 = Thermal Monitor 2 + pub const TM2_BITINDEX: u32 = 8; + // CNXT_ID = L1 Context ID (L1 data cache can be set to adaptive/shared mode) + pub const CNXT_ID_BITINDEX: u32 = 10; + // SDBG (cpu supports IA32_DEBUG_INTERFACE MSR for silicon debug) + pub const SDBG_BITINDEX: u32 = 11; + pub const FMA_BITINDEX: u32 = 12; + // XTPR_UPDATE = xTPR Update Control + pub const XTPR_UPDATE_BITINDEX: u32 = 14; + // PDCM = Perfmon and Debug Capability + pub const PDCM_BITINDEX: u32 = 15; + // 18 = DCA Direct Cache Access (prefetch data from a memory mapped device) + pub const MOVBE_BITINDEX: u32 = 22; + pub const TSC_DEADLINE_TIMER_BITINDEX: u32 = 24; + pub const OSXSAVE_BITINDEX: u32 = 27; + // Cpu is running on a hypervisor. + pub const HYPERVISOR_BITINDEX: u32 = 31; + } + + pub mod edx { + pub const PSN_BITINDEX: u32 = 18; // Processor Serial Number + pub const DS_BITINDEX: u32 = 21; // Debug Store. + pub const ACPI_BITINDEX: u32 = 22; // Thermal Monitor and Software Controlled Clock Facilities. + pub const SS_BITINDEX: u32 = 27; // Self Snoop + pub const HTT_BITINDEX: u32 = 28; // Max APIC IDs reserved field is valid + pub const TM_BITINDEX: u32 = 29; // Thermal Monitor. + pub const PBE_BITINDEX: u32 = 31; // Pending Break Enable. + } +} + +pub mod leaf_cache_parameters { + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + pub const CACHE_LEVEL_BITRANGE: BitRange = bit_range!(7, 5); + pub const MAX_CPUS_PER_CORE_BITRANGE: BitRange = bit_range!(25, 14); + } +} + +// Deterministic Cache Parameters Leaf +pub mod leaf_0x4 { + pub const LEAF_NUM: u32 = 0x4; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + // inherit eax from leaf_cache_parameters + pub use crate::cpuid::cpu_leaf::leaf_cache_parameters::eax::*; + + pub const MAX_CORES_PER_PACKAGE_BITRANGE: BitRange = bit_range!(31, 26); + } +} + +// Thermal and Power Management Leaf +#[allow(dead_code)] +pub mod leaf_0x6 { + pub const LEAF_NUM: u32 = 0x6; + + pub mod eax { + pub const TURBO_BOOST_BITINDEX: u32 = 1; + } + + pub mod ecx { + // "Energy Performance Bias" bit. + pub const EPB_BITINDEX: u32 = 3; + } +} + +// Structured Extended Feature Flags Enumeration Leaf +pub mod leaf_0x7 { + pub const LEAF_NUM: u32 = 0x7; + + pub mod index0 { + pub mod ebx { + // 1 = TSC_ADJUST + pub const SGX_BITINDEX: u32 = 2; + pub const BMI1_BITINDEX: u32 = 3; + pub const HLE_BITINDEX: u32 = 4; + pub const AVX2_BITINDEX: u32 = 5; + // FPU Data Pointer updated only on x87 exceptions if 1. + pub const FPDP_BITINDEX: u32 = 6; + // 7 = SMEP (Supervisor-Mode Execution Prevention if 1) + pub const BMI2_BITINDEX: u32 = 8; + // 9 = Enhanced REP MOVSB/STOSB if 1 + // 10 = INVPCID + pub const INVPCID_BITINDEX: u32 = 10; + pub const RTM_BITINDEX: u32 = 11; + // Intel® Resource Director Technology (Intel® RDT) Monitoring + pub const RDT_M_BITINDEX: u32 = 12; + // 13 = Deprecates FPU CS and FPU DS values if 1 + // Memory Protection Extensions + pub const MPX_BITINDEX: u32 = 14; + // RDT = Intel® Resource Director Technology + pub const RDT_A_BITINDEX: u32 = 15; + // AVX-512 Foundation instructions + pub const AVX512F_BITINDEX: u32 = 16; + // AVX-512 Doubleword and Quadword Instructions + pub const AVX512DQ_BITINDEX: u32 = 17; + pub const RDSEED_BITINDEX: u32 = 18; + pub const ADX_BITINDEX: u32 = 19; + // 20 = SMAP (Supervisor-Mode Access Prevention) + // AVX512IFMA = AVX-512 Integer Fused Multiply-Add Instructions + pub const AVX512IFMA_BITINDEX: u32 = 21; + // 21 = PCOMMIT intruction + // 22 reserved + // CLFLUSHOPT (flushing multiple cache lines in parallel within a single logical processor) + pub const CLFLUSHOPT_BITINDEX: u32 = 23; + // CLWB = Cache Line Write Back + pub const CLWB_BITINDEX: u32 = 24; + // PT = Intel Processor Trace + pub const PT_BITINDEX: u32 = 25; + // AVX512PF = AVX512 Prefetch Instructions + pub const AVX512PF_BITINDEX: u32 = 26; + // AVX512ER = AVX-512 Exponential and Reciprocal Instructions + pub const AVX512ER_BITINDEX: u32 = 27; + // AVX512CD = AVX-512 Conflict Detection Instructions + pub const AVX512CD_BITINDEX: u32 = 28; + // Intel Secure Hash Algorithm Extensions + pub const SHA_BITINDEX: u32 = 29; + // AVX-512 Byte and Word Instructions + pub const AVX512BW_BITINDEX: u32 = 30; + // AVX-512 Vector Length Extensions + pub const AVX512VL_BITINDEX: u32 = 31; + } + + pub mod ecx { + // 0 = PREFETCHWT1 (move data closer to the processor in anticipation of future use) + // AVX512_VBMI = AVX-512 Vector Byte Manipulation Instructions + pub const AVX512_VBMI_BITINDEX: u32 = 1; + // 2 = UMIP (User Mode Instruction Prevention) + // PKU = Protection Keys for user-mode pages + pub const PKU_BITINDEX: u32 = 3; + // OSPKE = If 1, OS has set CR4.PKE to enable protection keys + pub const OSPKE_BITINDEX: u32 = 4; + // 5 = WAITPKG + // 7-6 reserved + // 8 = GFNI + // 13-09 reserved + // AVX512_VPOPCNTDQ = Vector population count instruction (Intel® Xeon Phi™ only.) + pub const AVX512_VPOPCNTDQ_BITINDEX: u32 = 14; + // 21 - 17 = The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode. + // Read Processor ID + pub const RDPID_BITINDEX: u32 = 22; + // 23 - 29 reserved + // SGX_LC = SGX Launch Configuration + pub const SGX_LC_BITINDEX: u32 = 30; + // 31 reserved + } + + pub mod edx { + // AVX-512 4-register Neural Network Instructions + pub const AVX512_4VNNIW_BITINDEX: u32 = 2; + // AVX-512 4-register Multiply Accumulation Single precision + pub const AVX512_4FMAPS_BITINDEX: u32 = 3; + pub const ARCH_CAPABILITIES_BITINDEX: u32 = 29; + } + } +} + +// Architecture Performance Monitor Features +pub mod leaf_0xa { + pub const LEAF_NUM: u32 = 0xa; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + pub const PMC_VERSION_ID: BitRange = bit_range!(7, 0); + pub const BIT_LEN_PMEVENT: BitRange = bit_range!(31, 24); + } + + pub mod ebx { + pub const CORE_CYCLES_BITINDEX: u32 = 0; + pub const INST_RETIRED_BITINDEX: u32 = 1; + pub const REF_CYCLES_BITINDEX: u32 = 2; + pub const LLC_REF_BITINDEX: u32 = 3; + pub const LLC_MISSES_BITINDEX: u32 = 4; + pub const BR_INST_RETIRED_BITINDEX: u32 = 5; + pub const BR_MIS_RETIRED_BITINDEX: u32 = 6; + } +} + +// Extended Topology Leaf +pub mod leaf_0xb { + pub const LEAF_NUM: u32 = 0xb; + + pub const LEVEL_TYPE_THREAD: u32 = 1; + pub const LEVEL_TYPE_CORE: u32 = 2; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + // The bit-range containing the number of bits to shift right the APIC ID in order to get + // the next level APIC ID + pub const APICID_BITRANGE: BitRange = bit_range!(4, 0); + } + + pub mod ebx { + use crate::cpuid::bit_helper::BitRange; + + // The bit-range containing the number of factory-configured logical processors + // at the current cache level + pub const NUM_LOGICAL_PROCESSORS_BITRANGE: BitRange = bit_range!(15, 0); + } + + pub mod ecx { + use crate::cpuid::bit_helper::BitRange; + + pub const LEVEL_TYPE_BITRANGE: BitRange = bit_range!(15, 8); + pub const LEVEL_NUMBER_BITRANGE: BitRange = bit_range!(7, 0); + } +} + +// Processor Extended State Enumeration Sub-leaves +pub mod leaf_0xd { + pub const LEAF_NUM: u32 = 0xd; + + pub mod index0 { + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + pub const MPX_STATE_BITRANGE: BitRange = bit_range!(4, 3); + pub const AVX512_STATE_BITRANGE: BitRange = bit_range!(7, 5); + } + } + + pub mod index1 { + pub mod eax { + pub const XSAVEC_SHIFT: u32 = 1; + pub const XGETBV_SHIFT: u32 = 2; + pub const XSAVES_SHIFT: u32 = 3; + } + } +} + +// V2 Extended Topology Enumeration Leaf +pub mod leaf_0x1f { + pub const LEAF_NUM: u32 = 0x1f; + + pub const LEVEL_TYPE_THREAD: u32 = 1; + pub const LEVEL_TYPE_CORE: u32 = 2; + pub const LEVEL_TYPE_DIE: u32 = 5; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + // The bit-range containing the number of bits to shift right the APIC ID in order to get + // the next level APIC ID + pub const APICID_BITRANGE: BitRange = bit_range!(4, 0); + } + + pub mod ebx { + use crate::cpuid::bit_helper::BitRange; + + // The bit-range containing the number of factory-configured logical processors + // at the current cache level + pub const NUM_LOGICAL_PROCESSORS_BITRANGE: BitRange = bit_range!(15, 0); + } + + pub mod ecx { + use crate::cpuid::bit_helper::BitRange; + + pub const LEVEL_TYPE_BITRANGE: BitRange = bit_range!(15, 8); + pub const LEVEL_NUMBER_BITRANGE: BitRange = bit_range!(7, 0); + } +} + +/// KVM CPUID bits +/// A guest running on a kvm host, can check some of its features using cpuid. This is not always guaranteed to work, +/// since userspace can mask-out some, or even all KVM-related cpuid features before launching a guest. +/// More information: https://docs.kernel.org/virt/kvm/x86/cpuid.html +pub mod leaf_0x4000_0001 { + pub const LEAF_NUM: u32 = 0x4000_0001; + pub mod eax { + /// kvmclock available at msrs 0x11 and 0x12 + pub const KVM_FEATURE_CLOCKSOURCE_BITINDEX: u32 = 0; + /// not necessary to perform delays on PIO operations + pub const KVM_FEATURE_NOP_IO_DELAY_BITINDEX: u32 = 1; + /// deprecated + pub const KVM_FEATURE_MMU_OP_BITINDEX: u32 = 2; + /// kvmclock available at msrs 0x4b564d00 and 0x4b564d01 + pub const KVM_FEATURE_CLOCKSOURCE2_BITINDEX: u32 = 3; + /// async pf can be enabled by writing to msr 0x4b564d02 + pub const KVM_FEATURE_ASYNC_PF_BITINDEX: u32 = 4; + /// steal time can be enabled by writing to msr 0x4b564d03 + pub const KVM_FEATURE_STEAL_TIME_BITINDEX: u32 = 5; + /// paravirtualized end of interrupt handler can be enabled by writing to msr 0x4b564d04 + pub const KVM_FEATURE_PV_EOI_BITINDEX: u32 = 6; + /// guest checks this feature bit before enabling paravirtualized spinlock support + pub const KVM_FEATURE_PV_UNHALT_BITINDEX: u32 = 7; + /// guest checks this feature bit before enabling paravirtualized tlb flush + pub const KVM_FEATURE_PV_TLB_FLUSH_BITINDEX: u32 = 9; + /// paravirtualized async PF VM EXIT can be enabled by setting bit 2 when writing to msr 0x4b564d02 + pub const KVM_FEATURE_ASYNC_PF_VMEXIT_BITINDEX: u32 = 10; + /// guest checks this feature bit before enabling paravirtualized send IPIs + pub const KVM_FEATURE_PV_SEND_IPI_BITINDEX: u32 = 11; + /// host-side polling on HLT can be disabled by writing to msr 0x4b564d05. + pub const KVM_FEATURE_POLL_CONTROL_BITINDEX: u32 = 12; + /// guest checks this feature bit before using paravirtualized sched yield. + pub const KVM_FEATURE_PV_SCHED_YIELD_BITINDEX: u32 = 13; + /// guest checks this feature bit before using the second async pf control msr 0x4b564d06 and async pf acknowledgment msr 0x4b564d07. + pub const KVM_FEATURE_ASYNC_PF_INT_BITINDEX: u32 = 14; + /// guest checks this feature bit before using extended destination ID bits in MSI address bits 11-5. + pub const KVM_FEATURE_MSI_EXT_DEST_ID_BITINDEX: u32 = 15; + /// guest checks this feature bit before using the map gpa range hypercall to notify the page state change + pub const KVM_FEATURE_HC_MAP_GPA_RANGE_BITINDEX: u32 = 16; + /// guest checks this feature bit before using MSR_KVM_MIGRATION_CONTROL + pub const KVM_FEATURE_MIGRATION_CONTROL_BITINDEX: u32 = 17; + /// host will warn if no guest-side per-cpu warps are expected in kvmclock + pub const KVM_FEATURE_CLOCKSOURCE_STABLE_BITINDEX: u32 = 24; + } +} + +pub mod leaf_0x80000000 { + pub const LEAF_NUM: u32 = 0x8000_0000; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + pub const LARGEST_EXTENDED_FN_BITRANGE: BitRange = bit_range!(31, 0); + } +} + +pub mod leaf_0x80000001 { + pub const LEAF_NUM: u32 = 0x8000_0001; + + pub mod ecx { + pub const TOPOEXT_INDEX: u32 = 22; + pub const PREFETCH_BITINDEX: u32 = 8; // 3DNow! PREFETCH/PREFETCHW instructions + pub const LZCNT_BITINDEX: u32 = 5; // advanced bit manipulation + } + + pub mod edx { + pub const PDPE1GB_BITINDEX: u32 = 26; // 1-GByte pages are available if 1. + } +} + +pub mod leaf_0x80000008 { + pub const LEAF_NUM: u32 = 0x8000_0008; + + pub mod ecx { + use crate::cpuid::bit_helper::BitRange; + + // The number of bits in the initial ApicId value that indicate thread ID within a package + // Possible values: + // 0-3 -> Reserved + // 4 -> 1 Die, up to 16 threads + // 5 -> 2 Die, up to 32 threads + // 6 -> 3,4 Die, up to 64 threads + pub const THREAD_ID_SIZE_BITRANGE: BitRange = bit_range!(15, 12); + // The number of threads in the package - 1 + pub const NUM_THREADS_BITRANGE: BitRange = bit_range!(7, 0); + } +} + +// Extended Cache Topology Leaf +pub mod leaf_0x8000001d { + pub const LEAF_NUM: u32 = 0x8000_001d; + + // inherit eax from leaf_cache_parameters + pub use crate::cpuid::cpu_leaf::leaf_cache_parameters::eax; +} + +// Extended APIC ID Leaf +pub mod leaf_0x8000001e { + pub const LEAF_NUM: u32 = 0x8000_001e; + + pub mod eax { + use crate::cpuid::bit_helper::BitRange; + + pub const EXTENDED_APIC_ID_BITRANGE: BitRange = bit_range!(31, 0); + } + + pub mod ebx { + use crate::cpuid::bit_helper::BitRange; + + // The number of threads per core - 1 + pub const THREADS_PER_CORE_BITRANGE: BitRange = bit_range!(15, 8); + pub const CORE_ID_BITRANGE: BitRange = bit_range!(7, 0); + } + + pub mod ecx { + use crate::cpuid::bit_helper::BitRange; + + // The number of nodes per processor. Possible values: + // 0 -> 1 node per processor + // 1 -> 2 nodes per processor + // 2 -> Reserved + // 3 -> 4 nodes per processor + pub const NODES_PER_PROCESSOR_BITRANGE: BitRange = bit_range!(10, 8); + pub const NODE_ID_BITRANGE: BitRange = bit_range!(7, 0); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/mod.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/mod.rs new file mode 100644 index 000000000000..65855a86e0bd --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/mod.rs @@ -0,0 +1,76 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Utilities for configuring the CPUID (CPU identification) for the guest microVM. + +pub mod bit_helper; +pub mod cpu_leaf; + +mod brand_string; +mod common; +mod transformer; + +pub use transformer::{Error, VmSpec}; + +pub use crate::VpmuFeatureLevel; + +type CpuId = kvm_bindings::CpuId; +type CpuIdEntry = kvm_bindings::kvm_cpuid_entry2; + +/// Setup CPUID entries for the given vCPU. +/// +/// # Arguments +/// +/// * `kvm_cpuid` - KVM related structure holding the relevant CPUID info. +/// * `vm_spec` - The specifications of the VM. +/// +/// # Example +/// ```ignore +/// use dbs_arch::cpuid::{process_cpuid, VmSpec, VpmuFeatureLevel}; +/// use kvm_bindings::{CpuId, KVM_MAX_CPUID_ENTRIES}; +/// use kvm_ioctls::Kvm; +/// +/// let kvm = Kvm::new().unwrap(); +/// let mut kvm_cpuid: CpuId = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); +/// +/// let vm_spec = VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).unwrap(); +/// +/// process_cpuid(&mut kvm_cpuid, &vm_spec).unwrap(); +/// +/// // Get expected `kvm_cpuid` entries. +/// let entries = kvm_cpuid.as_mut_slice(); +/// ``` +pub fn process_cpuid(kvm_cpuid: &mut CpuId, vm_spec: &VmSpec) -> Result<(), Error> { + use transformer::CpuidTransformer; + + match vm_spec.cpu_vendor_id() { + self::common::VENDOR_ID_INTEL => { + self::transformer::intel::IntelCpuidTransformer::new().process_cpuid(kvm_cpuid, vm_spec) + } + self::common::VENDOR_ID_AMD => { + self::transformer::amd::AmdCpuidTransformer::new().process_cpuid(kvm_cpuid, vm_spec) + } + self::common::VENDOR_ID_HYGON => { + self::transformer::amd::AmdCpuidTransformer::new().process_cpuid(kvm_cpuid, vm_spec) + } + _ => Err(Error::CpuNotSupported), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_invalid_cpuid() { + let mut cpuid = CpuId::new(0).unwrap(); + let vm_spec = VmSpec::new(0, 2, 1, 1, 1, VpmuFeatureLevel::Disabled).unwrap(); + + process_cpuid(&mut cpuid, &vm_spec).unwrap(); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/amd.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/amd.rs new file mode 100644 index 000000000000..8ed7b73f3dee --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/amd.rs @@ -0,0 +1,412 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use kvm_bindings::KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + +use super::super::bit_helper::BitHelper; +use super::super::cpu_leaf; +use super::*; + +// Largest extended function. It has to be larger than 0x8000001d (Extended Cache Topology). +const LARGEST_EXTENDED_FN: u32 = 0x8000_001f; +// This value allows at most 256 logical threads within a package. But we currently only support +// less than or equal to 254vcpus. +// See also the documentation for leaf_0x80000008::ecx::THREAD_ID_SIZE_BITRANGE +const THREAD_ID_MAX_SIZE: u32 = 8; +// This value means there is 1 node per processor. +// See also the documentation for leaf_0x8000001e::ecx::NODES_PER_PROCESSOR_BITRANGE. +const NODES_PER_PROCESSOR: u32 = 0; + +fn update_structured_extended_entry( + entry: &mut CpuIdEntry, + _vm_spec: &VmSpec, +) -> Result<(), Error> { + use cpu_leaf::leaf_0x7::index0::*; + + // according to the EPYC PPR, only the leaf 0x7 with index 0 contains the + // structured extended feature identifiers + if entry.index == 0 { + // KVM sets this bit no matter what but this feature is not supported by hardware + entry.edx.write_bit(edx::ARCH_CAPABILITIES_BITINDEX, false); + } + + Ok(()) +} + +fn update_largest_extended_fn_entry( + entry: &mut CpuIdEntry, + _vm_spec: &VmSpec, +) -> Result<(), Error> { + use cpu_leaf::leaf_0x80000000::*; + + // KVM sets the largest extended function to 0x80000000. Change it to 0x8000001f + // Since we also use the leaf 0x8000001d (Extended Cache Topology). + entry + .eax + .write_bits_in_range(&eax::LARGEST_EXTENDED_FN_BITRANGE, LARGEST_EXTENDED_FN); + + Ok(()) +} + +fn update_extended_feature_info_entry( + entry: &mut CpuIdEntry, + _vm_spec: &VmSpec, +) -> Result<(), Error> { + use crate::cpuid::cpu_leaf::leaf_0x80000001::*; + + // set the Topology Extension bit since we use the Extended Cache Topology leaf + entry.ecx.write_bit(ecx::TOPOEXT_INDEX, true); + + Ok(()) +} + +fn update_amd_features_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + use cpu_leaf::leaf_0x80000008::*; + + // We don't support more then 254 threads right now. + entry + .ecx + .write_bits_in_range(&ecx::THREAD_ID_SIZE_BITRANGE, THREAD_ID_MAX_SIZE) + .write_bits_in_range(&ecx::NUM_THREADS_BITRANGE, u32::from(vm_spec.cpu_count - 1)); + + Ok(()) +} + +fn update_extended_cache_topology_entry( + entry: &mut CpuIdEntry, + vm_spec: &VmSpec, +) -> Result<(), Error> { + entry.flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + + common::update_cache_parameters_entry(entry, vm_spec) +} + +fn update_extended_apic_id_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + use crate::cpuid::cpu_leaf::leaf_0x8000001e::*; + + let mut core_id = u32::from(vm_spec.cpu_id); + // When hyper-threading is enabled each pair of 2 consecutive logical CPUs + // will have the same core id since they represent 2 threads in the same core. + // For Example: + // logical CPU 0 -> core id: 0 + // logical CPU 1 -> core id: 0 + // logical CPU 2 -> core id: 1 + // logical CPU 3 -> core id: 1 + if vm_spec.threads_per_core == 2 { + core_id /= 2; + } + + entry + .eax + // the Extended APIC ID is the id of the current logical CPU + .write_bits_in_range(&eax::EXTENDED_APIC_ID_BITRANGE, u32::from(vm_spec.cpu_id)); + + entry + .ebx + .write_bits_in_range(&ebx::CORE_ID_BITRANGE, core_id) + .write_bits_in_range( + &ebx::THREADS_PER_CORE_BITRANGE, + u32::from(vm_spec.threads_per_core - 1), + ); + + entry + .ecx + .write_bits_in_range(&ecx::NODES_PER_PROCESSOR_BITRANGE, NODES_PER_PROCESSOR) + // Put all the cpus in the same node. + .write_bits_in_range(&ecx::NODE_ID_BITRANGE, 0); + + Ok(()) +} + +#[derive(Default)] +pub struct AmdCpuidTransformer {} + +impl AmdCpuidTransformer { + pub fn new() -> Self { + Default::default() + } +} + +impl CpuidTransformer for AmdCpuidTransformer { + fn process_cpuid(&self, cpuid: &mut CpuId, vm_spec: &VmSpec) -> Result<(), Error> { + use cpu_leaf::*; + + common::use_host_cpuid_function(cpuid, leaf_0x0::LEAF_NUM, false)?; + common::use_host_cpuid_function(cpuid, leaf_0x8000001d::LEAF_NUM, false)?; + common::use_host_cpuid_function(cpuid, leaf_0x8000001d::LEAF_NUM, true)?; + self.process_entries(cpuid, vm_spec) + } + + fn entry_transformer_fn(&self, entry: &mut CpuIdEntry) -> Option { + use cpu_leaf::*; + + match entry.function { + leaf_0x1::LEAF_NUM => Some(common::update_feature_info_entry), + leaf_0x7::LEAF_NUM => Some(update_structured_extended_entry), + leaf_0xb::LEAF_NUM => Some(common::update_extended_topology_entry), + leaf_0x1f::LEAF_NUM => Some(common::update_extended_topology_v2_entry), + leaf_0x80000000::LEAF_NUM => Some(update_largest_extended_fn_entry), + leaf_0x80000001::LEAF_NUM => Some(update_extended_feature_info_entry), + leaf_0x80000008::LEAF_NUM => Some(update_amd_features_entry), + leaf_0x8000001d::LEAF_NUM => Some(update_extended_cache_topology_entry), + leaf_0x8000001e::LEAF_NUM => Some(update_extended_apic_id_entry), + 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_transformer_construct() { + use cpu_leaf::leaf_0x7::index0::*; + + let transformer = AmdCpuidTransformer::new(); + + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let mut cpuid = CpuId::from_entries(&[CpuIdEntry { + function: cpu_leaf::leaf_0x7::LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: *(0_u32).write_bit(edx::ARCH_CAPABILITIES_BITINDEX, true), + padding: [0, 0, 0], + }]) + .unwrap(); + + transformer.process_cpuid(&mut cpuid, &vm_spec).unwrap(); + } + + #[test] + fn test_update_structured_extended_entry() { + use cpu_leaf::leaf_0x7::index0::*; + + // Check that if index == 0 the entry is processed + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: cpu_leaf::leaf_0x7::LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: *(0_u32).write_bit(edx::ARCH_CAPABILITIES_BITINDEX, true), + padding: [0, 0, 0], + }; + assert!(update_structured_extended_entry(entry, &vm_spec).is_ok()); + assert!(!entry.edx.read_bit(edx::ARCH_CAPABILITIES_BITINDEX)); + + // Check that if index != 0 the entry is not processed + entry.index = 1; + entry.edx.write_bit(edx::ARCH_CAPABILITIES_BITINDEX, true); + assert!(update_structured_extended_entry(entry, &vm_spec).is_ok()); + assert!(entry.edx.read_bit(edx::ARCH_CAPABILITIES_BITINDEX)); + } + + #[test] + fn test_update_largest_extended_fn_entry() { + use crate::cpuid::cpu_leaf::leaf_0x80000000::*; + + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_largest_extended_fn_entry(entry, &vm_spec).is_ok()); + + assert_eq!( + entry + .eax + .read_bits_in_range(&eax::LARGEST_EXTENDED_FN_BITRANGE), + LARGEST_EXTENDED_FN + ); + } + + #[test] + fn test_update_extended_feature_info_entry() { + use crate::cpuid::cpu_leaf::leaf_0x80000001::*; + + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_extended_feature_info_entry(entry, &vm_spec).is_ok()); + + assert!(entry.ecx.read_bit(ecx::TOPOEXT_INDEX)); + } + + fn check_update_amd_features_entry( + cpu_count: u8, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0x80000008::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_amd_features_entry(entry, &vm_spec).is_ok()); + + assert_eq!( + entry.ecx.read_bits_in_range(&ecx::NUM_THREADS_BITRANGE), + u32::from(cpu_count - 1) + ); + assert_eq!( + entry.ecx.read_bits_in_range(&ecx::THREAD_ID_SIZE_BITRANGE), + THREAD_ID_MAX_SIZE + ); + } + + fn check_update_extended_apic_id_entry( + cpu_id: u8, + cpu_count: u8, + expected_core_id: u32, + expected_threads_per_core: u32, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0x8000001e::*; + + let vm_spec = VmSpec::new( + cpu_id, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_extended_apic_id_entry(entry, &vm_spec).is_ok()); + + assert_eq!( + entry + .eax + .read_bits_in_range(&eax::EXTENDED_APIC_ID_BITRANGE), + u32::from(cpu_id) + ); + + assert_eq!( + entry.ebx.read_bits_in_range(&ebx::CORE_ID_BITRANGE), + expected_core_id + ); + assert_eq!( + entry + .ebx + .read_bits_in_range(&ebx::THREADS_PER_CORE_BITRANGE), + expected_threads_per_core + ); + + assert_eq!( + entry + .ecx + .read_bits_in_range(&ecx::NODES_PER_PROCESSOR_BITRANGE), + NODES_PER_PROCESSOR + ); + assert_eq!(entry.ecx.read_bits_in_range(&ecx::NODE_ID_BITRANGE), 0); + } + + #[test] + fn test_update_extended_cache_topology_entry() { + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let entry = &mut CpuIdEntry { + function: cpu_leaf::leaf_0x8000001d::LEAF_NUM, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_extended_cache_topology_entry(entry, &vm_spec).is_ok()); + + assert_eq!(entry.flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX, 1); + } + + #[test] + fn test_1vcpu_ht_off() { + check_update_amd_features_entry(1, 1, 1, 1); + + check_update_extended_apic_id_entry(0, 1, 0, 0, 1, 1, 1); + } + + #[test] + fn test_1vcpu_ht_on() { + check_update_amd_features_entry(1, 2, 1, 1); + + check_update_extended_apic_id_entry(0, 1, 0, 1, 2, 1, 1); + } + + #[test] + fn test_2vcpu_ht_off() { + check_update_amd_features_entry(2, 1, 2, 1); + + check_update_extended_apic_id_entry(0, 2, 0, 0, 1, 2, 1); + check_update_extended_apic_id_entry(1, 2, 1, 0, 1, 2, 1); + } + + #[test] + fn test_2vcpu_ht_on() { + check_update_amd_features_entry(2, 2, 2, 1); + + check_update_extended_apic_id_entry(0, 2, 0, 1, 2, 2, 1); + check_update_extended_apic_id_entry(1, 2, 0, 1, 2, 2, 1); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/common.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/common.rs new file mode 100644 index 000000000000..681ef0232b85 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/common.rs @@ -0,0 +1,628 @@ +// Copyright 2019 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::super::bit_helper::BitHelper; +use super::super::common::get_cpuid; +use super::super::cpu_leaf; +use super::*; + +// constants for setting the fields of kvm_cpuid2 structures +// CPUID bits in ebx, ecx, and edx. +const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size. + +/// Prepare content for CPUID standard level 0000_0001h: get processor type/family/model/stepping +/// and feature flags +pub fn update_feature_info_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + use cpu_leaf::leaf_0x1::*; + + // ECX bit 31 (HV): hypervisor present (and intercepting this bit, to advertise its presence) + // ECX bit 24 (TSCD): local APIC supports one-shot operation using TSC deadline value + entry + .ecx + .write_bit(ecx::TSC_DEADLINE_TIMER_BITINDEX, true) + .write_bit(ecx::HYPERVISOR_BITINDEX, true); + + // EBX bit 8-15: The CLFLUSH (8-byte) chunk count + // EBX bit 16-23: The logical processor count + // EBX bit 24-31: The (fixed) default APIC ID + entry + .ebx + .write_bits_in_range(&ebx::APICID_BITRANGE, u32::from(vm_spec.cpu_id)) + .write_bits_in_range(&ebx::CLFLUSH_SIZE_BITRANGE, EBX_CLFLUSH_CACHELINE) + .write_bits_in_range( + &ebx::CPU_COUNT_BITRANGE, + u32::from(vm_spec.threads_per_core * vm_spec.cores_per_die * vm_spec.dies_per_socket), + ); + + // EDX bit 28: Hyper-Threading Technology, PAUSE. A value of 1 for HTT indicates the value in + // CPUID.1.Ebx[23:16] (the Maximum number of addressable IDs for logical processors in this + // package) is valid for the package + entry + .edx + .write_bit(edx::HTT_BITINDEX, vm_spec.cpu_count > 1); + + Ok(()) +} + +/// Prepare content for CPUID standard level 0000_000Bh: get topology enumeration information. +pub fn update_extended_topology_entry( + entry: &mut CpuIdEntry, + vm_spec: &VmSpec, +) -> Result<(), Error> { + use cpu_leaf::leaf_0xb::*; + let thread_width = 8 - (vm_spec.threads_per_core - 1).leading_zeros(); + let core_width = (8 - (vm_spec.cores_per_die - 1).leading_zeros()) + thread_width; + + // EAX bit 0-4: number of bits to shift x2APIC ID right to get unique topology ID of + // next level type all logical processors with same next level ID share current level + // EBX bit 0-15: number of enabled logical processors at this level + // ECX bit 0-8: level number (same as input) + // ECX bit 8-15: level type (00h=invalid, 01h=SMT, 02h=core, 03h...FFh=reserved) + // EDX bits 0-31 contain x2APIC ID of current logical processor + entry.eax = 0_u32; + entry.ebx = 0_u32; + entry.ecx = 0_u32; + entry.edx = u32::from(vm_spec.cpu_id); + + match entry.index { + // Thread Level Topology; index = 0 + 0 => { + // To get the next level APIC ID, shift right with at most 1 because we have + // maximum 2 hyperthreads per core that can be represented by 1 bit. + entry + .eax + .write_bits_in_range(&eax::APICID_BITRANGE, thread_width); + // When cpu_count == 1 or HT is disabled, there is 1 logical core at this level + // Otherwise there are 2 + entry.ebx.write_bits_in_range( + &ebx::NUM_LOGICAL_PROCESSORS_BITRANGE, + vm_spec.threads_per_core as u32, + ); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE, LEVEL_TYPE_THREAD); + } + + // Core Level Processor Topology; index = 1 + 1 => { + entry + .eax + .write_bits_in_range(&eax::APICID_BITRANGE, core_width); + entry.ebx.write_bits_in_range( + &ebx::NUM_LOGICAL_PROCESSORS_BITRANGE, + u32::from(vm_spec.threads_per_core * vm_spec.cores_per_die), + ); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_NUMBER_BITRANGE, entry.index); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE, LEVEL_TYPE_CORE); + } + // Core Level Processor Topology; index >=2 + // No other levels available; This should already be set to correctly, + // and it is added here as a "re-enforcement" in case we run on + // different hardware + level => { + entry.ecx = level; + } + } + + Ok(()) +} + +/// Prepare content for Intel V2 Extended Topology Enumeration Leaf. +/// +/// Leaf_0x1f is a superset of leaf_0xb. It gives extra information like die_per_socket. +/// When CPUID executes with EAX set to 1FH, the processor returns information about extended +/// topology enumeration data. Software must detect the presence of CPUID leaf 1FH by verifying +/// - the highest leaf index supported by CPUID is >= 1FH +/// - CPUID.1FH:EBX[15:0] reports a non-zero value +/// If leaf_0x1f is not implemented in cpu used in host, guest OS should turn to leaf_0xb to +/// determine the cpu topology. +pub fn update_extended_topology_v2_entry( + entry: &mut CpuIdEntry, + vm_spec: &VmSpec, +) -> Result<(), Error> { + use cpu_leaf::leaf_0x1f::*; + let thread_width = 8 - (vm_spec.threads_per_core - 1).leading_zeros(); + let core_width = (8 - (vm_spec.cores_per_die - 1).leading_zeros()) + thread_width; + let die_width = (8 - (vm_spec.dies_per_socket - 1).leading_zeros()) + core_width; + + // EAX bit 0-4: number of bits to shift x2APIC ID right to get unique topology ID of + // next level type all logical processors with same next level ID share current level + // EBX bit 0-15: number of enabled logical processors at this level + // ECX bit 0-8: level number (same as input) + // ECX bit 8-15: level type (00h=invalid, 01h=SMT, 02h=core, 05h=die, otherwise=reserved) + // EDX bits 0-31 contain x2APIC ID of current logical processor + entry.eax = 0_u32; + entry.ebx = 0_u32; + entry.ecx = 0_u32; + entry.edx = u32::from(vm_spec.cpu_id); + + match entry.index { + // Thread Level Topology; index = 0 + 0 => { + // To get the next level APIC ID, shift right with at most 1 because we have + // maximum 2 hyperthreads per core that can be represented by 1 bit. + entry + .eax + .write_bits_in_range(&eax::APICID_BITRANGE, thread_width); + // When cpu_count == 1 or HT is disabled, there is 1 logical core at this level + // Otherwise there are 2 + entry.ebx.write_bits_in_range( + &ebx::NUM_LOGICAL_PROCESSORS_BITRANGE, + vm_spec.threads_per_core as u32, + ); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE, LEVEL_TYPE_THREAD); + } + // Core Level Processor Topology; index = 1 + 1 => { + entry + .eax + .write_bits_in_range(&eax::APICID_BITRANGE, core_width); + entry.ebx.write_bits_in_range( + &ebx::NUM_LOGICAL_PROCESSORS_BITRANGE, + u32::from(vm_spec.threads_per_core * vm_spec.cores_per_die), + ); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_NUMBER_BITRANGE, entry.index); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE, LEVEL_TYPE_CORE); + } + // Die Level Processor Topology; index = 5 + 5 => { + entry + .eax + .write_bits_in_range(&eax::APICID_BITRANGE, die_width); + entry.ebx.write_bits_in_range( + &ebx::NUM_LOGICAL_PROCESSORS_BITRANGE, + u32::from( + vm_spec.threads_per_core * vm_spec.cores_per_die * vm_spec.dies_per_socket, + ), + ); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_NUMBER_BITRANGE, entry.index); + entry + .ecx + .write_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE, LEVEL_TYPE_DIE); + } + level => { + entry.ecx = level; + } + } + + Ok(()) +} + +/// Prepare content for CPUID standard level 8000_0002/3/4h: get processor name string. +pub fn update_brand_string_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + let brand_string = &vm_spec.brand_string; + entry.eax = brand_string.get_reg_for_leaf(entry.function, BsReg::Eax); + entry.ebx = brand_string.get_reg_for_leaf(entry.function, BsReg::Ebx); + entry.ecx = brand_string.get_reg_for_leaf(entry.function, BsReg::Ecx); + entry.edx = brand_string.get_reg_for_leaf(entry.function, BsReg::Edx); + + Ok(()) +} + +/// Prepare content for CPUID extended level 8000_001Dh: get cache configuration descriptors. +pub fn update_cache_parameters_entry( + entry: &mut CpuIdEntry, + vm_spec: &VmSpec, +) -> Result<(), Error> { + use cpu_leaf::leaf_cache_parameters::*; + + // EAX bit 14-25: cores per cache - 1 + + match entry.eax.read_bits_in_range(&eax::CACHE_LEVEL_BITRANGE) { + // L1 & L2 Cache + 1 | 2 => { + // The L1 & L2 cache is shared by at most 2 hyperthreads + entry.eax.write_bits_in_range( + &eax::MAX_CPUS_PER_CORE_BITRANGE, + (vm_spec.cpu_count > 1 && vm_spec.threads_per_core == 2) as u32, + ); + } + // L3 Cache + 3 => { + // The L3 cache is shared among all the logical threads + entry.eax.write_bits_in_range( + &eax::MAX_CPUS_PER_CORE_BITRANGE, + u32::from(vm_spec.cpu_count - 1), + ); + } + _ => (), + } + + Ok(()) +} + +/// Replaces the `cpuid` entries corresponding to `function` with the entries from the host's cpuid. +pub fn use_host_cpuid_function( + cpuid: &mut CpuId, + function: u32, + use_count: bool, +) -> Result<(), Error> { + // copy all the CpuId entries, except for the ones with the provided function + cpuid.retain(|entry| entry.function != function); + + // add all the host leaves with the provided function + let mut count: u32 = 0; + while let Ok(entry) = get_cpuid(function, count) { + if count > 0 && !use_count { + break; + } + + cpuid + .push(CpuIdEntry { + function, + index: count, + flags: 0, + eax: entry.eax, + ebx: entry.ebx, + ecx: entry.ecx, + edx: entry.edx, + padding: [0, 0, 0], + }) + .map_err(Error::FamError)?; + + count += 1; + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use kvm_bindings::kvm_cpuid_entry2; + + use super::*; + use crate::cpuid::common::tests::get_topoext_fn; + use crate::cpuid::cpu_leaf::leaf_0x1f::LEVEL_TYPE_DIE; + use crate::cpuid::cpu_leaf::leaf_0xb::LEVEL_TYPE_CORE; + use crate::cpuid::cpu_leaf::leaf_0xb::LEVEL_TYPE_THREAD; + use crate::cpuid::transformer::VmSpec; + + fn check_update_feature_info_entry( + cpu_count: u8, + expected_htt: bool, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0x1::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_feature_info_entry(entry, &vm_spec).is_ok()); + + assert!(entry.edx.read_bit(edx::HTT_BITINDEX) == expected_htt); + assert!(entry.ecx.read_bit(ecx::TSC_DEADLINE_TIMER_BITINDEX)); + } + + fn check_update_cache_parameters_entry( + cpu_count: u8, + cache_level: u32, + expected_max_cpus_per_core: u32, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_cache_parameters::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index: 0, + flags: 0, + eax: *(0_u32).write_bits_in_range(&eax::CACHE_LEVEL_BITRANGE, cache_level), + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_cache_parameters_entry(entry, &vm_spec).is_ok()); + + assert!( + entry + .eax + .read_bits_in_range(&eax::MAX_CPUS_PER_CORE_BITRANGE) + == expected_max_cpus_per_core + ); + } + + #[allow(clippy::too_many_arguments)] + fn check_update_extended_topology_entry( + cpu_count: u8, + index: u32, + expected_apicid_shift_bit: u32, + expected_num_logical_processors: u32, + expected_level_type: u32, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0xb::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_extended_topology_entry(entry, &vm_spec).is_ok()); + + assert!(entry.eax.read_bits_in_range(&eax::APICID_BITRANGE) == expected_apicid_shift_bit); + assert!( + entry + .ebx + .read_bits_in_range(&ebx::NUM_LOGICAL_PROCESSORS_BITRANGE) + == expected_num_logical_processors + ); + assert!(entry.ecx.read_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE) == expected_level_type); + assert!(entry.ecx.read_bits_in_range(&ecx::LEVEL_NUMBER_BITRANGE) == index); + } + + #[allow(clippy::too_many_arguments)] + fn check_update_extended_topology_v2_entry( + cpu_count: u8, + index: u32, + expected_apicid_shift_bit: u32, + expected_num_logical_processors: u32, + expected_level_type: u32, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0x1f::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_extended_topology_v2_entry(entry, &vm_spec).is_ok()); + + assert!(entry.eax.read_bits_in_range(&eax::APICID_BITRANGE) == expected_apicid_shift_bit); + assert!( + entry + .ebx + .read_bits_in_range(&ebx::NUM_LOGICAL_PROCESSORS_BITRANGE) + == expected_num_logical_processors + ); + assert!(entry.ecx.read_bits_in_range(&ecx::LEVEL_TYPE_BITRANGE) == expected_level_type); + assert!(entry.ecx.read_bits_in_range(&ecx::LEVEL_NUMBER_BITRANGE) == index); + } + + #[test] + fn test_1vcpu_ht_off() { + check_update_feature_info_entry(1, false, 1, 1, 1); + + // test update_deterministic_cache_entry + // test L1 + check_update_cache_parameters_entry(1, 1, 0, 1, 1, 1); + // test L2 + check_update_cache_parameters_entry(1, 2, 0, 1, 1, 1); + // test L3 + check_update_cache_parameters_entry(1, 3, 0, 1, 1, 1); + + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(1, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 1, 1); + check_update_extended_topology_v2_entry(1, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 1, 1); + // index 1 + check_update_extended_topology_entry(1, 1, 0, 1, LEVEL_TYPE_CORE, 1, 1, 1); + check_update_extended_topology_v2_entry(1, 1, 0, 1, LEVEL_TYPE_CORE, 1, 1, 1); + // index 5 + check_update_extended_topology_v2_entry(1, 5, 0, 1, LEVEL_TYPE_DIE, 1, 1, 1); + } + + #[test] + fn test_1vcpu_ht_on() { + check_update_feature_info_entry(1, false, 2, 1, 1); + + // test update_deterministic_cache_entry + // test L1 + check_update_cache_parameters_entry(1, 1, 0, 2, 1, 1); + // test L2 + check_update_cache_parameters_entry(1, 2, 0, 2, 1, 1); + // test L3 + check_update_cache_parameters_entry(1, 3, 0, 2, 1, 1); + + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(1, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 1, 1); + check_update_extended_topology_v2_entry(1, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 1, 1); + // index 1 + check_update_extended_topology_entry(1, 1, 1, 2, LEVEL_TYPE_CORE, 2, 1, 1); + check_update_extended_topology_v2_entry(1, 1, 1, 2, LEVEL_TYPE_CORE, 2, 1, 1); + // index 5 + check_update_extended_topology_v2_entry(1, 5, 1, 2, LEVEL_TYPE_DIE, 2, 1, 1); + } + + #[test] + fn test_2vcpu_ht_off() { + check_update_feature_info_entry(2, true, 1, 2, 1); + + // test update_deterministic_cache_entry + // test L1 + check_update_cache_parameters_entry(2, 1, 0, 1, 2, 1); + // test L2 + check_update_cache_parameters_entry(2, 2, 0, 1, 2, 1); + // test L3 + check_update_cache_parameters_entry(2, 3, 1, 1, 2, 1); + + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(2, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 2, 1); + check_update_extended_topology_v2_entry(2, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 2, 1); + // index 1 + check_update_extended_topology_entry(2, 1, 1, 2, LEVEL_TYPE_CORE, 1, 2, 1); + check_update_extended_topology_v2_entry(2, 1, 1, 2, LEVEL_TYPE_CORE, 1, 2, 1); + // index 5 + check_update_extended_topology_v2_entry(2, 5, 1, 2, LEVEL_TYPE_DIE, 1, 2, 1); + } + + #[test] + fn test_2vcpu_ht_on() { + check_update_feature_info_entry(2, true, 2, 2, 1); + + // test update_deterministic_cache_entry + // test L1 + check_update_cache_parameters_entry(2, 1, 1, 2, 2, 1); + // test L2 + check_update_cache_parameters_entry(2, 2, 1, 2, 2, 1); + // test L3 + check_update_cache_parameters_entry(2, 3, 1, 2, 2, 1); + + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(2, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 2, 1); + check_update_extended_topology_v2_entry(2, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 2, 1); + // index 1 + check_update_extended_topology_entry(2, 1, 2, 4, LEVEL_TYPE_CORE, 2, 2, 1); + check_update_extended_topology_v2_entry(2, 1, 2, 4, LEVEL_TYPE_CORE, 2, 2, 1); + // index 5 + check_update_extended_topology_v2_entry(2, 5, 2, 4, LEVEL_TYPE_DIE, 2, 2, 1); + } + + #[test] + fn test_2dies_2vcpu_ht_off() { + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(2, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 1, 2); + check_update_extended_topology_v2_entry(2, 0, 0, 1, LEVEL_TYPE_THREAD, 1, 1, 2); + // index 1 + check_update_extended_topology_entry(2, 1, 0, 1, LEVEL_TYPE_CORE, 1, 1, 2); + check_update_extended_topology_v2_entry(2, 1, 0, 1, LEVEL_TYPE_CORE, 1, 1, 2); + // index 5 + check_update_extended_topology_v2_entry(2, 5, 1, 2, LEVEL_TYPE_DIE, 1, 1, 2); + } + + #[test] + fn test_2dies_4vcpu_ht_on() { + // test update_extended_topology_entry + // index 0 + check_update_extended_topology_entry(4, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 1, 2); + check_update_extended_topology_v2_entry(4, 0, 1, 2, LEVEL_TYPE_THREAD, 2, 1, 2); + // index 1 + check_update_extended_topology_entry(4, 1, 1, 2, LEVEL_TYPE_CORE, 2, 1, 2); + check_update_extended_topology_v2_entry(4, 1, 1, 2, LEVEL_TYPE_CORE, 2, 1, 2); + // index 5 + check_update_extended_topology_v2_entry(4, 5, 2, 4, LEVEL_TYPE_DIE, 2, 1, 2); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_use_host_cpuid_function_with_count() { + // try to emulate the extended cache topology leaves + let topoext_fn = get_topoext_fn(); + + // check that it behaves correctly for TOPOEXT function + let mut cpuid = CpuId::new(1).unwrap(); + cpuid.as_mut_slice()[0].function = topoext_fn; + assert!(use_host_cpuid_function(&mut cpuid, topoext_fn, true).is_ok()); + let entries = cpuid.as_mut_slice(); + assert!(entries.len() > 1); + for (count, entry) in entries.iter_mut().enumerate() { + assert!(entry.function == topoext_fn); + assert!(entry.index == count as u32); + assert!(entry.eax != 0); + } + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_use_host_cpuid_function_without_count() { + use crate::cpuid::cpu_leaf::leaf_0x1::*; + // try to emulate the extended cache topology leaves + let feature_info_fn = LEAF_NUM; + + // check that it behaves correctly for TOPOEXT function + let mut cpuid = CpuId::new(1).unwrap(); + cpuid.as_mut_slice()[0].function = feature_info_fn; + assert!(use_host_cpuid_function(&mut cpuid, feature_info_fn, false).is_ok()); + let entries = cpuid.as_mut_slice(); + assert!(entries.len() == 1); + let entry = entries[0]; + + assert!(entry.function == feature_info_fn); + assert!(entry.index == 0); + assert!(entry.eax != 0); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_use_host_cpuid_function_err() { + let topoext_fn = get_topoext_fn(); + // check that it returns Err when there are too many entriesentry.function == topoext_fn + let mut cpuid = CpuId::new(kvm_bindings::KVM_MAX_CPUID_ENTRIES).unwrap(); + match use_host_cpuid_function(&mut cpuid, topoext_fn, true) { + Err(Error::FamError(vmm_sys_util::fam::Error::SizeLimitExceeded)) => {} + _ => panic!("Wrong behavior"), + } + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/intel.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/intel.rs new file mode 100644 index 000000000000..a7395126a192 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/intel.rs @@ -0,0 +1,280 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::super::bit_helper::BitHelper; +use super::super::cpu_leaf; +use super::*; + +fn update_deterministic_cache_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + use cpu_leaf::leaf_0x4::*; + + common::update_cache_parameters_entry(entry, vm_spec)?; + + // If leaf_0xB or leaf_0x1F is enabled, leaf0x4 won't be used to generate topology information. + // In most cases, we could have leaf_0xB in our host cpu. But we keep the leaf_0x4 eax[26,31] + // to prevent rare cases. + if vm_spec.cpu_count <= 64 { + entry.eax.write_bits_in_range( + &eax::MAX_CORES_PER_PACKAGE_BITRANGE, + u32::from(vm_spec.cpu_count - 1), + ); + } + + Ok(()) +} + +fn update_power_management_entry(entry: &mut CpuIdEntry, _vm_spec: &VmSpec) -> Result<(), Error> { + // disable pstate feature + entry.eax = 0; + entry.ebx = 0; + entry.ecx = 0; + entry.edx = 0; + + Ok(()) +} + +fn update_perf_mon_entry(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error> { + use cpu_leaf::leaf_0xa::*; + + // Architectural Performance Monitor Leaf + match vm_spec.vpmu_feature { + VpmuFeatureLevel::Disabled => { + // Disable PMU + entry.eax = 0; + entry.ebx = 0; + entry.ecx = 0; + entry.edx = 0; + } + VpmuFeatureLevel::LimitedlyEnabled => { + // Allow minimal vpmu ability (only instuctions and cycles pmu). + entry.eax.write_bits_in_range(&eax::PMC_VERSION_ID, 2); + entry.eax.write_bits_in_range(&eax::BIT_LEN_PMEVENT, 7); + + // 0(false) means support for the targeted performance monitoring event + entry.ebx.write_bit(ebx::CORE_CYCLES_BITINDEX, false); + entry.ebx.write_bit(ebx::REF_CYCLES_BITINDEX, false); + entry.ebx.write_bit(ebx::INST_RETIRED_BITINDEX, false); + entry.ebx.write_bit(ebx::BR_INST_RETIRED_BITINDEX, true); + entry.ebx.write_bit(ebx::LLC_MISSES_BITINDEX, true); + entry.ebx.write_bit(ebx::LLC_REF_BITINDEX, true); + entry.ebx.write_bit(ebx::BR_MIS_RETIRED_BITINDEX, true); + } + VpmuFeatureLevel::FullyEnabled => { + // Allow all supported vpmu ability + entry.eax.write_bits_in_range(&eax::PMC_VERSION_ID, 2); + entry.eax.write_bits_in_range(&eax::BIT_LEN_PMEVENT, 7); + + // 0(false) means support for the targeted performance monitoring event + entry.ebx.write_bit(ebx::CORE_CYCLES_BITINDEX, false); + entry.ebx.write_bit(ebx::REF_CYCLES_BITINDEX, false); + entry.ebx.write_bit(ebx::INST_RETIRED_BITINDEX, false); + entry.ebx.write_bit(ebx::BR_INST_RETIRED_BITINDEX, false); + entry.ebx.write_bit(ebx::LLC_MISSES_BITINDEX, false); + entry.ebx.write_bit(ebx::LLC_REF_BITINDEX, false); + entry.ebx.write_bit(ebx::BR_MIS_RETIRED_BITINDEX, false); + } + }; + Ok(()) +} + +#[derive(Default)] +pub struct IntelCpuidTransformer {} + +impl IntelCpuidTransformer { + pub fn new() -> Self { + Default::default() + } +} + +impl CpuidTransformer for IntelCpuidTransformer { + fn process_cpuid(&self, cpuid: &mut CpuId, vm_spec: &VmSpec) -> Result<(), Error> { + common::use_host_cpuid_function(cpuid, cpu_leaf::leaf_0x0::LEAF_NUM, false)?; + self.process_entries(cpuid, vm_spec) + } + + fn entry_transformer_fn(&self, entry: &mut CpuIdEntry) -> Option { + use cpu_leaf::*; + + match entry.function { + leaf_0x1::LEAF_NUM => Some(common::update_feature_info_entry), + leaf_0x4::LEAF_NUM => Some(intel::update_deterministic_cache_entry), + leaf_0x6::LEAF_NUM => Some(intel::update_power_management_entry), + leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), + leaf_0xb::LEAF_NUM => Some(common::update_extended_topology_entry), + leaf_0x1f::LEAF_NUM => Some(common::update_extended_topology_v2_entry), + 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use kvm_bindings::kvm_cpuid_entry2; + + use super::*; + use crate::cpuid::transformer::VmSpec; + + #[test] + fn test_update_perf_mon_entry() { + use crate::cpuid::cpu_leaf::leaf_0xa::*; + // Test when vpmu is off (level Disabled) + let vm_spec = + VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: LEAF_NUM, + index: 0, + flags: 0, + eax: 1, + ebx: 1, + ecx: 1, + edx: 1, + padding: [0, 0, 0], + }; + + assert!(update_perf_mon_entry(entry, &vm_spec).is_ok()); + + assert_eq!(entry.eax, 0); + assert_eq!(entry.ebx, 0); + assert_eq!(entry.ecx, 0); + assert_eq!(entry.edx, 0); + + // Test when only instructions and cycles pmu are enabled (level LimitedlyEnabled) + let vm_spec = VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::LimitedlyEnabled) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_perf_mon_entry(entry, &vm_spec).is_ok()); + assert_eq!(entry.eax.read_bits_in_range(&eax::PMC_VERSION_ID), 2); + assert_eq!(entry.eax.read_bits_in_range(&eax::BIT_LEN_PMEVENT), 7); + + assert!(!entry.ebx.read_bit(ebx::CORE_CYCLES_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::INST_RETIRED_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::REF_CYCLES_BITINDEX)); + assert!(entry.ebx.read_bit(ebx::LLC_REF_BITINDEX)); + assert!(entry.ebx.read_bit(ebx::LLC_MISSES_BITINDEX)); + assert!(entry.ebx.read_bit(ebx::BR_INST_RETIRED_BITINDEX)); + assert!(entry.ebx.read_bit(ebx::BR_MIS_RETIRED_BITINDEX)); + + // Test when all vpmu features are enabled (level FullyEnabled) + let vm_spec = VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::FullyEnabled) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index: 0, + flags: 0, + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_perf_mon_entry(entry, &vm_spec).is_ok()); + + assert_eq!(entry.eax.read_bits_in_range(&eax::PMC_VERSION_ID), 2); + assert_eq!(entry.eax.read_bits_in_range(&eax::BIT_LEN_PMEVENT), 7); + + assert!(!entry.ebx.read_bit(ebx::CORE_CYCLES_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::INST_RETIRED_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::REF_CYCLES_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::LLC_REF_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::LLC_MISSES_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::BR_INST_RETIRED_BITINDEX)); + assert!(!entry.ebx.read_bit(ebx::BR_MIS_RETIRED_BITINDEX)); + } + + fn check_update_deterministic_cache_entry( + cpu_count: u8, + cache_level: u32, + expected_max_cores_per_package: u32, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + ) { + use crate::cpuid::cpu_leaf::leaf_0x4::*; + + let vm_spec = VmSpec::new( + 0, + cpu_count, + threads_per_core, + cores_per_die, + dies_per_socket, + VpmuFeatureLevel::Disabled, + ) + .expect("Error creating vm_spec"); + let entry = &mut kvm_cpuid_entry2 { + function: 0x0, + index: 0, + flags: 0, + eax: *(0_u32).write_bits_in_range(&eax::CACHE_LEVEL_BITRANGE, cache_level), + ebx: 0, + ecx: 0, + edx: 0, + padding: [0, 0, 0], + }; + + assert!(update_deterministic_cache_entry(entry, &vm_spec).is_ok()); + + assert!( + entry + .eax + .read_bits_in_range(&eax::MAX_CORES_PER_PACKAGE_BITRANGE) + == expected_max_cores_per_package + ); + } + + #[test] + fn test_1vcpu_ht_off() { + // test update_deterministic_cache_entry + // test L1 + check_update_deterministic_cache_entry(1, 1, 0, 1, 1, 1); + // test L2 + check_update_deterministic_cache_entry(1, 2, 0, 1, 1, 1); + // test L3 + check_update_deterministic_cache_entry(1, 3, 0, 1, 1, 1); + } + + #[test] + fn test_1vcpu_ht_on() { + // test update_deterministic_cache_entry + // test L1 + check_update_deterministic_cache_entry(1, 1, 0, 2, 1, 1); + // test L2 + check_update_deterministic_cache_entry(1, 2, 0, 2, 1, 1); + // test L3 + check_update_deterministic_cache_entry(1, 3, 0, 2, 1, 1); + } + + #[test] + fn test_2vcpu_ht_off() { + // test update_deterministic_cache_entry + // test L1 + check_update_deterministic_cache_entry(2, 1, 1, 1, 2, 1); + // test L2 + check_update_deterministic_cache_entry(2, 2, 1, 1, 2, 1); + // test L3 + check_update_deterministic_cache_entry(2, 3, 1, 1, 2, 1); + } + + #[test] + fn test_2vcpu_ht_on() { + // test update_deterministic_cache_entry + // test L1 + check_update_deterministic_cache_entry(2, 1, 1, 2, 2, 1); + // test L2 + check_update_deterministic_cache_entry(2, 2, 1, 2, 2, 1); + // test L3 + check_update_deterministic_cache_entry(2, 3, 1, 2, 2, 1); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/mod.rs b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/mod.rs new file mode 100644 index 000000000000..58dac23eca5b --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/cpuid/transformer/mod.rs @@ -0,0 +1,172 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::brand_string::{BrandString, Reg as BsReg}; +use super::common::get_vendor_id; +use super::{CpuId, CpuIdEntry}; +use crate::VpmuFeatureLevel; + +pub mod amd; +pub mod common; +pub mod intel; + +/// Structure containing the specifications of the VM +pub struct VmSpec { + /// The vendor id of the CPU + cpu_vendor_id: [u8; 12], + /// The id of the current logical cpu in the range [0..cpu_count]. + cpu_id: u8, + /// The total number of logical cpus (includes cpus that could be hotplugged). + cpu_count: u8, + /// The desired brand string for the guest. + brand_string: BrandString, + /// threads per core for cpu topology information + threads_per_core: u8, + /// cores per die for cpu topology information + cores_per_die: u8, + /// dies per socket for cpu topology information + dies_per_socket: u8, + /// if vpmu feature is Disabled, it means vpmu feature is off (by default) + /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions) + /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported + vpmu_feature: VpmuFeatureLevel, +} + +impl VmSpec { + /// Creates a new instance of VmSpec with the specified parameters + /// The brand string is deduced from the vendor_id + pub fn new( + cpu_id: u8, + cpu_count: u8, + threads_per_core: u8, + cores_per_die: u8, + dies_per_socket: u8, + vpmu_feature: VpmuFeatureLevel, + ) -> Result { + let cpu_vendor_id = get_vendor_id().map_err(Error::InternalError)?; + let brand_string = + BrandString::from_vendor_id(&cpu_vendor_id).map_err(Error::BrandString)?; + + Ok(VmSpec { + cpu_vendor_id, + cpu_id, + cpu_count, + brand_string, + threads_per_core, + cores_per_die, + dies_per_socket, + vpmu_feature, + }) + } + + /// Returns an immutable reference to cpu_vendor_id + pub fn cpu_vendor_id(&self) -> &[u8; 12] { + &self.cpu_vendor_id + } +} + +/// Errors associated with processing the CPUID leaves. +#[derive(Debug, Clone)] +pub enum Error { + /// Failed to parse CPU brand string + BrandString(super::brand_string::Error), + /// The CPU architecture is not supported + CpuNotSupported, + /// A FamStructWrapper operation has failed + FamError(vmm_sys_util::fam::Error), + /// A call to an internal helper method failed + InternalError(super::common::Error), + /// The maximum number of addressable logical CPUs cannot be stored in an `u8`. + VcpuCountOverflow, +} + +pub type EntryTransformerFn = fn(entry: &mut CpuIdEntry, vm_spec: &VmSpec) -> Result<(), Error>; + +/// Generic trait that provides methods for transforming the cpuid +pub trait CpuidTransformer { + /// Process the cpuid array and make the desired transformations. + fn process_cpuid(&self, cpuid: &mut CpuId, vm_spec: &VmSpec) -> Result<(), Error> { + self.process_entries(cpuid, vm_spec) + } + + /// Iterate through all the cpuid entries and calls the associated transformer for each one. + fn process_entries(&self, cpuid: &mut CpuId, vm_spec: &VmSpec) -> Result<(), Error> { + for entry in cpuid.as_mut_slice().iter_mut() { + let maybe_transformer_fn = self.entry_transformer_fn(entry); + + if let Some(transformer_fn) = maybe_transformer_fn { + transformer_fn(entry, vm_spec)?; + } + } + + Ok(()) + } + + /// Get the associated transformer for a cpuid entry + fn entry_transformer_fn(&self, _entry: &mut CpuIdEntry) -> Option { + None + } +} + +#[cfg(test)] +mod test { + use super::*; + use kvm_bindings::kvm_cpuid_entry2; + + const PROCESSED_FN: u32 = 1; + const EXPECTED_INDEX: u32 = 100; + + fn transform_entry(entry: &mut kvm_cpuid_entry2, _vm_spec: &VmSpec) -> Result<(), Error> { + entry.index = EXPECTED_INDEX; + + Ok(()) + } + + struct MockCpuidTransformer {} + + impl CpuidTransformer for MockCpuidTransformer { + fn entry_transformer_fn(&self, entry: &mut kvm_cpuid_entry2) -> Option { + match entry.function { + PROCESSED_FN => Some(transform_entry), + _ => None, + } + } + } + + #[test] + fn test_process_cpuid() { + let num_entries = 5; + + let mut cpuid = CpuId::new(num_entries).unwrap(); + let vm_spec = VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled); + cpuid.as_mut_slice()[0].function = PROCESSED_FN; + assert!(MockCpuidTransformer {} + .process_cpuid(&mut cpuid, &vm_spec.unwrap()) + .is_ok()); + + assert!(cpuid.as_mut_slice().len() == num_entries); + for entry in cpuid.as_mut_slice().iter() { + match entry.function { + PROCESSED_FN => { + assert_eq!(entry.index, EXPECTED_INDEX); + } + _ => { + assert_ne!(entry.index, EXPECTED_INDEX); + } + } + } + } + + #[test] + fn test_invalid_cpu_architecture_cpuid() { + use crate::cpuid::process_cpuid; + let num_entries = 5; + + let mut cpuid = CpuId::new(num_entries).unwrap(); + let mut vm_spec = VmSpec::new(0, 1, 1, 1, 1, VpmuFeatureLevel::Disabled).unwrap(); + + vm_spec.cpu_vendor_id = [1; 12]; + assert!(process_cpuid(&mut cpuid, &vm_spec).is_err()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/gdt.rs b/src/dragonball/src/dbs_arch/src/x86_64/gdt.rs new file mode 100644 index 000000000000..dd8e9d095bd4 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/gdt.rs @@ -0,0 +1,119 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +// For GDT details see arch/x86/include/asm/segment.h + +#![allow(missing_docs)] + +use kvm_bindings::kvm_segment; + +/// Constructor for a conventional segment GDT (or LDT) entry. Derived from the kernel's segment.h. +#[allow(unused_parens)] +pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 { + (((u64::from(base) & 0xff00_0000u64) << (56 - 24)) + | ((u64::from(flags) & 0x0000_f0ffu64) << 40) + | ((u64::from(limit) & 0x000f_0000u64) << (48 - 16)) + | ((u64::from(base) & 0x00ff_ffffu64) << 16) + | (u64::from(limit) & 0x0000_ffffu64)) +} + +#[allow(unused_parens)] +fn get_base(entry: u64) -> u64 { + ((((entry) & 0xFF00_0000_0000_0000) >> 32) + | (((entry) & 0x0000_00FF_0000_0000) >> 16) + | (((entry) & 0x0000_0000_FFFF_0000) >> 16)) +} + +fn get_limit(entry: u64) -> u32 { + ((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32 +} + +fn get_g(entry: u64) -> u8 { + ((entry & 0x0080_0000_0000_0000) >> 55) as u8 +} + +fn get_db(entry: u64) -> u8 { + ((entry & 0x0040_0000_0000_0000) >> 54) as u8 +} + +fn get_l(entry: u64) -> u8 { + ((entry & 0x0020_0000_0000_0000) >> 53) as u8 +} + +fn get_avl(entry: u64) -> u8 { + ((entry & 0x0010_0000_0000_0000) >> 52) as u8 +} + +fn get_p(entry: u64) -> u8 { + ((entry & 0x0000_8000_0000_0000) >> 47) as u8 +} + +fn get_dpl(entry: u64) -> u8 { + ((entry & 0x0000_6000_0000_0000) >> 45) as u8 +} + +fn get_s(entry: u64) -> u8 { + ((entry & 0x0000_1000_0000_0000) >> 44) as u8 +} + +fn get_type(entry: u64) -> u8 { + ((entry & 0x0000_0F00_0000_0000) >> 40) as u8 +} + +/// Automatically build the kvm struct for SET_SREGS from the kernel bit fields. +/// +/// # Arguments +/// +/// * `entry` - The gdt entry. +/// * `table_index` - Index of the entry in the gdt table. +pub fn kvm_segment_from_gdt(entry: u64, table_index: u8) -> kvm_segment { + kvm_segment { + base: get_base(entry), + limit: get_limit(entry), + selector: u16::from(table_index * 8), + type_: get_type(entry), + present: get_p(entry), + dpl: get_dpl(entry), + db: get_db(entry), + s: get_s(entry), + l: get_l(entry), + g: get_g(entry), + avl: get_avl(entry), + padding: 0, + unusable: match get_p(entry) { + 0 => 1, + _ => 0, + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn field_parse() { + let gdt = gdt_entry(0xA09B, 0x10_0000, 0xfffff); + let seg = kvm_segment_from_gdt(gdt, 0); + // 0xA09B + // 'A' + assert_eq!(0x1, seg.g); + assert_eq!(0x0, seg.db); + assert_eq!(0x1, seg.l); + assert_eq!(0x0, seg.avl); + // '9' + assert_eq!(0x1, seg.present); + assert_eq!(0x0, seg.dpl); + assert_eq!(0x1, seg.s); + // 'B' + assert_eq!(0xB, seg.type_); + // base and limit + assert_eq!(0x10_0000, seg.base); + assert_eq!(0xfffff, seg.limit); + assert_eq!(0x0, seg.unusable); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/interrupts.rs b/src/dragonball/src/dbs_arch/src/x86_64/interrupts.rs new file mode 100644 index 000000000000..8a7e3b6bdfcb --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/interrupts.rs @@ -0,0 +1,136 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::kvm_lapic_state; +use kvm_ioctls::VcpuFd; + +/// Errors thrown while configuring the LAPIC. +#[derive(Debug)] +pub enum Error { + /// Failure in retrieving the LAPIC configuration. + GetLapic(kvm_ioctls::Error), + /// Failure in modifying the LAPIC configuration. + SetLapic(kvm_ioctls::Error), +} +type Result = std::result::Result; + +// Defines poached from apicdef.h kernel header. +const APIC_LVT0: usize = 0x350; +const APIC_LVT1: usize = 0x360; +const APIC_MODE_NMI: u32 = 0x4; +const APIC_MODE_EXTINT: u32 = 0x7; + +fn get_klapic_reg(klapic: &kvm_lapic_state, reg_offset: usize) -> u32 { + let range = reg_offset..reg_offset + 4; + let reg = klapic.regs.get(range).expect("get_klapic_reg range"); + + let mut reg_bytes = [0u8; 4]; + for (byte, read) in reg_bytes.iter_mut().zip(reg.iter().cloned()) { + *byte = read as u8; + } + + u32::from_le_bytes(reg_bytes) +} + +fn set_klapic_reg(klapic: &mut kvm_lapic_state, reg_offset: usize, value: u32) { + let range = reg_offset..reg_offset + 4; + let reg = klapic.regs.get_mut(range).expect("set_klapic_reg range"); + + let value = u32::to_le_bytes(value); + for (byte, read) in reg.iter_mut().zip(value.iter().cloned()) { + *byte = read as i8; + } +} + +#[allow(unused_parens)] +fn set_apic_delivery_mode(reg: u32, mode: u32) -> u32 { + (((reg) & !0x700) | ((mode) << 8)) +} + +/// Configures LAPICs. LAPIC0 is set for external interrupts, LAPIC1 is set for NMI. +/// +/// # Arguments +/// * `vcpu` - The VCPU object to configure. +pub fn set_lint(vcpu: &VcpuFd) -> Result<()> { + let mut klapic = vcpu.get_lapic().map_err(Error::GetLapic)?; + + let lvt_lint0 = get_klapic_reg(&klapic, APIC_LVT0); + set_klapic_reg( + &mut klapic, + APIC_LVT0, + set_apic_delivery_mode(lvt_lint0, APIC_MODE_EXTINT), + ); + let lvt_lint1 = get_klapic_reg(&klapic, APIC_LVT1); + set_klapic_reg( + &mut klapic, + APIC_LVT1, + set_apic_delivery_mode(lvt_lint1, APIC_MODE_NMI), + ); + + vcpu.set_lapic(&klapic).map_err(Error::SetLapic) +} + +#[cfg(test)] +mod tests { + use super::*; + use kvm_ioctls::Kvm; + + const KVM_APIC_REG_SIZE: usize = 0x400; + + #[test] + fn test_set_and_get_klapic_reg() { + let reg_offset = 0x340; + let mut klapic = kvm_lapic_state::default(); + set_klapic_reg(&mut klapic, reg_offset, 3); + let value = get_klapic_reg(&klapic, reg_offset); + assert_eq!(value, 3); + } + + #[test] + #[should_panic] + fn test_set_and_get_klapic_out_of_bounds() { + let reg_offset = KVM_APIC_REG_SIZE + 10; + let mut klapic = kvm_lapic_state::default(); + set_klapic_reg(&mut klapic, reg_offset, 3); + } + + #[test] + fn test_setlint() { + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(kvm_ioctls::Cap::Irqchip)); + let vm = kvm.create_vm().unwrap(); + //the get_lapic ioctl will fail if there is no irqchip created beforehand. + assert!(vm.create_irq_chip().is_ok()); + let vcpu = vm.create_vcpu(0).unwrap(); + let klapic_before: kvm_lapic_state = vcpu.get_lapic().unwrap(); + + // Compute the value that is expected to represent LVT0 and LVT1. + let lint0 = get_klapic_reg(&klapic_before, APIC_LVT0); + let lint1 = get_klapic_reg(&klapic_before, APIC_LVT1); + let lint0_mode_expected = set_apic_delivery_mode(lint0, APIC_MODE_EXTINT); + let lint1_mode_expected = set_apic_delivery_mode(lint1, APIC_MODE_NMI); + + set_lint(&vcpu).unwrap(); + + // Compute the value that represents LVT0 and LVT1 after set_lint. + let klapic_actual: kvm_lapic_state = vcpu.get_lapic().unwrap(); + let lint0_mode_actual = get_klapic_reg(&klapic_actual, APIC_LVT0); + let lint1_mode_actual = get_klapic_reg(&klapic_actual, APIC_LVT1); + assert_eq!(lint0_mode_expected, lint0_mode_actual); + assert_eq!(lint1_mode_expected, lint1_mode_actual); + } + + #[test] + fn test_setlint_fails() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + // 'get_lapic' ioctl triggered by the 'set_lint' function will fail if there is no + // irqchip created beforehand. + assert!(set_lint(&vcpu).is_err()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/mod.rs b/src/dragonball/src/dbs_arch/src/x86_64/mod.rs new file mode 100644 index 000000000000..6d39e5b5890a --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/mod.rs @@ -0,0 +1,15 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! CPU architecture specific constants and utilities for the `x86_64` architecture. + +/// Definitions for x86 CPUID +pub mod cpuid; +/// Definitions for x86 Global Descriptor Table +pub mod gdt; +/// Definitions for x86 interrupts +pub mod interrupts; +/// Definitions for x86 Model Specific Registers(MSR). +pub mod msr; +/// Definitions for x86 Registers +pub mod regs; diff --git a/src/dragonball/src/dbs_arch/src/x86_64/msr.rs b/src/dragonball/src/dbs_arch/src/x86_64/msr.rs new file mode 100644 index 000000000000..fcdfb848b982 --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/msr.rs @@ -0,0 +1,778 @@ +/* automatically generated by rust-bindgen */ + +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![allow(missing_docs)] +#![allow(dead_code)] +#![allow(non_upper_case_globals)] + +/// Model Specific Registers (MSRs) related functionality. +use std::result; + +use kvm_bindings::MsrList; +use kvm_ioctls::Kvm; + +#[derive(Debug)] +/// MSR related errors. +pub enum Error { + /// Getting supported MSRs failed. + GetSupportedModelSpecificRegisters(kvm_ioctls::Error), + /// Setting up MSRs failed. + SetModelSpecificRegisters(kvm_ioctls::Error), + /// Failed to set all MSRs. + SetModelSpecificRegistersCount, + /// Msr error + Msr(vmm_sys_util::fam::Error), +} + +type Result = result::Result; + +/// MSR range +struct MsrRange { + /// Base MSR address + base: u32, + /// Number of MSRs + nmsrs: u32, +} + +impl MsrRange { + /// Returns whether `msr` is contained in this MSR range. + fn contains(&self, msr: u32) -> bool { + self.base <= msr && msr < self.base + self.nmsrs + } +} + +// Creates a MsrRange of one msr given as argument. +macro_rules! SINGLE_MSR { + ($msr:expr) => { + MsrRange { + base: $msr, + nmsrs: 1, + } + }; +} + +// Creates a MsrRange of with msr base and count given as arguments. +macro_rules! MSR_RANGE { + ($first:expr, $count:expr) => { + MsrRange { + base: $first, + nmsrs: $count, + } + }; +} + +// List of MSRs that can be serialized. List is sorted in ascending order of MSRs addresses. +static WHITELISTED_MSR_RANGES: &[MsrRange] = &[ + SINGLE_MSR!(MSR_IA32_P5_MC_ADDR), + SINGLE_MSR!(MSR_IA32_P5_MC_TYPE), + SINGLE_MSR!(MSR_IA32_TSC), + SINGLE_MSR!(MSR_IA32_PLATFORM_ID), + SINGLE_MSR!(MSR_IA32_APICBASE), + SINGLE_MSR!(MSR_IA32_EBL_CR_POWERON), + SINGLE_MSR!(MSR_EBC_FREQUENCY_ID), + SINGLE_MSR!(MSR_SMI_COUNT), + SINGLE_MSR!(MSR_IA32_FEATURE_CONTROL), + SINGLE_MSR!(MSR_IA32_TSC_ADJUST), + SINGLE_MSR!(MSR_IA32_SPEC_CTRL), + SINGLE_MSR!(MSR_IA32_PRED_CMD), + SINGLE_MSR!(MSR_IA32_UCODE_WRITE), + SINGLE_MSR!(MSR_IA32_UCODE_REV), + SINGLE_MSR!(MSR_IA32_SMBASE), + SINGLE_MSR!(MSR_FSB_FREQ), + SINGLE_MSR!(MSR_PLATFORM_INFO), + SINGLE_MSR!(MSR_PKG_CST_CONFIG_CONTROL), + SINGLE_MSR!(MSR_IA32_MPERF), + SINGLE_MSR!(MSR_IA32_APERF), + SINGLE_MSR!(MSR_MTRRcap), + SINGLE_MSR!(MSR_IA32_BBL_CR_CTL3), + SINGLE_MSR!(MSR_IA32_SYSENTER_CS), + SINGLE_MSR!(MSR_IA32_SYSENTER_ESP), + SINGLE_MSR!(MSR_IA32_SYSENTER_EIP), + SINGLE_MSR!(MSR_IA32_MCG_CAP), + SINGLE_MSR!(MSR_IA32_MCG_STATUS), + SINGLE_MSR!(MSR_IA32_MCG_CTL), + SINGLE_MSR!(MSR_IA32_PERF_STATUS), + SINGLE_MSR!(MSR_IA32_MISC_ENABLE), + SINGLE_MSR!(MSR_MISC_FEATURE_CONTROL), + SINGLE_MSR!(MSR_MISC_PWR_MGMT), + SINGLE_MSR!(MSR_TURBO_RATIO_LIMIT), + SINGLE_MSR!(MSR_TURBO_RATIO_LIMIT1), + SINGLE_MSR!(MSR_IA32_DEBUGCTLMSR), + SINGLE_MSR!(MSR_IA32_LASTBRANCHFROMIP), + SINGLE_MSR!(MSR_IA32_LASTBRANCHTOIP), + SINGLE_MSR!(MSR_IA32_LASTINTFROMIP), + SINGLE_MSR!(MSR_IA32_LASTINTTOIP), + SINGLE_MSR!(MSR_IA32_POWER_CTL), + MSR_RANGE!( + // IA32_MTRR_PHYSBASE0 + 0x200, 0x100 + ), + MSR_RANGE!( + // MSR_CORE_C3_RESIDENCY + // MSR_CORE_C6_RESIDENCY + // MSR_CORE_C7_RESIDENCY + MSR_CORE_C3_RESIDENCY, + 3 + ), + MSR_RANGE!(MSR_IA32_MC0_CTL, 0x80), + SINGLE_MSR!(MSR_RAPL_POWER_UNIT), + MSR_RANGE!( + // MSR_PKGC3_IRTL + // MSR_PKGC6_IRTL + // MSR_PKGC7_IRTL + MSR_PKGC3_IRTL, + 3 + ), + SINGLE_MSR!(MSR_PKG_POWER_LIMIT), + SINGLE_MSR!(MSR_PKG_ENERGY_STATUS), + SINGLE_MSR!(MSR_PKG_PERF_STATUS), + SINGLE_MSR!(MSR_PKG_POWER_INFO), + SINGLE_MSR!(MSR_DRAM_POWER_LIMIT), + SINGLE_MSR!(MSR_DRAM_ENERGY_STATUS), + SINGLE_MSR!(MSR_DRAM_PERF_STATUS), + SINGLE_MSR!(MSR_DRAM_POWER_INFO), + SINGLE_MSR!(MSR_CONFIG_TDP_NOMINAL), + SINGLE_MSR!(MSR_CONFIG_TDP_LEVEL_1), + SINGLE_MSR!(MSR_CONFIG_TDP_LEVEL_2), + SINGLE_MSR!(MSR_CONFIG_TDP_CONTROL), + SINGLE_MSR!(MSR_TURBO_ACTIVATION_RATIO), + SINGLE_MSR!(MSR_IA32_TSCDEADLINE), + MSR_RANGE!(APIC_BASE_MSR, APIC_MSR_INDEXES), + SINGLE_MSR!(MSR_IA32_BNDCFGS), + SINGLE_MSR!(MSR_KVM_WALL_CLOCK_NEW), + SINGLE_MSR!(MSR_KVM_SYSTEM_TIME_NEW), + SINGLE_MSR!(MSR_KVM_ASYNC_PF_EN), + SINGLE_MSR!(MSR_KVM_STEAL_TIME), + SINGLE_MSR!(MSR_KVM_PV_EOI_EN), + SINGLE_MSR!(MSR_EFER), + SINGLE_MSR!(MSR_STAR), + SINGLE_MSR!(MSR_LSTAR), + SINGLE_MSR!(MSR_CSTAR), + SINGLE_MSR!(MSR_SYSCALL_MASK), + SINGLE_MSR!(MSR_FS_BASE), + SINGLE_MSR!(MSR_GS_BASE), + SINGLE_MSR!(MSR_KERNEL_GS_BASE), + SINGLE_MSR!(MSR_TSC_AUX), +]; + +/// Specifies whether a particular MSR should be included in vcpu serialization. +/// +/// # Arguments +/// +/// * `index` - The index of the MSR that is checked whether it's needed for serialization. +pub fn msr_should_serialize(index: u32) -> bool { + // Blacklisted MSRs not exported by Linux: IA32_FEATURE_CONTROL and IA32_MCG_CTL + if index == MSR_IA32_FEATURE_CONTROL || index == MSR_IA32_MCG_CTL { + return false; + }; + WHITELISTED_MSR_RANGES + .iter() + .any(|range| range.contains(index)) +} + +/// Returns the list of supported, serializable MSRs. +/// +/// # Arguments +/// +/// * `kvm_fd` - Structure that holds the KVM's fd. +pub fn supported_guest_msrs(kvm_fd: &Kvm) -> Result { + let mut msr_list = kvm_fd + .get_msr_index_list() + .map_err(Error::GetSupportedModelSpecificRegisters)?; + + msr_list.retain(|msr_index| msr_should_serialize(*msr_index)); + + Ok(msr_list) +} + +/// Base MSR for APIC +pub const APIC_BASE_MSR: u32 = 0x800; + +/// Number of APIC MSR indexes +pub const APIC_MSR_INDEXES: u32 = 0x400; + +/// Custom MSRs fall in the range 0x4b564d00-0x4b564dff +pub const MSR_KVM_WALL_CLOCK_NEW: u32 = 0x4b56_4d00; +pub const MSR_KVM_SYSTEM_TIME_NEW: u32 = 0x4b56_4d01; +pub const MSR_KVM_ASYNC_PF_EN: u32 = 0x4b56_4d02; +pub const MSR_KVM_STEAL_TIME: u32 = 0x4b56_4d03; +pub const MSR_KVM_PV_EOI_EN: u32 = 0x4b56_4d04; + +pub const MSR_EFER: u32 = 3221225600; +pub const MSR_STAR: u32 = 3221225601; +pub const MSR_LSTAR: u32 = 3221225602; +pub const MSR_CSTAR: u32 = 3221225603; +pub const MSR_SYSCALL_MASK: u32 = 3221225604; +pub const MSR_FS_BASE: u32 = 3221225728; +pub const MSR_GS_BASE: u32 = 3221225729; +pub const MSR_KERNEL_GS_BASE: u32 = 3221225730; +pub const MSR_TSC_AUX: u32 = 3221225731; +pub const _EFER_SCE: u32 = 0; +pub const _EFER_LME: u32 = 8; +pub const _EFER_LMA: u32 = 10; +pub const _EFER_NX: u32 = 11; +pub const _EFER_SVME: u32 = 12; +pub const _EFER_LMSLE: u32 = 13; +pub const _EFER_FFXSR: u32 = 14; +pub const EFER_SCE: u32 = 1; +pub const EFER_LME: u32 = 256; +pub const EFER_LMA: u32 = 1024; +pub const EFER_NX: u32 = 2048; +pub const EFER_SVME: u32 = 4096; +pub const EFER_LMSLE: u32 = 8192; +pub const EFER_FFXSR: u32 = 16384; +pub const MSR_IA32_SPEC_CTRL: u32 = 72; +pub const SPEC_CTRL_IBRS: u32 = 1; +pub const SPEC_CTRL_STIBP: u32 = 2; +pub const SPEC_CTRL_SSBD_SHIFT: u32 = 2; +pub const SPEC_CTRL_SSBD: u32 = 4; +pub const MSR_IA32_PRED_CMD: u32 = 73; +pub const PRED_CMD_IBPB: u32 = 1; +pub const MSR_IA32_PERFCTR0: u32 = 193; +pub const MSR_IA32_PERFCTR1: u32 = 194; +pub const MSR_FSB_FREQ: u32 = 205; +pub const MSR_PLATFORM_INFO: u32 = 206; +pub const MSR_NHM_SNB_PKG_CST_CFG_CTL: u32 = 226; +pub const NHM_C3_AUTO_DEMOTE: u32 = 33554432; +pub const NHM_C1_AUTO_DEMOTE: u32 = 67108864; +pub const ATM_LNC_C6_AUTO_DEMOTE: u32 = 33554432; +pub const SNB_C1_AUTO_UNDEMOTE: u32 = 134217728; +pub const SNB_C3_AUTO_UNDEMOTE: u32 = 268435456; +pub const MSR_MTRRcap: u32 = 254; +pub const MSR_IA32_ARCH_CAPABILITIES: u32 = 266; +pub const ARCH_CAP_RDCL_NO: u32 = 1; +pub const ARCH_CAP_IBRS_ALL: u32 = 2; +pub const ARCH_CAP_SKIP_VMENTRY_L1DFLUSH: u32 = 8; +pub const ARCH_CAP_SSB_NO: u32 = 16; +pub const MSR_IA32_FLUSH_CMD: u32 = 267; +pub const L1D_FLUSH: u32 = 1; +pub const MSR_PKG_CST_CONFIG_CONTROL: u32 = 226; +pub const MSR_IA32_BBL_CR_CTL: u32 = 281; +pub const MSR_IA32_BBL_CR_CTL3: u32 = 286; +pub const MSR_IA32_SYSENTER_CS: u32 = 372; +pub const MSR_IA32_SYSENTER_ESP: u32 = 373; +pub const MSR_IA32_SYSENTER_EIP: u32 = 374; +pub const MSR_IA32_MCG_CAP: u32 = 377; +pub const MSR_IA32_MCG_STATUS: u32 = 378; +pub const MSR_IA32_MCG_CTL: u32 = 379; +pub const MSR_IA32_MCG_EXT_CTL: u32 = 1232; +pub const MSR_OFFCORE_RSP_0: u32 = 422; +pub const MSR_OFFCORE_RSP_1: u32 = 423; +pub const MSR_TURBO_RATIO_LIMIT: u32 = 429; +pub const MSR_TURBO_RATIO_LIMIT1: u32 = 430; +pub const MSR_TURBO_RATIO_LIMIT2: u32 = 431; +pub const MSR_LBR_SELECT: u32 = 456; +pub const MSR_LBR_TOS: u32 = 457; +pub const MSR_LBR_NHM_FROM: u32 = 1664; +pub const MSR_LBR_NHM_TO: u32 = 1728; +pub const MSR_LBR_CORE_FROM: u32 = 64; +pub const MSR_LBR_CORE_TO: u32 = 96; +pub const MSR_LBR_INFO_0: u32 = 3520; +pub const LBR_INFO_CYCLES: u32 = 65535; +pub const MSR_IA32_PEBS_ENABLE: u32 = 1009; +pub const MSR_IA32_DS_AREA: u32 = 1536; +pub const MSR_IA32_PERF_CAPABILITIES: u32 = 837; +pub const MSR_PEBS_LD_LAT_THRESHOLD: u32 = 1014; +pub const MSR_IA32_RTIT_CTL: u32 = 1392; +pub const MSR_IA32_RTIT_STATUS: u32 = 1393; +pub const MSR_IA32_RTIT_ADDR0_A: u32 = 1408; +pub const MSR_IA32_RTIT_ADDR0_B: u32 = 1409; +pub const MSR_IA32_RTIT_ADDR1_A: u32 = 1410; +pub const MSR_IA32_RTIT_ADDR1_B: u32 = 1411; +pub const MSR_IA32_RTIT_ADDR2_A: u32 = 1412; +pub const MSR_IA32_RTIT_ADDR2_B: u32 = 1413; +pub const MSR_IA32_RTIT_ADDR3_A: u32 = 1414; +pub const MSR_IA32_RTIT_ADDR3_B: u32 = 1415; +pub const MSR_IA32_RTIT_CR3_MATCH: u32 = 1394; +pub const MSR_IA32_RTIT_OUTPUT_BASE: u32 = 1376; +pub const MSR_IA32_RTIT_OUTPUT_MASK: u32 = 1377; +pub const MSR_MTRRfix64K_00000: u32 = 592; +pub const MSR_MTRRfix16K_80000: u32 = 600; +pub const MSR_MTRRfix16K_A0000: u32 = 601; +pub const MSR_MTRRfix4K_C0000: u32 = 616; +pub const MSR_MTRRfix4K_C8000: u32 = 617; +pub const MSR_MTRRfix4K_D0000: u32 = 618; +pub const MSR_MTRRfix4K_D8000: u32 = 619; +pub const MSR_MTRRfix4K_E0000: u32 = 620; +pub const MSR_MTRRfix4K_E8000: u32 = 621; +pub const MSR_MTRRfix4K_F0000: u32 = 622; +pub const MSR_MTRRfix4K_F8000: u32 = 623; +pub const MSR_MTRRdefType: u32 = 767; +pub const MSR_IA32_CR_PAT: u32 = 631; +pub const MSR_IA32_DEBUGCTLMSR: u32 = 473; +pub const MSR_IA32_LASTBRANCHFROMIP: u32 = 475; +pub const MSR_IA32_LASTBRANCHTOIP: u32 = 476; +pub const MSR_IA32_LASTINTFROMIP: u32 = 477; +pub const MSR_IA32_LASTINTTOIP: u32 = 478; +pub const DEBUGCTLMSR_LBR: u32 = 1; +pub const DEBUGCTLMSR_BTF_SHIFT: u32 = 1; +pub const DEBUGCTLMSR_BTF: u32 = 2; +pub const DEBUGCTLMSR_TR: u32 = 64; +pub const DEBUGCTLMSR_BTS: u32 = 128; +pub const DEBUGCTLMSR_BTINT: u32 = 256; +pub const DEBUGCTLMSR_BTS_OFF_OS: u32 = 512; +pub const DEBUGCTLMSR_BTS_OFF_USR: u32 = 1024; +pub const DEBUGCTLMSR_FREEZE_LBRS_ON_PMI: u32 = 2048; +pub const MSR_PEBS_FRONTEND: u32 = 1015; +pub const MSR_IA32_POWER_CTL: u32 = 508; +pub const MSR_IA32_MC0_CTL: u32 = 1024; +pub const MSR_IA32_MC0_STATUS: u32 = 1025; +pub const MSR_IA32_MC0_ADDR: u32 = 1026; +pub const MSR_IA32_MC0_MISC: u32 = 1027; +pub const MSR_PKG_C3_RESIDENCY: u32 = 1016; +pub const MSR_PKG_C6_RESIDENCY: u32 = 1017; +pub const MSR_PKG_C7_RESIDENCY: u32 = 1018; +pub const MSR_CORE_C3_RESIDENCY: u32 = 1020; +pub const MSR_CORE_C6_RESIDENCY: u32 = 1021; +pub const MSR_CORE_C7_RESIDENCY: u32 = 1022; +pub const MSR_KNL_CORE_C6_RESIDENCY: u32 = 1023; +pub const MSR_PKG_C2_RESIDENCY: u32 = 1549; +pub const MSR_PKG_C8_RESIDENCY: u32 = 1584; +pub const MSR_PKG_C9_RESIDENCY: u32 = 1585; +pub const MSR_PKG_C10_RESIDENCY: u32 = 1586; +pub const MSR_PKGC3_IRTL: u32 = 1546; +pub const MSR_PKGC6_IRTL: u32 = 1547; +pub const MSR_PKGC7_IRTL: u32 = 1548; +pub const MSR_PKGC8_IRTL: u32 = 1587; +pub const MSR_PKGC9_IRTL: u32 = 1588; +pub const MSR_PKGC10_IRTL: u32 = 1589; +pub const MSR_RAPL_POWER_UNIT: u32 = 1542; +pub const MSR_PKG_POWER_LIMIT: u32 = 1552; +pub const MSR_PKG_ENERGY_STATUS: u32 = 1553; +pub const MSR_PKG_PERF_STATUS: u32 = 1555; +pub const MSR_PKG_POWER_INFO: u32 = 1556; +pub const MSR_DRAM_POWER_LIMIT: u32 = 1560; +pub const MSR_DRAM_ENERGY_STATUS: u32 = 1561; +pub const MSR_DRAM_PERF_STATUS: u32 = 1563; +pub const MSR_DRAM_POWER_INFO: u32 = 1564; +pub const MSR_PP0_POWER_LIMIT: u32 = 1592; +pub const MSR_PP0_ENERGY_STATUS: u32 = 1593; +pub const MSR_PP0_POLICY: u32 = 1594; +pub const MSR_PP0_PERF_STATUS: u32 = 1595; +pub const MSR_PP1_POWER_LIMIT: u32 = 1600; +pub const MSR_PP1_ENERGY_STATUS: u32 = 1601; +pub const MSR_PP1_POLICY: u32 = 1602; +pub const MSR_CONFIG_TDP_NOMINAL: u32 = 1608; +pub const MSR_CONFIG_TDP_LEVEL_1: u32 = 1609; +pub const MSR_CONFIG_TDP_LEVEL_2: u32 = 1610; +pub const MSR_CONFIG_TDP_CONTROL: u32 = 1611; +pub const MSR_TURBO_ACTIVATION_RATIO: u32 = 1612; +pub const MSR_PLATFORM_ENERGY_STATUS: u32 = 1613; +pub const MSR_PKG_WEIGHTED_CORE_C0_RES: u32 = 1624; +pub const MSR_PKG_ANY_CORE_C0_RES: u32 = 1625; +pub const MSR_PKG_ANY_GFXE_C0_RES: u32 = 1626; +pub const MSR_PKG_BOTH_CORE_GFXE_C0_RES: u32 = 1627; +pub const MSR_CORE_C1_RES: u32 = 1632; +pub const MSR_CC6_DEMOTION_POLICY_CONFIG: u32 = 1640; +pub const MSR_MC6_DEMOTION_POLICY_CONFIG: u32 = 1641; +pub const MSR_CORE_PERF_LIMIT_REASONS: u32 = 1680; +pub const MSR_GFX_PERF_LIMIT_REASONS: u32 = 1712; +pub const MSR_RING_PERF_LIMIT_REASONS: u32 = 1713; +pub const MSR_PPERF: u32 = 1614; +pub const MSR_PERF_LIMIT_REASONS: u32 = 1615; +pub const MSR_PM_ENABLE: u32 = 1904; +pub const MSR_HWP_CAPABILITIES: u32 = 1905; +pub const MSR_HWP_REQUEST_PKG: u32 = 1906; +pub const MSR_HWP_INTERRUPT: u32 = 1907; +pub const MSR_HWP_REQUEST: u32 = 1908; +pub const MSR_HWP_STATUS: u32 = 1911; +pub const HWP_BASE_BIT: u32 = 128; +pub const HWP_NOTIFICATIONS_BIT: u32 = 256; +pub const HWP_ACTIVITY_WINDOW_BIT: u32 = 512; +pub const HWP_ENERGY_PERF_PREFERENCE_BIT: u32 = 1024; +pub const HWP_PACKAGE_LEVEL_REQUEST_BIT: u32 = 2048; +pub const MSR_AMD64_MC0_MASK: u32 = 3221291076; +pub const MSR_IA32_MC0_CTL2: u32 = 640; +pub const MSR_P6_PERFCTR0: u32 = 193; +pub const MSR_P6_PERFCTR1: u32 = 194; +pub const MSR_P6_EVNTSEL0: u32 = 390; +pub const MSR_P6_EVNTSEL1: u32 = 391; +pub const MSR_KNC_PERFCTR0: u32 = 32; +pub const MSR_KNC_PERFCTR1: u32 = 33; +pub const MSR_KNC_EVNTSEL0: u32 = 40; +pub const MSR_KNC_EVNTSEL1: u32 = 41; +pub const MSR_IA32_PMC0: u32 = 1217; +pub const MSR_AMD64_PATCH_LEVEL: u32 = 139; +pub const MSR_AMD64_TSC_RATIO: u32 = 3221225732; +pub const MSR_AMD64_NB_CFG: u32 = 3221291039; +pub const MSR_AMD64_PATCH_LOADER: u32 = 3221291040; +pub const MSR_AMD64_OSVW_ID_LENGTH: u32 = 3221291328; +pub const MSR_AMD64_OSVW_STATUS: u32 = 3221291329; +pub const MSR_AMD64_LS_CFG: u32 = 3221295136; +pub const MSR_AMD64_DC_CFG: u32 = 3221295138; +pub const MSR_AMD64_BU_CFG2: u32 = 3221295146; +pub const MSR_AMD64_IBSFETCHCTL: u32 = 3221295152; +pub const MSR_AMD64_IBSFETCHLINAD: u32 = 3221295153; +pub const MSR_AMD64_IBSFETCHPHYSAD: u32 = 3221295154; +pub const MSR_AMD64_IBSFETCH_REG_COUNT: u32 = 3; +pub const MSR_AMD64_IBSFETCH_REG_MASK: u32 = 7; +pub const MSR_AMD64_IBSOPCTL: u32 = 3221295155; +pub const MSR_AMD64_IBSOPRIP: u32 = 3221295156; +pub const MSR_AMD64_IBSOPDATA: u32 = 3221295157; +pub const MSR_AMD64_IBSOPDATA2: u32 = 3221295158; +pub const MSR_AMD64_IBSOPDATA3: u32 = 3221295159; +pub const MSR_AMD64_IBSDCLINAD: u32 = 3221295160; +pub const MSR_AMD64_IBSDCPHYSAD: u32 = 3221295161; +pub const MSR_AMD64_IBSOP_REG_COUNT: u32 = 7; +pub const MSR_AMD64_IBSOP_REG_MASK: u32 = 127; +pub const MSR_AMD64_IBSCTL: u32 = 3221295162; +pub const MSR_AMD64_IBSBRTARGET: u32 = 3221295163; +pub const MSR_AMD64_IBSOPDATA4: u32 = 3221295165; +pub const MSR_AMD64_IBS_REG_COUNT_MAX: u32 = 8; +pub const MSR_AMD64_VIRT_SPEC_CTRL: u32 = 3221291295; +pub const MSR_F17H_IRPERF: u32 = 3221225705; +pub const MSR_F16H_L2I_PERF_CTL: u32 = 3221291568; +pub const MSR_F16H_L2I_PERF_CTR: u32 = 3221291569; +pub const MSR_F16H_DR1_ADDR_MASK: u32 = 3221295129; +pub const MSR_F16H_DR2_ADDR_MASK: u32 = 3221295130; +pub const MSR_F16H_DR3_ADDR_MASK: u32 = 3221295131; +pub const MSR_F16H_DR0_ADDR_MASK: u32 = 3221295143; +pub const MSR_F15H_PERF_CTL: u32 = 3221291520; +pub const MSR_F15H_PERF_CTR: u32 = 3221291521; +pub const MSR_F15H_NB_PERF_CTL: u32 = 3221291584; +pub const MSR_F15H_NB_PERF_CTR: u32 = 3221291585; +pub const MSR_F15H_PTSC: u32 = 3221291648; +pub const MSR_F15H_IC_CFG: u32 = 3221295137; +pub const MSR_FAM10H_MMIO_CONF_BASE: u32 = 3221291096; +pub const FAM10H_MMIO_CONF_ENABLE: u32 = 1; +pub const FAM10H_MMIO_CONF_BUSRANGE_MASK: u32 = 15; +pub const FAM10H_MMIO_CONF_BUSRANGE_SHIFT: u32 = 2; +pub const FAM10H_MMIO_CONF_BASE_MASK: u32 = 268435455; +pub const FAM10H_MMIO_CONF_BASE_SHIFT: u32 = 20; +pub const MSR_FAM10H_NODE_ID: u32 = 3221295116; +pub const MSR_F10H_DECFG: u32 = 3221295145; +pub const MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT: u32 = 1; +pub const MSR_K8_TOP_MEM1: u32 = 3221291034; +pub const MSR_K8_TOP_MEM2: u32 = 3221291037; +pub const MSR_K8_SYSCFG: u32 = 3221291024; +pub const MSR_K8_INT_PENDING_MSG: u32 = 3221291093; +pub const K8_INTP_C1E_ACTIVE_MASK: u32 = 402653184; +pub const MSR_K8_TSEG_ADDR: u32 = 3221291282; +pub const MSR_K8_TSEG_MASK: u32 = 3221291283; +pub const K8_MTRRFIXRANGE_DRAM_ENABLE: u32 = 262144; +pub const K8_MTRRFIXRANGE_DRAM_MODIFY: u32 = 524288; +pub const K8_MTRR_RDMEM_WRMEM_MASK: u32 = 404232216; +pub const MSR_K7_EVNTSEL0: u32 = 3221291008; +pub const MSR_K7_PERFCTR0: u32 = 3221291012; +pub const MSR_K7_EVNTSEL1: u32 = 3221291009; +pub const MSR_K7_PERFCTR1: u32 = 3221291013; +pub const MSR_K7_EVNTSEL2: u32 = 3221291010; +pub const MSR_K7_PERFCTR2: u32 = 3221291014; +pub const MSR_K7_EVNTSEL3: u32 = 3221291011; +pub const MSR_K7_PERFCTR3: u32 = 3221291015; +pub const MSR_K7_CLK_CTL: u32 = 3221291035; +pub const MSR_K7_HWCR: u32 = 3221291029; +pub const MSR_K7_FID_VID_CTL: u32 = 3221291073; +pub const MSR_K7_FID_VID_STATUS: u32 = 3221291074; +pub const MSR_K6_WHCR: u32 = 3221225602; +pub const MSR_K6_UWCCR: u32 = 3221225605; +pub const MSR_K6_EPMR: u32 = 3221225606; +pub const MSR_K6_PSOR: u32 = 3221225607; +pub const MSR_K6_PFIR: u32 = 3221225608; +pub const MSR_IDT_FCR1: u32 = 263; +pub const MSR_IDT_FCR2: u32 = 264; +pub const MSR_IDT_FCR3: u32 = 265; +pub const MSR_IDT_FCR4: u32 = 266; +pub const MSR_IDT_MCR0: u32 = 272; +pub const MSR_IDT_MCR1: u32 = 273; +pub const MSR_IDT_MCR2: u32 = 274; +pub const MSR_IDT_MCR3: u32 = 275; +pub const MSR_IDT_MCR4: u32 = 276; +pub const MSR_IDT_MCR5: u32 = 277; +pub const MSR_IDT_MCR6: u32 = 278; +pub const MSR_IDT_MCR7: u32 = 279; +pub const MSR_IDT_MCR_CTRL: u32 = 288; +pub const MSR_VIA_FCR: u32 = 4359; +pub const MSR_VIA_LONGHAUL: u32 = 4362; +pub const MSR_VIA_RNG: u32 = 4363; +pub const MSR_VIA_BCR2: u32 = 4423; +pub const MSR_TMTA_LONGRUN_CTRL: u32 = 2156298256; +pub const MSR_TMTA_LONGRUN_FLAGS: u32 = 2156298257; +pub const MSR_TMTA_LRTI_READOUT: u32 = 2156298264; +pub const MSR_TMTA_LRTI_VOLT_MHZ: u32 = 2156298266; +pub const MSR_IA32_P5_MC_ADDR: u32 = 0; +pub const MSR_IA32_P5_MC_TYPE: u32 = 1; +pub const MSR_IA32_TSC: u32 = 16; +pub const MSR_IA32_PLATFORM_ID: u32 = 23; +pub const MSR_IA32_EBL_CR_POWERON: u32 = 42; +pub const MSR_EBC_FREQUENCY_ID: u32 = 44; +pub const MSR_SMI_COUNT: u32 = 52; +pub const MSR_IA32_FEATURE_CONTROL: u32 = 58; +pub const MSR_IA32_TSC_ADJUST: u32 = 59; +pub const MSR_IA32_BNDCFGS: u32 = 3472; +pub const MSR_IA32_BNDCFGS_RSVD: u32 = 4092; +pub const MSR_IA32_XSS: u32 = 3488; +pub const FEATURE_CONTROL_LOCKED: u32 = 1; +pub const FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX: u32 = 2; +pub const FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX: u32 = 4; +pub const FEATURE_CONTROL_LMCE: u32 = 1048576; +pub const MSR_IA32_APICBASE: u32 = 27; +pub const MSR_IA32_APICBASE_BSP: u32 = 256; +pub const MSR_IA32_APICBASE_ENABLE: u32 = 2048; +pub const MSR_IA32_APICBASE_BASE: u32 = 4294963200; +pub const MSR_IA32_TSCDEADLINE: u32 = 1760; +pub const MSR_IA32_UCODE_WRITE: u32 = 121; +pub const MSR_IA32_UCODE_REV: u32 = 139; +pub const MSR_IA32_SMM_MONITOR_CTL: u32 = 155; +pub const MSR_IA32_SMBASE: u32 = 158; +pub const MSR_IA32_PERF_STATUS: u32 = 408; +pub const MSR_IA32_PERF_CTL: u32 = 409; +pub const INTEL_PERF_CTL_MASK: u32 = 65535; +pub const MSR_AMD_PSTATE_DEF_BASE: u32 = 3221291108; +pub const MSR_AMD_PERF_STATUS: u32 = 3221291107; +pub const MSR_AMD_PERF_CTL: u32 = 3221291106; +pub const MSR_IA32_MPERF: u32 = 231; +pub const MSR_IA32_APERF: u32 = 232; +pub const MSR_IA32_THERM_CONTROL: u32 = 410; +pub const MSR_IA32_THERM_INTERRUPT: u32 = 411; +pub const THERM_INT_HIGH_ENABLE: u32 = 1; +pub const THERM_INT_LOW_ENABLE: u32 = 2; +pub const THERM_INT_PLN_ENABLE: u32 = 16777216; +pub const MSR_IA32_THERM_STATUS: u32 = 412; +pub const THERM_STATUS_PROCHOT: u32 = 1; +pub const THERM_STATUS_POWER_LIMIT: u32 = 1024; +pub const MSR_THERM2_CTL: u32 = 413; +pub const MSR_THERM2_CTL_TM_SELECT: u32 = 65536; +pub const MSR_IA32_MISC_ENABLE: u32 = 416; +pub const MSR_IA32_TEMPERATURE_TARGET: u32 = 418; +pub const MSR_MISC_FEATURE_CONTROL: u32 = 420; +pub const MSR_MISC_PWR_MGMT: u32 = 426; +pub const MSR_IA32_ENERGY_PERF_BIAS: u32 = 432; +pub const ENERGY_PERF_BIAS_PERFORMANCE: u32 = 0; +pub const ENERGY_PERF_BIAS_NORMAL: u32 = 6; +pub const ENERGY_PERF_BIAS_POWERSAVE: u32 = 15; +pub const MSR_IA32_PACKAGE_THERM_STATUS: u32 = 433; +pub const PACKAGE_THERM_STATUS_PROCHOT: u32 = 1; +pub const PACKAGE_THERM_STATUS_POWER_LIMIT: u32 = 1024; +pub const MSR_IA32_PACKAGE_THERM_INTERRUPT: u32 = 434; +pub const PACKAGE_THERM_INT_HIGH_ENABLE: u32 = 1; +pub const PACKAGE_THERM_INT_LOW_ENABLE: u32 = 2; +pub const PACKAGE_THERM_INT_PLN_ENABLE: u32 = 16777216; +pub const THERM_INT_THRESHOLD0_ENABLE: u32 = 32768; +pub const THERM_SHIFT_THRESHOLD0: u32 = 8; +pub const THERM_MASK_THRESHOLD0: u32 = 32512; +pub const THERM_INT_THRESHOLD1_ENABLE: u32 = 8388608; +pub const THERM_SHIFT_THRESHOLD1: u32 = 16; +pub const THERM_MASK_THRESHOLD1: u32 = 8323072; +pub const THERM_STATUS_THRESHOLD0: u32 = 64; +pub const THERM_LOG_THRESHOLD0: u32 = 128; +pub const THERM_STATUS_THRESHOLD1: u32 = 256; +pub const THERM_LOG_THRESHOLD1: u32 = 512; +pub const MSR_IA32_MISC_ENABLE_FAST_STRING_BIT: u32 = 0; +pub const MSR_IA32_MISC_ENABLE_FAST_STRING: u32 = 1; +pub const MSR_IA32_MISC_ENABLE_TCC_BIT: u32 = 1; +pub const MSR_IA32_MISC_ENABLE_TCC: u32 = 2; +pub const MSR_IA32_MISC_ENABLE_EMON_BIT: u32 = 7; +pub const MSR_IA32_MISC_ENABLE_EMON: u32 = 128; +pub const MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT: u32 = 11; +pub const MSR_IA32_MISC_ENABLE_BTS_UNAVAIL: u32 = 2048; +pub const MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT: u32 = 12; +pub const MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL: u32 = 4096; +pub const MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT: u32 = 16; +pub const MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP: u32 = 65536; +pub const MSR_IA32_MISC_ENABLE_MWAIT_BIT: u32 = 18; +pub const MSR_IA32_MISC_ENABLE_MWAIT: u32 = 262144; +pub const MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT: u32 = 22; +pub const MSR_IA32_MISC_ENABLE_LIMIT_CPUID: u32 = 4194304; +pub const MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT: u32 = 23; +pub const MSR_IA32_MISC_ENABLE_XTPR_DISABLE: u32 = 8388608; +pub const MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT: u32 = 34; +pub const MSR_IA32_MISC_ENABLE_XD_DISABLE: u64 = 17179869184; +pub const MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT: u32 = 2; +pub const MSR_IA32_MISC_ENABLE_X87_COMPAT: u32 = 4; +pub const MSR_IA32_MISC_ENABLE_TM1_BIT: u32 = 3; +pub const MSR_IA32_MISC_ENABLE_TM1: u32 = 8; +pub const MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT: u32 = 4; +pub const MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE: u32 = 16; +pub const MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT: u32 = 6; +pub const MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE: u32 = 64; +pub const MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT: u32 = 8; +pub const MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK: u32 = 256; +pub const MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT: u32 = 9; +pub const MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE: u32 = 512; +pub const MSR_IA32_MISC_ENABLE_FERR_BIT: u32 = 10; +pub const MSR_IA32_MISC_ENABLE_FERR: u32 = 1024; +pub const MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT: u32 = 10; +pub const MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX: u32 = 1024; +pub const MSR_IA32_MISC_ENABLE_TM2_BIT: u32 = 13; +pub const MSR_IA32_MISC_ENABLE_TM2: u32 = 8192; +pub const MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT: u32 = 19; +pub const MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE: u32 = 524288; +pub const MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT: u32 = 20; +pub const MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK: u32 = 1048576; +pub const MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT: u32 = 24; +pub const MSR_IA32_MISC_ENABLE_L1D_CONTEXT: u32 = 16777216; +pub const MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT: u32 = 37; +pub const MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE: u64 = 137438953472; +pub const MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT: u32 = 38; +pub const MSR_IA32_MISC_ENABLE_TURBO_DISABLE: u64 = 274877906944; +pub const MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT: u32 = 39; +pub const MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE: u64 = 549755813888; +pub const MSR_IA32_TSC_DEADLINE: u32 = 1760; +pub const MSR_TSX_FORCE_ABORT: u32 = 271; +pub const MSR_TFA_RTM_FORCE_ABORT_BIT: u32 = 0; +pub const MSR_IA32_MCG_EAX: u32 = 384; +pub const MSR_IA32_MCG_EBX: u32 = 385; +pub const MSR_IA32_MCG_ECX: u32 = 386; +pub const MSR_IA32_MCG_EDX: u32 = 387; +pub const MSR_IA32_MCG_ESI: u32 = 388; +pub const MSR_IA32_MCG_EDI: u32 = 389; +pub const MSR_IA32_MCG_EBP: u32 = 390; +pub const MSR_IA32_MCG_ESP: u32 = 391; +pub const MSR_IA32_MCG_EFLAGS: u32 = 392; +pub const MSR_IA32_MCG_EIP: u32 = 393; +pub const MSR_IA32_MCG_RESERVED: u32 = 394; +pub const MSR_P4_BPU_PERFCTR0: u32 = 768; +pub const MSR_P4_BPU_PERFCTR1: u32 = 769; +pub const MSR_P4_BPU_PERFCTR2: u32 = 770; +pub const MSR_P4_BPU_PERFCTR3: u32 = 771; +pub const MSR_P4_MS_PERFCTR0: u32 = 772; +pub const MSR_P4_MS_PERFCTR1: u32 = 773; +pub const MSR_P4_MS_PERFCTR2: u32 = 774; +pub const MSR_P4_MS_PERFCTR3: u32 = 775; +pub const MSR_P4_FLAME_PERFCTR0: u32 = 776; +pub const MSR_P4_FLAME_PERFCTR1: u32 = 777; +pub const MSR_P4_FLAME_PERFCTR2: u32 = 778; +pub const MSR_P4_FLAME_PERFCTR3: u32 = 779; +pub const MSR_P4_IQ_PERFCTR0: u32 = 780; +pub const MSR_P4_IQ_PERFCTR1: u32 = 781; +pub const MSR_P4_IQ_PERFCTR2: u32 = 782; +pub const MSR_P4_IQ_PERFCTR3: u32 = 783; +pub const MSR_P4_IQ_PERFCTR4: u32 = 784; +pub const MSR_P4_IQ_PERFCTR5: u32 = 785; +pub const MSR_P4_BPU_CCCR0: u32 = 864; +pub const MSR_P4_BPU_CCCR1: u32 = 865; +pub const MSR_P4_BPU_CCCR2: u32 = 866; +pub const MSR_P4_BPU_CCCR3: u32 = 867; +pub const MSR_P4_MS_CCCR0: u32 = 868; +pub const MSR_P4_MS_CCCR1: u32 = 869; +pub const MSR_P4_MS_CCCR2: u32 = 870; +pub const MSR_P4_MS_CCCR3: u32 = 871; +pub const MSR_P4_FLAME_CCCR0: u32 = 872; +pub const MSR_P4_FLAME_CCCR1: u32 = 873; +pub const MSR_P4_FLAME_CCCR2: u32 = 874; +pub const MSR_P4_FLAME_CCCR3: u32 = 875; +pub const MSR_P4_IQ_CCCR0: u32 = 876; +pub const MSR_P4_IQ_CCCR1: u32 = 877; +pub const MSR_P4_IQ_CCCR2: u32 = 878; +pub const MSR_P4_IQ_CCCR3: u32 = 879; +pub const MSR_P4_IQ_CCCR4: u32 = 880; +pub const MSR_P4_IQ_CCCR5: u32 = 881; +pub const MSR_P4_ALF_ESCR0: u32 = 970; +pub const MSR_P4_ALF_ESCR1: u32 = 971; +pub const MSR_P4_BPU_ESCR0: u32 = 946; +pub const MSR_P4_BPU_ESCR1: u32 = 947; +pub const MSR_P4_BSU_ESCR0: u32 = 928; +pub const MSR_P4_BSU_ESCR1: u32 = 929; +pub const MSR_P4_CRU_ESCR0: u32 = 952; +pub const MSR_P4_CRU_ESCR1: u32 = 953; +pub const MSR_P4_CRU_ESCR2: u32 = 972; +pub const MSR_P4_CRU_ESCR3: u32 = 973; +pub const MSR_P4_CRU_ESCR4: u32 = 992; +pub const MSR_P4_CRU_ESCR5: u32 = 993; +pub const MSR_P4_DAC_ESCR0: u32 = 936; +pub const MSR_P4_DAC_ESCR1: u32 = 937; +pub const MSR_P4_FIRM_ESCR0: u32 = 932; +pub const MSR_P4_FIRM_ESCR1: u32 = 933; +pub const MSR_P4_FLAME_ESCR0: u32 = 934; +pub const MSR_P4_FLAME_ESCR1: u32 = 935; +pub const MSR_P4_FSB_ESCR0: u32 = 930; +pub const MSR_P4_FSB_ESCR1: u32 = 931; +pub const MSR_P4_IQ_ESCR0: u32 = 954; +pub const MSR_P4_IQ_ESCR1: u32 = 955; +pub const MSR_P4_IS_ESCR0: u32 = 948; +pub const MSR_P4_IS_ESCR1: u32 = 949; +pub const MSR_P4_ITLB_ESCR0: u32 = 950; +pub const MSR_P4_ITLB_ESCR1: u32 = 951; +pub const MSR_P4_IX_ESCR0: u32 = 968; +pub const MSR_P4_IX_ESCR1: u32 = 969; +pub const MSR_P4_MOB_ESCR0: u32 = 938; +pub const MSR_P4_MOB_ESCR1: u32 = 939; +pub const MSR_P4_MS_ESCR0: u32 = 960; +pub const MSR_P4_MS_ESCR1: u32 = 961; +pub const MSR_P4_PMH_ESCR0: u32 = 940; +pub const MSR_P4_PMH_ESCR1: u32 = 941; +pub const MSR_P4_RAT_ESCR0: u32 = 956; +pub const MSR_P4_RAT_ESCR1: u32 = 957; +pub const MSR_P4_SAAT_ESCR0: u32 = 942; +pub const MSR_P4_SAAT_ESCR1: u32 = 943; +pub const MSR_P4_SSU_ESCR0: u32 = 958; +pub const MSR_P4_SSU_ESCR1: u32 = 959; +pub const MSR_P4_TBPU_ESCR0: u32 = 962; +pub const MSR_P4_TBPU_ESCR1: u32 = 963; +pub const MSR_P4_TC_ESCR0: u32 = 964; +pub const MSR_P4_TC_ESCR1: u32 = 965; +pub const MSR_P4_U2L_ESCR0: u32 = 944; +pub const MSR_P4_U2L_ESCR1: u32 = 945; +pub const MSR_P4_PEBS_MATRIX_VERT: u32 = 1010; +pub const MSR_CORE_PERF_FIXED_CTR0: u32 = 777; +pub const MSR_CORE_PERF_FIXED_CTR1: u32 = 778; +pub const MSR_CORE_PERF_FIXED_CTR2: u32 = 779; +pub const MSR_CORE_PERF_FIXED_CTR_CTRL: u32 = 909; +pub const MSR_CORE_PERF_GLOBAL_STATUS: u32 = 910; +pub const MSR_CORE_PERF_GLOBAL_CTRL: u32 = 911; +pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL: u32 = 912; +pub const MSR_GEODE_BUSCONT_CONF0: u32 = 6400; +pub const MSR_IA32_VMX_BASIC: u32 = 1152; +pub const MSR_IA32_VMX_PINBASED_CTLS: u32 = 1153; +pub const MSR_IA32_VMX_PROCBASED_CTLS: u32 = 1154; +pub const MSR_IA32_VMX_EXIT_CTLS: u32 = 1155; +pub const MSR_IA32_VMX_ENTRY_CTLS: u32 = 1156; +pub const MSR_IA32_VMX_MISC: u32 = 1157; +pub const MSR_IA32_VMX_CR0_FIXED0: u32 = 1158; +pub const MSR_IA32_VMX_CR0_FIXED1: u32 = 1159; +pub const MSR_IA32_VMX_CR4_FIXED0: u32 = 1160; +pub const MSR_IA32_VMX_CR4_FIXED1: u32 = 1161; +pub const MSR_IA32_VMX_VMCS_ENUM: u32 = 1162; +pub const MSR_IA32_VMX_PROCBASED_CTLS2: u32 = 1163; +pub const MSR_IA32_VMX_EPT_VPID_CAP: u32 = 1164; +pub const MSR_IA32_VMX_TRUE_PINBASED_CTLS: u32 = 1165; +pub const MSR_IA32_VMX_TRUE_PROCBASED_CTLS: u32 = 1166; +pub const MSR_IA32_VMX_TRUE_EXIT_CTLS: u32 = 1167; +pub const MSR_IA32_VMX_TRUE_ENTRY_CTLS: u32 = 1168; +pub const MSR_IA32_VMX_VMFUNC: u32 = 1169; +pub const VMX_BASIC_VMCS_SIZE_SHIFT: u32 = 32; +pub const VMX_BASIC_TRUE_CTLS: u64 = 36028797018963968; +pub const VMX_BASIC_64: u64 = 281474976710656; +pub const VMX_BASIC_MEM_TYPE_SHIFT: u32 = 50; +pub const VMX_BASIC_MEM_TYPE_MASK: u64 = 16888498602639360; +pub const VMX_BASIC_MEM_TYPE_WB: u32 = 6; +pub const VMX_BASIC_INOUT: u64 = 18014398509481984; +pub const MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS: u32 = 536870912; +pub const MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE: u32 = 31; +pub const MSR_VM_CR: u32 = 3221291284; +pub const MSR_VM_IGNNE: u32 = 3221291285; +pub const MSR_VM_HSAVE_PA: u32 = 3221291287; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_msr_whitelist() { + for range in WHITELISTED_MSR_RANGES.iter() { + for msr in range.base..(range.base + range.nmsrs) { + let should = !matches!(msr, MSR_IA32_FEATURE_CONTROL | MSR_IA32_MCG_CTL); + assert_eq!(msr_should_serialize(msr), should); + } + } + } + + #[test] + fn test_msr_contains() { + let msr_range_a = MSR_RANGE!(0xEA, 9); + let msr_a = 0x8888; + assert!(!msr_range_a.contains(msr_a)); + + let msr_range_b = MSR_RANGE!(0xCCCC, 5); + let msr_b = 0xCCCD; + assert!(msr_range_b.contains(msr_b)); + } + + fn test_supported_msrs() { + let kvm = Kvm::new().unwrap(); + assert!(supported_guest_msrs(&kvm).is_ok()); + } +} diff --git a/src/dragonball/src/dbs_arch/src/x86_64/regs.rs b/src/dragonball/src/dbs_arch/src/x86_64/regs.rs new file mode 100644 index 000000000000..ca04e887e0ab --- /dev/null +++ b/src/dragonball/src/dbs_arch/src/x86_64/regs.rs @@ -0,0 +1,402 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Constants and utilities for x86 CPU generic, system and model specific registers. + +use std::mem; + +use kvm_bindings::{kvm_fpu, kvm_msr_entry, kvm_regs, kvm_sregs, Msrs}; +use kvm_ioctls::VcpuFd; +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use super::gdt::kvm_segment_from_gdt; +use super::msr; + +/// Non-Executable bit in EFER MSR. +pub const EFER_NX: u64 = 0x800; +/// Long-mode active bit in EFER MSR. +pub const EFER_LMA: u64 = 0x400; +/// Long-mode enable bit in EFER MSR. +pub const EFER_LME: u64 = 0x100; + +/// Protection mode enable bit in CR0. +pub const X86_CR0_PE: u64 = 0x1; +/// Paging enable bit in CR0. +pub const X86_CR0_PG: u64 = 0x8000_0000; +/// Physical Address Extension bit in CR4. +pub const X86_CR4_PAE: u64 = 0x20; + +/// Errors thrown while setting up x86_64 registers. +#[derive(Debug)] +pub enum Error { + /// Failed to get SREGs for this CPU. + GetStatusRegisters(kvm_ioctls::Error), + /// Failed to set base registers for this CPU. + SetBaseRegisters(kvm_ioctls::Error), + /// Failed to configure the FPU. + SetFPURegisters(kvm_ioctls::Error), + /// Setting up MSRs failed. + SetModelSpecificRegisters(kvm_ioctls::Error), + /// Failed to set all MSRs. + SetModelSpecificRegistersCount, + /// Failed to set SREGs for this CPU. + SetStatusRegisters(kvm_ioctls::Error), + /// Writing the GDT to RAM failed. + WriteGDT, + /// Writing the IDT to RAM failed. + WriteIDT, +} + +type Result = std::result::Result; + +/// Configure Floating-Point Unit (FPU) registers for a given CPU. +/// +/// # Arguments +/// +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +pub fn setup_fpu(vcpu: &VcpuFd) -> Result<()> { + let fpu: kvm_fpu = kvm_fpu { + fcw: 0x37f, + mxcsr: 0x1f80, + ..Default::default() + }; + + vcpu.set_fpu(&fpu).map_err(Error::SetFPURegisters) +} + +/// Configure Model Specific Registers (MSRs) for a given CPU. +/// +/// # Arguments +/// +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +pub fn setup_msrs(vcpu: &VcpuFd) -> Result<()> { + let entry_vec = create_msr_entries(); + let kvm_msrs = + Msrs::from_entries(&entry_vec).map_err(|_| Error::SetModelSpecificRegistersCount)?; + + vcpu.set_msrs(&kvm_msrs) + .map_err(Error::SetModelSpecificRegisters) + .and_then(|msrs_written| { + if msrs_written as u32 != kvm_msrs.as_fam_struct_ref().nmsrs { + Err(Error::SetModelSpecificRegistersCount) + } else { + Ok(msrs_written) + } + })?; + Ok(()) +} + +/// Configure base registers for a given CPU. +/// +/// # Arguments +/// +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +/// * `boot_ip` - Starting instruction pointer. +/// * `rsp` - Value for RSP register +/// * `rbp` - Value for RBP register +/// * `rsi` - Value for RSI register +pub fn setup_regs(vcpu: &VcpuFd, boot_ip: u64, rsp: u64, rbp: u64, rsi: u64) -> Result<()> { + let regs: kvm_regs = kvm_regs { + rflags: 0x0000_0000_0000_0002u64, + rip: boot_ip, + rsp, + rbp, + rsi, + ..Default::default() + }; + + vcpu.set_regs(®s).map_err(Error::SetBaseRegisters) +} + +/// Configures the segment registers for a given CPU. +/// +/// # Arguments +/// +/// * `mem` - The memory that will be passed to the guest. +/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +/// * `pgtable_addr` - Address of the vcpu pgtable. +/// * `gdt_table` - Content of the global descriptor table. +/// * `gdt_addr` - Address of the global descriptor table. +/// * `idt_addr` - Address of the interrupt descriptor table. +pub fn setup_sregs( + mem: &M, + vcpu: &VcpuFd, + pgtable_addr: GuestAddress, + gdt_table: &[u64], + gdt_addr: u64, + idt_addr: u64, +) -> Result<()> { + let mut sregs: kvm_sregs = vcpu.get_sregs().map_err(Error::GetStatusRegisters)?; + configure_segments_and_sregs(mem, &mut sregs, pgtable_addr, gdt_table, gdt_addr, idt_addr)?; + vcpu.set_sregs(&sregs).map_err(Error::SetStatusRegisters) +} + +fn configure_segments_and_sregs( + mem: &M, + sregs: &mut kvm_sregs, + pgtable_addr: GuestAddress, + gdt_table: &[u64], + gdt_addr: u64, + idt_addr: u64, +) -> Result<()> { + assert!(gdt_table.len() >= 4); + let code_seg = kvm_segment_from_gdt(gdt_table[1], 1); + let data_seg = kvm_segment_from_gdt(gdt_table[2], 2); + let tss_seg = kvm_segment_from_gdt(gdt_table[3], 3); + + // Write segments + write_gdt_table(gdt_table, gdt_addr, mem)?; + sregs.gdt.base = gdt_addr; + sregs.gdt.limit = std::mem::size_of_val(gdt_table) as u16 - 1; + + write_idt_value(0, idt_addr, mem)?; + sregs.idt.base = idt_addr; + sregs.idt.limit = mem::size_of::() as u16 - 1; + + sregs.cs = code_seg; + sregs.ds = data_seg; + sregs.es = data_seg; + sregs.fs = data_seg; + sregs.gs = data_seg; + sregs.ss = data_seg; + sregs.tr = tss_seg; + + /* 64-bit protected mode */ + sregs.cr0 |= X86_CR0_PE; + sregs.cr3 = pgtable_addr.raw_value(); + sregs.cr4 |= X86_CR4_PAE; + sregs.cr0 |= X86_CR0_PG; + sregs.efer |= EFER_LME | EFER_LMA; + + Ok(()) +} + +fn write_gdt_table(gdt_table: &[u64], gdt_addr: u64, guest_mem: &M) -> Result<()> { + let boot_gdt_addr = GuestAddress(gdt_addr); + for (index, entry) in gdt_table.iter().enumerate() { + let addr = guest_mem + .checked_offset(boot_gdt_addr, index * mem::size_of::()) + .ok_or(Error::WriteGDT)?; + guest_mem + .write_obj(*entry, addr) + .map_err(|_| Error::WriteGDT)?; + } + Ok(()) +} + +fn write_idt_value(idt_table: u64, idt_addr: u64, guest_mem: &M) -> Result<()> { + let boot_idt_addr = GuestAddress(idt_addr); + guest_mem + .write_obj(idt_table, boot_idt_addr) + .map_err(|_| Error::WriteIDT) +} + +#[allow(clippy::vec_init_then_push)] +fn create_msr_entries() -> Vec { + let mut entries = Vec::::new(); + + entries.push(kvm_msr_entry { + index: msr::MSR_IA32_SYSENTER_CS, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_IA32_SYSENTER_ESP, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_IA32_SYSENTER_EIP, + data: 0x0, + ..Default::default() + }); + // x86_64 specific msrs, we only run on x86_64 not x86. + entries.push(kvm_msr_entry { + index: msr::MSR_STAR, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_CSTAR, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_KERNEL_GS_BASE, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_SYSCALL_MASK, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_LSTAR, + data: 0x0, + ..Default::default() + }); + // end of x86_64 specific code + entries.push(kvm_msr_entry { + index: msr::MSR_IA32_TSC, + data: 0x0, + ..Default::default() + }); + entries.push(kvm_msr_entry { + index: msr::MSR_IA32_MISC_ENABLE, + data: u64::from(msr::MSR_IA32_MISC_ENABLE_FAST_STRING), + ..Default::default() + }); + + entries +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::x86_64::gdt::gdt_entry; + use kvm_ioctls::Kvm; + use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap}; + + const BOOT_GDT_OFFSET: u64 = 0x500; + const BOOT_IDT_OFFSET: u64 = 0x520; + const BOOT_STACK_POINTER: u64 = 0x100_0000; + const ZERO_PAGE_START: u64 = 0x7_C000; + const BOOT_GDT_MAX: usize = 4; + const PML4_START: u64 = 0x9000; + + fn create_guest_mem() -> GuestMemoryMmap { + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap() + } + + fn read_u64(gm: &GuestMemoryMmap, offset: u64) -> u64 { + let read_addr = GuestAddress(offset); + gm.read_obj(read_addr).unwrap() + } + + fn validate_segments_and_sregs(gm: &GuestMemoryMmap, sregs: &kvm_sregs) { + assert_eq!(0x0, read_u64(gm, BOOT_GDT_OFFSET)); + assert_eq!(0xaf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); + assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); + assert_eq!(0x8f_8b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 24)); + assert_eq!(0x0, read_u64(gm, BOOT_IDT_OFFSET)); + + assert_eq!(0, sregs.cs.base); + assert_eq!(0xfffff, sregs.ds.limit); + assert_eq!(0x10, sregs.es.selector); + assert_eq!(1, sregs.fs.present); + assert_eq!(1, sregs.gs.g); + assert_eq!(0, sregs.ss.avl); + assert_eq!(0, sregs.tr.base); + assert_eq!(0xfffff, sregs.tr.limit); + assert_eq!(0, sregs.tr.avl); + assert!(sregs.cr0 & X86_CR0_PE != 0); + assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0); + } + + #[test] + fn test_configure_segments_and_sregs() { + let mut sregs: kvm_sregs = Default::default(); + let gm = create_guest_mem(); + let gdt_table: [u64; BOOT_GDT_MAX] = [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xa09b, 0, 0xfffff), // CODE + gdt_entry(0xc093, 0, 0xfffff), // DATA + gdt_entry(0x808b, 0, 0xfffff), // TSS + ]; + configure_segments_and_sregs( + &gm, + &mut sregs, + GuestAddress(PML4_START), + &gdt_table, + BOOT_GDT_OFFSET, + BOOT_IDT_OFFSET, + ) + .unwrap(); + + validate_segments_and_sregs(&gm, &sregs); + } + + #[test] + fn test_setup_fpu() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + setup_fpu(&vcpu).unwrap(); + + let expected_fpu: kvm_fpu = kvm_fpu { + fcw: 0x37f, + mxcsr: 0x1f80, + ..Default::default() + }; + let actual_fpu: kvm_fpu = vcpu.get_fpu().unwrap(); + assert_eq!(expected_fpu.fcw, actual_fpu.fcw); + // Setting the mxcsr register from kvm_fpu inside setup_fpu does not influence anything. + // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c. + // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should + // remove it at all. + // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr); + } + + #[test] + #[allow(clippy::cast_ptr_alignment)] + fn test_setup_msrs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + setup_msrs(&vcpu).unwrap(); + + // This test will check against the last MSR entry configured (the tenth one). + // See create_msr_entries() for details. + let test_kvm_msrs_entry = [kvm_msr_entry { + index: msr::MSR_IA32_MISC_ENABLE, + ..Default::default() + }]; + let mut kvm_msrs = Msrs::from_entries(&test_kvm_msrs_entry).unwrap(); + + // kvm_ioctls::get_msrs() returns the number of msrs that it succeeded in reading. + // We only want to read one in this test case scenario. + let read_nmsrs = vcpu.get_msrs(&mut kvm_msrs).unwrap(); + // Validate it only read one. + assert_eq!(read_nmsrs, 1); + + // Official entries that were setup when we did setup_msrs. We need to assert that the + // tenth one (i.e the one with index msr_index::MSR_IA32_MISC_ENABLE has the data we + // expect. + let entry_vec = create_msr_entries(); + assert_eq!(entry_vec[9], kvm_msrs.as_slice()[0]); + } + + #[test] + fn test_setup_regs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let expected_regs: kvm_regs = kvm_regs { + rflags: 0x0000_0000_0000_0002u64, + rip: 1, + rsp: BOOT_STACK_POINTER, + rbp: BOOT_STACK_POINTER, + rsi: ZERO_PAGE_START, + ..Default::default() + }; + + setup_regs( + &vcpu, + expected_regs.rip, + BOOT_STACK_POINTER, + BOOT_STACK_POINTER, + ZERO_PAGE_START, + ) + .unwrap(); + + let actual_regs: kvm_regs = vcpu.get_regs().unwrap(); + assert_eq!(actual_regs, expected_regs); + } +} diff --git a/src/dragonball/src/dbs_boot/Cargo.toml b/src/dragonball/src/dbs_boot/Cargo.toml new file mode 100644 index 000000000000..1ecac6421071 --- /dev/null +++ b/src/dragonball/src/dbs_boot/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "dbs-boot" +version = "0.4.0" +authors = ["Alibaba Dragonball Team"] +description = "Traits and structs for booting sandbox" +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "boot", "VMM"] +readme = "README.md" + +[dependencies] +dbs-arch = { path = "../dbs_arch" } +kvm-bindings = { version = "0.6.0", features = ["fam-wrappers"] } +kvm-ioctls = "0.12.0" +lazy_static = "1" +libc = "0.2.39" +thiserror = "1" +vm-memory = "0.10.0" +vm-fdt = "0.2.0" + +[dev-dependencies] +vm-memory = { version = "0.10.0", features = ["backend-mmap"] } +device_tree = ">=1.1.0" +dbs-device = { path = "../dbs_device" } diff --git a/src/dragonball/src/dbs_boot/LICENSE b/src/dragonball/src/dbs_boot/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_boot/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_boot/README.md b/src/dragonball/src/dbs_boot/README.md new file mode 100644 index 000000000000..a55842d18e93 --- /dev/null +++ b/src/dragonball/src/dbs_boot/README.md @@ -0,0 +1,24 @@ +# dbs-boot + +## Design + +The `dbs-boot` crate is a collection of constants, structs and utilities used to boot virtual machines. + +## Submodule List + +This repository contains the following submodules: +| Name | Arch| Description | +| --- | --- | --- | +| [`bootparam`](src/x86_64/bootparam.rs) | x86_64 | Magic addresses externally used to lay out x86_64 VMs | +| [fdt](src/aarch64/fdt.rs) | aarch64| Create FDT for Aarch64 systems | +| [layout](src/x86_64/layout.rs) | x86_64 | x86_64 layout constants | +| [layout](src/aarch64/layout.rs/) | aarch64 | aarch64 layout constants | +| [mptable](src/x86_64/mptable.rs) | x86_64 | MP Table configurations used for defining VM boot status | + +## Acknowledgement + +Part of the code is derived from the [Firecracker](https://github.com/firecracker-microvm/firecracker) project. + +## License + +This project is licensed under [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0. diff --git a/src/dragonball/src/dbs_boot/THIRD-PARTY b/src/dragonball/src/dbs_boot/THIRD-PARTY new file mode 120000 index 000000000000..301d0a498953 --- /dev/null +++ b/src/dragonball/src/dbs_boot/THIRD-PARTY @@ -0,0 +1 @@ +../../THIRD-PARTY \ No newline at end of file diff --git a/src/dragonball/src/dbs_boot/src/aarch64/fdt.rs b/src/dragonball/src/dbs_boot/src/aarch64/fdt.rs new file mode 100644 index 000000000000..6d6eeaaf3c26 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/aarch64/fdt.rs @@ -0,0 +1,608 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Create Flatten Device Tree (FDT) for ARM64 systems. + +use std::collections::HashMap; +use std::fmt::Debug; + +use dbs_arch::gic::its::ItsType::{self, PciMsiIts, PlatformMsiIts}; +use dbs_arch::gic::GICDevice; +use dbs_arch::{pmu::VIRTUAL_PMU_IRQ, VpmuFeatureLevel}; +use dbs_arch::{DeviceInfoForFDT, DeviceType}; + +use vm_fdt::FdtWriter; +use vm_memory::GuestMemoryRegion; +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use super::fdt_utils::*; +use super::Error; +use crate::Result; + +// This is a value for uniquely identifying the FDT node declaring the interrupt controller. +const GIC_PHANDLE: u32 = 1; +// This is a value for uniquely identifying the FDT node containing the clock definition. +const CLOCK_PHANDLE: u32 = 2; +// This is a value for uniquely identifying the FDT node containing the plaform msi ITS definition. +const GIC_PLATFORM_MSI_ITS_PHANDLE: u32 = 3; +// This is a value for uniquely identifying the FDT node containing the pci msi ITS definition. +const GIC_PCI_MSI_ITS_PHANDLE: u32 = 4; +// According to the arm, gic-v3.txt document, ITS' #msi-cells is fixed at 1. +const GIC_PLATFORM_MSI_ITS_CELLS_SIZE: u32 = 1; + +// Read the documentation specified when appending the root node to the FDT. +const ADDRESS_CELLS: u32 = 0x2; +const SIZE_CELLS: u32 = 0x2; + +// As per kvm tool and +// https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt +// Look for "The 1st cell..." +const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; +const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; + +// From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17 +const IRQ_TYPE_EDGE_RISING: u32 = 1; +const IRQ_TYPE_LEVEL_HI: u32 = 4; + +/// Creates the flattened device tree for this aarch64 microVM. +pub fn create_fdt( + fdt_vm_info: FdtVmInfo, + _fdt_numa_info: FdtNumaInfo, + fdt_device_info: FdtDeviceInfo, +) -> Result> +where + T: DeviceInfoForFDT + Clone + Debug, +{ + let mut fdt = FdtWriter::new()?; + + // For an explanation why these nodes were introduced in the blob take a look at + // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845 + // Look for "Required nodes and properties". + + // Header or the root node as per above mentioned documentation. + let root_node = fdt.begin_node("")?; + fdt.property_string("compatible", "linux,dummy-virt")?; + // For info on #address-cells and size-cells read "Note about cells and address representation" + // from the above mentioned txt file. + fdt.property_u32("#address-cells", ADDRESS_CELLS)?; + fdt.property_u32("#size-cells", SIZE_CELLS)?; + // This is not mandatory but we use it to point the root node to the node + // containing description of the interrupt controller for this VM. + fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; + create_cpu_nodes(&mut fdt, &fdt_vm_info)?; + create_memory_node(&mut fdt, fdt_vm_info.get_guest_memory())?; + create_chosen_node(&mut fdt, &fdt_vm_info)?; + create_gic_node(&mut fdt, fdt_device_info.get_irqchip())?; + create_timer_node(&mut fdt)?; + create_clock_node(&mut fdt)?; + create_psci_node(&mut fdt)?; + fdt_device_info + .get_mmio_device_info() + .map_or(Ok(()), |v| create_devices_node(&mut fdt, v))?; + create_pmu_node(&mut fdt, fdt_vm_info.get_vpmu_feature())?; + + // End Header node. + fdt.end_node(root_node)?; + + // Allocate another buffer so we can format and then write fdt to guest. + let fdt_final = fdt.finish()?; + + // Write FDT to memory. + let fdt_address = GuestAddress(super::get_fdt_addr(fdt_vm_info.get_guest_memory())); + fdt_vm_info + .get_guest_memory() + .write_slice(fdt_final.as_slice(), fdt_address)?; + Ok(fdt_final) +} + +// Following are the auxiliary function for creating the different nodes that we append to our FDT. +fn create_cpu_nodes(fdt: &mut FdtWriter, fdt_vm_info: &FdtVmInfo) -> Result<()> { + // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml. + let cpus_node = fdt.begin_node("cpus")?; + // As per documentation, on ARM v8 64-bit systems value should be set to 2. + fdt.property_u32("#address-cells", 0x02)?; + fdt.property_u32("#size-cells", 0x0)?; + let vcpu_mpidr = fdt_vm_info.get_vcpu_mpidr(); + let vcpu_boot_onlined = fdt_vm_info.get_boot_onlined(); + let num_cpus = vcpu_mpidr.len(); + + for (cpu_index, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) { + let cpu_name = format!("cpu@{cpu_index:x}"); + let cpu_node = fdt.begin_node(&cpu_name)?; + fdt.property_string("device_type", "cpu")?; + fdt.property_string("compatible", "arm,arm-v8")?; + if num_cpus > 1 { + // This is required on armv8 64-bit. See aforementioned documentation. + fdt.property_string("enable-method", "psci")?; + } + // boot-onlined attribute is used to indicate whether this cpu should be onlined at boot. + // 0 means offline, 1 means online. + fdt.property_u32("boot-onlined", vcpu_boot_onlined[cpu_index])?; + // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register. + // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html. + fdt.property_u64("reg", mpidr & 0x7FFFFF)?; + fdt.end_node(cpu_node)?; + } + fdt.end_node(cpus_node)?; + Ok(()) +} + +fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &M) -> Result<()> { + // See https://github.com/torvalds/linux/blob/v5.9/Documentation/devicetree/booting-without-of.rst + for region in guest_mem.iter() { + let memory_name = format!("memory@{:x}", region.start_addr().raw_value()); + let mem_reg_prop = &[region.start_addr().raw_value(), region.len()]; + let memory_node = fdt.begin_node(&memory_name)?; + fdt.property_string("device_type", "memory")?; + fdt.property_array_u64("reg", mem_reg_prop)?; + fdt.end_node(memory_node)?; + } + Ok(()) +} + +fn create_chosen_node(fdt: &mut FdtWriter, fdt_vm_info: &FdtVmInfo) -> Result<()> { + let chosen_node = fdt.begin_node("chosen")?; + fdt.property_string("bootargs", fdt_vm_info.get_cmdline())?; + + if let Some(initrd_config) = fdt_vm_info.get_initrd_config() { + fdt.property_u64("linux,initrd-start", initrd_config.address.raw_value())?; + fdt.property_u64( + "linux,initrd-end", + initrd_config.address.raw_value() + initrd_config.size as u64, + )?; + } + + fdt.end_node(chosen_node)?; + + Ok(()) +} + +fn append_its_common_property(fdt: &mut FdtWriter, registers_prop: &[u64]) -> Result<()> { + fdt.property_string("compatible", "arm,gic-v3-its")?; + fdt.property_null("msi-controller")?; + fdt.property_array_u64("reg", registers_prop)?; + Ok(()) +} + +fn create_its_node( + fdt: &mut FdtWriter, + gic_device: &dyn GICDevice, + its_type: ItsType, +) -> Result<()> { + let reg = gic_device.get_its_reg_range(&its_type); + if let Some(registers) = reg { + // There are two types of its, pci_msi_its and platform_msi_its. + // If this is pci_msi_its, the fdt node of its is required to have no + // #msi-cells attribute. If this is platform_msi_its, the #msi-cells + // attribute of its fdt node is required, and the value is 1. + match its_type { + PlatformMsiIts => { + let its_node = fdt.begin_node("gic-platform-its")?; + append_its_common_property(fdt, ®isters)?; + fdt.property_u32("phandle", GIC_PLATFORM_MSI_ITS_PHANDLE)?; + fdt.property_u32("#msi-cells", GIC_PLATFORM_MSI_ITS_CELLS_SIZE)?; + fdt.end_node(its_node)?; + } + PciMsiIts => { + let its_node = fdt.begin_node("gic-pci-its")?; + append_its_common_property(fdt, ®isters)?; + fdt.property_u32("phandle", GIC_PCI_MSI_ITS_PHANDLE)?; + fdt.end_node(its_node)?; + } + } + } + Ok(()) +} + +fn create_gic_node(fdt: &mut FdtWriter, gic_device: &dyn GICDevice) -> Result<()> { + let gic_reg_prop = gic_device.device_properties(); + + let intc_node = fdt.begin_node("intc")?; + fdt.property_string("compatible", gic_device.fdt_compatibility())?; + fdt.property_null("interrupt-controller")?; + // "interrupt-cells" field specifies the number of cells needed to encode an + // interrupt source. The type shall be a and the value shall be 3 if no PPI affinity description + // is required. + fdt.property_u32("#interrupt-cells", 3)?; + fdt.property_array_u64("reg", gic_reg_prop)?; + fdt.property_u32("phandle", GIC_PHANDLE)?; + fdt.property_u32("#address-cells", 2)?; + fdt.property_u32("#size-cells", 2)?; + fdt.property_null("ranges")?; + let gic_intr_prop = &[ + GIC_FDT_IRQ_TYPE_PPI, + gic_device.fdt_maint_irq(), + IRQ_TYPE_LEVEL_HI, + ]; + + fdt.property_array_u32("interrupts", gic_intr_prop)?; + create_its_node(fdt, gic_device, PlatformMsiIts)?; + create_its_node(fdt, gic_device, PciMsiIts)?; + fdt.end_node(intc_node)?; + + Ok(()) +} + +fn create_clock_node(fdt: &mut FdtWriter) -> Result<()> { + // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture + // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power + // consumption and reduced interface complexity. + // PCLK is the clock source and this node defines exactly the clock for the APB. + let clock_node = fdt.begin_node("apb-pclk")?; + fdt.property_string("compatible", "fixed-clock")?; + fdt.property_u32("#clock-cells", 0x0)?; + fdt.property_u32("clock-frequency", 24000000)?; + fdt.property_string("clock-output-names", "clk24mhz")?; + fdt.property_u32("phandle", CLOCK_PHANDLE)?; + fdt.end_node(clock_node)?; + + Ok(()) +} + +fn create_timer_node(fdt: &mut FdtWriter) -> Result<()> { + // See + // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt + // These are fixed interrupt numbers for the timer device. + let irqs = [13, 14, 11, 10]; + let compatible = "arm,armv8-timer"; + + let mut timer_reg_cells: Vec = Vec::new(); + for &irq in irqs.iter() { + timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI); + timer_reg_cells.push(irq); + timer_reg_cells.push(IRQ_TYPE_LEVEL_HI); + } + + let timer_node = fdt.begin_node("timer")?; + fdt.property_string("compatible", compatible)?; + fdt.property_null("always-on")?; + fdt.property_array_u32("interrupts", &timer_reg_cells)?; + fdt.end_node(timer_node)?; + + Ok(()) +} + +fn create_psci_node(fdt: &mut FdtWriter) -> Result<()> { + let compatible = "arm,psci-0.2"; + let psci_node = fdt.begin_node("psci")?; + fdt.property_string("compatible", compatible)?; + // Two methods available: hvc and smc. + // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. + // So, since we are using kvm, we need to use hvc. + fdt.property_string("method", "hvc")?; + fdt.end_node(psci_node)?; + + Ok(()) +} + +fn create_virtio_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> Result<()> { + let device_reg_prop = &[dev_info.addr(), dev_info.length()]; + let irq_number = dev_info.irq().map_err(|_| Error::InvalidArguments)?; + let irq_property = &[GIC_FDT_IRQ_TYPE_SPI, irq_number, IRQ_TYPE_EDGE_RISING]; + + let virtio_mmio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?; + fdt.property_string("compatible", "virtio,mmio")?; + fdt.property_array_u64("reg", device_reg_prop)?; + fdt.property_array_u32("interrupts", irq_property)?; + fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; + fdt.end_node(virtio_mmio_node)?; + + Ok(()) +} + +fn create_serial_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> Result<()> { + let serial_reg_prop = &[dev_info.addr(), dev_info.length()]; + let irq_number = dev_info.irq().map_err(|_| Error::InvalidArguments)?; + let irq_property = &[GIC_FDT_IRQ_TYPE_SPI, irq_number, IRQ_TYPE_EDGE_RISING]; + + let uart_node = fdt.begin_node(&format!("uart@{:x}", dev_info.addr()))?; + fdt.property_string("compatible", "ns16550a")?; + fdt.property_array_u64("reg", serial_reg_prop)?; + fdt.property_u32("clocks", CLOCK_PHANDLE)?; + fdt.property_string("clock-names", "apb_pclk")?; + fdt.property_array_u32("interrupts", irq_property)?; + fdt.end_node(uart_node)?; + + Ok(()) +} + +fn create_rtc_node( + fdt: &mut FdtWriter, + dev_info: &T, +) -> Result<()> { + let compatible = b"arm,pl031\0arm,primecell\0"; + let rtc_reg_prop = &[dev_info.addr(), dev_info.length()]; + let irq_number = dev_info.irq().map_err(|_| Error::InvalidArguments)?; + let irq_property = &[GIC_FDT_IRQ_TYPE_SPI, irq_number, IRQ_TYPE_LEVEL_HI]; + + let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?; + fdt.property("compatible", compatible)?; + fdt.property_array_u64("reg", rtc_reg_prop)?; + fdt.property_array_u32("interrupts", irq_property)?; + fdt.property_u32("clocks", CLOCK_PHANDLE)?; + fdt.property_string("clock-names", "apb_pclk")?; + fdt.end_node(rtc_node)?; + + Ok(()) +} + +fn create_devices_node( + fdt: &mut FdtWriter, + dev_info: &HashMap<(DeviceType, String), T>, +) -> Result<()> { + // Serial devices need to be registered in order + let mut ordered_serial_device: Vec<&T> = Vec::new(); + // Create one temp Vec to store all virtio devices + let mut ordered_virtio_device: Vec<&T> = Vec::new(); + + for ((device_type, _device_id), info) in dev_info { + match device_type { + DeviceType::RTC => create_rtc_node(fdt, info)?, + DeviceType::Serial => { + ordered_serial_device.push(info); + } + DeviceType::Virtio(_) => { + ordered_virtio_device.push(info); + } + } + } + + // Sort out serial devices by address from low to high and insert them into fdt table. + ordered_serial_device.sort_by_key(|a| a.addr()); + for serial_device_info in ordered_serial_device.drain(..) { + create_serial_node(fdt, serial_device_info)?; + } + // Sort out virtio devices by address from low to high and insert them into fdt table. + ordered_virtio_device.sort_by_key(|a| a.addr()); + for ordered_device_info in ordered_virtio_device.drain(..) { + create_virtio_node(fdt, ordered_device_info)?; + } + + Ok(()) +} + +fn create_pmu_node(fdt: &mut FdtWriter, vpmu_feature: VpmuFeatureLevel) -> Result<()> { + if vpmu_feature == VpmuFeatureLevel::Disabled { + return Ok(()); + }; + + let pmu_node = fdt.begin_node("pmu")?; + fdt.property_string("compatible", "arm,armv8-pmuv3")?; + let pmu_intr_prop = [GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, IRQ_TYPE_LEVEL_HI]; + fdt.property_array_u32("interrupts", &pmu_intr_prop)?; + fdt.end_node(pmu_node)?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::cmp::min; + use std::collections::HashMap; + use std::env; + use std::fs::OpenOptions; + use std::io::Write; + use std::path::PathBuf; + + use dbs_arch::{gic::create_gic, pmu::initialize_pmu}; + use device_tree::DeviceTree; + use kvm_bindings::{kvm_vcpu_init, KVM_ARM_VCPU_PMU_V3, KVM_ARM_VCPU_PSCI_0_2}; + use kvm_ioctls::{Kvm, VcpuFd, VmFd}; + use vm_memory::GuestMemoryMmap; + + use super::super::tests::MMIODeviceInfo; + use super::*; + use crate::layout::{DRAM_MEM_MAX_SIZE, DRAM_MEM_START, FDT_MAX_SIZE}; + use crate::InitrdConfig; + + const LEN: u64 = 4096; + + fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize)> { + let dram_size = min(size as u64, DRAM_MEM_MAX_SIZE) as usize; + vec![(GuestAddress(DRAM_MEM_START), dram_size)] + } + + // The `load` function from the `device_tree` will mistakenly check the actual size + // of the buffer with the allocated size. This works around that. + fn set_size(buf: &mut [u8], pos: usize, val: usize) { + buf[pos] = ((val >> 24) & 0xff) as u8; + buf[pos + 1] = ((val >> 16) & 0xff) as u8; + buf[pos + 2] = ((val >> 8) & 0xff) as u8; + buf[pos + 3] = (val & 0xff) as u8; + } + + // Initialize vcpu for pmu test + fn initialize_vcpu_with_pmu(vm: &VmFd, vcpu: &VcpuFd) -> Result<()> { + let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + kvi.features[0] = 1 << KVM_ARM_VCPU_PSCI_0_2 | 1 << KVM_ARM_VCPU_PMU_V3; + vcpu.vcpu_init(&kvi).map_err(|_| Error::InvalidArguments)?; + initialize_pmu(vm, vcpu).map_err(|_| Error::InvalidArguments)?; + + Ok(()) + } + + // Create fdt dtb file + fn create_dtb_file(name: &str, dtb: &[u8]) { + // Control whether to create new dtb files for unit test. + // Usage: FDT_CREATE_DTB=1 cargo test + if env::var("FDT_CREATE_DTB").is_err() { + return; + } + + // Use this code when wanting to generate a new DTB sample. + // Do manually check dtb files with dtc + // See https://git.kernel.org/pub/scm/utils/dtc/dtc.git/plain/Documentation/manual.txt + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut output = OpenOptions::new() + .write(true) + .create(true) + .open(path.join(format!("src/aarch64/test/{name}"))) + .unwrap(); + output + .set_len(FDT_MAX_SIZE as u64) + .map_err(|_| Error::InvalidArguments) + .unwrap(); + output.write_all(dtb).unwrap(); + } + + #[test] + fn test_create_fdt_with_devices() { + let regions = arch_memory_regions(FDT_MAX_SIZE + 0x1000); + let mem = GuestMemoryMmap::<()>::from_ranges(®ions).expect("Cannot initialize memory"); + let dev_info: HashMap<(DeviceType, String), MMIODeviceInfo> = [ + ( + (DeviceType::Serial, DeviceType::Serial.to_string()), + MMIODeviceInfo::new(0, 1), + ), + ( + (DeviceType::Virtio(1), "virtio".to_string()), + MMIODeviceInfo::new(LEN, 2), + ), + ( + (DeviceType::RTC, "rtc".to_string()), + MMIODeviceInfo::new(2 * LEN, 3), + ), + ] + .iter() + .cloned() + .collect(); + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let gic = create_gic(&vm, 1).unwrap(); + let vpmu_feature = VpmuFeatureLevel::Disabled; + assert!(create_fdt( + FdtVmInfo::new( + &mem, + "console=tty0", + None, + FdtVcpuInfo::new(vec![0], vec![1], vpmu_feature, false) + ), + FdtNumaInfo::default(), + FdtDeviceInfo::new(Some(&dev_info), gic.as_ref()) + ) + .is_ok()) + } + + #[test] + fn test_create_fdt() { + let regions = arch_memory_regions(FDT_MAX_SIZE + 0x1000); + let mem = GuestMemoryMmap::<()>::from_ranges(®ions).expect("Cannot initialize memory"); + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let gic = create_gic(&vm, 1).unwrap(); + let vpmu_feature = VpmuFeatureLevel::Disabled; + let dtb = create_fdt( + FdtVmInfo::new( + &mem, + "console=tty0", + None, + FdtVcpuInfo::new(vec![0], vec![1], vpmu_feature, false), + ), + FdtNumaInfo::default(), + FdtDeviceInfo::::new(None, gic.as_ref()), + ) + .unwrap(); + + create_dtb_file("output.dtb", &dtb); + + let bytes = include_bytes!("test/output.dtb"); + let pos = 4; + let val = FDT_MAX_SIZE; + let mut buf = vec![]; + buf.extend_from_slice(bytes); + set_size(&mut buf, pos, val); + + let original_fdt = DeviceTree::load(&buf).unwrap(); + let generated_fdt = DeviceTree::load(&dtb).unwrap(); + assert_eq!(format!("{original_fdt:?}"), format!("{generated_fdt:?}")); + } + + #[test] + fn test_create_fdt_with_initrd() { + let regions = arch_memory_regions(FDT_MAX_SIZE + 0x1000); + let mem = GuestMemoryMmap::<()>::from_ranges(®ions).expect("Cannot initialize memory"); + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let gic = create_gic(&vm, 1).unwrap(); + let initrd = InitrdConfig { + address: GuestAddress(0x10000000), + size: 0x1000, + }; + let vpmu_feature = VpmuFeatureLevel::Disabled; + let dtb = create_fdt( + FdtVmInfo::new( + &mem, + "console=tty0", + Some(&initrd), + FdtVcpuInfo::new(vec![0], vec![1], vpmu_feature, false), + ), + FdtNumaInfo::default(), + FdtDeviceInfo::::new(None, gic.as_ref()), + ) + .unwrap(); + + create_dtb_file("output_with_initrd.dtb", &dtb); + + let bytes = include_bytes!("test/output_with_initrd.dtb"); + let pos = 4; + let val = FDT_MAX_SIZE; + let mut buf = vec![]; + buf.extend_from_slice(bytes); + set_size(&mut buf, pos, val); + + let original_fdt = DeviceTree::load(&buf).unwrap(); + let generated_fdt = DeviceTree::load(&dtb).unwrap(); + assert_eq!(format!("{original_fdt:?}"), format!("{generated_fdt:?}")); + } + + #[test] + fn test_create_fdt_with_pmu() { + let regions = arch_memory_regions(FDT_MAX_SIZE + 0x1000); + let mem = GuestMemoryMmap::<()>::from_ranges(®ions).expect("Cannot initialize memory"); + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let gic = create_gic(&vm, 1).unwrap(); + + assert!(initialize_vcpu_with_pmu(&vm, &vcpu).is_ok()); + + let vpmu_feature = VpmuFeatureLevel::FullyEnabled; + let dtb = create_fdt( + FdtVmInfo::new( + &mem, + "console=tty0", + None, + FdtVcpuInfo::new(vec![0], vec![1], vpmu_feature, false), + ), + FdtNumaInfo::default(), + FdtDeviceInfo::::new(None, gic.as_ref()), + ) + .unwrap(); + + create_dtb_file("output_with_pmu.dtb", &dtb); + + let bytes = include_bytes!("test/output_with_pmu.dtb"); + let pos = 4; + let val = FDT_MAX_SIZE; + let mut buf = vec![]; + buf.extend_from_slice(bytes); + set_size(&mut buf, pos, val); + + let original_fdt = DeviceTree::load(&buf).unwrap(); + let generated_fdt = DeviceTree::load(&dtb).unwrap(); + assert_eq!(format!("{original_fdt:?}"), format!("{generated_fdt:?}")); + } +} diff --git a/src/dragonball/src/dbs_boot/src/aarch64/fdt_utils.rs b/src/dragonball/src/dbs_boot/src/aarch64/fdt_utils.rs new file mode 100644 index 000000000000..ceb6d8e79350 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/aarch64/fdt_utils.rs @@ -0,0 +1,373 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! This module abstract some structs for constructing fdt. Instead of using +//! multiple parameters. + +use std::collections::HashMap; + +use dbs_arch::{gic::GICDevice, DeviceInfoForFDT, DeviceType, VpmuFeatureLevel}; +use vm_memory::mmap::GuestMemoryMmap; + +use crate::InitrdConfig; + +/// Struct to save vcpu information +pub struct FdtVcpuInfo { + /// vcpu mpidrs + vcpu_mpidr: Vec, + /// vcpu boot-onlined + vcpu_boot_onlined: Vec, + /// vpmu feature + vpmu_feature: VpmuFeatureLevel, + // TODO: #274 cache passthrough + /// cache passthrough + cache_passthrough_enabled: bool, +} + +impl FdtVcpuInfo { + /// Generate FdtVcpuInfo + pub fn new( + vcpu_mpidr: Vec, + vcpu_boot_onlined: Vec, + vpmu_feature: VpmuFeatureLevel, + cache_passthrough_enabled: bool, + ) -> Self { + FdtVcpuInfo { + vcpu_mpidr, + vcpu_boot_onlined, + vpmu_feature, + cache_passthrough_enabled, + } + } +} + +/// Struct to save vm information. +pub struct FdtVmInfo<'a> { + /// guest meory + guest_memory: &'a GuestMemoryMmap, + /// command line + cmdline: &'a str, + /// initrd config + initrd_config: Option<&'a InitrdConfig>, + /// vcpu information + vcpu_info: FdtVcpuInfo, +} + +impl FdtVmInfo<'_> { + /// Generate FdtVmInfo. + pub fn new<'a>( + guest_memory: &'a GuestMemoryMmap, + cmdline: &'a str, + initrd_config: Option<&'a InitrdConfig>, + vcpu_info: FdtVcpuInfo, + ) -> FdtVmInfo<'a> { + FdtVmInfo { + guest_memory, + cmdline, + initrd_config, + vcpu_info, + } + } + + /// Get guest_memory. + pub fn get_guest_memory(&self) -> &GuestMemoryMmap { + self.guest_memory + } + + /// Get cmdline. + pub fn get_cmdline(&self) -> &str { + self.cmdline + } + + /// Get initrd_config. + pub fn get_initrd_config(&self) -> Option<&InitrdConfig> { + self.initrd_config + } + + /// Get vcpu_mpidr. + pub fn get_vcpu_mpidr(&self) -> &[u64] { + self.vcpu_info.vcpu_mpidr.as_slice() + } + + /// Get vpmu_feature. + pub fn get_boot_onlined(&self) -> &[u32] { + self.vcpu_info.vcpu_boot_onlined.as_slice() + } + + /// Get vpmu_feature. + pub fn get_vpmu_feature(&self) -> VpmuFeatureLevel { + self.vcpu_info.vpmu_feature + } + + /// Get cache_passthrough_enabled. + pub fn get_cache_passthrough_enabled(&self) -> bool { + self.vcpu_info.cache_passthrough_enabled + } +} + +// This struct is used for cache passthrough and numa passthrough +// TODO: #274 cache passthrough +// TODO: #275 numa passthrough +/// Struct to save numa information. +#[derive(Default)] +pub struct FdtNumaInfo { + /// vcpu -> pcpu maps + cpu_maps: Option>, + /// numa id map vector for memory + memory_numa_id_map: Option>, + /// numa id map vector for vcpu + vcpu_numa_id_map: Option>, +} + +impl FdtNumaInfo { + /// Generate FdtNumaInfo. + pub fn new( + cpu_maps: Option>, + memory_numa_id_map: Option>, + vcpu_numa_id_map: Option>, + ) -> Self { + FdtNumaInfo { + cpu_maps, + memory_numa_id_map, + vcpu_numa_id_map, + } + } + + /// Get cpu_maps struct. + pub fn get_cpu_maps(&self) -> Option> { + self.cpu_maps.clone() + } + + /// Get memory_numa_id_map struct. + pub fn get_memory_numa_id_map(&self) -> Option<&Vec> { + self.memory_numa_id_map.as_ref() + } + + /// Get vcpu_numa_id_map struct. + pub fn get_vcpu_numa_id_map(&self) -> Option<&Vec> { + self.vcpu_numa_id_map.as_ref() + } +} + +/// Struct to save device information. +pub struct FdtDeviceInfo<'a, T: DeviceInfoForFDT> { + /// mmio device information + mmio_device_info: Option<&'a HashMap<(DeviceType, String), T>>, + /// interrupt controller + irq_chip: &'a dyn GICDevice, +} + +impl FdtDeviceInfo<'_, T> { + /// Generate FdtDeviceInfo. + pub fn new<'a>( + mmio_device_info: Option<&'a HashMap<(DeviceType, String), T>>, + irq_chip: &'a dyn GICDevice, + ) -> FdtDeviceInfo<'a, T> { + FdtDeviceInfo { + mmio_device_info, + irq_chip, + } + } + + /// Get mmio device information. + pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), T>> { + self.mmio_device_info + } + + /// Get interrupt controller. + pub fn get_irqchip(&self) -> &dyn GICDevice { + self.irq_chip + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use dbs_arch::gic::create_gic; + use vm_memory::{GuestAddress, GuestMemory}; + + const CMDLINE: &str = "console=tty0"; + const INITRD_CONFIG: InitrdConfig = InitrdConfig { + address: GuestAddress(0x10000000), + size: 0x1000, + }; + const VCPU_MPIDR: [u64; 1] = [0]; + const VCPU_BOOT_ONLINED: [u32; 1] = [1]; + const VPMU_FEATURE: VpmuFeatureLevel = VpmuFeatureLevel::Disabled; + const CACHE_PASSTHROUGH_ENABLED: bool = false; + + #[inline] + fn helper_generate_fdt_vm_info(guest_memory: &GuestMemoryMmap) -> FdtVmInfo<'_> { + FdtVmInfo::new( + guest_memory, + CMDLINE, + Some(&INITRD_CONFIG), + FdtVcpuInfo::new( + VCPU_MPIDR.to_vec(), + VCPU_BOOT_ONLINED.to_vec(), + VPMU_FEATURE, + CACHE_PASSTHROUGH_ENABLED, + ), + ) + } + + #[test] + fn test_fdtutils_fdt_vm_info() { + let ranges = vec![(GuestAddress(0x80000000), 0x40000)]; + let guest_memory: GuestMemoryMmap<()> = + GuestMemoryMmap::<()>::from_ranges(ranges.as_slice()) + .expect("Cannot initialize memory"); + let vm_info = helper_generate_fdt_vm_info(&guest_memory); + + assert_eq!( + guest_memory.check_address(GuestAddress(0x80001000)), + Some(GuestAddress(0x80001000)) + ); + assert_eq!(guest_memory.check_address(GuestAddress(0x80050000)), None); + assert!(guest_memory.check_range(GuestAddress(0x80000000), 0x40000)); + assert_eq!(vm_info.get_cmdline(), CMDLINE); + assert_eq!( + vm_info.get_initrd_config().unwrap().address, + INITRD_CONFIG.address + ); + assert_eq!( + vm_info.get_initrd_config().unwrap().size, + INITRD_CONFIG.size + ); + assert_eq!(vm_info.get_vcpu_mpidr(), VCPU_MPIDR.as_slice()); + assert_eq!(vm_info.get_boot_onlined(), VCPU_BOOT_ONLINED.as_slice()); + assert_eq!(vm_info.get_vpmu_feature(), VPMU_FEATURE); + assert_eq!( + vm_info.get_cache_passthrough_enabled(), + CACHE_PASSTHROUGH_ENABLED + ); + } + + const CPU_MAPS: [u8; 5] = [1, 2, 3, 4, 5]; + const MEMORY_VEC: [u32; 2] = [0, 1]; + const CPU_VEC: [u32; 5] = [0, 0, 0, 1, 1]; + + #[inline] + fn helper_generate_fdt_numa_info() -> FdtNumaInfo { + FdtNumaInfo::new( + Some(CPU_MAPS.to_vec()), + Some(MEMORY_VEC.to_vec()), + Some(CPU_VEC.to_vec()), + ) + } + + #[test] + fn test_fdtutils_fdt_numa_info() { + // test default + let numa_info = FdtNumaInfo::default(); + assert_eq!(numa_info.get_cpu_maps(), None); + assert_eq!(numa_info.get_memory_numa_id_map(), None); + assert_eq!(numa_info.get_vcpu_numa_id_map(), None); + + let numa_info = helper_generate_fdt_numa_info(); + assert_eq!( + numa_info.get_cpu_maps().unwrap().as_slice(), + CPU_MAPS.as_slice() + ); + assert_eq!( + numa_info.get_memory_numa_id_map().unwrap().as_slice(), + MEMORY_VEC.as_slice() + ); + assert_eq!( + numa_info.get_vcpu_numa_id_map().unwrap().as_slice(), + CPU_VEC.as_slice() + ); + } + + use dbs_arch::gic::its::ItsType; + use dbs_device::resources::{DeviceResources, Resource}; + use kvm_ioctls::Kvm; + + use super::super::tests::MMIODeviceInfo; + + const MEMORY_SIZE: u64 = 4096; + const ECAM_SPACE: [Resource; 1] = [Resource::MmioAddressRange { + base: 0x40000000, + size: 0x1000, + }]; + const BAR_SPACE: [Resource; 2] = [ + Resource::MmioAddressRange { + base: 0x40001000, + size: 0x1000, + }, + Resource::MmioAddressRange { + base: 0x40002000, + size: 0x1000, + }, + ]; + + #[test] + fn test_fdtutils_fdt_device_info() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let gic = create_gic(&vm, 0).unwrap(); + let mmio_device_info: Option> = Some( + [ + ( + (DeviceType::Serial, DeviceType::Serial.to_string()), + MMIODeviceInfo::new(0, 1), + ), + ( + (DeviceType::Virtio(1), "virtio".to_string()), + MMIODeviceInfo::new(MEMORY_SIZE, 2), + ), + ( + (DeviceType::RTC, "rtc".to_string()), + MMIODeviceInfo::new(2 * MEMORY_SIZE, 3), + ), + ] + .iter() + .cloned() + .collect(), + ); + let mut ecam_space = DeviceResources::new(); + ecam_space.append(ECAM_SPACE.as_slice()[0].clone()); + + let mut bar_space = DeviceResources::new(); + bar_space.append(BAR_SPACE.as_slice()[0].clone()); + bar_space.append(BAR_SPACE.as_slice()[1].clone()); + + let its_type1 = ItsType::PciMsiIts; + let its_type2 = ItsType::PlatformMsiIts; + + let device_info = FdtDeviceInfo::new(mmio_device_info.as_ref(), gic.as_ref()); + assert_eq!( + device_info.get_mmio_device_info(), + mmio_device_info.as_ref() + ); + assert_eq!( + format!("{:?}", device_info.get_irqchip().device_fd()), + format!("{:?}", gic.as_ref().device_fd()) + ); + assert_eq!( + device_info.get_irqchip().device_properties(), + gic.as_ref().device_properties() + ); + assert_eq!( + device_info.get_irqchip().fdt_compatibility(), + gic.as_ref().fdt_compatibility() + ); + assert_eq!( + device_info.get_irqchip().fdt_maint_irq(), + gic.as_ref().fdt_maint_irq() + ); + assert_eq!( + device_info.get_irqchip().vcpu_count(), + gic.as_ref().vcpu_count() + ); + assert_eq!( + device_info.get_irqchip().get_its_reg_range(&its_type1), + gic.as_ref().get_its_reg_range(&its_type1) + ); + assert_eq!( + device_info.get_irqchip().get_its_reg_range(&its_type2), + gic.as_ref().get_its_reg_range(&its_type2) + ); + } +} diff --git a/src/dragonball/src/dbs_boot/src/aarch64/layout.rs b/src/dragonball/src/dbs_boot/src/aarch64/layout.rs new file mode 100644 index 000000000000..6bc98d55f638 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/aarch64/layout.rs @@ -0,0 +1,94 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// ==== Address map in use in ARM development systems today ==== +// +// - 32-bit - - 36-bit - - 40-bit - +//1024GB + + +-------------------+ <- 40-bit +// | | DRAM | +// ~ ~ ~ ~ +// | | | +// | | | +// | | | +// | | | +//544GB + + +-------------------+ +// | | Hole or DRAM | +// | | | +//512GB + + +-------------------+ +// | | Mapped | +// | | I/O | +// ~ ~ ~ ~ +// | | | +//256GB + + +-------------------+ +// | | Reserved | +// ~ ~ ~ ~ +// | | | +//64GB + +-----------------------+-------------------+ <- 36-bit +// | | DRAM | +// ~ ~ ~ ~ +// | | | +// | | | +//34GB + +-----------------------+-------------------+ +// | | Hole or DRAM | +//32GB + +-----------------------+-------------------+ +// | | Mapped I/O | +// ~ ~ ~ ~ +// | | | +//16GB + +-----------------------+-------------------+ +// | | Reserved | +// ~ ~ ~ ~ +//4GB +-------------------+-----------------------+-------------------+ <- 32-bit +// | 2GB of DRAM | +// | | +//2GB +-------------------+-----------------------+-------------------+ +// | Mapped I/O | +//1GB +-------------------+-----------------------+-------------------+ +// | ROM & RAM & I/O | +//0GB +-------------------+-----------------------+-------------------+ 0 +// - 32-bit - - 36-bit - - 40-bit - +// +// Taken from (http://infocenter.arm.com/help/topic/com.arm.doc.den0001c/DEN0001C_principles_of_arm_memory_maps.pdf). + +/// Start of RAM on 64 bit ARM. +pub const DRAM_MEM_START: u64 = 0x8000_0000; // 2 GB. +/// The maximum addressable RAM address. +pub const DRAM_MEM_END: u64 = 0x00F8_0000_0000; // 1024 - 32 = 992 GB. +/// The maximum RAM size. +pub const DRAM_MEM_MAX_SIZE: u64 = DRAM_MEM_END - DRAM_MEM_START; + +/// Kernel command line maximum size. +/// As per `arch/arm64/include/uapi/asm/setup.h`. +pub const CMDLINE_MAX_SIZE: usize = 2048; + +/// Maximum size of the device tree blob as specified in https://www.kernel.org/doc/Documentation/arm64/booting.txt. +pub const FDT_MAX_SIZE: usize = 0x20_0000; + +// As per virt/kvm/arm/vgic/vgic-kvm-device.c we need +// the number of interrupts our GIC will support to be: +// * bigger than 32 +// * less than 1023 and +// * a multiple of 32. +// We are setting up our interrupt controller to support a maximum of 128 interrupts. +/// First usable interrupt on aarch64. +pub const IRQ_BASE: u32 = dbs_arch::gic::IRQ_BASE; + +/// Last usable interrupt on aarch64. +pub const IRQ_MAX: u32 = dbs_arch::gic::IRQ_MAX; + +/// Below this address will reside the GIC, above this address will reside the MMIO devices. +pub const MAPPED_IO_START: u64 = dbs_arch::gic::GIC_REG_END_ADDRESS; // 1 GB +/// End address (inclusive) of the MMIO window. +pub const MAPPED_IO_END: u64 = (2 << 30) - 1; // 1 GB + +/// Maximum guest physical address supported. +pub static GUEST_PHYS_END: &u64 = &((1u64 << 40) - 1); +/// Upper bound of guest memory. +pub static GUEST_MEM_END: &u64 = &(DRAM_MEM_END - 1); +/// Lower bound of guest memory. +pub const GUEST_MEM_START: u64 = DRAM_MEM_START; +/// Start address of the lower MMIO window. +pub const MMIO_LOW_START: u64 = MAPPED_IO_START; +/// End address (inclusive) of the lower MMIO window. +pub const MMIO_LOW_END: u64 = MAPPED_IO_END; +/// Size of memory below MMIO hole. +pub const GUEST_MEM_LOW_SIZE: u64 = 0u64; diff --git a/src/dragonball/src/dbs_boot/src/aarch64/mod.rs b/src/dragonball/src/dbs_boot/src/aarch64/mod.rs new file mode 100644 index 000000000000..c9aa5fdf6a9a --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/aarch64/mod.rs @@ -0,0 +1,103 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! VM boot related constants and utilities for `aarch64` architecture. + +use vm_fdt::Error as VmFdtError; +use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryError}; + +/// Magic addresses externally used to lay out aarch64 VMs. +pub mod layout; + +/// FDT is used to inform the guest kernel of device tree information. +pub mod fdt; + +/// Helper structs for constructing fdt. +pub mod fdt_utils; + +/// Default (smallest) memory page size for the supported architectures. +pub const PAGE_SIZE: usize = 4096; + +/// Errors thrown while configuring the Flattened Device Tree for aarch64. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Failure in creating FDT + #[error("create fdt fail: {0}")] + CreateFdt(#[from] VmFdtError), + /// Failure in writing FDT in memory. + #[error("write fdt to memory fail: {0}")] + WriteFDTToMemory(#[from] GuestMemoryError), + /// Failed to compute the initrd address. + #[error("Failed to compute the initrd address.")] + InitrdAddress, + /// Invalid arguments + #[error("invalid arguments")] + InvalidArguments, +} + +/// Returns the memory address where the kernel could be loaded. +pub fn get_kernel_start() -> u64 { + layout::DRAM_MEM_START +} + +/// Auxiliary function to get the address where the device tree blob is loaded. +pub fn get_fdt_addr(mem: &M) -> u64 { + // If the memory allocated is smaller than the size allocated for the FDT, + // we return the start of the DRAM so that + // we allow the code to try and load the FDT. + if let Some(offset) = mem.last_addr().checked_sub(layout::FDT_MAX_SIZE as u64 - 1) { + if mem.address_in_range(offset) { + return offset.raw_value(); + } + } + layout::DRAM_MEM_START +} + +/// Returns the memory address where the initrd could be loaded. +pub fn initrd_load_addr(guest_mem: &M, initrd_size: u64) -> super::Result { + let round_to_pagesize = |size| (size + (PAGE_SIZE as u64 - 1)) & !(PAGE_SIZE as u64 - 1); + match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(round_to_pagesize(initrd_size)) { + Some(offset) => { + if guest_mem.address_in_range(offset) { + Ok(offset.raw_value()) + } else { + Err(Error::InitrdAddress) + } + } + None => Err(Error::InitrdAddress), + } +} + +#[cfg(test)] +pub mod tests { + use dbs_arch::{DeviceInfoForFDT, Error as ArchError}; + + const LEN: u64 = 4096; + + #[derive(Clone, Debug, PartialEq)] + pub struct MMIODeviceInfo { + addr: u64, + irq: u32, + } + + impl MMIODeviceInfo { + pub fn new(addr: u64, irq: u32) -> Self { + MMIODeviceInfo { addr, irq } + } + } + + impl DeviceInfoForFDT for MMIODeviceInfo { + fn addr(&self) -> u64 { + self.addr + } + fn irq(&self) -> std::result::Result { + Ok(self.irq) + } + fn length(&self) -> u64 { + LEN + } + fn get_device_id(&self) -> Option { + None + } + } +} diff --git a/src/dragonball/src/dbs_boot/src/aarch64/test/output.dtb b/src/dragonball/src/dbs_boot/src/aarch64/test/output.dtb new file mode 100644 index 000000000000..8329528f3482 Binary files /dev/null and b/src/dragonball/src/dbs_boot/src/aarch64/test/output.dtb differ diff --git a/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_initrd.dtb b/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_initrd.dtb new file mode 100644 index 000000000000..6fe25cde1893 Binary files /dev/null and b/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_initrd.dtb differ diff --git a/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_pmu.dtb b/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_pmu.dtb new file mode 100644 index 000000000000..16c554821b26 Binary files /dev/null and b/src/dragonball/src/dbs_boot/src/aarch64/test/output_with_pmu.dtb differ diff --git a/src/dragonball/src/dbs_boot/src/lib.rs b/src/dragonball/src/dbs_boot/src/lib.rs new file mode 100644 index 000000000000..e281b8d3cae6 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/lib.rs @@ -0,0 +1,27 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] + +//! Constants, Structs and Utilities to setup boot environment for virtual machines. + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; + +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub use aarch64::*; + +/// Specialized [std::result::Result] for boot related operations. +pub type Result = std::result::Result; + +/// Type for passing information about the initrd in the guest memory. +pub struct InitrdConfig { + /// Load address of initrd in guest memory + pub address: vm_memory::GuestAddress, + /// Size of initrd in guest memory + pub size: usize, +} diff --git a/src/dragonball/src/dbs_boot/src/vendor/bootparam.rs b/src/dragonball/src/dbs_boot/src/vendor/bootparam.rs new file mode 100644 index 000000000000..b093ef8420be --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/vendor/bootparam.rs @@ -0,0 +1,41 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use arch_gen::x86::bootparam::{__u32, __u64}; +use vm_memory::bytes::Bytes; +use vm_memory::guest_memory::GuestAddress; +use vm_memory::{ByteValued, GuestMemory}; + +use super::layout; + +/// With reference to the x86_hardware_subarch enumeration type of the +/// kernel, we newly added the X86_SUBARCH_DRAGONBALL type and defined +/// it as 0xdbdbdb01 to mark this as a guest kernel. +#[allow(dead_code)] +pub enum X86HardwareSubarch { + X86SubarchPC = 0, + X86SubarchLGUEST = 1, + X86SubarchXEN = 2, + X86SubarchIntelMID = 3, + X86SubarchCE4100 = 4, + X86SubarchDragonball = 0xdbdbdb01, +} + +/// Recorded in subarch_data, used to verify the validity of dragonball subarch_data. +pub const DB_BOOT_PARAM_SIGNATURE: u64 = 0xdbdbb007700bbdbd; + +#[derive(Debug, PartialEq, thiserror::Error)] +pub enum Error { + /// Error dragonball boot parameter length + #[error("dragonball boot param exceeds max size")] + DragonballBootParamPastMaxSize, + + /// Error dragonball boot parameter location + #[error("dragonball boot param past ram end")] + DragonballBootParamPastRamEnd, + + /// Error writing dragonball boot parameter + #[error("dragonball boot param setup fail")] + WriteDragonballBootParam, +} + diff --git a/src/dragonball/src/dbs_boot/src/x86_64/bootparam.rs b/src/dragonball/src/dbs_boot/src/x86_64/bootparam.rs new file mode 100644 index 000000000000..db5d3d550d61 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/x86_64/bootparam.rs @@ -0,0 +1,4628 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +/* + * automatically generated by rust-bindgen + * From upstream linux arch/x86/include/uapi/asm/bootparam.h at commit: + * 806276b7f07a39a1cc3f38bb1ef5c573d4594a38 + */ +#![allow(unused)] +#![allow(non_snake_case)] +#![allow(non_camel_case_types)] +#![allow(missing_docs)] +#![allow(deref_nullptr)] + +/* automatically generated by rust-bindgen 0.59.2 */ + +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField(::std::marker::PhantomData, [T; 0]); +impl __IncompleteArrayField { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[allow(clippy::missing_safety_doc)] + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[allow(clippy::missing_safety_doc)] + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl ::std::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +pub const SETUP_NONE: u32 = 0; +pub const SETUP_E820_EXT: u32 = 1; +pub const SETUP_DTB: u32 = 2; +pub const SETUP_PCI: u32 = 3; +pub const SETUP_EFI: u32 = 4; +pub const SETUP_APPLE_PROPERTIES: u32 = 5; +pub const SETUP_JAILHOUSE: u32 = 6; +pub const SETUP_INDIRECT: u32 = 2147483648; +pub const SETUP_TYPE_MAX: u32 = 2147483654; +pub const RAMDISK_IMAGE_START_MASK: u32 = 2047; +pub const RAMDISK_PROMPT_FLAG: u32 = 32768; +pub const RAMDISK_LOAD_FLAG: u32 = 16384; +pub const LOADED_HIGH: u32 = 1; +pub const KASLR_FLAG: u32 = 2; +pub const QUIET_FLAG: u32 = 32; +pub const KEEP_SEGMENTS: u32 = 64; +pub const CAN_USE_HEAP: u32 = 128; +pub const XLF_KERNEL_64: u32 = 1; +pub const XLF_CAN_BE_LOADED_ABOVE_4G: u32 = 2; +pub const XLF_EFI_HANDOVER_32: u32 = 4; +pub const XLF_EFI_HANDOVER_64: u32 = 8; +pub const XLF_EFI_KEXEC: u32 = 16; +pub const XLF_5LEVEL: u32 = 32; +pub const XLF_5LEVEL_ENABLED: u32 = 64; +pub const __BITS_PER_LONG: u32 = 64; +pub const __FD_SETSIZE: u32 = 1024; +pub const VIDEO_TYPE_MDA: u32 = 16; +pub const VIDEO_TYPE_CGA: u32 = 17; +pub const VIDEO_TYPE_EGAM: u32 = 32; +pub const VIDEO_TYPE_EGAC: u32 = 33; +pub const VIDEO_TYPE_VGAC: u32 = 34; +pub const VIDEO_TYPE_VLFB: u32 = 35; +pub const VIDEO_TYPE_PICA_S3: u32 = 48; +pub const VIDEO_TYPE_MIPS_G364: u32 = 49; +pub const VIDEO_TYPE_SGI: u32 = 51; +pub const VIDEO_TYPE_TGAC: u32 = 64; +pub const VIDEO_TYPE_SUN: u32 = 80; +pub const VIDEO_TYPE_SUNPCI: u32 = 81; +pub const VIDEO_TYPE_PMAC: u32 = 96; +pub const VIDEO_TYPE_EFI: u32 = 112; +pub const VIDEO_FLAGS_NOCURSOR: u32 = 1; +pub const VIDEO_CAPABILITY_SKIP_QUIRKS: u32 = 1; +pub const VIDEO_CAPABILITY_64BIT_BASE: u32 = 2; +pub const APM_STATE_READY: u32 = 0; +pub const APM_STATE_STANDBY: u32 = 1; +pub const APM_STATE_SUSPEND: u32 = 2; +pub const APM_STATE_OFF: u32 = 3; +pub const APM_STATE_BUSY: u32 = 4; +pub const APM_STATE_REJECT: u32 = 5; +pub const APM_STATE_OEM_SYS: u32 = 32; +pub const APM_STATE_OEM_DEV: u32 = 64; +pub const APM_STATE_DISABLE: u32 = 0; +pub const APM_STATE_ENABLE: u32 = 1; +pub const APM_STATE_DISENGAGE: u32 = 0; +pub const APM_STATE_ENGAGE: u32 = 1; +pub const APM_SYS_STANDBY: u32 = 1; +pub const APM_SYS_SUSPEND: u32 = 2; +pub const APM_NORMAL_RESUME: u32 = 3; +pub const APM_CRITICAL_RESUME: u32 = 4; +pub const APM_LOW_BATTERY: u32 = 5; +pub const APM_POWER_STATUS_CHANGE: u32 = 6; +pub const APM_UPDATE_TIME: u32 = 7; +pub const APM_CRITICAL_SUSPEND: u32 = 8; +pub const APM_USER_STANDBY: u32 = 9; +pub const APM_USER_SUSPEND: u32 = 10; +pub const APM_STANDBY_RESUME: u32 = 11; +pub const APM_CAPABILITY_CHANGE: u32 = 12; +pub const APM_USER_HIBERNATION: u32 = 13; +pub const APM_HIBERNATION_RESUME: u32 = 14; +pub const APM_SUCCESS: u32 = 0; +pub const APM_DISABLED: u32 = 1; +pub const APM_CONNECTED: u32 = 2; +pub const APM_NOT_CONNECTED: u32 = 3; +pub const APM_16_CONNECTED: u32 = 5; +pub const APM_16_UNSUPPORTED: u32 = 6; +pub const APM_32_CONNECTED: u32 = 7; +pub const APM_32_UNSUPPORTED: u32 = 8; +pub const APM_BAD_DEVICE: u32 = 9; +pub const APM_BAD_PARAM: u32 = 10; +pub const APM_NOT_ENGAGED: u32 = 11; +pub const APM_BAD_FUNCTION: u32 = 12; +pub const APM_RESUME_DISABLED: u32 = 13; +pub const APM_NO_ERROR: u32 = 83; +pub const APM_BAD_STATE: u32 = 96; +pub const APM_NO_EVENTS: u32 = 128; +pub const APM_NOT_PRESENT: u32 = 134; +pub const APM_DEVICE_BIOS: u32 = 0; +pub const APM_DEVICE_ALL: u32 = 1; +pub const APM_DEVICE_DISPLAY: u32 = 256; +pub const APM_DEVICE_STORAGE: u32 = 512; +pub const APM_DEVICE_PARALLEL: u32 = 768; +pub const APM_DEVICE_SERIAL: u32 = 1024; +pub const APM_DEVICE_NETWORK: u32 = 1280; +pub const APM_DEVICE_PCMCIA: u32 = 1536; +pub const APM_DEVICE_BATTERY: u32 = 32768; +pub const APM_DEVICE_OEM: u32 = 57344; +pub const APM_DEVICE_OLD_ALL: u32 = 65535; +pub const APM_DEVICE_CLASS: u32 = 255; +pub const APM_DEVICE_MASK: u32 = 65280; +pub const APM_MAX_BATTERIES: u32 = 2; +pub const APM_CAP_GLOBAL_STANDBY: u32 = 1; +pub const APM_CAP_GLOBAL_SUSPEND: u32 = 2; +pub const APM_CAP_RESUME_STANDBY_TIMER: u32 = 4; +pub const APM_CAP_RESUME_SUSPEND_TIMER: u32 = 8; +pub const APM_CAP_RESUME_STANDBY_RING: u32 = 16; +pub const APM_CAP_RESUME_SUSPEND_RING: u32 = 32; +pub const APM_CAP_RESUME_STANDBY_PCMCIA: u32 = 64; +pub const APM_CAP_RESUME_SUSPEND_PCMCIA: u32 = 128; +pub const _IOC_NRBITS: u32 = 8; +pub const _IOC_TYPEBITS: u32 = 8; +pub const _IOC_SIZEBITS: u32 = 14; +pub const _IOC_DIRBITS: u32 = 2; +pub const _IOC_NRMASK: u32 = 255; +pub const _IOC_TYPEMASK: u32 = 255; +pub const _IOC_SIZEMASK: u32 = 16383; +pub const _IOC_DIRMASK: u32 = 3; +pub const _IOC_NRSHIFT: u32 = 0; +pub const _IOC_TYPESHIFT: u32 = 8; +pub const _IOC_SIZESHIFT: u32 = 16; +pub const _IOC_DIRSHIFT: u32 = 30; +pub const _IOC_NONE: u32 = 0; +pub const _IOC_WRITE: u32 = 1; +pub const _IOC_READ: u32 = 2; +pub const IOC_IN: u32 = 1073741824; +pub const IOC_OUT: u32 = 2147483648; +pub const IOC_INOUT: u32 = 3221225472; +pub const IOCSIZE_MASK: u32 = 1073676288; +pub const IOCSIZE_SHIFT: u32 = 16; +pub const EDDNR: u32 = 489; +pub const EDDBUF: u32 = 3328; +pub const EDDMAXNR: u32 = 6; +pub const EDDEXTSIZE: u32 = 8; +pub const EDDPARMSIZE: u32 = 74; +pub const CHECKEXTENSIONSPRESENT: u32 = 65; +pub const GETDEVICEPARAMETERS: u32 = 72; +pub const LEGACYGETDEVICEPARAMETERS: u32 = 8; +pub const EDDMAGIC1: u32 = 21930; +pub const EDDMAGIC2: u32 = 43605; +pub const READ_SECTORS: u32 = 2; +pub const EDD_MBR_SIG_OFFSET: u32 = 440; +pub const EDD_MBR_SIG_BUF: u32 = 656; +pub const EDD_MBR_SIG_MAX: u32 = 16; +pub const EDD_MBR_SIG_NR_BUF: u32 = 490; +pub const EDD_EXT_FIXED_DISK_ACCESS: u32 = 1; +pub const EDD_EXT_DEVICE_LOCKING_AND_EJECTING: u32 = 2; +pub const EDD_EXT_ENHANCED_DISK_DRIVE_SUPPORT: u32 = 4; +pub const EDD_EXT_64BIT_EXTENSIONS: u32 = 8; +pub const EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT: u32 = 1; +pub const EDD_INFO_GEOMETRY_VALID: u32 = 2; +pub const EDD_INFO_REMOVABLE: u32 = 4; +pub const EDD_INFO_WRITE_VERIFY: u32 = 8; +pub const EDD_INFO_MEDIA_CHANGE_NOTIFICATION: u32 = 16; +pub const EDD_INFO_LOCKABLE: u32 = 32; +pub const EDD_INFO_NO_MEDIA_PRESENT: u32 = 64; +pub const EDD_INFO_USE_INT13_FN50: u32 = 128; +pub const E820_MAX_ENTRIES_ZEROPAGE: u32 = 128; +pub const JAILHOUSE_SETUP_REQUIRED_VERSION: u32 = 1; +pub const E820MAP: ::std::os::raw::c_uint = 720; +pub const E820MAX: ::std::os::raw::c_uint = 128; +pub const E820_X_MAX: ::std::os::raw::c_uint = 128; +pub const E820NR: ::std::os::raw::c_uint = 488; +pub const E820_RAM: ::std::os::raw::c_uint = 1; +pub const E820_RESERVED: ::std::os::raw::c_uint = 2; +pub const E820_ACPI: ::std::os::raw::c_uint = 3; +pub const E820_NVS: ::std::os::raw::c_uint = 4; +pub const E820_UNUSABLE: ::std::os::raw::c_uint = 5; +pub const E820_RESERVED_KERN: ::std::os::raw::c_uint = 128; +pub type __s8 = ::std::os::raw::c_schar; +pub type __u8 = ::std::os::raw::c_uchar; +pub type __s16 = ::std::os::raw::c_short; +pub type __u16 = ::std::os::raw::c_ushort; +pub type __s32 = ::std::os::raw::c_int; +pub type __u32 = ::std::os::raw::c_uint; +pub type __s64 = ::std::os::raw::c_longlong; +pub type __u64 = ::std::os::raw::c_ulonglong; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __kernel_fd_set { + pub fds_bits: [::std::os::raw::c_ulong; 16usize], +} +#[test] +fn bindgen_test_layout___kernel_fd_set() { + assert_eq!( + ::std::mem::size_of::<__kernel_fd_set>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fd_set>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::<__kernel_fd_set>())).fds_bits) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__kernel_fd_set), + "::", + stringify!(fds_bits) + ) + ); +} +pub type __kernel_sighandler_t = + ::std::option::Option; +pub type __kernel_key_t = ::std::os::raw::c_int; +pub type __kernel_mqd_t = ::std::os::raw::c_int; +pub type __kernel_old_uid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_gid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_dev_t = ::std::os::raw::c_ulong; +pub type __kernel_long_t = ::std::os::raw::c_long; +pub type __kernel_ulong_t = ::std::os::raw::c_ulong; +pub type __kernel_ino_t = __kernel_ulong_t; +pub type __kernel_mode_t = ::std::os::raw::c_uint; +pub type __kernel_pid_t = ::std::os::raw::c_int; +pub type __kernel_ipc_pid_t = ::std::os::raw::c_int; +pub type __kernel_uid_t = ::std::os::raw::c_uint; +pub type __kernel_gid_t = ::std::os::raw::c_uint; +pub type __kernel_suseconds_t = __kernel_long_t; +pub type __kernel_daddr_t = ::std::os::raw::c_int; +pub type __kernel_uid32_t = ::std::os::raw::c_uint; +pub type __kernel_gid32_t = ::std::os::raw::c_uint; +pub type __kernel_size_t = __kernel_ulong_t; +pub type __kernel_ssize_t = __kernel_long_t; +pub type __kernel_ptrdiff_t = __kernel_long_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __kernel_fsid_t { + pub val: [::std::os::raw::c_int; 2usize], +} +#[test] +fn bindgen_test_layout___kernel_fsid_t() { + assert_eq!( + ::std::mem::size_of::<__kernel_fsid_t>(), + 8usize, + concat!("Size of: ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fsid_t>(), + 4usize, + concat!("Alignment of ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::<__kernel_fsid_t>())).val) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(__kernel_fsid_t), + "::", + stringify!(val) + ) + ); +} +pub type __kernel_off_t = __kernel_long_t; +pub type __kernel_loff_t = ::std::os::raw::c_longlong; +pub type __kernel_old_time_t = __kernel_long_t; +pub type __kernel_time_t = __kernel_long_t; +pub type __kernel_time64_t = ::std::os::raw::c_longlong; +pub type __kernel_clock_t = __kernel_long_t; +pub type __kernel_timer_t = ::std::os::raw::c_int; +pub type __kernel_clockid_t = ::std::os::raw::c_int; +pub type __kernel_caddr_t = *mut ::std::os::raw::c_char; +pub type __kernel_uid16_t = ::std::os::raw::c_ushort; +pub type __kernel_gid16_t = ::std::os::raw::c_ushort; +pub type __le16 = __u16; +pub type __be16 = __u16; +pub type __le32 = __u32; +pub type __be32 = __u32; +pub type __le64 = __u64; +pub type __be64 = __u64; +pub type __sum16 = __u16; +pub type __wsum = __u32; +pub type __poll_t = ::std::os::raw::c_uint; +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub struct screen_info { + pub orig_x: __u8, + pub orig_y: __u8, + pub ext_mem_k: __u16, + pub orig_video_page: __u16, + pub orig_video_mode: __u8, + pub orig_video_cols: __u8, + pub flags: __u8, + pub unused2: __u8, + pub orig_video_ega_bx: __u16, + pub unused3: __u16, + pub orig_video_lines: __u8, + pub orig_video_isVGA: __u8, + pub orig_video_points: __u16, + pub lfb_width: __u16, + pub lfb_height: __u16, + pub lfb_depth: __u16, + pub lfb_base: __u32, + pub lfb_size: __u32, + pub cl_magic: __u16, + pub cl_offset: __u16, + pub lfb_linelength: __u16, + pub red_size: __u8, + pub red_pos: __u8, + pub green_size: __u8, + pub green_pos: __u8, + pub blue_size: __u8, + pub blue_pos: __u8, + pub rsvd_size: __u8, + pub rsvd_pos: __u8, + pub vesapm_seg: __u16, + pub vesapm_off: __u16, + pub pages: __u16, + pub vesa_attributes: __u16, + pub capabilities: __u32, + pub ext_lfb_base: __u32, + pub _reserved: [__u8; 2usize], +} +#[test] +fn bindgen_test_layout_screen_info() { + assert_eq!( + ::std::mem::size_of::(), + 64usize, + concat!("Size of: ", stringify!(screen_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(screen_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_x) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_x) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_y) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_y) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_mem_k) as *const _ + as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(ext_mem_k) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_page) as *const _ + as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_page) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_mode) as *const _ + as usize + }, + 6usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_mode) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_cols) as *const _ + as usize + }, + 7usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_cols) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).flags) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).unused2) as *const _ as usize + }, + 9usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(unused2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_ega_bx) as *const _ + as usize + }, + 10usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_ega_bx) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).unused3) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(unused3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_lines) as *const _ + as usize + }, + 14usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_lines) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_isVGA) as *const _ + as usize + }, + 15usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_isVGA) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).orig_video_points) as *const _ + as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(orig_video_points) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_width) as *const _ + as usize + }, + 18usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_width) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_height) as *const _ + as usize + }, + 20usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_height) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_depth) as *const _ + as usize + }, + 22usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_depth) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_base) as *const _ as usize + }, + 24usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_base) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_size) as *const _ as usize + }, + 28usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cl_magic) as *const _ as usize + }, + 32usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(cl_magic) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cl_offset) as *const _ + as usize + }, + 34usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(cl_offset) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).lfb_linelength) as *const _ + as usize + }, + 36usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(lfb_linelength) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).red_size) as *const _ as usize + }, + 38usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(red_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).red_pos) as *const _ as usize + }, + 39usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(red_pos) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).green_size) as *const _ + as usize + }, + 40usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(green_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).green_pos) as *const _ + as usize + }, + 41usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(green_pos) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).blue_size) as *const _ + as usize + }, + 42usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(blue_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).blue_pos) as *const _ as usize + }, + 43usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(blue_pos) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).rsvd_size) as *const _ + as usize + }, + 44usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(rsvd_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).rsvd_pos) as *const _ as usize + }, + 45usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(rsvd_pos) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).vesapm_seg) as *const _ + as usize + }, + 46usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(vesapm_seg) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).vesapm_off) as *const _ + as usize + }, + 48usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(vesapm_off) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).pages) as *const _ as usize + }, + 50usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(pages) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).vesa_attributes) as *const _ + as usize + }, + 52usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(vesa_attributes) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).capabilities) as *const _ + as usize + }, + 54usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(capabilities) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_lfb_base) as *const _ + as usize + }, + 58usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(ext_lfb_base) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._reserved) as *const _ + as usize + }, + 62usize, + concat!( + "Offset of field: ", + stringify!(screen_info), + "::", + stringify!(_reserved) + ) + ); +} +pub type apm_event_t = ::std::os::raw::c_ushort; +pub type apm_eventinfo_t = ::std::os::raw::c_ushort; +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct apm_bios_info { + pub version: __u16, + pub cseg: __u16, + pub offset: __u32, + pub cseg_16: __u16, + pub dseg: __u16, + pub flags: __u16, + pub cseg_len: __u16, + pub cseg_16_len: __u16, + pub dseg_len: __u16, +} +#[test] +fn bindgen_test_layout_apm_bios_info() { + assert_eq!( + ::std::mem::size_of::(), + 20usize, + concat!("Size of: ", stringify!(apm_bios_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(apm_bios_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).version) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cseg) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(cseg) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).offset) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(offset) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cseg_16) as *const _ + as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(cseg_16) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).dseg) as *const _ as usize + }, + 10usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(dseg) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).flags) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cseg_len) as *const _ + as usize + }, + 14usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(cseg_len) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cseg_16_len) as *const _ + as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(cseg_16_len) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).dseg_len) as *const _ + as usize + }, + 18usize, + concat!( + "Offset of field: ", + stringify!(apm_bios_info), + "::", + stringify!(dseg_len) + ) + ); +} +#[repr(C, packed)] +#[derive(Copy, Clone)] +pub struct edd_device_params { + pub length: __u16, + pub info_flags: __u16, + pub num_default_cylinders: __u32, + pub num_default_heads: __u32, + pub sectors_per_track: __u32, + pub number_of_sectors: __u64, + pub bytes_per_sector: __u16, + pub dpte_ptr: __u32, + pub key: __u16, + pub device_path_info_length: __u8, + pub reserved2: __u8, + pub reserved3: __u16, + pub host_bus_type: [__u8; 4usize], + pub interface_type: [__u8; 8usize], + pub interface_path: edd_device_params__bindgen_ty_1, + pub device_path: edd_device_params__bindgen_ty_2, + pub reserved4: __u8, + pub checksum: __u8, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union edd_device_params__bindgen_ty_1 { + pub isa: edd_device_params__bindgen_ty_1__bindgen_ty_1, + pub pci: edd_device_params__bindgen_ty_1__bindgen_ty_2, + pub ibnd: edd_device_params__bindgen_ty_1__bindgen_ty_3, + pub xprs: edd_device_params__bindgen_ty_1__bindgen_ty_4, + pub htpt: edd_device_params__bindgen_ty_1__bindgen_ty_5, + pub unknown: edd_device_params__bindgen_ty_1__bindgen_ty_6, +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_1 { + pub base_address: __u16, + pub reserved1: __u16, + pub reserved2: __u32, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())) + .base_address + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(base_address) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(reserved2) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_2 { + pub bus: __u8, + pub slot: __u8, + pub function: __u8, + pub channel: __u8, + pub reserved: __u32, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).bus + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(bus) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).slot + ) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(slot) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).function + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(function) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).channel + ) as *const _ as usize + }, + 3usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(channel) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_3 { + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_3() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_3) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_3), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_4 { + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_4() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_4) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_4) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_4), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_5 { + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_5() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_5) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_5) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_5), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_1__bindgen_ty_6 { + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1__bindgen_ty_6() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_6) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_6) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1__bindgen_ty_6), + "::", + stringify!(reserved) + ) + ); +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(edd_device_params__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(edd_device_params__bindgen_ty_1)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).isa) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(isa) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).pci) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(pci) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ibnd) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(ibnd) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).xprs) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(xprs) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).htpt) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(htpt) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).unknown) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_1), + "::", + stringify!(unknown) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union edd_device_params__bindgen_ty_2 { + pub ata: edd_device_params__bindgen_ty_2__bindgen_ty_1, + pub atapi: edd_device_params__bindgen_ty_2__bindgen_ty_2, + pub scsi: edd_device_params__bindgen_ty_2__bindgen_ty_3, + pub usb: edd_device_params__bindgen_ty_2__bindgen_ty_4, + pub i1394: edd_device_params__bindgen_ty_2__bindgen_ty_5, + pub fibre: edd_device_params__bindgen_ty_2__bindgen_ty_6, + pub i2o: edd_device_params__bindgen_ty_2__bindgen_ty_7, + pub raid: edd_device_params__bindgen_ty_2__bindgen_ty_8, + pub sata: edd_device_params__bindgen_ty_2__bindgen_ty_9, + pub unknown: edd_device_params__bindgen_ty_2__bindgen_ty_10, +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_1 { + pub device: __u8, + pub reserved1: __u8, + pub reserved2: __u16, + pub reserved3: __u32, + pub reserved4: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).device + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1), + "::", + stringify!(device) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1), + "::", + stringify!(reserved2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved3 + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1), + "::", + stringify!(reserved3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved4 + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_1), + "::", + stringify!(reserved4) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_2 { + pub device: __u8, + pub lun: __u8, + pub reserved1: __u8, + pub reserved2: __u8, + pub reserved3: __u32, + pub reserved4: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).device + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(device) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).lun + ) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(lun) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 3usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(reserved2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved3 + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(reserved3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved4 + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_2), + "::", + stringify!(reserved4) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_3 { + pub id: __u16, + pub lun: __u64, + pub reserved1: __u16, + pub reserved2: __u32, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_3() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).id + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3), + "::", + stringify!(id) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).lun + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3), + "::", + stringify!(lun) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 10usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_3), + "::", + stringify!(reserved2) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_4 { + pub serial_number: __u64, + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_4() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_4) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_4) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())) + .serial_number + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_4), + "::", + stringify!(serial_number) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_4), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_5 { + pub eui: __u64, + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_5() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_5) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_5) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).eui + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_5), + "::", + stringify!(eui) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_5), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_6 { + pub wwid: __u64, + pub lun: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_6() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_6) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_6) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).wwid + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_6), + "::", + stringify!(wwid) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).lun + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_6), + "::", + stringify!(lun) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_7 { + pub identity_tag: __u64, + pub reserved: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_7() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_7) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_7) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())) + .identity_tag + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_7), + "::", + stringify!(identity_tag) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_7), + "::", + stringify!(reserved) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_8 { + pub array_number: __u32, + pub reserved1: __u32, + pub reserved2: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_8() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_8) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_8) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())) + .array_number + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_8), + "::", + stringify!(array_number) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_8), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_8), + "::", + stringify!(reserved2) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_9 { + pub device: __u8, + pub reserved1: __u8, + pub reserved2: __u16, + pub reserved3: __u32, + pub reserved4: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_9() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).device + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9), + "::", + stringify!(device) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9), + "::", + stringify!(reserved2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved3 + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9), + "::", + stringify!(reserved3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved4 + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_9), + "::", + stringify!(reserved4) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct edd_device_params__bindgen_ty_2__bindgen_ty_10 { + pub reserved1: __u64, + pub reserved2: __u64, +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2__bindgen_ty_10() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!( + "Size of: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_10) + ) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_10) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved1 + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_10), + "::", + stringify!(reserved1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).reserved2 + ) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2__bindgen_ty_10), + "::", + stringify!(reserved2) + ) + ); +} +#[test] +fn bindgen_test_layout_edd_device_params__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(edd_device_params__bindgen_ty_2)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(edd_device_params__bindgen_ty_2)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ata) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(ata) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).atapi) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(atapi) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).scsi) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(scsi) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).usb) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(usb) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).i1394) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(i1394) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).fibre) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(fibre) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).i2o) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(i2o) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).raid) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(raid) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).sata) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(sata) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).unknown) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params__bindgen_ty_2), + "::", + stringify!(unknown) + ) + ); +} +#[test] +fn bindgen_test_layout_edd_device_params() { + assert_eq!( + ::std::mem::size_of::(), + 74usize, + concat!("Size of: ", stringify!(edd_device_params)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(edd_device_params)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).length) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(length) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).info_flags) as *const _ + as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(info_flags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).num_default_cylinders) + as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(num_default_cylinders) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).num_default_heads) + as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(num_default_heads) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).sectors_per_track) + as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(sectors_per_track) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).number_of_sectors) + as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(number_of_sectors) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).bytes_per_sector) + as *const _ as usize + }, + 24usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(bytes_per_sector) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).dpte_ptr) as *const _ + as usize + }, + 26usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(dpte_ptr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).key) as *const _ + as usize + }, + 30usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(key) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).device_path_info_length) + as *const _ as usize + }, + 32usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(device_path_info_length) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).reserved2) as *const _ + as usize + }, + 33usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(reserved2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).reserved3) as *const _ + as usize + }, + 34usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(reserved3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).host_bus_type) + as *const _ as usize + }, + 36usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(host_bus_type) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).interface_type) + as *const _ as usize + }, + 40usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(interface_type) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).interface_path) + as *const _ as usize + }, + 48usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(interface_path) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).device_path) as *const _ + as usize + }, + 56usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(device_path) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).reserved4) as *const _ + as usize + }, + 72usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(reserved4) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).checksum) as *const _ + as usize + }, + 73usize, + concat!( + "Offset of field: ", + stringify!(edd_device_params), + "::", + stringify!(checksum) + ) + ); +} +#[repr(C, packed)] +#[derive(Copy, Clone)] +pub struct edd_info { + pub device: __u8, + pub version: __u8, + pub interface_support: __u16, + pub legacy_max_cylinder: __u16, + pub legacy_max_head: __u8, + pub legacy_sectors_per_track: __u8, + pub params: edd_device_params, +} +#[test] +fn bindgen_test_layout_edd_info() { + assert_eq!( + ::std::mem::size_of::(), + 82usize, + concat!("Size of: ", stringify!(edd_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(edd_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).device) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(device) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).version) as *const _ as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).interface_support) as *const _ + as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(interface_support) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).legacy_max_cylinder) as *const _ + as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(legacy_max_cylinder) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).legacy_max_head) as *const _ + as usize + }, + 6usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(legacy_max_head) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).legacy_sectors_per_track) + as *const _ as usize + }, + 7usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(legacy_sectors_per_track) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).params) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(edd_info), + "::", + stringify!(params) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct edd { + pub mbr_signature: [::std::os::raw::c_uint; 16usize], + pub edd_info: [edd_info; 6usize], + pub mbr_signature_nr: ::std::os::raw::c_uchar, + pub edd_info_nr: ::std::os::raw::c_uchar, +} +#[test] +fn bindgen_test_layout_edd() { + assert_eq!( + ::std::mem::size_of::(), + 560usize, + concat!("Size of: ", stringify!(edd)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(edd)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).mbr_signature) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edd), + "::", + stringify!(mbr_signature) + ) + ); + assert_eq!( + unsafe { std::ptr::addr_of!((*(::std::ptr::null::())).edd_info) as *const _ as usize }, + 64usize, + concat!( + "Offset of field: ", + stringify!(edd), + "::", + stringify!(edd_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).mbr_signature_nr) as *const _ as usize + }, + 556usize, + concat!( + "Offset of field: ", + stringify!(edd), + "::", + stringify!(mbr_signature_nr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).edd_info_nr) as *const _ as usize + }, + 557usize, + concat!( + "Offset of field: ", + stringify!(edd), + "::", + stringify!(edd_info_nr) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct ist_info { + pub signature: __u32, + pub command: __u32, + pub event: __u32, + pub perf_level: __u32, +} +#[test] +fn bindgen_test_layout_ist_info() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(ist_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(ist_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).signature) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(ist_info), + "::", + stringify!(signature) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).command) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(ist_info), + "::", + stringify!(command) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).event) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(ist_info), + "::", + stringify!(event) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).perf_level) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(ist_info), + "::", + stringify!(perf_level) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct edid_info { + pub dummy: [::std::os::raw::c_uchar; 128usize], +} +#[test] +fn bindgen_test_layout_edid_info() { + assert_eq!( + ::std::mem::size_of::(), + 128usize, + concat!("Size of: ", stringify!(edid_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(edid_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).dummy) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(edid_info), + "::", + stringify!(dummy) + ) + ); +} +#[repr(C)] +#[derive(Debug)] +pub struct setup_data { + pub next: __u64, + pub type_: __u32, + pub len: __u32, + pub data: __IncompleteArrayField<__u8>, +} +#[test] +fn bindgen_test_layout_setup_data() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(setup_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(setup_data)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).next) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(setup_data), + "::", + stringify!(next) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).type_) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(setup_data), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).len) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(setup_data), + "::", + stringify!(len) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).data) as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(setup_data), + "::", + stringify!(data) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct setup_indirect { + pub type_: __u32, + pub reserved: __u32, + pub len: __u64, + pub addr: __u64, +} +#[test] +fn bindgen_test_layout_setup_indirect() { + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(setup_indirect)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(setup_indirect)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).type_) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(setup_indirect), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).reserved) as *const _ + as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(setup_indirect), + "::", + stringify!(reserved) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).len) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(setup_indirect), + "::", + stringify!(len) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).addr) as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(setup_indirect), + "::", + stringify!(addr) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub struct setup_header { + pub setup_sects: __u8, + pub root_flags: __u16, + pub syssize: __u32, + pub ram_size: __u16, + pub vid_mode: __u16, + pub root_dev: __u16, + pub boot_flag: __u16, + pub jump: __u16, + pub header: __u32, + pub version: __u16, + pub realmode_swtch: __u32, + pub start_sys_seg: __u16, + pub kernel_version: __u16, + pub type_of_loader: __u8, + pub loadflags: __u8, + pub setup_move_size: __u16, + pub code32_start: __u32, + pub ramdisk_image: __u32, + pub ramdisk_size: __u32, + pub bootsect_kludge: __u32, + pub heap_end_ptr: __u16, + pub ext_loader_ver: __u8, + pub ext_loader_type: __u8, + pub cmd_line_ptr: __u32, + pub initrd_addr_max: __u32, + pub kernel_alignment: __u32, + pub relocatable_kernel: __u8, + pub min_alignment: __u8, + pub xloadflags: __u16, + pub cmdline_size: __u32, + pub hardware_subarch: __u32, + pub hardware_subarch_data: __u64, + pub payload_offset: __u32, + pub payload_length: __u32, + pub setup_data: __u64, + pub pref_address: __u64, + pub init_size: __u32, + pub handover_offset: __u32, + pub kernel_info_offset: __u32, +} +#[test] +fn bindgen_test_layout_setup_header() { + assert_eq!( + ::std::mem::size_of::(), + 123usize, + concat!("Size of: ", stringify!(setup_header)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(setup_header)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).setup_sects) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(setup_sects) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).root_flags) as *const _ + as usize + }, + 1usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(root_flags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).syssize) as *const _ as usize + }, + 3usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(syssize) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ram_size) as *const _ + as usize + }, + 7usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(ram_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).vid_mode) as *const _ + as usize + }, + 9usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(vid_mode) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).root_dev) as *const _ + as usize + }, + 11usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(root_dev) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).boot_flag) as *const _ + as usize + }, + 13usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(boot_flag) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).jump) as *const _ as usize + }, + 15usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(jump) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).header) as *const _ as usize + }, + 17usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(header) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).version) as *const _ as usize + }, + 21usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).realmode_swtch) as *const _ + as usize + }, + 23usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(realmode_swtch) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).start_sys_seg) as *const _ + as usize + }, + 27usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(start_sys_seg) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).kernel_version) as *const _ + as usize + }, + 29usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(kernel_version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).type_of_loader) as *const _ + as usize + }, + 31usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(type_of_loader) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).loadflags) as *const _ + as usize + }, + 32usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(loadflags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).setup_move_size) as *const _ + as usize + }, + 33usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(setup_move_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).code32_start) as *const _ + as usize + }, + 35usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(code32_start) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ramdisk_image) as *const _ + as usize + }, + 39usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(ramdisk_image) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ramdisk_size) as *const _ + as usize + }, + 43usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(ramdisk_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).bootsect_kludge) as *const _ + as usize + }, + 47usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(bootsect_kludge) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).heap_end_ptr) as *const _ + as usize + }, + 51usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(heap_end_ptr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_loader_ver) as *const _ + as usize + }, + 53usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(ext_loader_ver) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_loader_type) as *const _ + as usize + }, + 54usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(ext_loader_type) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cmd_line_ptr) as *const _ + as usize + }, + 55usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(cmd_line_ptr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).initrd_addr_max) as *const _ + as usize + }, + 59usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(initrd_addr_max) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).kernel_alignment) as *const _ + as usize + }, + 63usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(kernel_alignment) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).relocatable_kernel) + as *const _ as usize + }, + 67usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(relocatable_kernel) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).min_alignment) as *const _ + as usize + }, + 68usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(min_alignment) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).xloadflags) as *const _ + as usize + }, + 69usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(xloadflags) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cmdline_size) as *const _ + as usize + }, + 71usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(cmdline_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hardware_subarch) as *const _ + as usize + }, + 75usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(hardware_subarch) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hardware_subarch_data) + as *const _ as usize + }, + 79usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(hardware_subarch_data) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).payload_offset) as *const _ + as usize + }, + 87usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(payload_offset) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).payload_length) as *const _ + as usize + }, + 91usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(payload_length) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).setup_data) as *const _ + as usize + }, + 95usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(setup_data) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).pref_address) as *const _ + as usize + }, + 103usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(pref_address) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).init_size) as *const _ + as usize + }, + 111usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(init_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).handover_offset) as *const _ + as usize + }, + 115usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(handover_offset) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).kernel_info_offset) + as *const _ as usize + }, + 119usize, + concat!( + "Offset of field: ", + stringify!(setup_header), + "::", + stringify!(kernel_info_offset) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct sys_desc_table { + pub length: __u16, + pub table: [__u8; 14usize], +} +#[test] +fn bindgen_test_layout_sys_desc_table() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(sys_desc_table)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(sys_desc_table)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).length) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(sys_desc_table), + "::", + stringify!(length) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).table) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(sys_desc_table), + "::", + stringify!(table) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub struct olpc_ofw_header { + pub ofw_magic: __u32, + pub ofw_version: __u32, + pub cif_handler: __u32, + pub irq_desc_table: __u32, +} +#[test] +fn bindgen_test_layout_olpc_ofw_header() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(olpc_ofw_header)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(olpc_ofw_header)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ofw_magic) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(olpc_ofw_header), + "::", + stringify!(ofw_magic) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ofw_version) as *const _ + as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(olpc_ofw_header), + "::", + stringify!(ofw_version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).cif_handler) as *const _ + as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(olpc_ofw_header), + "::", + stringify!(cif_handler) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).irq_desc_table) + as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(olpc_ofw_header), + "::", + stringify!(irq_desc_table) + ) + ); +} +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct efi_info { + pub efi_loader_signature: __u32, + pub efi_systab: __u32, + pub efi_memdesc_size: __u32, + pub efi_memdesc_version: __u32, + pub efi_memmap: __u32, + pub efi_memmap_size: __u32, + pub efi_systab_hi: __u32, + pub efi_memmap_hi: __u32, +} +#[test] +fn bindgen_test_layout_efi_info() { + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(efi_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(efi_info)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_loader_signature) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_loader_signature) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_systab) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_systab) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_memdesc_size) as *const _ + as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_memdesc_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_memdesc_version) as *const _ + as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_memdesc_version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_memmap) as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_memmap) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_memmap_size) as *const _ + as usize + }, + 20usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_memmap_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_systab_hi) as *const _ + as usize + }, + 24usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_systab_hi) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_memmap_hi) as *const _ + as usize + }, + 28usize, + concat!( + "Offset of field: ", + stringify!(efi_info), + "::", + stringify!(efi_memmap_hi) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub struct boot_e820_entry { + pub addr: __u64, + pub size: __u64, + pub type_: __u32, +} +#[test] +fn bindgen_test_layout_boot_e820_entry() { + assert_eq!( + ::std::mem::size_of::(), + 20usize, + concat!("Size of: ", stringify!(boot_e820_entry)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(boot_e820_entry)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).addr) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(boot_e820_entry), + "::", + stringify!(addr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).size) as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(boot_e820_entry), + "::", + stringify!(size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).type_) as *const _ + as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(boot_e820_entry), + "::", + stringify!(type_) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct jailhouse_setup_data { + pub hdr: jailhouse_setup_data__bindgen_ty_1, + pub v1: jailhouse_setup_data__bindgen_ty_2, + pub v2: jailhouse_setup_data__bindgen_ty_3, +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct jailhouse_setup_data__bindgen_ty_1 { + pub version: __u16, + pub compatible_version: __u16, +} +#[test] +fn bindgen_test_layout_jailhouse_setup_data__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(jailhouse_setup_data__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(jailhouse_setup_data__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).version + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_1), + "::", + stringify!(version) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).compatible_version + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_1), + "::", + stringify!(compatible_version) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct jailhouse_setup_data__bindgen_ty_2 { + pub pm_timer_address: __u16, + pub num_cpus: __u16, + pub pci_mmconfig_base: __u64, + pub tsc_khz: __u32, + pub apic_khz: __u32, + pub standard_ioapic: __u8, + pub cpu_ids: [__u8; 255usize], +} +#[test] +fn bindgen_test_layout_jailhouse_setup_data__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::(), + 276usize, + concat!("Size of: ", stringify!(jailhouse_setup_data__bindgen_ty_2)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(jailhouse_setup_data__bindgen_ty_2) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).pm_timer_address + ) as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(pm_timer_address) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).num_cpus + ) as *const _ as usize + }, + 2usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(num_cpus) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).pci_mmconfig_base + ) as *const _ as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(pci_mmconfig_base) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).tsc_khz + ) as *const _ as usize + }, + 12usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(tsc_khz) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).apic_khz + ) as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(apic_khz) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).standard_ioapic + ) as *const _ as usize + }, + 20usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(standard_ioapic) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!( + (*(::std::ptr::null::())).cpu_ids + ) as *const _ as usize + }, + 21usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_2), + "::", + stringify!(cpu_ids) + ) + ); +} +#[repr(C, packed)] +#[derive(Debug, Copy, Clone)] +pub struct jailhouse_setup_data__bindgen_ty_3 { + pub flags: __u32, +} +#[test] +fn bindgen_test_layout_jailhouse_setup_data__bindgen_ty_3() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(jailhouse_setup_data__bindgen_ty_3)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!( + "Alignment of ", + stringify!(jailhouse_setup_data__bindgen_ty_3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).flags) + as *const _ as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data__bindgen_ty_3), + "::", + stringify!(flags) + ) + ); +} +#[test] +fn bindgen_test_layout_jailhouse_setup_data() { + assert_eq!( + ::std::mem::size_of::(), + 284usize, + concat!("Size of: ", stringify!(jailhouse_setup_data)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(jailhouse_setup_data)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hdr) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data), + "::", + stringify!(hdr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).v1) as *const _ + as usize + }, + 4usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data), + "::", + stringify!(v1) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).v2) as *const _ + as usize + }, + 280usize, + concat!( + "Offset of field: ", + stringify!(jailhouse_setup_data), + "::", + stringify!(v2) + ) + ); +} +#[repr(C, packed)] +#[derive(Copy, Clone)] +pub struct boot_params { + pub screen_info: screen_info, + pub apm_bios_info: apm_bios_info, + pub _pad2: [__u8; 4usize], + pub tboot_addr: __u64, + pub ist_info: ist_info, + pub acpi_rsdp_addr: __u64, + pub _pad3: [__u8; 8usize], + pub hd0_info: [__u8; 16usize], + pub hd1_info: [__u8; 16usize], + pub sys_desc_table: sys_desc_table, + pub olpc_ofw_header: olpc_ofw_header, + pub ext_ramdisk_image: __u32, + pub ext_ramdisk_size: __u32, + pub ext_cmd_line_ptr: __u32, + pub _pad4: [__u8; 116usize], + pub edid_info: edid_info, + pub efi_info: efi_info, + pub alt_mem_k: __u32, + pub scratch: __u32, + pub e820_entries: __u8, + pub eddbuf_entries: __u8, + pub edd_mbr_sig_buf_entries: __u8, + pub kbd_status: __u8, + pub secure_boot: __u8, + pub _pad5: [__u8; 2usize], + pub sentinel: __u8, + pub _pad6: [__u8; 1usize], + pub hdr: setup_header, + pub _pad7: [__u8; 36usize], + pub edd_mbr_sig_buffer: [__u32; 16usize], + pub e820_table: [boot_e820_entry; 128usize], + pub _pad8: [__u8; 48usize], + pub eddbuf: [edd_info; 6usize], + pub _pad9: [__u8; 276usize], +} +#[test] +fn bindgen_test_layout_boot_params() { + assert_eq!( + ::std::mem::size_of::(), + 4096usize, + concat!("Size of: ", stringify!(boot_params)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(boot_params)) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).screen_info) as *const _ + as usize + }, + 0usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(screen_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).apm_bios_info) as *const _ + as usize + }, + 64usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(apm_bios_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad2) as *const _ as usize + }, + 84usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad2) + ) + ); + + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).tboot_addr) as *const _ + as usize + }, + 88usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(tboot_addr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ist_info) as *const _ as usize + }, + 96usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(ist_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).acpi_rsdp_addr) as *const _ + as usize + }, + 112usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(acpi_rsdp_addr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad3) as *const _ as usize + }, + 120usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad3) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hd0_info) as *const _ as usize + }, + 128usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(hd0_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hd1_info) as *const _ as usize + }, + 144usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(hd1_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).sys_desc_table) as *const _ + as usize + }, + 160usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(sys_desc_table) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).olpc_ofw_header) as *const _ + as usize + }, + 176usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(olpc_ofw_header) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_ramdisk_image) as *const _ + as usize + }, + 192usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(ext_ramdisk_image) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_ramdisk_size) as *const _ + as usize + }, + 196usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(ext_ramdisk_size) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).ext_cmd_line_ptr) as *const _ + as usize + }, + 200usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(ext_cmd_line_ptr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad4) as *const _ as usize + }, + 204usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad4) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).edid_info) as *const _ + as usize + }, + 320usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(edid_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).efi_info) as *const _ as usize + }, + 448usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(efi_info) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).alt_mem_k) as *const _ + as usize + }, + 480usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(alt_mem_k) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).scratch) as *const _ as usize + }, + 484usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(scratch) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).e820_entries) as *const _ + as usize + }, + 488usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(e820_entries) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).eddbuf_entries) as *const _ + as usize + }, + 489usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(eddbuf_entries) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).edd_mbr_sig_buf_entries) + as *const _ as usize + }, + 490usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(edd_mbr_sig_buf_entries) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).kbd_status) as *const _ + as usize + }, + 491usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(kbd_status) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).secure_boot) as *const _ + as usize + }, + 492usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(secure_boot) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad5) as *const _ as usize + }, + 493usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad5) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).sentinel) as *const _ as usize + }, + 495usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(sentinel) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad6) as *const _ as usize + }, + 496usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad6) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).hdr) as *const _ as usize + }, + 497usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(hdr) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad7) as *const _ as usize + }, + 620usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad7) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).edd_mbr_sig_buffer) + as *const _ as usize + }, + 656usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(edd_mbr_sig_buffer) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).e820_table) as *const _ + as usize + }, + 720usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(e820_table) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad8) as *const _ as usize + }, + 3280usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad8) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::())).eddbuf) as *const _ as usize + }, + 3328usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(eddbuf) + ) + ); + assert_eq!( + unsafe { + std::ptr::addr_of!((*(::std::ptr::null::()))._pad9) as *const _ as usize + }, + 3820usize, + concat!( + "Offset of field: ", + stringify!(boot_params), + "::", + stringify!(_pad9) + ) + ); +} + +impl Default for boot_params { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +pub const X86_HARDWARE_SUBARCH_X86_SUBARCH_PC: x86_hardware_subarch = 0; +pub const X86_HARDWARE_SUBARCH_X86_SUBARCH_LGUEST: x86_hardware_subarch = 1; +pub const X86_HARDWARE_SUBARCH_X86_SUBARCH_XEN: x86_hardware_subarch = 2; +pub const X86_HARDWARE_SUBARCH_X86_SUBARCH_INTEL_MID: x86_hardware_subarch = 3; +pub const X86_HARDWARE_SUBARCH_X86_SUBARCH_CE4100: x86_hardware_subarch = 4; +pub const X86_HARDWARE_SUBARCH_X86_NR_SUBARCHS: x86_hardware_subarch = 5; + +#[doc = " enum x86_hardware_subarch - x86 hardware subarchitecture"] +#[doc = ""] +#[doc = " The x86 hardware_subarch and hardware_subarch_data were added as of the x86"] +#[doc = " boot protocol 2.07 to help distinguish and support custom x86 boot"] +#[doc = " sequences. This enum represents accepted values for the x86"] +#[doc = " hardware_subarch. Custom x86 boot sequences (not X86_SUBARCH_PC) do not"] +#[doc = " have or simply *cannot* make use of natural stubs like BIOS or EFI, the"] +#[doc = " hardware_subarch can be used on the Linux entry path to revector to a"] +#[doc = " subarchitecture stub when needed. This subarchitecture stub can be used to"] +#[doc = " set up Linux boot parameters or for special care to account for nonstandard"] +#[doc = " handling of page tables."] +#[doc = ""] +#[doc = " These enums should only ever be used by x86 code, and the code that uses"] +#[doc = " it should be well contained and compartmentalized."] +#[doc = ""] +#[doc = " KVM and Xen HVM do not have a subarch as these are expected to follow"] +#[doc = " standard x86 boot entries. If there is a genuine need for \"hypervisor\" type"] +#[doc = " that should be considered separately in the future. Future guest types"] +#[doc = " should seriously consider working with standard x86 boot stubs such as"] +#[doc = " the BIOS or EFI boot stubs."] +#[doc = ""] +#[doc = " WARNING: this enum is only used for legacy hacks, for platform features that"] +#[doc = "\t are not easily enumerated or discoverable. You should not ever use"] +#[doc = "\t this for new features."] +#[doc = ""] +#[doc = " @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard"] +#[doc = "\tPC mechanisms (PCI, ACPI) and doesn't need a special boot flow."] +#[doc = " @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest, deprecated"] +#[doc = " @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,"] +#[doc = " \twhich start at asm startup_xen() entry point and later jump to the C"] +#[doc = " \txen_start_kernel() entry point. Both domU and dom0 type of guests are"] +#[doc = " \tcurrently supported through this PV boot path."] +#[doc = " @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform"] +#[doc = "\tsystems which do not have the PCI legacy interfaces."] +#[doc = " @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC"] +#[doc = " \tfor settop boxes and media devices, the use of a subarch for CE4100"] +#[doc = " \tis more of a hack..."] +pub type x86_hardware_subarch = ::std::os::raw::c_uint; diff --git a/src/dragonball/src/dbs_boot/src/x86_64/layout.rs b/src/dragonball/src/dbs_boot/src/x86_64/layout.rs new file mode 100644 index 000000000000..4bd65dbe00d7 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/x86_64/layout.rs @@ -0,0 +1,100 @@ +// Copyright 2021-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use lazy_static::lazy_static; + +/// Magic addresses externally used to lay out x86_64 VMs. + +/// Global Descriptor Table Offset +pub const BOOT_GDT_OFFSET: u64 = 0x500; +/// Interrupt Descriptor Table Offset +pub const BOOT_IDT_OFFSET: u64 = 0x520; + +/// Address of Global Descriptor Table (GDT) +pub const BOOT_GDT_ADDRESS: u64 = 0x500; +/// Number of initial GDT entries. +pub const BOOT_GDT_MAX: usize = 4; + +/// Address of Interrupt Descriptor Table (IDT) +pub const BOOT_IDT_ADDRESS: u64 = 0x520; + +/// The 'zero page', a.k.a linux kernel bootparams. +pub const ZERO_PAGE_START: u64 = 0x7000; + +/// Initial stack for the boot CPU. +pub const BOOT_STACK_POINTER: u64 = 0x8ff0; + +/// Address of page table level 4 page +pub const PML4_START: u64 = 0x9000; +/// Address of page table level 3 page +pub const PDPTE_START: u64 = 0xa000; +/// Address of page table level 2 page +pub const PDE_START: u64 = 0xb000; + +/// Kernel command line start address. +pub const CMDLINE_START: u64 = 0x20000; +/// Kernel command line start address maximum size. +pub const CMDLINE_MAX_SIZE: usize = 0x10000; + +/// Kernel dragonball boot parameters start address. +pub const DB_BOOT_PARAM_START: u64 = 0x30000; +/// Kernel dragonball boot parameters length maximum size. +pub const DB_BOOT_PARAM_MAX_SIZE: u32 = 0x10000; + +/// Start of the high memory. +pub const HIMEM_START: u64 = 0x0010_0000; //1 MB. + +// Typically, on x86 systems 16 IRQs are used (0-15). +/// First usable IRQ ID for virtio device interrupts on x86_64. +pub const IRQ_BASE: u32 = 5; +/// Last usable IRQ ID for virtio device interrupts on x86_64. +pub const IRQ_MAX: u32 = 15; + +/// Address for the TSS setup. +pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; + +/// Where BIOS/VGA magic would live on a real PC. +pub const EBDA_START: u64 = 0x9fc00; + +/// Start address of the lower MMIO window. +pub const MMIO_LOW_START: u64 = 3u64 << 30; +/// End address (inclusive) of the lower MMIO window. +pub const MMIO_LOW_END: u64 = (4u64 << 30) - 1; +/// Lower bound of guest memory. +pub const GUEST_MEM_START: u64 = 0u64; +/// Size of memory below MMIO hole. +pub const GUEST_MEM_LOW_SIZE: u64 = MMIO_LOW_START - GUEST_MEM_START; + +/// Max retry times for reading /proc/cpuinfo +const CPUINFO_READ_RETRY: u64 = 5; + +lazy_static! { + /// Maximum guest physical address supported. + pub static ref GUEST_PHYS_END: u64 = { + for _ in 0..CPUINFO_READ_RETRY { + if let Ok(buf) = std::fs::read("/proc/cpuinfo") { + let content = String::from_utf8_lossy(&buf); + for line in content.lines() { + if line.starts_with("address sizes : ") { + if let Some(end) = line.find(" bits physical") { + if let Ok(size) = line[16..end].parse::() { + if (36..=64).contains(&size) { + return (1u64 << size) - 1; + } + } + } + } + } + } + } + panic!("Exceed max retry times. Cannot get physical address size from /proc/cpuinfo"); + }; + + /// Upper bound of guest memory. + pub static ref GUEST_MEM_END: u64 = *GUEST_PHYS_END >> 1; +} diff --git a/src/dragonball/src/dbs_boot/src/x86_64/mod.rs b/src/dragonball/src/dbs_boot/src/x86_64/mod.rs new file mode 100644 index 000000000000..50443ed14412 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/x86_64/mod.rs @@ -0,0 +1,325 @@ +// Copyright 2021 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! VM boot related constants and utilities for `x86_64` architecture. + +use dbs_arch::gdt::gdt_entry; +use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryRegion}; + +use self::layout::{BOOT_GDT_ADDRESS, BOOT_GDT_MAX, BOOT_IDT_ADDRESS}; +use super::Result; + +/// Magic addresses externally used to lay out x86_64 VMs. +pub mod layout; + +/// Structure definitions for SMP machines following the Intel Multiprocessing Specification 1.1 and 1.4. +pub mod mpspec; + +/// MP Table configurations used for defining VM boot status. +pub mod mptable; + +/// Guest boot parameters used for config guest information. +pub mod bootparam; + +/// Default (smallest) memory page size for the supported architectures. +pub const PAGE_SIZE: usize = 4096; + +/// Boot parameters wrapper for ByteValue trait +// This is a workaround to the Rust enforcement specifying that any implementation of a foreign +// trait (in this case `ByteValued`) where: +// * the type that is implementing the trait is foreign or +// * all of the parameters being passed to the trait (if there are any) are also foreign +// is prohibited. +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct BootParamsWrapper(pub bootparam::boot_params); + +// It is safe to initialize BootParamsWrap which is a wrapper over `boot_params` (a series of ints). +unsafe impl ByteValued for BootParamsWrapper {} + +/// Errors thrown while configuring x86_64 system. +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum Error { + /// Invalid e820 setup params. + #[error("invalid e820 setup parameters")] + E820Configuration, + + /// Error writing MP table to memory. + #[error("failed to write MP table to guest memory")] + MpTableSetup(#[source] mptable::Error), + + /// The zero page extends past the end of guest_mem. + #[error("the guest zero page extends past the end of guest memory")] + ZeroPagePastRamEnd, + + /// Error writing the zero page of guest memory. + #[error("failed to write to guest zero page")] + ZeroPageSetup, + + /// Failed to compute initrd address. + #[error("invalid guest memory address for Initrd")] + InitrdAddress, + + /// boot parameter setup fail. + #[error("write boot parameter fail")] + BootParamSetup, + + /// Empty AddressSpace from parameters. + #[error("Empty AddressSpace from parameters")] + AddressSpace, + + /// Writing PDPTE to RAM failed. + #[error("Writing PDPTE to RAM failed.")] + WritePDPTEAddress, + + /// Writing PDE to RAM failed. + #[error("Writing PDE to RAM failed.")] + WritePDEAddress, + + #[error("Writing PML4 to RAM failed.")] + /// Writing PML4 to RAM failed. + WritePML4Address, +} + +/// Initialize the 1:1 identity mapping table for guest memory range [0..1G). +/// +/// Also, return the pml4 address for sregs setting and AP boot +pub fn setup_identity_mapping(mem: &M) -> Result { + // Puts PML4 right after zero page but aligned to 4k. + let boot_pml4_addr = GuestAddress(layout::PML4_START); + let boot_pdpte_addr = GuestAddress(layout::PDPTE_START); + let boot_pde_addr = GuestAddress(layout::PDE_START); + + // Entry covering VA [0..512GB) + mem.write_obj(boot_pdpte_addr.raw_value() | 0x03, boot_pml4_addr) + .map_err(|_| Error::WritePML4Address)?; + + // Entry covering VA [0..1GB) + mem.write_obj(boot_pde_addr.raw_value() | 0x03, boot_pdpte_addr) + .map_err(|_| Error::WritePDPTEAddress)?; + + // 512 2MB entries together covering VA [0..1GB). Note we are assuming + // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do. + for i in 0..512 { + mem.write_obj((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8)) + .map_err(|_| Error::WritePDEAddress)?; + } + + // return the pml4 address that could be used for AP boot up and later sreg setting process. + Ok(boot_pml4_addr) +} + +/// Get information to configure GDT/IDT. +pub fn get_descriptor_config_info() -> ([u64; BOOT_GDT_MAX], u64, u64) { + let gdt_table: [u64; BOOT_GDT_MAX] = [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xa09b, 0, 0xfffff), // CODE + gdt_entry(0xc093, 0, 0xfffff), // DATA + gdt_entry(0x808b, 0, 0xfffff), // TSS + ]; + + (gdt_table, BOOT_GDT_ADDRESS, BOOT_IDT_ADDRESS) +} + +/// Returns the memory address where the initrd could be loaded. +pub fn initrd_load_addr(guest_mem: &M, initrd_size: u64) -> Result { + let lowmem_size = guest_mem + .find_region(GuestAddress(0)) + .ok_or(Error::InitrdAddress) + .map(|r| r.len())?; + + // For safety to avoid overlap, reserve 32M for kernel and boot params in low end. + if lowmem_size < initrd_size + (32 << 20) { + return Err(Error::InitrdAddress); + } + + let align_to_pagesize = |address| address & !(PAGE_SIZE as u64 - 1); + Ok(align_to_pagesize(lowmem_size - initrd_size)) +} + +/// Returns the memory address where the kernel could be loaded. +pub fn get_kernel_start() -> u64 { + layout::HIMEM_START +} + +/// Add an e820 region to the e820 map. +/// Returns Ok(()) if successful, or an error if there is no space left in the map. +pub fn add_e820_entry( + params: &mut bootparam::boot_params, + addr: u64, + size: u64, + mem_type: u32, +) -> Result<()> { + if params.e820_entries >= params.e820_table.len() as u8 { + return Err(Error::E820Configuration); + } + + params.e820_table[params.e820_entries as usize].addr = addr; + params.e820_table[params.e820_entries as usize].size = size; + params.e820_table[params.e820_entries as usize].type_ = mem_type; + params.e820_entries += 1; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::bootparam::{boot_e820_entry, boot_params}; + use crate::layout::{PDE_START, PDPTE_START, PML4_START}; + use kvm_bindings::kvm_sregs; + use kvm_ioctls::Kvm; + use vm_memory::GuestMemoryMmap; + + const BOOT_GDT_OFFSET: u64 = 0x500; + const BOOT_IDT_OFFSET: u64 = 0x520; + + fn read_u64(gm: &GuestMemoryMmap, offset: u64) -> u64 { + let read_addr = GuestAddress(offset); + gm.read_obj(read_addr).unwrap() + } + + #[test] + fn test_get_descriptor_config_info() { + let (gdt_table, gdt_addr, idt_addr) = get_descriptor_config_info(); + + assert_eq!(gdt_table.len(), BOOT_GDT_MAX); + assert_eq!(gdt_addr, BOOT_GDT_ADDRESS); + assert_eq!(idt_addr, BOOT_IDT_ADDRESS); + } + + #[test] + fn test_setup_identity_mapping() { + let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + setup_identity_mapping(&gm).unwrap(); + assert_eq!(0xa003, read_u64(&gm, PML4_START)); + assert_eq!(0xb003, read_u64(&gm, PDPTE_START)); + for i in 0..512 { + assert_eq!((i << 21) + 0x83u64, read_u64(&gm, PDE_START + (i * 8))); + } + } + + #[test] + fn test_write_boot_param() { + const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; + const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; + const KERNEL_LOADER_OTHER: u8 = 0xff; + const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero. + let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); + + params.0.hdr.type_of_loader = KERNEL_LOADER_OTHER; + params.0.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC; + params.0.hdr.header = KERNEL_HDR_MAGIC; + params.0.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES; + + assert_eq!(params.0.hdr.type_of_loader, KERNEL_LOADER_OTHER); + assert_eq!( + unsafe { std::ptr::addr_of!(params.0.hdr.boot_flag).read_unaligned() }, + KERNEL_BOOT_FLAG_MAGIC + ); + assert_eq!( + unsafe { std::ptr::addr_of!(params.0.hdr.header).read_unaligned() }, + KERNEL_HDR_MAGIC + ); + assert_eq!( + unsafe { std::ptr::addr_of!(params.0.hdr.kernel_alignment).read_unaligned() }, + KERNEL_MIN_ALIGNMENT_BYTES + ); + } + + fn validate_page_tables( + gm: &GuestMemoryMmap, + sregs: &kvm_sregs, + existing_pgtable: Option, + ) { + assert_eq!(0xa003, read_u64(gm, PML4_START)); + assert_eq!(0xb003, read_u64(gm, PDPTE_START)); + for i in 0..512 { + assert_eq!((i << 21) + 0x83u64, read_u64(gm, PDE_START + (i * 8))); + } + + if let Some(pgtable_base) = existing_pgtable { + assert_eq!(pgtable_base.raw_value(), sregs.cr3); + } else { + assert_eq!(PML4_START, sregs.cr3); + } + assert!(sregs.cr4 & dbs_arch::regs::X86_CR4_PAE != 0); + assert!(sregs.cr0 & dbs_arch::regs::X86_CR0_PG != 0); + } + + fn create_guest_mem() -> GuestMemoryMmap { + GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap() + } + + #[test] + fn test_setup_page_tables() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let gm = create_guest_mem(); + let gdt_table: [u64; layout::BOOT_GDT_MAX] = [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xa09b, 0, 0xfffff), // CODE + gdt_entry(0xc093, 0, 0xfffff), // DATA + gdt_entry(0x808b, 0, 0xfffff), // TSS + ]; + + let page_address = setup_identity_mapping(&gm).unwrap(); + dbs_arch::regs::setup_sregs( + &gm, + &vcpu, + page_address, + &gdt_table, + BOOT_GDT_OFFSET, + BOOT_IDT_OFFSET, + ) + .unwrap(); + let sregs: kvm_sregs = vcpu.get_sregs().unwrap(); + validate_page_tables(&gm, &sregs, Some(page_address)); + } + + #[test] + fn test_add_e820_entry() { + let e820_table = [(boot_e820_entry { + addr: 0x1, + size: 4, + type_: 1, + }); 128]; + + let expected_params = boot_params { + e820_table, + e820_entries: 1, + ..Default::default() + }; + + let mut params: boot_params = Default::default(); + add_e820_entry( + &mut params, + e820_table[0].addr, + e820_table[0].size, + e820_table[0].type_, + ) + .unwrap(); + assert_eq!( + format!("{:?}", params.e820_table[0]), + format!("{:?}", expected_params.e820_table[0]) + ); + assert_eq!(params.e820_entries, expected_params.e820_entries); + + // Exercise the scenario where the field storing the length of the e820 entry table is + // is bigger than the allocated memory. + params.e820_entries = params.e820_table.len() as u8 + 1; + assert!(add_e820_entry( + &mut params, + e820_table[0].addr, + e820_table[0].size, + e820_table[0].type_ + ) + .is_err()); + } +} diff --git a/src/dragonball/src/dbs_boot/src/x86_64/mpspec.rs b/src/dragonball/src/dbs_boot/src/x86_64/mpspec.rs new file mode 100644 index 000000000000..98c9d91b7487 --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/x86_64/mpspec.rs @@ -0,0 +1,936 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +//! Structure definitions for SMP machines following the Intel Multiprocessing Specification 1.1 and 1.4. + +/* automatically generated by rust-bindgen */ + +#![allow(missing_docs)] +#![allow(non_camel_case_types)] +#![allow(non_upper_case_globals)] +#![allow(deref_nullptr)] + +pub const MPC_SIGNATURE: &[u8; 5usize] = b"PCMP\x00"; +pub const MP_PROCESSOR: ::std::os::raw::c_uint = 0; +pub const MP_BUS: ::std::os::raw::c_uint = 1; +pub const MP_IOAPIC: ::std::os::raw::c_uint = 2; +pub const MP_INTSRC: ::std::os::raw::c_uint = 3; +pub const MP_LINTSRC: ::std::os::raw::c_uint = 4; +pub const MP_TRANSLATION: ::std::os::raw::c_uint = 192; +pub const CPU_ENABLED: ::std::os::raw::c_uint = 1; +pub const CPU_BOOTPROCESSOR: ::std::os::raw::c_uint = 2; +pub const CPU_STEPPING_MASK: ::std::os::raw::c_uint = 15; +pub const CPU_MODEL_MASK: ::std::os::raw::c_uint = 240; +pub const CPU_FAMILY_MASK: ::std::os::raw::c_uint = 3840; +pub const BUSTYPE_EISA: &[u8; 5usize] = b"EISA\x00"; +pub const BUSTYPE_ISA: &[u8; 4usize] = b"ISA\x00"; +pub const BUSTYPE_INTERN: &[u8; 7usize] = b"INTERN\x00"; +pub const BUSTYPE_MCA: &[u8; 4usize] = b"MCA\x00"; +pub const BUSTYPE_VL: &[u8; 3usize] = b"VL\x00"; +pub const BUSTYPE_PCI: &[u8; 4usize] = b"PCI\x00"; +pub const BUSTYPE_PCMCIA: &[u8; 7usize] = b"PCMCIA\x00"; +pub const BUSTYPE_CBUS: &[u8; 5usize] = b"CBUS\x00"; +pub const BUSTYPE_CBUSII: &[u8; 7usize] = b"CBUSII\x00"; +pub const BUSTYPE_FUTURE: &[u8; 7usize] = b"FUTURE\x00"; +pub const BUSTYPE_MBI: &[u8; 4usize] = b"MBI\x00"; +pub const BUSTYPE_MBII: &[u8; 5usize] = b"MBII\x00"; +pub const BUSTYPE_MPI: &[u8; 4usize] = b"MPI\x00"; +pub const BUSTYPE_MPSA: &[u8; 5usize] = b"MPSA\x00"; +pub const BUSTYPE_NUBUS: &[u8; 6usize] = b"NUBUS\x00"; +pub const BUSTYPE_TC: &[u8; 3usize] = b"TC\x00"; +pub const BUSTYPE_VME: &[u8; 4usize] = b"VME\x00"; +pub const BUSTYPE_XPRESS: &[u8; 7usize] = b"XPRESS\x00"; +pub const MPC_APIC_USABLE: ::std::os::raw::c_uint = 1; +pub const MP_IRQDIR_DEFAULT: ::std::os::raw::c_uint = 0; +pub const MP_IRQDIR_HIGH: ::std::os::raw::c_uint = 1; +pub const MP_IRQDIR_LOW: ::std::os::raw::c_uint = 3; +pub const MP_APIC_ALL: ::std::os::raw::c_uint = 255; +pub const MPC_OEM_SIGNATURE: &[u8; 5usize] = b"_OEM\x00"; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpf_intel { + pub signature: [::std::os::raw::c_char; 4usize], + pub physptr: ::std::os::raw::c_uint, + pub length: ::std::os::raw::c_uchar, + pub specification: ::std::os::raw::c_uchar, + pub checksum: ::std::os::raw::c_uchar, + pub feature1: ::std::os::raw::c_uchar, + pub feature2: ::std::os::raw::c_uchar, + pub feature3: ::std::os::raw::c_uchar, + pub feature4: ::std::os::raw::c_uchar, + pub feature5: ::std::os::raw::c_uchar, +} + +#[test] +fn default_mpf_intel() { + let mpf_intel = mpf_intel::default(); + assert_eq!(mpf_intel.signature, [0i8, 0i8, 0i8, 0i8]); + assert_eq!(mpf_intel.physptr, 0u32); + assert_eq!(mpf_intel.length, 0u8); + assert_eq!(mpf_intel.specification, 0u8); + assert_eq!(mpf_intel.checksum, 0u8); + assert_eq!(mpf_intel.feature1, 0u8); + assert_eq!(mpf_intel.feature2, 0u8); + assert_eq!(mpf_intel.feature3, 0u8); + assert_eq!(mpf_intel.feature4, 0u8); + assert_eq!(mpf_intel.feature5, 0u8); +} + +#[test] +fn bindgen_test_layout_mpf_intel() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(mpf_intel)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(mpf_intel)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).signature as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(signature) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).physptr as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(physptr) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).length as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(length) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).specification as *const _ as usize }, + 9usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(specification) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).checksum as *const _ as usize }, + 10usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(checksum) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).feature1 as *const _ as usize }, + 11usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(feature1) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).feature2 as *const _ as usize }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(feature2) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).feature3 as *const _ as usize }, + 13usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(feature3) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).feature4 as *const _ as usize }, + 14usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(feature4) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).feature5 as *const _ as usize }, + 15usize, + concat!( + "Alignment of field: ", + stringify!(mpf_intel), + "::", + stringify!(feature5) + ) + ); +} + +impl Clone for mpf_intel { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_table { + pub signature: [::std::os::raw::c_char; 4usize], + pub length: ::std::os::raw::c_ushort, + pub spec: ::std::os::raw::c_char, + pub checksum: ::std::os::raw::c_char, + pub oem: [::std::os::raw::c_char; 8usize], + pub productid: [::std::os::raw::c_char; 12usize], + pub oemptr: ::std::os::raw::c_uint, + pub oemsize: ::std::os::raw::c_ushort, + pub oemcount: ::std::os::raw::c_ushort, + pub lapic: ::std::os::raw::c_uint, + pub reserved: ::std::os::raw::c_uint, +} + +#[test] +fn default_mpc_table() { + let mpc_table = mpc_table::default(); + assert_eq!(mpc_table.signature, [0i8, 0i8, 0i8, 0i8]); + assert_eq!(mpc_table.length, 0u16); + assert_eq!(mpc_table.spec, 0i8); + assert_eq!(mpc_table.checksum, 0i8); + assert_eq!(mpc_table.oem, [0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8]); + assert_eq!( + mpc_table.productid, + [0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8] + ); + assert_eq!(mpc_table.oemptr, 0u32); + assert_eq!(mpc_table.oemsize, 0u16); + assert_eq!(mpc_table.oemcount, 0u16); + assert_eq!(mpc_table.lapic, 0u32); + assert_eq!(mpc_table.reserved, 0u32); +} +#[test] +fn bindgen_test_layout_mpc_table() { + assert_eq!( + ::std::mem::size_of::(), + 44usize, + concat!("Size of: ", stringify!(mpc_table)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(mpc_table)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).signature as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(signature) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).length as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(length) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).spec as *const _ as usize }, + 6usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(spec) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).checksum as *const _ as usize }, + 7usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(checksum) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).oem as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(oem) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).productid as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(productid) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).oemptr as *const _ as usize }, + 28usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(oemptr) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).oemsize as *const _ as usize }, + 32usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(oemsize) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).oemcount as *const _ as usize }, + 34usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(oemcount) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).lapic as *const _ as usize }, + 36usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(lapic) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).reserved as *const _ as usize }, + 40usize, + concat!( + "Alignment of field: ", + stringify!(mpc_table), + "::", + stringify!(reserved) + ) + ); +} +impl Clone for mpc_table { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_cpu { + pub type_: ::std::os::raw::c_uchar, + pub apicid: ::std::os::raw::c_uchar, + pub apicver: ::std::os::raw::c_uchar, + pub cpuflag: ::std::os::raw::c_uchar, + pub cpufeature: ::std::os::raw::c_uint, + pub featureflag: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 2usize], +} +#[test] +fn default_mpc_cpu() { + let mpc_cpu = mpc_cpu::default(); + assert_eq!(mpc_cpu.type_, 0u8); + assert_eq!(mpc_cpu.apicid, 0u8); + assert_eq!(mpc_cpu.apicver, 0u8); + assert_eq!(mpc_cpu.cpuflag, 0u8); + assert_eq!(mpc_cpu.cpufeature, 0u32); + assert_eq!(mpc_cpu.featureflag, 0u32); + assert_eq!(mpc_cpu.reserved, [0u32, 0u32]); +} +#[test] +fn bindgen_test_layout_mpc_cpu() { + assert_eq!( + ::std::mem::size_of::(), + 20usize, + concat!("Size of: ", stringify!(mpc_cpu)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(mpc_cpu)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).apicid as *const _ as usize }, + 1usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(apicid) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).apicver as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(apicver) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).cpuflag as *const _ as usize }, + 3usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(cpuflag) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).cpufeature as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(cpufeature) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).featureflag as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(featureflag) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).reserved as *const _ as usize }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(mpc_cpu), + "::", + stringify!(reserved) + ) + ); +} +impl Clone for mpc_cpu { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_bus { + pub type_: ::std::os::raw::c_uchar, + pub busid: ::std::os::raw::c_uchar, + pub bustype: [::std::os::raw::c_uchar; 6usize], +} +#[test] +fn default_mpc_bus() { + let mpc_bus = mpc_bus::default(); + assert_eq!(mpc_bus.type_, 0u8); + assert_eq!(mpc_bus.busid, 0u8); + assert_eq!(mpc_bus.bustype, [0u8, 0u8, 0u8, 0u8, 0u8, 0u8]); +} +#[test] +fn bindgen_test_layout_mpc_bus() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(mpc_bus)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(mpc_bus)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_bus), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).busid as *const _ as usize }, + 1usize, + concat!( + "Alignment of field: ", + stringify!(mpc_bus), + "::", + stringify!(busid) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).bustype as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(mpc_bus), + "::", + stringify!(bustype) + ) + ); +} +impl Clone for mpc_bus { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_ioapic { + pub type_: ::std::os::raw::c_uchar, + pub apicid: ::std::os::raw::c_uchar, + pub apicver: ::std::os::raw::c_uchar, + pub flags: ::std::os::raw::c_uchar, + pub apicaddr: ::std::os::raw::c_uint, +} +#[test] +fn default_mpc_ioapic() { + let mpc_ioapic = mpc_ioapic::default(); + assert_eq!(mpc_ioapic.type_, 0u8); + assert_eq!(mpc_ioapic.apicid, 0u8); + assert_eq!(mpc_ioapic.apicver, 0u8); + assert_eq!(mpc_ioapic.flags, 0u8); + assert_eq!(mpc_ioapic.apicaddr, 0u32); +} +#[test] +fn bindgen_test_layout_mpc_ioapic() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(mpc_ioapic)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(mpc_ioapic)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_ioapic), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).apicid as *const _ as usize }, + 1usize, + concat!( + "Alignment of field: ", + stringify!(mpc_ioapic), + "::", + stringify!(apicid) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).apicver as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(mpc_ioapic), + "::", + stringify!(apicver) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).flags as *const _ as usize }, + 3usize, + concat!( + "Alignment of field: ", + stringify!(mpc_ioapic), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).apicaddr as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_ioapic), + "::", + stringify!(apicaddr) + ) + ); +} +impl Clone for mpc_ioapic { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_intsrc { + pub type_: ::std::os::raw::c_uchar, + pub irqtype: ::std::os::raw::c_uchar, + pub irqflag: ::std::os::raw::c_ushort, + pub srcbus: ::std::os::raw::c_uchar, + pub srcbusirq: ::std::os::raw::c_uchar, + pub dstapic: ::std::os::raw::c_uchar, + pub dstirq: ::std::os::raw::c_uchar, +} +#[test] +fn default_mpc_intsrc() { + let mpc_intsrc = mpc_intsrc::default(); + assert_eq!(mpc_intsrc.type_, 0u8); + assert_eq!(mpc_intsrc.irqtype, 0u8); + assert_eq!(mpc_intsrc.irqflag, 0u16); + assert_eq!(mpc_intsrc.srcbus, 0u8); + assert_eq!(mpc_intsrc.srcbusirq, 0u8); + assert_eq!(mpc_intsrc.dstapic, 0u8); + assert_eq!(mpc_intsrc.dstirq, 0u8); +} +#[test] +fn bindgen_test_layout_mpc_intsrc() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(mpc_intsrc)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(mpc_intsrc)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).irqtype as *const _ as usize }, + 1usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(irqtype) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).irqflag as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(irqflag) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).srcbus as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(srcbus) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).srcbusirq as *const _ as usize }, + 5usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(srcbusirq) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).dstapic as *const _ as usize }, + 6usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(dstapic) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).dstirq as *const _ as usize }, + 7usize, + concat!( + "Alignment of field: ", + stringify!(mpc_intsrc), + "::", + stringify!(dstirq) + ) + ); +} +impl Clone for mpc_intsrc { + fn clone(&self) -> Self { + *self + } +} +pub const mp_irq_source_types_mp_INT: mp_irq_source_types = 0; +pub const mp_irq_source_types_mp_NMI: mp_irq_source_types = 1; +pub const mp_irq_source_types_mp_SMI: mp_irq_source_types = 2; +pub const mp_irq_source_types_mp_ExtINT: mp_irq_source_types = 3; +pub type mp_irq_source_types = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_lintsrc { + pub type_: ::std::os::raw::c_uchar, + pub irqtype: ::std::os::raw::c_uchar, + pub irqflag: ::std::os::raw::c_ushort, + pub srcbusid: ::std::os::raw::c_uchar, + pub srcbusirq: ::std::os::raw::c_uchar, + pub destapic: ::std::os::raw::c_uchar, + pub destapiclint: ::std::os::raw::c_uchar, +} +#[test] +fn default_mpc_lintsrc() { + let mpc_lintsrc = mpc_lintsrc::default(); + assert_eq!(mpc_lintsrc.type_, 0u8); + assert_eq!(mpc_lintsrc.irqtype, 0u8); + assert_eq!(mpc_lintsrc.irqflag, 0u16); + assert_eq!(mpc_lintsrc.srcbusid, 0u8); + assert_eq!(mpc_lintsrc.srcbusirq, 0u8); + assert_eq!(mpc_lintsrc.destapic, 0u8); + assert_eq!(mpc_lintsrc.destapiclint, 0u8); +} +#[test] +fn bindgen_test_layout_mpc_lintsrc() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(mpc_lintsrc)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(mpc_lintsrc)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).irqtype as *const _ as usize }, + 1usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(irqtype) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).irqflag as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(irqflag) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).srcbusid as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(srcbusid) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).srcbusirq as *const _ as usize }, + 5usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(srcbusirq) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).destapic as *const _ as usize }, + 6usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(destapic) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).destapiclint as *const _ as usize }, + 7usize, + concat!( + "Alignment of field: ", + stringify!(mpc_lintsrc), + "::", + stringify!(destapiclint) + ) + ); +} +impl Clone for mpc_lintsrc { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct mpc_oemtable { + pub signature: [::std::os::raw::c_char; 4usize], + pub length: ::std::os::raw::c_ushort, + pub rev: ::std::os::raw::c_char, + pub checksum: ::std::os::raw::c_char, + pub mpc: [::std::os::raw::c_char; 8usize], +} +#[test] +fn default_mpc_oemtable() { + let mpc_oemtable = mpc_oemtable::default(); + assert_eq!(mpc_oemtable.signature, [0i8, 0i8, 0i8, 0i8]); + assert_eq!(mpc_oemtable.length, 0u16); + assert_eq!(mpc_oemtable.rev, 0i8); + assert_eq!(mpc_oemtable.checksum, 0i8); + assert_eq!(mpc_oemtable.mpc, [0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8, 0i8]); +} +#[test] +fn bindgen_test_layout_mpc_oemtable() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(mpc_oemtable)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(mpc_oemtable)) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).signature as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(mpc_oemtable), + "::", + stringify!(signature) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).length as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(mpc_oemtable), + "::", + stringify!(length) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).rev as *const _ as usize }, + 6usize, + concat!( + "Alignment of field: ", + stringify!(mpc_oemtable), + "::", + stringify!(rev) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).checksum as *const _ as usize }, + 7usize, + concat!( + "Alignment of field: ", + stringify!(mpc_oemtable), + "::", + stringify!(checksum) + ) + ); + assert_eq!( + unsafe { &(*(std::ptr::null::())).mpc as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(mpc_oemtable), + "::", + stringify!(mpc) + ) + ); +} +impl Clone for mpc_oemtable { + fn clone(&self) -> Self { + *self + } +} + +pub const mp_bustype_MP_BUS_ISA: mp_bustype = 1; +pub const mp_bustype_MP_BUS_EISA: mp_bustype = 2; +pub const mp_bustype_MP_BUS_PCI: mp_bustype = 3; +pub type mp_bustype = ::std::os::raw::c_uint; diff --git a/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs b/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs new file mode 100644 index 000000000000..008e972a521c --- /dev/null +++ b/src/dragonball/src/dbs_boot/src/x86_64/mptable.rs @@ -0,0 +1,523 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! MP Table configurations used for defining VM boot status. + +use libc::c_char; +use std::io; +use std::mem; +use std::result; +use std::slice; + +use super::mpspec; +use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory}; + +// This is a workaround to the Rust enforcement specifying that any implementation of a foreign +// trait (in this case `ByteValued`) where: +// * the type that is implementing the trait is foreign or +// * all of the parameters being passed to the trait (if there are any) are also foreign +// is prohibited. +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcBusWrapper(mpspec::mpc_bus); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcCpuWrapper(mpspec::mpc_cpu); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcIntsrcWrapper(mpspec::mpc_intsrc); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcIoapicWrapper(mpspec::mpc_ioapic); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcTableWrapper(mpspec::mpc_table); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpcLintsrcWrapper(mpspec::mpc_lintsrc); +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +struct MpfIntelWrapper(mpspec::mpf_intel); + +// These `mpspec` wrapper types are only data, reading them from data is a safe initialization. +unsafe impl ByteValued for MpcBusWrapper {} +unsafe impl ByteValued for MpcCpuWrapper {} +unsafe impl ByteValued for MpcIntsrcWrapper {} +unsafe impl ByteValued for MpcIoapicWrapper {} +unsafe impl ByteValued for MpcTableWrapper {} +unsafe impl ByteValued for MpcLintsrcWrapper {} +unsafe impl ByteValued for MpfIntelWrapper {} + +// MPTABLE, describing VCPUS. +const MPTABLE_START: u64 = 0x9fc00; + +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +/// MP Table related errors +pub enum Error { + /// There was too little guest memory to store the entire MP table. + #[error("too little guest memory to store the entire MP table")] + NotEnoughMemory, + + /// The MP table has too little address space to be stored. + #[error("the MP table has no enough space")] + AddressOverflow, + + /// Failure while zeroing out the memory for the MP table. + #[error("failure while zeroing out the memory for the MP table")] + Clear, + + /// Number of CPUs exceeds the maximum supported CPUs + #[error("number of CPUs exceeds the maximum supported CPUs")] + TooManyCpus, + + /// Number of CPUs exceeds the maximum supported CPUs + #[error("number of boot CPUs exceeds the maximum number of CPUs")] + TooManyBootCpus, + + /// Failure to write the MP floating pointer. + #[error("failure to write the MP floating pointer")] + WriteMpfIntel, + + /// Failure to write MP CPU entry. + #[error("failure to write MP CPU entry")] + WriteMpcCpu, + + /// Failure to write MP ioapic entry. + #[error("failure to write MP ioapic entry")] + WriteMpcIoapic, + + /// Failure to write MP bus entry. + #[error("failure to write MP bus entry")] + WriteMpcBus, + + /// Failure to write MP interrupt source entry. + #[error("failure to write MP interrupt source entry")] + WriteMpcIntsrc, + + /// Failure to write MP local interrupt source entry. + #[error("failure to write MP local interrupt source entry")] + WriteMpcLintsrc, + + /// Failure to write MP OEM table entry. + #[error("failure to write MP OEM table entry")] + WriteMpcOemtable, + + /// Failure to write MP table header. + #[error("failure to write MP table header")] + WriteMpcTable, +} + +/// Generic type for MP Table Results. +pub type Result = result::Result; + +/// With APIC/xAPIC, there are only 255 APIC IDs available. And IOAPIC occupies +/// one APIC ID, so only 254 CPUs at maximum may be supported. Actually it's +/// a large number for Dragonball usecases. +pub const MAX_SUPPORTED_CPUS: u32 = 254; + +// Convenience macro for making arrays of diverse character types. +macro_rules! char_array { + ($t:ty; $( $c:expr ),*) => ( [ $( $c as $t ),* ] ) +} + +// Most of these variables are sourced from the Intel MP Spec 1.4. +const SMP_MAGIC_IDENT: [c_char; 4] = char_array!(c_char; '_', 'M', 'P', '_'); +const MPC_SIGNATURE: [c_char; 4] = char_array!(c_char; 'P', 'C', 'M', 'P'); +const MPC_SPEC: i8 = 4; +const MPC_OEM: [c_char; 8] = char_array!(c_char; 'A', 'L', 'I', 'C', 'L', 'O', 'U', 'D'); +const MPC_PRODUCT_ID: [c_char; 12] = + char_array!(c_char; 'D', 'R', 'A', 'G', 'O', 'N', 'B', 'A', 'L', 'L', '1', '0'); +const BUS_TYPE_ISA: [u8; 6] = char_array!(u8; 'I', 'S', 'A', ' ', ' ', ' '); +const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec0_0000; // source: linux/arch/x86/include/asm/apicdef.h +const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee0_0000; // source: linux/arch/x86/include/asm/apicdef.h + +/// APIC version in mptable +pub const APIC_VERSION: u8 = 0x14; + +const CPU_STEPPING: u32 = 0x600; +const CPU_FEATURE_APIC: u32 = 0x200; +const CPU_FEATURE_FPU: u32 = 0x001; + +const BUS_ID_ISA: u8 = 0; + +fn compute_checksum(v: &T) -> u8 { + // Safe because we are only reading the bytes within the size of the `T` reference `v`. + let v_slice = unsafe { slice::from_raw_parts(v as *const T as *const u8, mem::size_of::()) }; + let mut checksum: u8 = 0; + for i in v_slice.iter() { + checksum = checksum.wrapping_add(*i); + } + checksum +} + +fn mpf_intel_compute_checksum(v: &mpspec::mpf_intel) -> u8 { + let checksum = compute_checksum(v).wrapping_sub(v.checksum); + (!checksum).wrapping_add(1) +} + +fn compute_mp_size(num_cpus: u8) -> usize { + mem::size_of::() + + mem::size_of::() + + mem::size_of::() * (num_cpus as usize) + + mem::size_of::() + + mem::size_of::() * 2 + + mem::size_of::() * 16 + + mem::size_of::() * 2 +} + +/// Performs setup of the MP table for the given `num_cpus` +pub fn setup_mptable(mem: &M, boot_cpus: u8, max_cpus: u8) -> Result<()> { + if boot_cpus > max_cpus { + return Err(Error::TooManyBootCpus); + } + if u32::from(max_cpus) > MAX_SUPPORTED_CPUS { + return Err(Error::TooManyCpus); + } + + // Used to keep track of the next base pointer into the MP table. + let mut base_mp = GuestAddress(MPTABLE_START); + + let mp_size = compute_mp_size(max_cpus); + + let mut checksum: u8 = 0; + let ioapicid: u8 = max_cpus + 1; + + // The checked_add here ensures the all of the following base_mp.unchecked_add's will be without + // overflow. + if let Some(end_mp) = base_mp.checked_add((mp_size - 1) as u64) { + if !mem.address_in_range(end_mp) { + return Err(Error::NotEnoughMemory); + } + } else { + return Err(Error::AddressOverflow); + } + + mem.read_from(base_mp, &mut io::repeat(0), mp_size) + .map_err(|_| Error::Clear)?; + + { + let mut mpf_intel = MpfIntelWrapper(mpspec::mpf_intel::default()); + let size = mem::size_of::() as u64; + mpf_intel.0.signature = SMP_MAGIC_IDENT; + mpf_intel.0.length = 1; + mpf_intel.0.specification = 4; + mpf_intel.0.physptr = (base_mp.raw_value() + size) as u32; + mpf_intel.0.checksum = mpf_intel_compute_checksum(&mpf_intel.0); + mem.write_obj(mpf_intel, base_mp) + .map_err(|_| Error::WriteMpfIntel)?; + base_mp = base_mp.unchecked_add(size); + } + + // We set the location of the mpc_table here but we can't fill it out until we have the length + // of the entire table later. + let table_base = base_mp; + base_mp = base_mp.unchecked_add(mem::size_of::() as u64); + + { + let size = mem::size_of::() as u64; + for cpu_id in 0..max_cpus { + let mut mpc_cpu = MpcCpuWrapper(mpspec::mpc_cpu::default()); + mpc_cpu.0.type_ = mpspec::MP_PROCESSOR as u8; + mpc_cpu.0.apicid = cpu_id; + mpc_cpu.0.apicver = APIC_VERSION; + if cpu_id < boot_cpus { + mpc_cpu.0.cpuflag |= mpspec::CPU_ENABLED as u8; + } + if cpu_id == 0 { + mpc_cpu.0.cpuflag |= mpspec::CPU_BOOTPROCESSOR as u8; + } + mpc_cpu.0.cpufeature = CPU_STEPPING; + mpc_cpu.0.featureflag = CPU_FEATURE_APIC | CPU_FEATURE_FPU; + mem.write_obj(mpc_cpu, base_mp) + .map_err(|_| Error::WriteMpcCpu)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_cpu.0)); + } + } + + { + let size = mem::size_of::() as u64; + let mut mpc_bus = MpcBusWrapper(mpspec::mpc_bus::default()); + mpc_bus.0.type_ = mpspec::MP_BUS as u8; + mpc_bus.0.busid = BUS_ID_ISA; + mpc_bus.0.bustype = BUS_TYPE_ISA; + mem.write_obj(mpc_bus, base_mp) + .map_err(|_| Error::WriteMpcBus)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_bus.0)); + } + + { + let size = mem::size_of::() as u64; + let mut mpc_ioapic = MpcIoapicWrapper(mpspec::mpc_ioapic::default()); + mpc_ioapic.0.type_ = mpspec::MP_IOAPIC as u8; + mpc_ioapic.0.apicid = ioapicid; + mpc_ioapic.0.apicver = APIC_VERSION; + mpc_ioapic.0.flags = mpspec::MPC_APIC_USABLE as u8; + mpc_ioapic.0.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; + mem.write_obj(mpc_ioapic, base_mp) + .map_err(|_| Error::WriteMpcIoapic)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_ioapic.0)); + } + // Per kvm_setup_default_irq_routing() in kernel + for i in 0..16 { + let size = mem::size_of::() as u64; + let mut mpc_intsrc = MpcIntsrcWrapper(mpspec::mpc_intsrc::default()); + mpc_intsrc.0.type_ = mpspec::MP_INTSRC as u8; + mpc_intsrc.0.irqtype = mpspec::mp_irq_source_types_mp_INT as u8; + mpc_intsrc.0.irqflag = mpspec::MP_IRQDIR_DEFAULT as u16; + mpc_intsrc.0.srcbus = BUS_ID_ISA; + mpc_intsrc.0.srcbusirq = i; + mpc_intsrc.0.dstapic = ioapicid; + mpc_intsrc.0.dstirq = i; + mem.write_obj(mpc_intsrc, base_mp) + .map_err(|_| Error::WriteMpcIntsrc)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc.0)); + } + { + let size = mem::size_of::() as u64; + let mut mpc_lintsrc = MpcLintsrcWrapper(mpspec::mpc_lintsrc::default()); + mpc_lintsrc.0.type_ = mpspec::MP_LINTSRC as u8; + mpc_lintsrc.0.irqtype = mpspec::mp_irq_source_types_mp_ExtINT as u8; + mpc_lintsrc.0.irqflag = mpspec::MP_IRQDIR_DEFAULT as u16; + mpc_lintsrc.0.srcbusid = 0; + mpc_lintsrc.0.srcbusirq = 0; + mpc_lintsrc.0.destapic = 0; + mpc_lintsrc.0.destapiclint = 0; + mem.write_obj(mpc_lintsrc, base_mp) + .map_err(|_| Error::WriteMpcLintsrc)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc.0)); + } + { + let size = mem::size_of::() as u64; + let mut mpc_lintsrc = MpcLintsrcWrapper(mpspec::mpc_lintsrc::default()); + mpc_lintsrc.0.type_ = mpspec::MP_LINTSRC as u8; + mpc_lintsrc.0.irqtype = mpspec::mp_irq_source_types_mp_NMI as u8; + mpc_lintsrc.0.irqflag = mpspec::MP_IRQDIR_DEFAULT as u16; + mpc_lintsrc.0.srcbusid = 0; + mpc_lintsrc.0.srcbusirq = 0; + mpc_lintsrc.0.destapic = 0xFF; /* to all local APICs */ + mpc_lintsrc.0.destapiclint = 1; + mem.write_obj(mpc_lintsrc, base_mp) + .map_err(|_| Error::WriteMpcLintsrc)?; + base_mp = base_mp.unchecked_add(size); + checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc.0)); + } + + // At this point we know the size of the mp_table. + let table_end = base_mp; + + let mpc_table_size = mem::size_of::() as u64; + base_mp = base_mp.unchecked_add(mpc_table_size); + let oem_count = 0; + let oem_size = 0; + let oem_ptr = base_mp; + + { + let mut mpc_table = MpcTableWrapper(mpspec::mpc_table::default()); + mpc_table.0.signature = MPC_SIGNATURE; + // it's safe to use unchecked_offset_from because + // table_end > table_base + mpc_table.0.length = table_end.unchecked_offset_from(table_base) as u16; + mpc_table.0.spec = MPC_SPEC; + mpc_table.0.oem = MPC_OEM; + mpc_table.0.oemcount = oem_count; + mpc_table.0.oemptr = oem_ptr.0 as u32; + mpc_table.0.oemsize = oem_size as u16; + mpc_table.0.productid = MPC_PRODUCT_ID; + mpc_table.0.lapic = APIC_DEFAULT_PHYS_BASE; + checksum = checksum.wrapping_add(compute_checksum(&mpc_table.0)); + mpc_table.0.checksum = (!checksum).wrapping_add(1) as i8; + mem.write_obj(mpc_table, table_base) + .map_err(|_| Error::WriteMpcTable)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use vm_memory::{Bytes, GuestMemoryMmap}; + + fn table_entry_size(type_: u8) -> usize { + match u32::from(type_) { + mpspec::MP_PROCESSOR => mem::size_of::(), + mpspec::MP_BUS => mem::size_of::(), + mpspec::MP_IOAPIC => mem::size_of::(), + mpspec::MP_INTSRC => mem::size_of::(), + mpspec::MP_LINTSRC => mem::size_of::(), + _ => panic!("unrecognized mpc table entry type: {}", type_), + } + } + + #[test] + fn bounds_check() { + let num_cpus = 4; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(num_cpus), + )]) + .unwrap(); + + setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + } + + #[test] + fn bounds_check_fails() { + let num_cpus = 4; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(num_cpus) - 1, + )]) + .unwrap(); + + assert!(setup_mptable(&mem, num_cpus, num_cpus).is_err()); + } + + #[test] + fn mpf_intel_checksum() { + let num_cpus = 1; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(num_cpus), + )]) + .unwrap(); + + setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + + let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); + + assert_eq!( + mpf_intel_compute_checksum(&mpf_intel.0), + mpf_intel.0.checksum + ); + } + + #[test] + fn mpc_table_checksum() { + let num_cpus = 4; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(num_cpus), + )]) + .unwrap(); + + setup_mptable(&mem, num_cpus, num_cpus).unwrap(); + + let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); + let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); + let mpc_table: MpcTableWrapper = mem.read_obj(mpc_offset).unwrap(); + + struct Sum(u8); + impl io::Write for Sum { + fn write(&mut self, buf: &[u8]) -> io::Result { + for v in buf.iter() { + self.0 = self.0.wrapping_add(*v); + } + Ok(buf.len()) + } + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + let mut sum = Sum(0); + mem.write_to(mpc_offset, &mut sum, mpc_table.0.length as usize) + .unwrap(); + assert_eq!(sum.0, 0); + } + + #[test] + fn max_cpu_entry_count() { + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(MAX_SUPPORTED_CPUS as u8), + )]) + .unwrap(); + + for i in 0..MAX_SUPPORTED_CPUS as u8 { + setup_mptable(&mem, i, i).unwrap(); + + let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); + let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); + let mpc_table: MpcTableWrapper = mem.read_obj(mpc_offset).unwrap(); + let mpc_end = mpc_offset + .checked_add(u64::from(mpc_table.0.length)) + .unwrap(); + + let mut entry_offset = mpc_offset + .checked_add(mem::size_of::() as u64) + .unwrap(); + let mut max_cpu_count = 0; + while entry_offset < mpc_end { + let entry_type: u8 = mem.read_obj(entry_offset).unwrap(); + entry_offset = entry_offset + .checked_add(table_entry_size(entry_type) as u64) + .unwrap(); + assert!(entry_offset <= mpc_end); + if u32::from(entry_type) == mpspec::MP_PROCESSOR { + max_cpu_count += 1; + } + } + assert_eq!(max_cpu_count, i); + } + } + + #[test] + fn boot_cpu_entry_count() { + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(MAX_SUPPORTED_CPUS as u8), + )]) + .unwrap(); + + for i in 0..MAX_SUPPORTED_CPUS as u8 { + setup_mptable(&mem, i, MAX_SUPPORTED_CPUS as u8).unwrap(); + + let mpf_intel: MpfIntelWrapper = mem.read_obj(GuestAddress(MPTABLE_START)).unwrap(); + let mpc_offset = GuestAddress(u64::from(mpf_intel.0.physptr)); + let mpc_table: MpcTableWrapper = mem.read_obj(mpc_offset).unwrap(); + let mpc_end = mpc_offset + .checked_add(u64::from(mpc_table.0.length)) + .unwrap(); + + let mut entry_offset = mpc_offset + .checked_add(mem::size_of::() as u64) + .unwrap(); + let mut boot_cpu_count = 0; + for _ in 0..MAX_SUPPORTED_CPUS { + let mpc_cpu: MpcCpuWrapper = mem.read_obj(entry_offset).unwrap(); + if mpc_cpu.0.cpuflag & mpspec::CPU_ENABLED as u8 != 0 { + boot_cpu_count += 1; + } + entry_offset = entry_offset + .checked_add(table_entry_size(mpc_cpu.0.type_) as u64) + .unwrap(); + assert!(entry_offset <= mpc_end); + } + assert_eq!(boot_cpu_count, i); + } + } + + #[test] + fn cpu_entry_count_max() { + let cpus = MAX_SUPPORTED_CPUS + 1; + let mem = GuestMemoryMmap::<()>::from_ranges(&[( + GuestAddress(MPTABLE_START), + compute_mp_size(cpus as u8), + )]) + .unwrap(); + + let result = setup_mptable(&mem, cpus as u8, cpus as u8).unwrap_err(); + assert_eq!(result, Error::TooManyCpus); + } +} diff --git a/src/dragonball/src/dbs_device/Cargo.toml b/src/dragonball/src/dbs_device/Cargo.toml new file mode 100644 index 000000000000..594698ce2976 --- /dev/null +++ b/src/dragonball/src/dbs_device/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "dbs-device" +version = "0.2.0" +authors = ["Alibaba Dragonball Team"] +description = "Device model for Dragonball Sandbox" +license = "Apache-2.0" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-device" +keywords = ["dragonball", "secure-sandbox", "device", "resource"] +readme = "README.md" + +[dependencies] +thiserror = "1" diff --git a/src/dragonball/src/dbs_device/LICENSE b/src/dragonball/src/dbs_device/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_device/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_device/README.md b/src/dragonball/src/dbs_device/README.md new file mode 100644 index 000000000000..5f13c00cd300 --- /dev/null +++ b/src/dragonball/src/dbs_device/README.md @@ -0,0 +1,141 @@ +# dbs-device + +The `dbs-device` crate, as a counterpart of [`vm-device`], defines device model for the Dragonball Secure Sandbox. +The `dbs-device` crate shares some common concepts and data structures with [`vm-device`], but it also diverges from +[`vm-device`] due to different VMM designs. + +The dbs-device crate provides: + +- [`DeviceIo`] and [`DeviceIoMut`]: trait for device to handle trapped MMIO/PIO access requests. +- [`IoManager`]: IO manager to handle trapped MMIO/PIO access requests. +- [`IoManagerContext`]: trait for IO manager context object to support device hotplug at runtime. +- [`ResourceConstraint`], [Resource] and [`DeviceResources`]: resource allocation requirements and constraints. + +## Design + +The dbs-device crate is designed to support the virtual machine's device model. + +The core concepts of device model are [Port I/O](https://wiki.osdev.org/I/O_Ports) and +[Memory-mapped I/O](https://en.wikipedia.org/wiki/Memory-mapped_I/O), +which are two main methods of performing I/O between CPU and devices. + +The device model provided by the dbs-device crate works as below: +- The VMM creates a global resource manager, device manager and IO manager. +- The device manager creates virtual devices configured by the VMM + - create device object + - query device allocation requirements and constraints, the device returns an array of [`ResourceConstraint`]. + - allocate resources for device from the resource manager, resource manager returns a [`DeviceResources`] object. + - assign the allocated resources to the device. + - The device manager register devices to the IO manager. + - query trapped address ranges by calling [`DeviceIo::get_trapped_io_resources()`] + - register the device to the IO manager with trapped address range + - The guest access those trapped MMIO/PIO address ranges, and triggers VM IO Exit events to trap into the VMM. + - The VMM parses the VM exit events and dispatch those events to the IO manager. + - The IO manager looks up device by searching trapped address ranges, and call the device's [`DeviceIO`] + handler to process those trapped MMIO/PIO access requests. + +## Usage + +First, a VM needs to create an [`IoManager`] to help it dispatch I/O events to devices. +And an [`IoManager`] has two types of bus, the PIO bus and the MMIO bus, to handle different types of IO. + +Then, when creating a device, it needs to implement the [`DeviceIo`] or [`DeviceIoMut`] trait to receive read or write +events send by driver in guest OS: +- `read()` and `write()` methods is used to deal with MMIO events +- `pio_read()` and `pio_write()` methods is used to deal with PIO events +- `get_assigned_resources()` method is used to get all resources assigned to the device +- `get_trapped_io_resources()` method is used to get only MMIO/PIO resources assigned to the device + +The difference of [`DeviceIo`] and [`DeviceIoMut`] is the reference type of `self` passed to method: +- [`DeviceIo`] trait would pass a immutable reference `&self` to method, so the implementation of device would provide + interior mutability and thread-safe protection itself +- [`DeviceIoMut`] trait would pass a mutable reference `&mut self` to method, and it can give mutability to device + which is wrapped by `Mutex` directly to simplify the difficulty of achieving interior mutability. + +Additionally, the [`DeviceIo`] trait has an auto implement for `Mutex` + +Last, the device needs to be added to [`IoManager`] by using `register_device_io()`, and the function would add device +to PIO bus and/or MMIO bus by the resources it have. If a device has not only MMIO resource but PIO resource, +it would be added to both pio bus and mmio bus. So the device would wrapped by `Arc`. + +From now on, the [`IoManager`] will dispatch I/O requests for the registered address ranges to the device. + +## Examples + + +```rust +use std::sync::Arc; + +use dbs_device::device_manager::IoManager; +use dbs_device::resources::{DeviceResources, Resource}; +use dbs_device::{DeviceIo, IoAddress, PioAddress}; + +struct DummyDevice {} + +impl DeviceIo for DummyDevice { + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) { + println!( + "mmio read, base: 0x{:x}, offset: 0x{:x}", + base.raw_value(), + offset.raw_value() + ); + } + + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) { + println!( + "mmio write, base: 0x{:x}, offset: 0x{:x}", + base.raw_value(), + offset.raw_value() + ); + } + + fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) { + println!( + "pio read, base: 0x{:x}, offset: 0x{:x}", + base.raw_value(), + offset.raw_value() + ); + } + + fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) { + println!( + "pio write, base: 0x{:x}, offset: 0x{:x}", + base.raw_value(), + offset.raw_value() + ); + } +} + +// Allocate resources for device +let mut resources = DeviceResources::new(); +resources.append(Resource::MmioAddressRange { + base: 0, + size: 4096, +}); +resources.append(Resource::PioAddressRange { base: 0, size: 32 }); + +// Register device to `IoManager` with resources +let device = Arc::new(DummyDevice {}); +let mut manager = IoManager::new(); +manager.register_device_io(device, &resources).unwrap(); + +// Dispatch I/O event from `IoManager` to device +manager.mmio_write(0, &vec![0, 1]).unwrap(); + +let mut buffer = vec![0; 4]; +manager.pio_read(0, &mut buffer); +``` + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). + +[DeviceIo::get_trapped_io_resources()]: https://docs.rs/dbs-device/0.1.0/dbs_device/trait.DeviceIo.html#method.get_trapped_io_resources +[DeviceIo]: src/lib.rs +[DeviceIoMut]: src/lib.rs +[IoManager]: src/device_manager.rs +[IoManagerContext]: src/device_manager.rs +[ResourceConstraint]: src/resources.rs +[Resource]: src/resources.rs +[DeviceResources]: src/resources.rs +[vm-device]: https://github.com/rust-vmm/vm-device diff --git a/src/dragonball/src/dbs_device/src/device_manager.rs b/src/dragonball/src/dbs_device/src/device_manager.rs new file mode 100644 index 000000000000..63aef81e9c93 --- /dev/null +++ b/src/dragonball/src/dbs_device/src/device_manager.rs @@ -0,0 +1,695 @@ +// Copyright 2020-2022 Alibaba Cloud. All Rights Reserved. +// Copyright © 2019 Intel Corporation. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! IO Device Manager to handle trapped MMIO/PIO access requests. +//! +//! The [IoManager](self::IoManager) is responsible for managing all trapped MMIO/PIO accesses for +//! virtual devices. It cooperates with the Secure Sandbox/VMM and device drivers to handle trapped +//! accesses. The flow is as below: +//! - device drivers allocate resources from the VMM/resource manager, including trapped MMIO/PIO +//! address ranges. +//! - the device manager registers devices to the [IoManager](self::IoManager) with trapped MMIO/PIO +//! address ranges. +//! - VM IO Exit events get triggered when the guest accesses those trapped address ranges. +//! - the vmm handle those VM IO Exit events, and dispatch them to the [IoManager]. +//! - the [IoManager] invokes registered callbacks/device drivers to handle those accesses, if there +//! is a device registered for the address. +//! +//! # Examples +//! +//! Creating a dummy deivce which implement DeviceIo trait, and register it to [IoManager] with +//! trapped MMIO/PIO address ranges: +//! +//! ``` +//! use std::sync::Arc; +//! use std::any::Any; +//! +//! use dbs_device::device_manager::IoManager; +//! use dbs_device::resources::{DeviceResources, Resource}; +//! use dbs_device::{DeviceIo, IoAddress, PioAddress}; +//! +//! struct DummyDevice {} +//! +//! impl DeviceIo for DummyDevice { +//! fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) { +//! println!( +//! "mmio read, base: 0x{:x}, offset: 0x{:x}", +//! base.raw_value(), +//! offset.raw_value() +//! ); +//! } +//! +//! fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) { +//! println!( +//! "mmio write, base: 0x{:x}, offset: 0x{:x}", +//! base.raw_value(), +//! offset.raw_value() +//! ); +//! } +//! +//! fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) { +//! println!( +//! "pio read, base: 0x{:x}, offset: 0x{:x}", +//! base.raw_value(), +//! offset.raw_value() +//! ); +//! } +//! +//! fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) { +//! println!( +//! "pio write, base: 0x{:x}, offset: 0x{:x}", +//! base.raw_value(), +//! offset.raw_value() +//! ); +//! } +//! +//! fn as_any(&self) -> &dyn Any { +//! self +//! } +//! } +//! +//! // Allocate resources for device +//! let mut resources = DeviceResources::new(); +//! resources.append(Resource::MmioAddressRange { +//! base: 0, +//! size: 4096, +//! }); +//! resources.append(Resource::PioAddressRange { base: 0, size: 32 }); +//! +//! // Register device to `IoManager` with resources +//! let device = Arc::new(DummyDevice {}); +//! let mut manager = IoManager::new(); +//! manager.register_device_io(device, &resources).unwrap(); +//! +//! // Dispatch I/O event from `IoManager` to device +//! manager.mmio_write(0, &vec![0, 1]).unwrap(); +//! { +//! let mut buffer = vec![0; 4]; +//! manager.pio_read(0, &mut buffer); +//! } +//! ``` + +use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; +use std::collections::btree_map::BTreeMap; +use std::ops::Deref; +use std::result; +use std::sync::Arc; + +use thiserror::Error; + +use crate::resources::Resource; +use crate::{DeviceIo, IoAddress, IoSize, PioAddress}; + +/// Error types for `IoManager` related operations. +#[derive(Error, Debug)] +pub enum Error { + /// The inserting device overlaps with a current device. + #[error("device address conflicts with existing devices")] + DeviceOverlap, + /// The device doesn't exist. + #[error("no such device")] + NoDevice, +} + +/// A specialized version of [std::result::Result] for [IoManager] realted operations. +pub type Result = result::Result; + +/// Structure representing an IO address range. +#[derive(Debug, Copy, Clone, Eq)] +pub struct IoRange { + base: IoAddress, + size: IoSize, +} + +impl IoRange { + fn new_pio_range(base: u16, size: u16) -> Self { + IoRange { + base: IoAddress(base as u64), + size: IoSize(size as u64), + } + } + + fn new_mmio_range(base: u64, size: u64) -> Self { + IoRange { + base: IoAddress(base), + size: IoSize(size), + } + } +} + +impl PartialEq for IoRange { + fn eq(&self, other: &IoRange) -> bool { + self.base == other.base + } +} + +impl Ord for IoRange { + fn cmp(&self, other: &IoRange) -> Ordering { + self.base.cmp(&other.base) + } +} + +impl PartialOrd for IoRange { + fn partial_cmp(&self, other: &IoRange) -> Option { + self.base.partial_cmp(&other.base) + } +} + +/// IO manager to handle all trapped MMIO/PIO access requests. +/// +/// All devices handling trapped MMIO/PIO accesses should register themself to the IO manager +/// with trapped address ranges. When guest vm accesses those trapped MMIO/PIO address ranges, +/// VM IO Exit events will be triggered and the VMM dispatches those events to IO manager. +/// And then the registered callbacks will invoked by IO manager. +#[derive(Clone, Default)] +pub struct IoManager { + /// Range mapping for VM exit pio operations. + pio_bus: BTreeMap>, + /// Range mapping for VM exit mmio operations. + mmio_bus: BTreeMap>, +} + +impl IoManager { + /// Create a new instance of [IoManager]. + pub fn new() -> Self { + IoManager::default() + } + + /// Register a new device to the [IoManager], with trapped MMIO/PIO address ranges. + /// + /// # Arguments + /// + /// * `device`: device object to handle trapped IO access requests + /// * `resources`: resources representing trapped MMIO/PIO address ranges. Only MMIO/PIO address + /// ranges will be handled, and other types of resource will be ignored. So the caller does + /// not need to filter out non-MMIO/PIO resources. + pub fn register_device_io( + &mut self, + device: Arc, + resources: &[Resource], + ) -> Result<()> { + for (idx, res) in resources.iter().enumerate() { + match *res { + Resource::PioAddressRange { base, size } => { + if self + .pio_bus + .insert(IoRange::new_pio_range(base, size), device.clone()) + .is_some() + { + // Rollback registered resources. + self.unregister_device_io(&resources[0..idx]) + .expect("failed to unregister devices"); + + return Err(Error::DeviceOverlap); + } + } + Resource::MmioAddressRange { base, size } => { + if self + .mmio_bus + .insert(IoRange::new_mmio_range(base, size), device.clone()) + .is_some() + { + // Rollback registered resources. + self.unregister_device_io(&resources[0..idx]) + .expect("failed to unregister devices"); + + return Err(Error::DeviceOverlap); + } + } + _ => continue, + } + } + Ok(()) + } + + /// Unregister a device from `IoManager`. + /// + /// # Arguments + /// + /// * `resources`: resource list containing all trapped address ranges for the device. + pub fn unregister_device_io(&mut self, resources: &[Resource]) -> Result<()> { + for res in resources.iter() { + match *res { + Resource::PioAddressRange { base, size } => { + self.pio_bus.remove(&IoRange::new_pio_range(base, size)); + } + Resource::MmioAddressRange { base, size } => { + self.mmio_bus.remove(&IoRange::new_mmio_range(base, size)); + } + _ => continue, + } + } + Ok(()) + } + + /// Handle VM IO Exit events triggered by trapped MMIO read accesses. + /// + /// Return error if failed to get the device. + pub fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()> { + self.get_mmio_device(IoAddress(addr)) + .map(|(device, base)| device.read(base, IoAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) + } + + /// Handle VM IO Exit events triggered by trapped MMIO write accesses. + /// + /// Return error if failed to get the device. + pub fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()> { + self.get_mmio_device(IoAddress(addr)) + .map(|(device, base)| device.write(base, IoAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) + } + + /// Get the registered device handling the trapped MMIO address `addr`. + fn get_mmio_device(&self, addr: IoAddress) -> Option<(&Arc, IoAddress)> { + let range = IoRange::new_mmio_range(addr.raw_value(), 0); + if let Some((range, dev)) = self.mmio_bus.range(..=&range).nth_back(0) { + if (addr.raw_value() - range.base.raw_value()) < range.size.raw_value() { + return Some((dev, range.base)); + } + } + None + } +} + +impl IoManager { + /// Handle VM IO Exit events triggered by trapped PIO read accesses. + /// + /// Return error if failed to get the device. + pub fn pio_read(&self, addr: u16, data: &mut [u8]) -> Result<()> { + self.get_pio_device(PioAddress(addr)) + .map(|(device, base)| device.pio_read(base, PioAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) + } + + /// Handle VM IO Exit events triggered by trapped PIO write accesses. + /// + /// Return error if failed to get the device. + pub fn pio_write(&self, addr: u16, data: &[u8]) -> Result<()> { + self.get_pio_device(PioAddress(addr)) + .map(|(device, base)| device.pio_write(base, PioAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) + } + + /// Get the registered device handling the trapped PIO address `addr`. + fn get_pio_device(&self, addr: PioAddress) -> Option<(&Arc, PioAddress)> { + let range = IoRange::new_pio_range(addr.raw_value(), 0); + if let Some((range, dev)) = self.pio_bus.range(..=&range).nth_back(0) { + if (addr.raw_value() as u64 - range.base.raw_value()) < range.size.raw_value() { + return Some((dev, PioAddress(range.base.0 as u16))); + } + } + None + } +} + +impl PartialEq for IoManager { + fn eq(&self, other: &IoManager) -> bool { + if self.pio_bus.len() != other.pio_bus.len() { + return false; + } + if self.mmio_bus.len() != other.mmio_bus.len() { + return false; + } + + for (io_range, device_io) in self.pio_bus.iter() { + if !other.pio_bus.contains_key(io_range) { + return false; + } + let other_device_io = &other.pio_bus[io_range]; + if device_io.get_trapped_io_resources() != other_device_io.get_trapped_io_resources() { + return false; + } + } + + for (io_range, device_io) in self.mmio_bus.iter() { + if !other.mmio_bus.contains_key(io_range) { + return false; + } + let other_device_io = &other.mmio_bus[io_range]; + if device_io.get_trapped_io_resources() != other_device_io.get_trapped_io_resources() { + return false; + } + } + + true + } +} + +/// Trait for IO manager context object to support device hotplug at runtime. +/// +/// The `IoManagerContext` objects are passed to devices by the IO manager, so the devices could +/// use it to hot-add/hot-remove other devices at runtime. It provides a transaction mechanism +/// to hot-add/hot-remove devices. +pub trait IoManagerContext { + /// Type of context object passed to the callbacks. + type Context; + + /// Begin a transaction and return a context object. + /// + /// The returned context object must be passed to commit_tx() or cancel_tx() later. + fn begin_tx(&self) -> Self::Context; + + /// Commit the transaction. + fn commit_tx(&self, ctx: Self::Context); + + /// Cancel the transaction. + fn cancel_tx(&self, ctx: Self::Context); + + /// Register a new device with its associated resources to the IO manager. + /// + /// # Arguments + /// + /// * `ctx`: context object returned by begin_tx(). + /// * `device`: device instance object to be registered + /// * `resources`: resources representing trapped MMIO/PIO address ranges. Only MMIO/PIO address + /// ranges will be handled, and other types of resource will be ignored. So the caller does + /// not need to filter out non-MMIO/PIO resources. + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> Result<()>; + + /// Unregister a device from the IO manager. + /// + /// # Arguments + /// + /// * `ctx`: context object returned by begin_tx(). + /// * `resources`: resource list containing all trapped address ranges for the device. + fn unregister_device_io(&self, ctx: &mut Self::Context, resources: &[Resource]) -> Result<()>; +} + +impl IoManagerContext for Arc { + type Context = T::Context; + + fn begin_tx(&self) -> Self::Context { + self.deref().begin_tx() + } + + fn commit_tx(&self, ctx: Self::Context) { + self.deref().commit_tx(ctx) + } + + fn cancel_tx(&self, ctx: Self::Context) { + self.deref().cancel_tx(ctx) + } + + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> std::result::Result<(), Error> { + self.deref().register_device_io(ctx, device, resources) + } + + fn unregister_device_io( + &self, + ctx: &mut Self::Context, + resources: &[Resource], + ) -> std::result::Result<(), Error> { + self.deref().unregister_device_io(ctx, resources) + } +} + +#[cfg(test)] +mod tests { + use std::error::Error; + use std::sync::Mutex; + + use super::*; + use crate::resources::DeviceResources; + + const PIO_ADDRESS_SIZE: u16 = 4; + const PIO_ADDRESS_BASE: u16 = 0x40; + const MMIO_ADDRESS_SIZE: u64 = 0x8765_4321; + const MMIO_ADDRESS_BASE: u64 = 0x1234_5678; + const LEGACY_IRQ: u32 = 4; + const CONFIG_DATA: u32 = 0x1234; + + struct DummyDevice { + config: Mutex, + } + + impl DummyDevice { + fn new(config: u32) -> Self { + DummyDevice { + config: Mutex::new(config), + } + } + } + + impl DeviceIo for DummyDevice { + fn read(&self, _base: IoAddress, _offset: IoAddress, data: &mut [u8]) { + if data.len() > 4 { + return; + } + for (idx, iter) in data.iter_mut().enumerate() { + let config = self.config.lock().expect("failed to acquire lock"); + *iter = (*config >> (idx * 8) & 0xff) as u8; + } + } + + fn write(&self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { + let mut config = self.config.lock().expect("failed to acquire lock"); + *config = u32::from(data[0]) & 0xff; + } + + fn pio_read(&self, _base: PioAddress, _offset: PioAddress, data: &mut [u8]) { + if data.len() > 4 { + return; + } + for (idx, iter) in data.iter_mut().enumerate() { + let config = self.config.lock().expect("failed to acquire lock"); + *iter = (*config >> (idx * 8) & 0xff) as u8; + } + } + + fn pio_write(&self, _base: PioAddress, _offset: PioAddress, data: &[u8]) { + let mut config = self.config.lock().expect("failed to acquire lock"); + *config = u32::from(data[0]) & 0xff; + } + fn as_any(&self) -> &dyn std::any::Any { + self + } + } + + #[test] + fn test_clone_io_manager() { + let mut io_mgr = IoManager::new(); + let dummy = DummyDevice::new(0); + let dum = Arc::new(dummy); + + let mut resource: Vec = Vec::new(); + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let irq = Resource::LegacyIrq(LEGACY_IRQ); + + resource.push(mmio); + resource.push(irq); + + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + resource.push(pio); + + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let io_mgr2 = io_mgr.clone(); + assert_eq!(io_mgr2.mmio_bus.len(), 1); + + assert_eq!(io_mgr2.pio_bus.len(), 1); + + let (dev, addr) = io_mgr2 + .get_mmio_device(IoAddress(MMIO_ADDRESS_BASE + 1)) + .unwrap(); + assert_eq!(Arc::strong_count(dev), 5); + + assert_eq!(addr, IoAddress(MMIO_ADDRESS_BASE)); + + drop(io_mgr); + assert_eq!(Arc::strong_count(dev), 3); + + drop(io_mgr2); + assert_eq!(Arc::strong_count(&dum), 1); + } + + #[test] + fn test_register_unregister_device_io() { + let mut io_mgr = IoManager::new(); + let dummy = DummyDevice::new(0); + let dum = Arc::new(dummy); + + let mut resources = DeviceResources::new(); + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + let irq = Resource::LegacyIrq(LEGACY_IRQ); + + resources.append(mmio); + resources.append(pio); + resources.append(irq); + + assert!(io_mgr.register_device_io(dum.clone(), &resources).is_ok()); + assert!(io_mgr.register_device_io(dum, &resources).is_err()); + assert!(io_mgr.unregister_device_io(&resources).is_ok()) + } + + #[test] + fn test_mmio_read_write() { + let mut io_mgr: IoManager = Default::default(); + let dum = Arc::new(DummyDevice::new(CONFIG_DATA)); + let mut resource: Vec = Vec::new(); + + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + resource.push(mmio); + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let mut data = [0; 4]; + assert!(io_mgr.mmio_read(MMIO_ADDRESS_BASE, &mut data).is_ok()); + assert_eq!(data, [0x34, 0x12, 0, 0]); + + assert!(io_mgr + .mmio_read(MMIO_ADDRESS_BASE + MMIO_ADDRESS_SIZE, &mut data) + .is_err()); + + data = [0; 4]; + assert!(io_mgr.mmio_write(MMIO_ADDRESS_BASE, &data).is_ok()); + assert_eq!(*dum.config.lock().unwrap(), 0); + + assert!(io_mgr + .mmio_write(MMIO_ADDRESS_BASE + MMIO_ADDRESS_SIZE, &data) + .is_err()); + } + + #[test] + fn test_pio_read_write() { + let mut io_mgr: IoManager = Default::default(); + let dum = Arc::new(DummyDevice::new(CONFIG_DATA)); + let mut resource: Vec = Vec::new(); + + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + resource.push(pio); + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let mut data = [0; 4]; + assert!(io_mgr.pio_read(PIO_ADDRESS_BASE, &mut data).is_ok()); + assert_eq!(data, [0x34, 0x12, 0, 0]); + + assert!(io_mgr + .pio_read(PIO_ADDRESS_BASE + PIO_ADDRESS_SIZE, &mut data) + .is_err()); + + data = [0; 4]; + assert!(io_mgr.pio_write(PIO_ADDRESS_BASE, &data).is_ok()); + assert_eq!(*dum.config.lock().unwrap(), 0); + + assert!(io_mgr + .pio_write(PIO_ADDRESS_BASE + PIO_ADDRESS_SIZE, &data) + .is_err()); + } + + #[test] + fn test_device_manager_data_structs() { + let range1 = IoRange::new_mmio_range(0x1000, 0x1000); + let range2 = IoRange::new_mmio_range(0x1000, 0x2000); + let range3 = IoRange::new_mmio_range(0x2000, 0x1000); + + assert_eq!(range1, range1.clone()); + assert_eq!(range1, range2); + assert!(range1 < range3); + } + + #[test] + fn test_error_code() { + let err = super::Error::DeviceOverlap; + + assert!(err.source().is_none()); + assert_eq!( + format!("{err}"), + "device address conflicts with existing devices" + ); + + let err = super::Error::NoDevice; + assert!(err.source().is_none()); + assert_eq!(format!("{err:#?}"), "NoDevice"); + } + + #[test] + fn test_io_manager_partial_eq() { + let mut io_mgr1 = IoManager::new(); + let mut io_mgr2 = IoManager::new(); + let dummy1 = Arc::new(DummyDevice::new(0)); + let dummy2 = Arc::new(DummyDevice::new(0)); + + let mut resources1 = DeviceResources::new(); + let mut resources2 = DeviceResources::new(); + + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + + resources1.append(mmio.clone()); + resources1.append(pio.clone()); + + resources2.append(mmio); + resources2.append(pio); + + io_mgr1.register_device_io(dummy1, &resources1).unwrap(); + io_mgr2.register_device_io(dummy2, &resources2).unwrap(); + + assert!(io_mgr1 == io_mgr2); + } + + #[test] + fn test_io_manager_partial_neq() { + let mut io_mgr1 = IoManager::new(); + let mut io_mgr2 = IoManager::new(); + let dummy1 = Arc::new(DummyDevice::new(0)); + let dummy2 = Arc::new(DummyDevice::new(0)); + + let mut resources1 = DeviceResources::new(); + let mut resources2 = DeviceResources::new(); + + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + + resources1.append(mmio.clone()); + resources1.append(pio); + + resources2.append(mmio); + + io_mgr1.register_device_io(dummy1, &resources1).unwrap(); + io_mgr2.register_device_io(dummy2, &resources2).unwrap(); + + assert!(io_mgr1 != io_mgr2); + } +} diff --git a/src/dragonball/src/dbs_device/src/lib.rs b/src/dragonball/src/dbs_device/src/lib.rs new file mode 100644 index 000000000000..a482299620bb --- /dev/null +++ b/src/dragonball/src/dbs_device/src/lib.rs @@ -0,0 +1,420 @@ +// Copyright 2020 Alibaba Cloud. All Rights Reserved. +// Copyright © 2019 Intel Corporation. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] + +//! Device model for Dragonball Secure Sandbox. +//! +//! The `dbs-device` crate, as a counterpart of [vm-device], defines device model for the +//! Dragonball Secure Sandbox. The `dbs-device` crate shares some common concepts and data structures +//! with [vm-device], but it also diverges from [vm-device] due to different VMM designs. +//! +//! [vm-device]: https://github.com/rust-vmm/vm-device + +use std::any::Any; +use std::cmp::{Ord, PartialOrd}; +use std::convert::TryFrom; +use std::sync::Mutex; + +use self::resources::DeviceResources; + +pub mod device_manager; +pub mod resources; + +/// Size of MMIO range/access request. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct IoSize(pub u64); + +impl IoSize { + /// Get the raw value as u64 to make operation simple. + #[inline] + pub fn raw_value(self) -> u64 { + self.0 + } +} + +impl From for IoSize { + #[inline] + fn from(size: u64) -> Self { + IoSize(size) + } +} + +impl From for u64 { + #[inline] + fn from(size: IoSize) -> Self { + size.0 + } +} + +/// Memory Mapped IO (MMIO) address. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct IoAddress(pub u64); + +impl IoAddress { + /// Get the raw value of IO Address to make operation simple. + #[inline] + pub fn raw_value(self) -> u64 { + self.0 + } +} + +impl From for IoAddress { + #[inline] + fn from(addr: u64) -> Self { + IoAddress(addr) + } +} + +impl From for u64 { + #[inline] + fn from(addr: IoAddress) -> Self { + addr.0 + } +} + +type PioAddressType = u16; + +/// Size of Port I/O range/request. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct PioSize(pub PioAddressType); + +impl PioSize { + /// Get the raw value as u64 to make operation simple. + #[inline] + pub fn raw_value(self) -> PioAddressType { + self.0 + } +} + +impl From for PioSize { + #[inline] + fn from(size: PioAddressType) -> Self { + PioSize(size) + } +} + +impl From for PioAddressType { + #[inline] + fn from(size: PioSize) -> Self { + size.0 + } +} + +impl TryFrom for PioSize { + type Error = IoSize; + + #[inline] + fn try_from(size: IoSize) -> Result { + if size.raw_value() <= std::u16::MAX as u64 { + Ok(PioSize(size.raw_value() as PioAddressType)) + } else { + Err(size) + } + } +} + +impl From for IoSize { + #[inline] + fn from(size: PioSize) -> Self { + IoSize(size.raw_value() as u64) + } +} + +/// Port IO (PIO) address. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct PioAddress(pub PioAddressType); + +impl PioAddress { + /// Get the raw value of IO Address to make operation simple. + #[inline] + pub fn raw_value(self) -> PioAddressType { + self.0 + } +} + +impl From for PioAddress { + #[inline] + fn from(addr: PioAddressType) -> Self { + PioAddress(addr) + } +} + +impl From for PioAddressType { + #[inline] + fn from(addr: PioAddress) -> Self { + addr.0 + } +} + +impl TryFrom for PioAddress { + type Error = IoAddress; + + #[inline] + fn try_from(addr: IoAddress) -> Result { + if addr.0 <= std::u16::MAX as u64 { + Ok(PioAddress(addr.raw_value() as PioAddressType)) + } else { + Err(addr) + } + } +} + +impl From for IoAddress { + #[inline] + fn from(addr: PioAddress) -> Self { + IoAddress(addr.raw_value() as u64) + } +} + +/// Trait for device to handle trapped MMIO/PIO access requests with interior mutability +/// for high performance. +/// +/// Any device which needs to trap MMIO/PIO access requests should implement the [DeviceIo] or +/// [DeviceIoMut] trait and register itself to the [IoManager](crate::device_manager::IoManager) +/// with those trapped IO address ranges. When the guest access those trapped address ranges, +/// the access request will be routed to the registered callbacks. +/// +/// The [DeviceIo] trait adopts the interior mutability pattern so we can get a real concurrent +/// multiple threads handling. For device backend drivers not focusing on high performance, +/// the Mutex adapter may be used to simplify the implementation. +#[allow(unused_variables)] +pub trait DeviceIo: Send + Sync { + /// Read from the MMIO address `base + offset` into `data`. + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) {} + + /// Write from `data` to the MMIO address `base + offset`. + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) {} + + /// Read from port `base + offset` into `data`. + fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) {} + + /// Write from `data` to the port `base + offset`. + fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) {} + + /// Get resources assigned to the device. + fn get_assigned_resources(&self) -> DeviceResources { + DeviceResources::new() + } + + /// Get the trapped IO address ranges for the device. + /// + /// Only MMIO/PIO address ranges in the resource list will be handled, other resources will be + /// ignored. So the device does not need to filter out non-MMIO/PIO resources. + fn get_trapped_io_resources(&self) -> DeviceResources { + self.get_assigned_resources() + } + + /// Used to downcast to the specific type. + fn as_any(&self) -> &dyn Any; +} + +/// Trait for device to handle trapped MMIO/PIO access requests. +/// +/// Many device backend drivers will mutate itself when handling IO requests. The [DeviceIo] trait +/// assumes interior mutability, but it's a little complex to support interior mutability. +/// So the Mutex adapter may be used to ease device backend driver implementations. +/// +/// The Mutex adapter is an zero overhead abstraction without performance penalty. +#[allow(unused_variables)] +pub trait DeviceIoMut { + /// Read from the MMIO address `base + offset` into `data`. + fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]) {} + + /// Write from `data` to the MMIO address `base + offset`. + fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]) {} + + /// Read from port `base + offset` into `data`. + fn pio_read(&mut self, base: PioAddress, offset: PioAddress, data: &mut [u8]) {} + + /// Write from `data` to the port `base + offset`. + fn pio_write(&mut self, base: PioAddress, offset: PioAddress, data: &[u8]) {} + + /// Get resources assigned to the device. + fn get_assigned_resources(&self) -> DeviceResources { + DeviceResources::new() + } + + /// Get the trapped IO address ranges for the device. + /// + /// Only MMIO/PIO address ranges in the resource list will be handled, other resources will be + /// ignored. So the device does not need to filter out non-MMIO/PIO resources. + fn get_trapped_io_resources(&self) -> DeviceResources { + self.get_assigned_resources() + } +} + +impl DeviceIo for Mutex { + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().read(base, offset, data) + } + + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().write(base, offset, data) + } + + fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().pio_read(base, offset, data) + } + + fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().pio_write(base, offset, data) + } + + fn get_assigned_resources(&self) -> DeviceResources { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().get_assigned_resources() + } + + fn get_trapped_io_resources(&self) -> DeviceResources { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().get_trapped_io_resources() + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +#[cfg(test)] +mod tests { + use std::convert::TryFrom; + use std::sync::Arc; + + use super::*; + + #[derive(Default)] + struct MockDevice { + data: Mutex, + } + + impl DeviceIo for MockDevice { + fn read(&self, _base: IoAddress, _offset: IoAddress, data: &mut [u8]) { + data[0] = *self.data.lock().unwrap(); + } + + fn write(&self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { + *self.data.lock().unwrap() = data[0]; + } + + fn pio_read(&self, _base: PioAddress, _offset: PioAddress, data: &mut [u8]) { + data[0] = *self.data.lock().unwrap(); + } + + fn pio_write(&self, _base: PioAddress, _offset: PioAddress, data: &[u8]) { + *self.data.lock().unwrap() = data[0]; + } + fn as_any(&self) -> &dyn Any { + self + } + } + + #[derive(Default)] + struct MockDeviceMut { + data: u8, + } + + impl DeviceIoMut for MockDeviceMut { + fn read(&mut self, _base: IoAddress, _offset: IoAddress, data: &mut [u8]) { + data[0] = self.data; + } + + fn write(&mut self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { + self.data = data[0]; + } + + fn pio_read(&mut self, _base: PioAddress, _offset: PioAddress, data: &mut [u8]) { + data[0] = self.data; + } + + fn pio_write(&mut self, _base: PioAddress, _offset: PioAddress, data: &[u8]) { + self.data = data[0]; + } + } + + fn register_device(device: Arc) { + device.write(IoAddress(0), IoAddress(0), &[0x10u8]); + let mut buf = [0x0u8]; + device.read(IoAddress(0), IoAddress(0), &mut buf); + assert_eq!(buf[0], 0x10); + + { + device.pio_write(PioAddress(0), PioAddress(0), &[0x10u8]); + let mut buf = [0x0u8]; + device.pio_read(PioAddress(0), PioAddress(0), &mut buf); + assert_eq!(buf[0], 0x10); + } + + // test trait's default implementation + let resource = DeviceResources::new(); + assert_eq!(resource, device.get_assigned_resources()); + assert_eq!(resource, device.get_trapped_io_resources()); + } + + #[test] + fn test_device_io_adapter() { + let device = Arc::new(MockDevice::default()); + + register_device(device.clone()); + assert_eq!(*device.data.lock().unwrap(), 0x010); + } + + #[test] + fn test_device_io_mut_adapter() { + let device_mut = Arc::new(Mutex::new(MockDeviceMut::default())); + + register_device(device_mut.clone()); + assert_eq!(device_mut.lock().unwrap().data, 0x010); + } + + #[test] + fn test_io_data_struct() { + let io_size = IoSize::from(0x1111u64); + assert_eq!(io_size.raw_value(), 0x1111u64); + assert_eq!(u64::from(io_size), 0x1111u64); + assert_eq!(io_size, io_size.clone()); + let io_size1 = IoSize::from(0x1112u64); + assert!(io_size < io_size1); + + let io_addr = IoAddress::from(0x1234u64); + assert_eq!(io_addr.raw_value(), 0x1234u64); + assert_eq!(u64::from(io_addr), 0x1234u64); + assert_eq!(io_addr, io_addr.clone()); + let io_addr1 = IoAddress::from(0x1235u64); + assert!(io_addr < io_addr1); + } + + #[test] + fn test_pio_data_struct() { + let pio_size = PioSize::from(0x1111u16); + assert_eq!(pio_size.raw_value(), 0x1111u16); + assert_eq!(u16::from(pio_size), 0x1111u16); + assert_eq!(pio_size, pio_size.clone()); + let pio_size1 = PioSize::from(0x1112u16); + assert!(pio_size < pio_size1); + + let pio_size = PioSize::try_from(IoSize(0x1111u64)).unwrap(); + assert_eq!(pio_size.raw_value(), 0x1111u16); + + assert!(PioSize::try_from(IoSize(std::u16::MAX as u64 + 1)).is_err()); + + let io_size = IoSize::from(PioSize::from(0x1111u16)); + assert_eq!(io_size.raw_value(), 0x1111u64); + + let pio_addr = PioAddress::from(0x1234u16); + assert_eq!(pio_addr.raw_value(), 0x1234u16); + assert_eq!(u16::from(pio_addr), 0x1234u16); + assert_eq!(pio_addr, pio_addr.clone()); + let pio_addr1 = PioAddress::from(0x1235u16); + assert!(pio_addr < pio_addr1); + + assert!(PioAddress::try_from(IoAddress::from(0x12_3456u64)).is_err()); + assert!(PioAddress::try_from(IoAddress::from(0x1234u64)).is_ok()); + assert_eq!(IoAddress::from(pio_addr).raw_value(), 0x1234u64); + } +} diff --git a/src/dragonball/src/dbs_device/src/resources.rs b/src/dragonball/src/dbs_device/src/resources.rs new file mode 100644 index 000000000000..e87b0fe87453 --- /dev/null +++ b/src/dragonball/src/dbs_device/src/resources.rs @@ -0,0 +1,649 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Descriptors representing device resource allocation requirements and assigned resources. +//! +//! There are several components related to resource management: +//! - the Dragonball Secure Sandbox (VMM), which is responsible for creating and registering devices +//! to the device manager. +//! - the device manager, which manages all devices of a Dragonball Secure Sandbox instance. +//! - the devices, which implement virtual device backends for the guest. +//! +//! They cooperate with each to provide resources required by each device. The high level flow of +//! resource management is as below: +//! 1) the VMM creates a new device object. +//! 2) the device returns an array of [ResourceConstraint](self::ResourceConstraint), +//! describing the required resources and resource allocation constraints. +//! 3) the VMM allocates required resources from a resource manager, +//! 4) the VMM passes the allocated resources [DeviceResources](self::DeviceResources), +//! which is an array of [Resource](self::Resource), to the device object. +//! 5) the VMM registers the new device onto corresponding device managers according the allocated +//! resources. + +use std::ops::Deref; + +/// Enumeration describing a device's resource allocation requirements and constraints. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ResourceConstraint { + /// Constraint for an IO Port address range. + PioAddress { + /// Allocating resource within the range [`min`, `max`] if specified. + range: Option<(u16, u16)>, + /// Alignment for the allocated address. + align: u16, + /// Size for the allocated address range. + size: u16, + }, + /// Constraint for a Memory Mapped IO address range. + MmioAddress { + /// Allocating resource within the range [`min`, `max`] if specified. + range: Option<(u64, u64)>, + /// Alignment for the allocated address. + align: u64, + /// Size for the allocated address range. + size: u64, + }, + /// Constraint for a Guest Mem address range. + MemAddress { + /// Allocating resource within the range [`min`, `max`] if specified. + range: Option<(u64, u64)>, + /// Alignment for the allocated address. + align: u64, + /// Size for the allocated address range. + size: u64, + }, + /// Constraint for a legacy IRQ. + LegacyIrq { + /// Reserving the pre-allocated IRQ if it's specified. + irq: Option, + }, + /// Constraint for PCI MSI IRQs. + PciMsiIrq { + /// Number of Irqs to allocate. + size: u32, + }, + /// Constraint for PCI MSIx IRQs. + PciMsixIrq { + /// Number of Irqs to allocate. + size: u32, + }, + /// Constraint for generic IRQs. + GenericIrq { + /// Number of Irqs to allocate. + size: u32, + }, + /// Constraint for KVM mem_slot indexes to map memory into the guest. + KvmMemSlot { + /// Allocating kvm memory slots starting from the index `slot` if + /// specified. + slot: Option, + /// Number of slots to allocate. + size: u32, + }, +} + +impl ResourceConstraint { + /// Create a new PIO address constraint object with default configuration. + pub fn new_pio(size: u16) -> Self { + ResourceConstraint::PioAddress { + range: None, + align: 0x1, + size, + } + } + + /// Create a new PIO address constraint object. + pub fn pio_with_constraints(size: u16, range: Option<(u16, u16)>, align: u16) -> Self { + ResourceConstraint::PioAddress { range, align, size } + } + + /// Create a new MMIO address constraint object with default configuration. + pub fn new_mmio(size: u64) -> Self { + ResourceConstraint::MmioAddress { + range: None, + align: 0x1000, + size, + } + } + + /// Create a new MMIO address constraint object. + pub fn mmio_with_constraints(size: u64, range: Option<(u64, u64)>, align: u64) -> Self { + ResourceConstraint::MmioAddress { range, align, size } + } + + /// Create a new Mem address constraint object with default configuration. + pub fn new_mem(size: u64) -> Self { + ResourceConstraint::MemAddress { + range: None, + align: 0x1000, + size, + } + } + + /// Create a new Mem address constraint object. + pub fn mem_with_constraints(size: u64, range: Option<(u64, u64)>, align: u64) -> Self { + ResourceConstraint::MemAddress { range, align, size } + } + + /// Create a new legacy IRQ constraint object. + /// + /// Allocating the pre-allocated legacy Irq `irq` if specified. + pub fn new_legacy_irq(irq: Option) -> Self { + ResourceConstraint::LegacyIrq { irq } + } + + /// Create a new PCI MSI IRQ constraint object. + pub fn new_pci_msi_irq(size: u32) -> Self { + ResourceConstraint::PciMsiIrq { size } + } + + /// Create a new PCI MSIX IRQ constraint object. + pub fn new_pci_msix_irq(size: u32) -> Self { + ResourceConstraint::PciMsixIrq { size } + } + + /// Create a new Generic IRQ constraint object. + pub fn new_generic_irq(size: u32) -> Self { + ResourceConstraint::GenericIrq { size } + } + + /// Create a new KVM memory slot constraint object. + /// + /// Allocating kvm memory slots starting from the index `slot` if specified. + pub fn new_kvm_mem_slot(size: u32, slot: Option) -> Self { + ResourceConstraint::KvmMemSlot { slot, size } + } +} + +/// Type of Message Singaled Interrupt +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum MsiIrqType { + /// PCI MSI IRQ numbers. + PciMsi, + /// PCI MSIx IRQ numbers. + PciMsix, + /// Generic MSI IRQ numbers. + GenericMsi, +} + +/// Enumeration for device resources. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Resource { + /// IO Port resource range. + PioAddressRange { + /// Pio resource base + base: u16, + /// Pio resource size + size: u16, + }, + /// Memory Mapped IO resource range. + MmioAddressRange { + /// Mmio resource base + base: u64, + /// Mmio resource size + size: u64, + }, + /// Guest Mem resource range. + MemAddressRange { + /// Mem resource base + base: u64, + /// Mem resource size + size: u64, + }, + /// Legacy IRQ number. + LegacyIrq(u32), + /// Message Signaled Interrupt + MsiIrq { + /// Msi irq type + ty: MsiIrqType, + /// Msi irq base + base: u32, + /// Msi irq size + size: u32, + }, + /// Network Interface Card MAC address. + MacAddresss(String), + /// KVM memslot index. + KvmMemSlot(u32), +} + +/// Newtype to store a set of device resources. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DeviceResources(Vec); + +impl DeviceResources { + /// Create a container object to store device resources. + pub fn new() -> Self { + DeviceResources(Vec::new()) + } + + /// Append a device resource to the container object. + pub fn append(&mut self, entry: Resource) { + self.0.push(entry); + } + + /// Get the IO port address resources. + pub fn get_pio_address_ranges(&self) -> Vec<(u16, u16)> { + let mut vec = Vec::new(); + for entry in self.0.iter().as_ref() { + if let Resource::PioAddressRange { base, size } = entry { + vec.push((*base, *size)); + } + } + vec + } + + /// Get the Memory Mapped IO address resources. + pub fn get_mmio_address_ranges(&self) -> Vec<(u64, u64)> { + let mut vec = Vec::new(); + for entry in self.0.iter().as_ref() { + if let Resource::MmioAddressRange { base, size } = entry { + vec.push((*base, *size)); + } + } + vec + } + + /// Get the Guest Memory address resources. + pub fn get_mem_address_ranges(&self) -> Vec<(u64, u64)> { + let mut vec = Vec::new(); + for entry in self.0.iter().as_ref() { + if let Resource::MemAddressRange { base, size } = entry { + vec.push((*base, *size)); + } + } + vec + } + + /// Get the first legacy interrupt number(IRQ). + pub fn get_legacy_irq(&self) -> Option { + for entry in self.0.iter().as_ref() { + if let Resource::LegacyIrq(base) = entry { + return Some(*base); + } + } + None + } + + /// Get information about the first PCI MSI interrupt resource. + pub fn get_pci_msi_irqs(&self) -> Option<(u32, u32)> { + self.get_msi_irqs(MsiIrqType::PciMsi) + } + + /// Get information about the first PCI MSIx interrupt resource. + pub fn get_pci_msix_irqs(&self) -> Option<(u32, u32)> { + self.get_msi_irqs(MsiIrqType::PciMsix) + } + + /// Get information about the first Generic MSI interrupt resource. + pub fn get_generic_msi_irqs(&self) -> Option<(u32, u32)> { + self.get_msi_irqs(MsiIrqType::GenericMsi) + } + + fn get_msi_irqs(&self, ty: MsiIrqType) -> Option<(u32, u32)> { + for entry in self.0.iter().as_ref() { + if let Resource::MsiIrq { + ty: msi_type, + base, + size, + } = entry + { + if ty == *msi_type { + return Some((*base, *size)); + } + } + } + None + } + + /// Get the KVM memory slots to map memory into the guest. + pub fn get_kvm_mem_slots(&self) -> Vec { + let mut vec = Vec::new(); + for entry in self.0.iter().as_ref() { + if let Resource::KvmMemSlot(index) = entry { + vec.push(*index); + } + } + vec + } + + /// Get the first resource information for NIC MAC address. + pub fn get_mac_address(&self) -> Option { + for entry in self.0.iter().as_ref() { + if let Resource::MacAddresss(addr) = entry { + return Some(addr.clone()); + } + } + None + } + + /// Get immutable reference to all the resources. + pub fn get_all_resources(&self) -> &[Resource] { + &self.0 + } +} + +impl Deref for DeviceResources { + type Target = [Resource]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + const PIO_ADDRESS_SIZE: u16 = 5; + const PIO_ADDRESS_BASE: u16 = 0; + const MMIO_ADDRESS_SIZE: u64 = 0x8765_4321; + const MMIO_ADDRESS_BASE: u64 = 0x1234_5678; + const MEM_ADDRESS_SIZE: u64 = 0x8765_4321; + const MEM_ADDRESS_BASE: u64 = 0x1234_5678; + const LEGACY_IRQ: u32 = 0x168; + const PCI_MSI_IRQ_SIZE: u32 = 0x8888; + const PCI_MSI_IRQ_BASE: u32 = 0x6666; + const PCI_MSIX_IRQ_SIZE: u32 = 0x16666; + const PCI_MSIX_IRQ_BASE: u32 = 0x8888; + const GENERIC_MSI_IRQS_SIZE: u32 = 0x16888; + const GENERIC_MSI_IRQS_BASE: u32 = 0x16688; + const MAC_ADDRESS: &str = "00:08:63:66:86:88"; + const KVM_SLOT_ID: u32 = 0x0100; + + pub fn get_device_resource() -> DeviceResources { + let mut resource = DeviceResources::new(); + + let entry = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[0]); + + let entry = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[1]); + + let entry = Resource::MemAddressRange { + base: MEM_ADDRESS_BASE, + size: MEM_ADDRESS_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[2]); + + let entry = Resource::LegacyIrq(LEGACY_IRQ); + resource.append(entry.clone()); + assert_eq!(entry, resource[3]); + + let entry = Resource::MsiIrq { + ty: MsiIrqType::PciMsi, + base: PCI_MSI_IRQ_BASE, + size: PCI_MSI_IRQ_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[4]); + + let entry = Resource::MsiIrq { + ty: MsiIrqType::PciMsix, + base: PCI_MSIX_IRQ_BASE, + size: PCI_MSIX_IRQ_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[5]); + + let entry = Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base: GENERIC_MSI_IRQS_BASE, + size: GENERIC_MSI_IRQS_SIZE, + }; + resource.append(entry.clone()); + assert_eq!(entry, resource[6]); + + let entry = Resource::MacAddresss(MAC_ADDRESS.to_string()); + resource.append(entry.clone()); + assert_eq!(entry, resource[7]); + + let entry = Resource::KvmMemSlot(KVM_SLOT_ID); + resource.append(entry.clone()); + assert_eq!(entry, resource[8]); + + resource + } + + #[test] + fn get_pio_address_ranges() { + let resources = get_device_resource(); + assert!( + resources.get_pio_address_ranges()[0].0 == PIO_ADDRESS_BASE + && resources.get_pio_address_ranges()[0].1 == PIO_ADDRESS_SIZE + ); + assert_eq!( + resources[0], + Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + } + ); + assert_ne!(resources[0], resources[1]); + + let resources2 = resources.clone(); + assert_eq!(resources.len(), resources2.len()); + drop(resources); + assert_eq!( + resources2[0], + Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + } + ); + } + + #[test] + fn test_get_mmio_address_ranges() { + let resources = get_device_resource(); + assert!( + resources.get_mmio_address_ranges()[0].0 == MMIO_ADDRESS_BASE + && resources.get_mmio_address_ranges()[0].1 == MMIO_ADDRESS_SIZE + ); + } + + #[test] + fn test_get_mem_address_ranges() { + let resources = get_device_resource(); + assert!( + resources.get_mem_address_ranges()[0].0 == MEM_ADDRESS_BASE + && resources.get_mem_address_ranges()[0].1 == MEM_ADDRESS_SIZE + ); + } + + #[test] + fn test_get_legacy_irq() { + let resources = get_device_resource(); + assert!(resources.get_legacy_irq().unwrap() == LEGACY_IRQ); + + // None case. + let resources = DeviceResources::new(); + assert!(resources.get_legacy_irq().is_none()); + } + + #[test] + fn test_get_pci_msi_irqs() { + let resources = get_device_resource(); + assert!( + resources.get_pci_msi_irqs().unwrap().0 == PCI_MSI_IRQ_BASE + && resources.get_pci_msi_irqs().unwrap().1 == PCI_MSI_IRQ_SIZE + ); + + // None case. + let resources = DeviceResources::new(); + assert!(resources.get_generic_msi_irqs().is_none()); + } + + #[test] + fn test_get_pci_msix_irqs() { + let resources = get_device_resource(); + assert!( + resources.get_pci_msix_irqs().unwrap().0 == PCI_MSIX_IRQ_BASE + && resources.get_pci_msix_irqs().unwrap().1 == PCI_MSIX_IRQ_SIZE + ); + + // None case. + let resources = DeviceResources::new(); + assert!(resources.get_generic_msi_irqs().is_none()); + } + + #[test] + fn test_get_generic_msi_irqs() { + let resources = get_device_resource(); + assert!( + resources.get_generic_msi_irqs().unwrap().0 == GENERIC_MSI_IRQS_BASE + && resources.get_generic_msi_irqs().unwrap().1 == GENERIC_MSI_IRQS_SIZE + ); + + // None case. + let resources = DeviceResources::new(); + assert!(resources.get_generic_msi_irqs().is_none()); + } + + #[test] + fn test_get_mac_address() { + let resources = get_device_resource(); + assert_eq!(resources.get_mac_address().unwrap(), MAC_ADDRESS); + + // None case. + let resources = DeviceResources::new(); + assert!(resources.get_mac_address().is_none()); + } + + #[test] + fn test_get_kvm_slot() { + let resources = get_device_resource(); + assert_eq!(resources.get_kvm_mem_slots(), vec![KVM_SLOT_ID]); + } + + #[test] + fn test_get_all_resources() { + let resources = get_device_resource(); + assert_eq!(resources.get_all_resources().len(), 9); + } + + #[test] + fn test_resource_constraint() { + let pio = ResourceConstraint::new_pio(2); + let pio2 = pio; + let mmio = ResourceConstraint::new_mmio(0x1000); + assert_eq!(pio, pio2); + assert_ne!(pio, mmio); + + if let ResourceConstraint::PioAddress { range, align, size } = + ResourceConstraint::new_pio(2) + { + assert_eq!(range, None); + assert_eq!(align, 1); + assert_eq!(size, 2); + } else { + panic!("Pio resource constraint is invalid."); + } + + if let ResourceConstraint::PioAddress { range, align, size } = + ResourceConstraint::pio_with_constraints(2, Some((15, 16)), 2) + { + assert_eq!(range, Some((15, 16))); + assert_eq!(align, 2); + assert_eq!(size, 2); + } else { + panic!("Pio resource constraint is invalid."); + } + + if let ResourceConstraint::MmioAddress { range, align, size } = + ResourceConstraint::new_mmio(0x2000) + { + assert_eq!(range, None); + assert_eq!(align, 0x1000); + assert_eq!(size, 0x2000); + } else { + panic!("Mmio resource constraint is invalid."); + } + + if let ResourceConstraint::MmioAddress { range, align, size } = + ResourceConstraint::mmio_with_constraints(0x2000, Some((0x0, 0x2000)), 0x2000) + { + assert_eq!(range, Some((0x0, 0x2000))); + assert_eq!(align, 0x2000); + assert_eq!(size, 0x2000); + } else { + panic!("Mmio resource constraint is invalid."); + } + + if let ResourceConstraint::MemAddress { range, align, size } = + ResourceConstraint::new_mem(0x2000) + { + assert_eq!(range, None); + assert_eq!(align, 0x1000); + assert_eq!(size, 0x2000); + } else { + panic!("Mem resource constraint is invalid."); + } + + if let ResourceConstraint::MemAddress { range, align, size } = + ResourceConstraint::mem_with_constraints(0x2000, Some((0x0, 0x2000)), 0x2000) + { + assert_eq!(range, Some((0x0, 0x2000))); + assert_eq!(align, 0x2000); + assert_eq!(size, 0x2000); + } else { + panic!("Mem resource constraint is invalid."); + } + + if let ResourceConstraint::LegacyIrq { irq } = + ResourceConstraint::new_legacy_irq(Some(0x123)) + { + assert_eq!(irq, Some(0x123)); + } else { + panic!("IRQ resource constraint is invalid."); + } + + if let ResourceConstraint::PciMsiIrq { size } = ResourceConstraint::new_pci_msi_irq(0x123) { + assert_eq!(size, 0x123); + } else { + panic!("Pci MSI irq resource constraint is invalid."); + } + + if let ResourceConstraint::PciMsixIrq { size } = ResourceConstraint::new_pci_msix_irq(0x123) + { + assert_eq!(size, 0x123); + } else { + panic!("Pci MSIx irq resource constraint is invalid."); + } + + if let ResourceConstraint::GenericIrq { size } = ResourceConstraint::new_generic_irq(0x123) + { + assert_eq!(size, 0x123); + } else { + panic!("generic irq resource constraint is invalid."); + } + + if let ResourceConstraint::KvmMemSlot { slot, size } = + ResourceConstraint::new_kvm_mem_slot(0x1000, Some(0x2000)) + { + assert_eq!(slot, Some(0x2000)); + assert_eq!(size, 0x1000); + } else { + panic!("KVM slot resource constraint is invalid."); + } + } + + #[test] + fn test_resources_deref() { + let resources = get_device_resource(); + let mut count = 0; + for _res in resources.iter() { + count += 1; + } + assert_eq!(count, resources.0.len()); + } +} diff --git a/src/dragonball/src/dbs_interrupt/Cargo.toml b/src/dragonball/src/dbs_interrupt/Cargo.toml new file mode 100644 index 000000000000..20d5d46e4764 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "dbs-interrupt" +version = "0.2.2" +authors = ["Alibaba Dragonball Team"] +description = "Traits and structs to manage interrupts for virtual devices" +license = "Apache-2.0" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-interrupt" +keywords = ["dragonball", "secure-sandbox", "device", "interrupt"] +readme = "README.md" + +[dependencies] +dbs-device = { path = "../dbs_device" } +dbs-arch = { path = "../dbs_arch" } +kvm-bindings = { version = "0.6.0", optional = true } +kvm-ioctls = { version = "0.12.0", optional = true } +libc = "0.2" +vmm-sys-util = "0.11.0" + +[features] +default = ["legacy-irq", "msi-irq"] + +legacy-irq = [] +msi-irq = [] + +kvm-irq = ["kvm-ioctls", "kvm-bindings"] +kvm-legacy-irq = ["legacy-irq", "kvm-irq"] +kvm-msi-generic = ["msi-irq", "kvm-irq"] +kvm-msi-irq = ["kvm-msi-generic"] diff --git a/src/dragonball/src/dbs_interrupt/LICENSE b/src/dragonball/src/dbs_interrupt/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_interrupt/README.md b/src/dragonball/src/dbs_interrupt/README.md new file mode 100644 index 000000000000..3ddd354cde0d --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/README.md @@ -0,0 +1,73 @@ +# dbs-interrupt + +Interrupts are used by hardware devices to indicate asynchronous events to the processor. +The `dbs-interrupt` crate provides traits and data structures for the `Dragonball Sandbox` to manage +interrupts for virtual and physical devices. + +An interrupt alerts the processor to a high-priority condition requiring the interruption of +the current code the processor is executing. The processor responds by suspending its current activities, +saving its state, and executing a function called an interrupt handler (or an interrupt service routine, ISR) +to deal with the event. This interruption is temporary, and, after the interrupt handler finishes, +unless handling the interrupt has emitted a fatal error, the processor resumes normal activities. + +Hardware interrupts are used by devices to communicate that they require attention from the +operating system, or a bare-metal program running on the CPU if there are no OSes. The act of +initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices are +usually associated with different interrupts using a unique value associated with each interrupt. +This makes it possible to know which hardware device caused which interrupts. These interrupt values +are often called IRQ lines, or just interrupt lines. + +Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. MSI +(Message Signaled Interrupt) is another commonly used alternative in-band method of signaling an +interrupt, using special in-band messages to replace traditional out-of-band assertion of dedicated +interrupt lines. While more complex to implement in a device, message signaled interrupts have some +significant advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts +are supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some non-PCI +architectures also use message signaled interrupts. + +While IRQ is a term commonly used by Operating Systems when dealing with hardware interrupts, the +IRQ numbers managed by OSes are independent of the ones managed by VMM. For simplicity sake, the +term Interrupt Source is used instead of IRQ to represent both pin-based interrupts and MSI +interrupts. + +A device may support multiple types of interrupts, and each type of interrupt may support one or +multiple interrupt sources. For example, a PCI device may support: + +- Legacy Irq: exactly one interrupt source. +- PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +- PCI MSIx Irq: 2^n(n=0-11) interrupt sources. + +A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. An ID +allocator will be used to allocate and free Interrupt Source Identifiers for devices. To decouple +this crate from the ID allocator, here we doesn't take the responsibility to allocate/free Interrupt +Source IDs but only makes use of assigned IDs. + +The overall flow to deal with interrupts is: + +- the VMM creates an interrupt manager +- the VMM creates a device manager, passing on an reference to the interrupt manager +- the device manager passes on an reference to the interrupt manager to all registered devices +- guest kernel loads drivers for virtual devices +- guest device driver determines the type and number of interrupts needed, and update the device + configuration +- the virtual device backend requests the interrupt manager to create an interrupt group according to guest configuration information + +The dbs-device crate provides: + +- [trait `InterruptManager`]: manage interrupt sources for virtual device backend +- [struct `DeviceInterruptManager`]: an implementation of [`InterruptManager`], manage interrupts and interrupt modes for a device +- [trait `InterruptSourceGroup`]: manage a group of interrupt sources for a device, provide methods to control the interrupts +- [enum `InterruptSourceType`]: type of interrupt source +- [enum `InterruptSourceConfig`], [struct `LegacyIrqSourceConfig`] and [struct `MsiIrqSourceConfig`]: configuration data for interrupt sources + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). + +[trait InterruptManager]: src/lib.rs +[struct DeviceInterruptManager]: src/manager.rs +[trait InterruptSourceGroup]: src/lib.rs +[enum InterruptSourceType]: src/lib.rs +[enum InterruptSourceConfig]: src/lib.rs +[struct LegacyIrqSourceConfig]: src/lib.rs +[struct MsiIrqSourceConfig]: src/lib.rs diff --git a/src/dragonball/src/dbs_interrupt/src/kvm/legacy_irq.rs b/src/dragonball/src/dbs_interrupt/src/kvm/legacy_irq.rs new file mode 100644 index 000000000000..3fb6b02474f0 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/kvm/legacy_irq.rs @@ -0,0 +1,351 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's legacy interrupts based on Linux KVM framework. +//! +//! On x86 platforms, legacy interrupts are those managed by the Master PIC, the slave PIC and +//! IOAPICs. + +use kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::{KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE}; +use vmm_sys_util::eventfd::EFD_NONBLOCK; + +use super::*; + +#[cfg(target_arch = "x86_64")] +/// Maximum number of legacy interrupts supported. +pub const MAX_LEGACY_IRQS: u32 = 24; + +#[cfg(target_arch = "aarch64")] +/// Maximum number of legacy interrupts supported. +pub const MAX_LEGACY_IRQS: u32 = 128; + +pub(super) struct LegacyIrq { + base: u32, + vmfd: Arc, + irqfd: EventFd, +} + +impl LegacyIrq { + pub(super) fn new( + base: InterruptIndex, + count: InterruptIndex, + vmfd: Arc, + _routes: Arc, + ) -> Result { + if count != 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + if base >= MAX_LEGACY_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + Ok(LegacyIrq { + base, + vmfd, + irqfd: EventFd::new(EFD_NONBLOCK)?, + }) + } + + #[cfg(target_arch = "x86_64")] + fn add_legacy_entry( + gsi: u32, + chip: u32, + pin: u32, + routes: &mut HashMap, + ) -> Result<()> { + let mut entry = kvm_irq_routing_entry { + gsi, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + // Safe because we are initializing all fields of the `irqchip` struct. + entry.u.irqchip.irqchip = chip; + entry.u.irqchip.pin = pin; + routes.insert(hash_key(&entry), entry); + + Ok(()) + } + + /// Build routings for IRQs connected to the master PIC, the slave PIC or the first IOAPIC. + #[cfg(target_arch = "x86_64")] + pub(super) fn initialize_legacy( + routes: &mut HashMap, + ) -> Result<()> { + // Build routings for the master PIC + for i in 0..8 { + if i != 2 { + Self::add_legacy_entry(i, KVM_IRQCHIP_PIC_MASTER, i, routes)?; + } + } + + // Build routings for the slave PIC + for i in 8..16 { + Self::add_legacy_entry(i, KVM_IRQCHIP_PIC_SLAVE, i - 8, routes)?; + } + + // Build routings for the first IOAPIC + for i in 0..MAX_LEGACY_IRQS { + if i == 0 { + Self::add_legacy_entry(i, KVM_IRQCHIP_IOAPIC, 2, routes)?; + } else if i != 2 { + Self::add_legacy_entry(i, KVM_IRQCHIP_IOAPIC, i, routes)?; + }; + } + + Ok(()) + } + + #[cfg(target_arch = "aarch64")] + pub(super) fn initialize_legacy( + routes: &mut HashMap, + ) -> Result<()> { + for i in 0..MAX_LEGACY_IRQS { + let mut entry = kvm_irq_routing_entry { + gsi: i, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + entry.u.irqchip.irqchip = 0; + entry.u.irqchip.pin = i; + routes.insert(hash_key(&entry), entry); + } + Ok(()) + } +} + +impl InterruptSourceGroup for LegacyIrq { + fn interrupt_type(&self) -> InterruptSourceType { + InterruptSourceType::LegacyIrq + } + + fn len(&self) -> u32 { + 1 + } + + fn base(&self) -> u32 { + self.base + } + + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()> { + if configs.len() != 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + // The IRQ routings for legacy IRQs have been configured during KvmIrqManager::initialize(), + // so only need to register irqfd to the KVM driver. + self.vmfd + .register_irqfd(&self.irqfd, self.base) + .map_err(from_sys_util_errno) + } + + fn disable(&self) -> Result<()> { + self.vmfd + .unregister_irqfd(&self.irqfd, self.base) + .map_err(from_sys_util_errno) + } + + fn update(&self, index: InterruptIndex, _config: &InterruptSourceConfig) -> Result<()> { + // For legacy interrupts, the routing configuration is managed by the PIC/IOAPIC interrupt + // controller drivers, so nothing to do here. + if index != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + Ok(()) + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + if index != 0 { + None + } else { + Some(&self.irqfd) + } + } + + fn trigger(&self, index: InterruptIndex) -> Result<()> { + if index != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + self.irqfd.write(1) + } + + fn mask(&self, index: InterruptIndex) -> Result<()> { + if index > 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + self.vmfd + .unregister_irqfd(&self.irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn unmask(&self, index: InterruptIndex) -> Result<()> { + if index > 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + self.vmfd + .register_irqfd(&self.irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn get_pending_state(&self, index: InterruptIndex) -> bool { + if index > 1 { + return false; + } + + // Peak the EventFd.count by reading and writing back. + // The irqfd must be in NON-BLOCKING mode. + match self.irqfd.read() { + Err(_) => false, + Ok(count) => { + if count != 0 && self.irqfd.write(count).is_err() { + // Hope the caller will handle the pending state corrrectly, + // then no interrupt will be lost. + } + count != 0 + } + } + } +} + +#[cfg(test)] +#[cfg(target_arch = "x86_64")] +mod test { + use super::*; + use crate::manager::tests::create_vm_fd; + + const MASTER_PIC: usize = 7; + const SLAVE_PIC: usize = 8; + const IOAPIC: usize = 23; + + #[test] + #[allow(unreachable_patterns)] + fn test_legacy_interrupt_group() { + let vmfd = Arc::new(create_vm_fd()); + let rounting = Arc::new(KvmIrqRouting::new(vmfd.clone())); + let base = 0; + let count = 1; + let group = LegacyIrq::new(base, count, vmfd.clone(), rounting.clone()).unwrap(); + + let legacy_fds = vec![InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})]; + + match group.interrupt_type() { + InterruptSourceType::LegacyIrq => {} + _ => { + panic!(); + } + } + vmfd.create_irq_chip().unwrap(); + assert_eq!(group.len(), 1); + assert_eq!(group.base(), base); + group.enable(&legacy_fds).unwrap(); + group.notifier(0).unwrap().write(1).unwrap(); + group.trigger(0).unwrap(); + assert!(group.trigger(1).is_err()); + group + .update( + 0, + &InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {}), + ) + .unwrap(); + group.disable().unwrap(); + + assert!(LegacyIrq::new(base, 2, vmfd.clone(), rounting.clone()).is_err()); + assert!(LegacyIrq::new(110, 1, vmfd, rounting).is_err()); + } + + #[test] + fn test_irq_routing_initialize_legacy() { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + + // this would ok on 4.9 kernel + assert!(routing.initialize().is_err()); + + vmfd.create_irq_chip().unwrap(); + routing.initialize().unwrap(); + + let routes = &routing.routes.lock().unwrap(); + assert_eq!(routes.len(), MASTER_PIC + SLAVE_PIC + IOAPIC); + } + + #[test] + fn test_routing_opt() { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + + // this would ok on 4.9 kernel + assert!(routing.initialize().is_err()); + + vmfd.create_irq_chip().unwrap(); + routing.initialize().unwrap(); + + let mut entry = kvm_irq_routing_entry { + gsi: 8, + type_: kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + + // Safe because we are initializing all fields of the `irqchip` struct. + entry.u.irqchip.irqchip = 0; + entry.u.irqchip.pin = 3; + + let entrys = vec![entry]; + + assert!(routing.modify(&entry).is_err()); + routing.add(&entrys).unwrap(); + entry.u.irqchip.pin = 4; + routing.modify(&entry).unwrap(); + routing.remove(&entrys).unwrap(); + assert!(routing.modify(&entry).is_err()); + } + + #[test] + fn test_routing_set_routing() { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + + // this would ok on 4.9 kernel + assert!(routing.initialize().is_err()); + + vmfd.create_irq_chip().unwrap(); + routing.initialize().unwrap(); + + let mut entry = kvm_irq_routing_entry { + gsi: 8, + type_: kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + entry.u.irqchip.irqchip = 0; + entry.u.irqchip.pin = 3; + + routing + .routes + .lock() + .unwrap() + .insert(hash_key(&entry), entry); + let routes = routing.routes.lock().unwrap(); + routing.set_routing(&routes).unwrap(); + } + + #[test] + fn test_has_key() { + let gsi = 4; + let mut entry = kvm_irq_routing_entry { + gsi, + type_: kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + // Safe because we are initializing all fields of the `irqchip` struct. + entry.u.irqchip.irqchip = kvm_bindings::KVM_IRQCHIP_PIC_MASTER; + entry.u.irqchip.pin = gsi; + assert_eq!(hash_key(&entry), 0x0001_0000_0004); + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/kvm/mod.rs b/src/dragonball/src/dbs_interrupt/src/kvm/mod.rs new file mode 100644 index 000000000000..435bb20a7363 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/kvm/mod.rs @@ -0,0 +1,340 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's interrupts based on the Linux KVM framework. +//! +//! When updaing KVM IRQ routing by ioctl(KVM_SET_GSI_ROUTING), all interrupts of the virtual +//! machine must be updated all together. The [KvmIrqRouting](struct.KvmIrqRouting.html) structure +//! is to maintain the global interrupt routing table. +//! +//! It deserves a good documentation about the way that KVM based vmms manages interrupts. From the +//! KVM hypervisor side, it provides three mechanism to support injecting interrupts into guests: +//! 1) Irqfd. When data is written to an irqfd, it triggers KVM to inject an interrupt into guest. +//! 2) Irq routing. Irq routing determines the way to inject an irq into guest. +//! 3) Signal MSI. Vmm can inject an MSI interrupt into guest by issuing KVM_SIGNAL_MSI ioctl. +//! +//! Most VMMs use irqfd + irq routing to support interrupt injecting, so we will focus on this mode. +//! The flow to enable interrupt injecting is: +//! 1) VMM creates an irqfd +//! 2) VMM invokes KVM_IRQFD to bind the irqfd to an interrupt source +//! 3) VMM invokes KVM_SET_GSI_ROUTING to configure the way to inject the interrupt into guest +//! 4) device backend driver writes to the irqfd +//! 5) an interurpt is injected into the guest + +use std::collections::HashMap; +use std::io::{Error, ErrorKind}; +use std::sync::{Arc, Mutex}; + +use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry}; +use kvm_ioctls::VmFd; + +use super::*; + +#[cfg(feature = "kvm-legacy-irq")] +use legacy_irq::LegacyIrq; +#[cfg(feature = "kvm-msi-irq")] +use msi_irq::MsiIrq; + +#[cfg(feature = "kvm-legacy-irq")] +mod legacy_irq; +#[cfg(feature = "kvm-msi-generic")] +mod msi_generic; +#[cfg(feature = "kvm-msi-irq")] +mod msi_irq; + +/// Maximum number of global interrupt sources. +pub const MAX_IRQS: InterruptIndex = 1024; + +/// Default maximum number of Message Signaled Interrupts per device. +pub const DEFAULT_MAX_MSI_IRQS_PER_DEVICE: InterruptIndex = 256; + +/// Structure to manage interrupt sources for a virtual machine based on the Linux KVM framework. +/// +/// The KVM framework provides methods to inject interrupts into the target virtual machines, which +/// uses irqfd to notity the KVM kernel module for injecting interrupts. When the interrupt source, +/// usually a virtual device backend in userspace, writes to the irqfd file descriptor, the KVM +/// kernel module will inject a corresponding interrupt into the target VM according to the IRQ +/// routing configuration. +pub struct KvmIrqManager { + mgr: Mutex, +} + +impl KvmIrqManager { + /// Create a new interrupt manager based on the Linux KVM framework. + /// + /// # Arguments + /// * `vmfd`: The KVM VM file descriptor, which will be used to access the KVM subsystem. + pub fn new(vmfd: Arc) -> Self { + KvmIrqManager { + mgr: Mutex::new(KvmIrqManagerObj { + vmfd: vmfd.clone(), + groups: HashMap::new(), + routes: Arc::new(KvmIrqRouting::new(vmfd)), + max_msi_irqs: DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + }), + } + } + + /// Prepare the interrupt manager for generating interrupts into the target VM. + pub fn initialize(&self) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mgr = self.mgr.lock().unwrap(); + mgr.initialize() + } + + /// Set maximum supported MSI interrupts per device. + pub fn set_max_msi_irqs(&self, max_msi_irqs: InterruptIndex) { + let mut mgr = self.mgr.lock().unwrap(); + mgr.max_msi_irqs = max_msi_irqs; + } +} + +impl InterruptManager for KvmIrqManager { + fn create_group( + &self, + ty: InterruptSourceType, + base: InterruptIndex, + count: u32, + ) -> Result>> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut mgr = self.mgr.lock().unwrap(); + mgr.create_group(ty, base, count) + } + + fn destroy_group(&self, group: Arc>) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut mgr = self.mgr.lock().unwrap(); + mgr.destroy_group(group) + } +} + +struct KvmIrqManagerObj { + vmfd: Arc, + routes: Arc, + groups: HashMap>>, + max_msi_irqs: InterruptIndex, +} + +impl KvmIrqManagerObj { + fn initialize(&self) -> Result<()> { + self.routes.initialize()?; + Ok(()) + } + + fn create_group( + &mut self, + ty: InterruptSourceType, + base: InterruptIndex, + count: u32, + ) -> Result>> { + #[allow(unreachable_patterns)] + let group: Arc> = match ty { + #[cfg(feature = "kvm-legacy-irq")] + InterruptSourceType::LegacyIrq => Arc::new(Box::new(LegacyIrq::new( + base, + count, + self.vmfd.clone(), + self.routes.clone(), + )?)), + #[cfg(feature = "kvm-msi-irq")] + InterruptSourceType::MsiIrq => Arc::new(Box::new(MsiIrq::new( + base, + count, + self.max_msi_irqs, + self.vmfd.clone(), + self.routes.clone(), + )?)), + _ => return Err(Error::from(ErrorKind::InvalidInput)), + }; + + self.groups.insert(base, group.clone()); + + Ok(group) + } + + fn destroy_group(&mut self, group: Arc>) -> Result<()> { + self.groups.remove(&group.base()); + Ok(()) + } +} + +// Use (entry.type, entry.gsi) as the hash key because entry.gsi can't uniquely identify an +// interrupt source on x86 platforms. The PIC and IOAPIC may share the same GSI on x86 platforms. +fn hash_key(entry: &kvm_irq_routing_entry) -> u64 { + let type1 = match entry.type_ { + #[cfg(feature = "kvm-legacy-irq")] + kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP => unsafe { entry.u.irqchip.irqchip }, + _ => 0u32, + }; + (u64::from(type1) << 48 | u64::from(entry.type_) << 32) | u64::from(entry.gsi) +} + +pub(super) struct KvmIrqRouting { + vm_fd: Arc, + routes: Mutex>, +} + +impl KvmIrqRouting { + pub(super) fn new(vm_fd: Arc) -> Self { + KvmIrqRouting { + vm_fd, + routes: Mutex::new(HashMap::new()), + } + } + + pub(super) fn initialize(&self) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + #[allow(unused_mut)] + let mut routes = self.routes.lock().unwrap(); + + #[cfg(feature = "kvm-legacy-irq")] + LegacyIrq::initialize_legacy(&mut routes)?; + + self.set_routing(&routes)?; + + Ok(()) + } + + fn set_routing(&self, routes: &HashMap) -> Result<()> { + // Allocate enough buffer memory. + let elem_sz = std::mem::size_of::(); + let total_sz = std::mem::size_of::() * routes.len() + elem_sz; + let elem_cnt = (total_sz + elem_sz - 1) / elem_sz; + let mut irq_routings = Vec::::with_capacity(elem_cnt); + irq_routings.resize_with(elem_cnt, Default::default); + + // Prepare the irq_routing header. + let irq_routing = &mut irq_routings[0]; + irq_routing.nr = routes.len() as u32; + irq_routing.flags = 0; + + // Safe because we have just allocated enough memory above. + let irq_routing_entries = unsafe { irq_routing.entries.as_mut_slice(routes.len()) }; + for (idx, entry) in routes.values().enumerate() { + irq_routing_entries[idx] = *entry; + } + + self.vm_fd + .set_gsi_routing(irq_routing) + .map_err(from_sys_util_errno)?; + + Ok(()) + } +} + +#[cfg(feature = "kvm-msi-generic")] +impl KvmIrqRouting { + pub(super) fn add(&self, entries: &[kvm_irq_routing_entry]) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + for entry in entries { + if entry.gsi >= MAX_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } else if routes.contains_key(&hash_key(entry)) { + return Err(std::io::Error::from_raw_os_error(libc::EEXIST)); + } + } + + for entry in entries { + let _ = routes.insert(hash_key(entry), *entry); + } + self.set_routing(&routes) + } + + pub(super) fn remove(&self, entries: &[kvm_irq_routing_entry]) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + for entry in entries { + let _ = routes.remove(&hash_key(entry)); + } + self.set_routing(&routes) + } + + pub(super) fn modify(&self, entry: &kvm_irq_routing_entry) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + if !routes.contains_key(&hash_key(entry)) { + return Err(std::io::Error::from_raw_os_error(libc::ENOENT)); + } + + let _ = routes.insert(hash_key(entry), *entry); + self.set_routing(&routes) + } +} + +/// Helper function convert from vmm_sys_util::errno::Error to std::io::Error. +pub fn from_sys_util_errno(e: vmm_sys_util::errno::Error) -> std::io::Error { + std::io::Error::from_raw_os_error(e.errno()) +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::manager::tests::create_vm_fd; + + fn create_irq_group( + manager: Arc, + _vmfd: Arc, + ) -> Arc> { + let base = 0; + let count = 1; + + manager + .create_group(InterruptSourceType::LegacyIrq, base, count) + .unwrap() + } + + fn create_msi_group( + manager: Arc, + _vmfd: Arc, + ) -> Arc> { + let base = 168; + let count = 32; + + manager + .create_group(InterruptSourceType::MsiIrq, base, count) + .unwrap() + } + + pub fn create_kvm_irq_manager() -> (Arc, KvmIrqManager) { + let vmfd = Arc::new(create_vm_fd()); + let manager = KvmIrqManager::new(vmfd.clone()); + vmfd.create_irq_chip().unwrap(); + manager.initialize().unwrap(); + (vmfd, manager) + } + + #[test] + fn test_create_kvm_irq_manager() { + let _ = create_kvm_irq_manager(); + } + + #[test] + fn test_kvm_irq_manager_opt() { + let vmfd = Arc::new(create_vm_fd()); + vmfd.create_irq_chip().unwrap(); + let manager = Arc::new(KvmIrqManager::new(vmfd.clone())); + manager.initialize().unwrap(); + + // set max irqs + manager.set_max_msi_irqs(0x128); + assert_eq!(manager.mgr.lock().unwrap().max_msi_irqs, 0x128); + + // irq + let group = create_irq_group(manager.clone(), vmfd.clone()); + let _ = group.clone(); + manager.destroy_group(group).unwrap(); + + // msi + let group = create_msi_group(manager.clone(), vmfd); + let _ = group.clone(); + manager.destroy_group(group).unwrap(); + } + + #[test] + fn test_from_sys_util_errno() { + let error = vmm_sys_util::errno::Error::new(1); + let io_error = from_sys_util_errno(error); + assert_eq!(io_error.kind(), std::io::ErrorKind::PermissionDenied); + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/kvm/msi_generic.rs b/src/dragonball/src/dbs_interrupt/src/kvm/msi_generic.rs new file mode 100644 index 000000000000..eedef67d0cd1 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/kvm/msi_generic.rs @@ -0,0 +1,132 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Helper utilities for handling MSI interrupts. + +use kvm_bindings::{kvm_irq_routing_entry, KVM_IRQ_ROUTING_MSI}; +use vmm_sys_util::eventfd::EFD_NONBLOCK; + +use super::*; + +pub(crate) struct MsiConfig { + pub(super) irqfd: EventFd, + pub(crate) config: Mutex, +} + +impl MsiConfig { + pub(crate) fn new() -> Self { + MsiConfig { + irqfd: EventFd::new(EFD_NONBLOCK).unwrap(), + config: Mutex::new(Default::default()), + } + } +} + +pub(super) fn new_msi_routing_entry( + gsi: InterruptIndex, + msicfg: &MsiIrqSourceConfig, +) -> kvm_irq_routing_entry { + let mut entry = kvm_irq_routing_entry { + gsi, + type_: KVM_IRQ_ROUTING_MSI, + ..Default::default() + }; + entry.u.msi.address_hi = msicfg.high_addr; + entry.u.msi.address_lo = msicfg.low_addr; + entry.u.msi.data = msicfg.data; + if let Some(dev_id) = msicfg.device_id { + entry.u.msi.__bindgen_anon_1.devid = dev_id; + entry.flags = kvm_bindings::KVM_MSI_VALID_DEVID; + } + + entry +} + +#[allow(irrefutable_let_patterns)] +pub(super) fn create_msi_routing_entries( + base: InterruptIndex, + configs: &[InterruptSourceConfig], +) -> Result> { + let _ = base + .checked_add(configs.len() as u32) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EINVAL))?; + let mut entries = Vec::with_capacity(configs.len()); + for (i, ref val) in configs.iter().enumerate() { + if let InterruptSourceConfig::MsiIrq(msicfg) = val { + let entry = new_msi_routing_entry(base + i as u32, msicfg); + entries.push(entry); + } else { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + } + + Ok(entries) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_create_msiconfig() { + let config = MsiConfig::new(); + config.irqfd.write(1).unwrap(); + } + + #[test] + fn test_new_msi_routing_single() { + let test_gsi = 4; + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + msg_ctl: 0, + device_id: None, + }; + let entry = new_msi_routing_entry(test_gsi, &msi_source_config); + assert_eq!(entry.gsi, test_gsi); + assert_eq!(entry.type_, KVM_IRQ_ROUTING_MSI); + unsafe { + assert_eq!(entry.u.msi.address_hi, msi_source_config.high_addr); + assert_eq!(entry.u.msi.address_lo, msi_source_config.low_addr); + assert_eq!(entry.u.msi.data, msi_source_config.data); + } + } + + #[cfg(all(feature = "legacy_irq", target_arch = "x86_64"))] + #[test] + fn test_new_msi_routing_multi() { + let mut msi_fds = Vec::with_capacity(16); + for _ in 0..16 { + msi_fds.push(InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + msg_ctl: 0, + device_id: None, + })); + } + let mut legacy_fds = Vec::with_capacity(16); + for _ in 0..16 { + legacy_fds.push(InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})); + } + + let base = 0; + let entrys = create_msi_routing_entries(0, &msi_fds).unwrap(); + + for (i, entry) in entrys.iter().enumerate() { + assert_eq!(entry.gsi, (base + i) as u32); + assert_eq!(entry.type_, KVM_IRQ_ROUTING_MSI); + if let InterruptSourceConfig::MsiIrq(config) = &msi_fds[i] { + unsafe { + assert_eq!(entry.u.msi.address_hi, config.high_addr); + assert_eq!(entry.u.msi.address_lo, config.low_addr); + assert_eq!(entry.u.msi.data, config.data); + } + } + } + + assert!(create_msi_routing_entries(0, &legacy_fds).is_err()); + assert!(create_msi_routing_entries(!0, &msi_fds).is_err()); + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/kvm/msi_irq.rs b/src/dragonball/src/dbs_interrupt/src/kvm/msi_irq.rs new file mode 100644 index 000000000000..50e1cdb3388a --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/kvm/msi_irq.rs @@ -0,0 +1,276 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's PCI MSI/PCI MSIx interrupts based on Linux KVM framework. +//! +//! To optimize for performance by avoiding unnecessary locking and state checking, we assume that +//! the caller will take the responsibility to maintain the interrupt states and only issue valid +//! requests to this driver. If the caller doesn't obey the contract, only the current virtual +//! machine will be affected, it shouldn't break the host or other virtual machines. + +use super::msi_generic::{create_msi_routing_entries, new_msi_routing_entry, MsiConfig}; +use super::*; + +pub(super) struct MsiIrq { + base: InterruptIndex, + count: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + msi_configs: Vec, +} + +impl MsiIrq { + pub(super) fn new( + base: InterruptIndex, + count: InterruptIndex, + max_msi_irqs: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + ) -> Result { + if count > max_msi_irqs || base >= MAX_IRQS || base + count > MAX_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let mut msi_configs = Vec::with_capacity(count as usize); + for _ in 0..count { + msi_configs.push(MsiConfig::new()); + } + + Ok(MsiIrq { + base, + count, + vmfd, + irq_routing, + msi_configs, + }) + } +} + +impl InterruptSourceGroup for MsiIrq { + fn interrupt_type(&self) -> InterruptSourceType { + InterruptSourceType::MsiIrq + } + + fn len(&self) -> u32 { + self.count + } + + fn base(&self) -> u32 { + self.base + } + + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()> { + if configs.len() != self.count as usize { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + // First add IRQ routings for all the MSI interrupts. + let entries = create_msi_routing_entries(self.base, configs)?; + + self.irq_routing + .add(&entries) + .or_else(|err| match err.kind() { + // The irq_routing was already restored when the snapshot was restored, so the AlreadyExists error is ignored here. + std::io::ErrorKind::AlreadyExists => Ok(()), + _ => Err(err), + })?; + + // Then register irqfds to the KVM module. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + Ok(()) + } + + fn disable(&self) -> Result<()> { + // First unregister all irqfds, so it won't trigger anymore. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + // Then tear down the IRQ routings for all the MSI interrupts. + let mut entries = Vec::with_capacity(self.count as usize); + for i in 0..self.count { + // Safe to unwrap because there's no legal way to break the mutex. + let msicfg = self.msi_configs[i as usize].config.lock().unwrap(); + let entry = new_msi_routing_entry(self.base + i, &msicfg); + entries.push(entry); + } + self.irq_routing.remove(&entries)?; + + Ok(()) + } + + #[allow(irrefutable_let_patterns)] + fn update(&self, index: InterruptIndex, config: &InterruptSourceConfig) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + if let InterruptSourceConfig::MsiIrq(ref cfg) = config { + // Safe to unwrap because there's no legal way to break the mutex. + let entry = { + let mut msicfg = self.msi_configs[index as usize].config.lock().unwrap(); + msicfg.high_addr = cfg.high_addr; + msicfg.low_addr = cfg.low_addr; + msicfg.data = cfg.data; + msicfg.device_id = cfg.device_id; + new_msi_routing_entry(self.base + index, &msicfg) + }; + self.irq_routing.modify(&entry) + } else { + Err(std::io::Error::from_raw_os_error(libc::EINVAL)) + } + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + if index >= self.count { + None + } else { + let msi_config = &self.msi_configs[index as usize]; + Some(&msi_config.irqfd) + } + } + + fn trigger(&self, index: InterruptIndex) -> Result<()> { + // Assume that the caller will maintain the interrupt states and only call this function + // when suitable. + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + let msi_config = &self.msi_configs[index as usize]; + msi_config.irqfd.write(1) + } + + fn mask(&self, index: InterruptIndex) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.msi_configs[index as usize].irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn unmask(&self, index: InterruptIndex) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.msi_configs[index as usize].irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn get_pending_state(&self, index: InterruptIndex) -> bool { + if index >= self.count { + return false; + } + + // Peak the EventFd.count by reading and writing back. + // The irqfd must be in NON-BLOCKING mode. + let irqfd = &self.msi_configs[index as usize].irqfd; + match irqfd.read() { + Err(_) => false, + Ok(count) => { + if count != 0 && irqfd.write(count).is_err() { + // Hope the caller will handle the pending state corrrectly, + // then no interrupt will be lost. + // Really no way to recover here! + } + count != 0 + } + } + } +} + +#[cfg(target_arch = "x86_64")] +#[cfg(test)] +mod test { + use super::*; + use crate::manager::tests::create_vm_fd; + + #[test] + #[allow(unreachable_patterns)] + fn test_msi_interrupt_group() { + let vmfd = Arc::new(create_vm_fd()); + vmfd.create_irq_chip().unwrap(); + + let rounting = Arc::new(KvmIrqRouting::new(vmfd.clone())); + rounting.initialize().unwrap(); + + let base = 168; + let count = 32; + let group = MsiIrq::new( + base, + count, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + vmfd.clone(), + rounting.clone(), + ) + .unwrap(); + let mut msi_fds = Vec::with_capacity(count as usize); + + match group.interrupt_type() { + InterruptSourceType::MsiIrq => {} + _ => { + panic!(); + } + } + + for _ in 0..count { + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + msg_ctl: 0x6789, + device_id: None, + }; + msi_fds.push(InterruptSourceConfig::MsiIrq(msi_source_config)); + } + + group.enable(&msi_fds).unwrap(); + assert_eq!(group.len(), count); + assert_eq!(group.base(), base); + + for i in 0..count { + let msi_source_config = MsiIrqSourceConfig { + high_addr: i + 0x1234, + low_addr: i + 0x5678, + data: i + 0x9876, + msg_ctl: i + 0x6789, + device_id: None, + }; + group.notifier(i).unwrap().write(1).unwrap(); + group.trigger(i).unwrap(); + group + .update(0, &InterruptSourceConfig::MsiIrq(msi_source_config)) + .unwrap(); + } + assert!(group.trigger(33).is_err()); + group.disable().unwrap(); + + assert!(MsiIrq::new( + base, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE + 1, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + vmfd.clone(), + rounting.clone() + ) + .is_err()); + assert!(MsiIrq::new(1100, 1, DEFAULT_MAX_MSI_IRQS_PER_DEVICE, vmfd, rounting).is_err()); + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/lib.rs b/src/dragonball/src/dbs_interrupt/src/lib.rs new file mode 100644 index 000000000000..fab0123b24d7 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/lib.rs @@ -0,0 +1,244 @@ +// Copyright (C) 2019-2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Traits and Structs to manage interrupt sources for devices. +//! +//! Software indicating an event that needs immediate attention. An interrupt alerts the processor +//! to a high-priority condition requiring the interruption of the current code the processor is +//! executing. The processor responds by suspending its current activities, saving its state, and +//! executing a function called an interrupt handler (or an interrupt service routine, ISR) to deal +//! with the event. This interruption is temporary, and, after the interrupt handler finishes, +//! unless handling the interrupt has emitted a fatal error, the processor resumes normal +//! activities. +//! +//! Hardware interrupts are used by devices to communicate that they require attention from the +//! operating system, or a bare-metal program running on the CPU if there are no OSes. The act of +//! initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices +//! are usually associated with different interrupts using a unique value associated with each +//! interrupt. This makes it possible to know which hardware device caused which interrupts. These +//! interrupt values are often called IRQ lines, or just interrupt lines. +//! +//! Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. MSI +//! [(Message Signaled Interrupt)](https://en.wikipedia.org/wiki/Message_Signaled_Interrupts) is +//! another commonly used alternative in-band method of signaling an interrupt, using special +//! in-band messages to replace traditional out-of-band assertion of dedicated interrupt lines. +//! While more complex to implement in a device, message signaled interrupts have some significant +//! advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts are +//! supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some non-PCI +//! architectures also use message signaled interrupts. +//! +//! While IRQ is a term commonly used by Operating Systems when dealing with hardware interrupts, +//! the IRQ numbers managed by OSes are independent of the ones managed by VMM. For simplicity sake, +//! the term `Interrupt Source` is used instead of IRQ to represent both pin-based interrupts and +//! MSI interrupts. +//! +//! A device may support multiple types of interrupts, and each type of interrupt may support one or +//! multiple interrupt sources. For example, a PCI device may support: +//! * Legacy Irq: exactly one interrupt source. +//! * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +//! * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. +//! +//! A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. An ID +//! allocator will be used to allocate and free Interrupt Source Identifiers for devices. To +//! decouple this crate from the ID allocator, here we doesn't take the responsibility to +//! allocate/free Interrupt Source IDs but only makes use of assigned IDs. +//! +//! The overall flow to deal with interrupts is: +//! * the VMM creates an interrupt manager +//! * the VMM creates a device manager, passing on an reference to the interrupt manager +//! * the device manager passes on an reference to the interrupt manager to all registered devices +//! * guest kernel loads drivers for virtual devices +//! * guest device driver determines the type and number of interrupts needed, and update the device +//! configuration +//! * the virtual device backend requests the interrupt manager to create an interrupt group +//! according to guest configuration information + +use std::io::Error; +use std::ops::Deref; +use std::sync::Arc; + +use vmm_sys_util::eventfd::EventFd; + +mod manager; +pub use manager::MSI_DEVICE_ID_SHIFT; +pub use manager::{DeviceInterruptManager, DeviceInterruptMode, InterruptStatusRegister32}; + +mod notifier; +pub use self::notifier::*; + +#[cfg(feature = "kvm-irq")] +pub mod kvm; +#[cfg(feature = "kvm-irq")] +pub use self::kvm::KvmIrqManager; + +/// Reuse std::io::Result to simplify interoperability among crates. +pub type Result = std::io::Result; + +/// Data type to store an interrupt source identifier. +pub type InterruptIndex = u32; + +/// Type of interrupt source. +#[derive(Clone, Eq, PartialEq, Debug)] +pub enum InterruptSourceType { + #[cfg(feature = "legacy-irq")] + /// Legacy Pin-based Interrupt. + /// On x86 platforms, legacy interrupts are routed through 8259 PICs and/or IOAPICs. + LegacyIrq, + #[cfg(feature = "msi-irq")] + /// Message Signaled Interrupt (PCI MSI/PCI MSIx etc). + /// Some non-PCI devices (like HPET on x86) make use of generic MSI in platform specific ways. + MsiIrq, +} + +/// Configuration data for an interrupt source. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum InterruptSourceConfig { + #[cfg(feature = "legacy-irq")] + /// Configuration data for Legacy interrupts. + LegacyIrq(LegacyIrqSourceConfig), + #[cfg(feature = "msi-irq")] + /// Configuration data for PciMsi, PciMsix and generic MSI interrupts. + MsiIrq(MsiIrqSourceConfig), +} + +/// Configuration data for legacy interrupts. +/// +/// On x86 platforms, legacy interrupts means those interrupts routed through PICs or IOAPICs. +#[cfg(feature = "legacy-irq")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LegacyIrqSourceConfig {} + +/// Configuration data for GenericMsi, PciMsi, PciMsix interrupts. +#[cfg(feature = "msi-irq")] +#[derive(Default, Clone, Debug, Eq, PartialEq)] +pub struct MsiIrqSourceConfig { + /// High address to deliver message signaled interrupt. + pub high_addr: u32, + /// Low address to deliver message signaled interrupt. + pub low_addr: u32, + /// Data to write to deliver message signaled interrupt. + pub data: u32, + /// Interrupt control state. + pub msg_ctl: u32, + /// Device id indicate the device who triggers this msi irq. + pub device_id: Option, +} + +/// Trait to manage interrupt sources for virtual device backends. +/// +/// The InterruptManager implementations should protect itself from concurrent accesses internally, +/// so it could be invoked from multi-threaded context. +pub trait InterruptManager { + /// Create an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object to manage interrupt + /// sources for a virtual device. + /// + /// An [InterruptSourceGroup](trait.InterruptSourceGroup.html) object manages all interrupt + /// sources of the same type for a virtual device. + /// + /// # Arguments + /// * type_: type of interrupt source. + /// * base: base Interrupt Source ID to be managed by the group object. + /// * count: number of Interrupt Sources to be managed by the group object. + fn create_group( + &self, + type_: InterruptSourceType, + base: InterruptIndex, + count: InterruptIndex, + ) -> Result>>; + + /// Destroy an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object created by + /// [create_group()](trait.InterruptManager.html#tymethod.create_group). + /// + /// Assume the caller takes the responsibility to disable all interrupt sources of the group + /// before calling destroy_group(). This assumption helps to simplify InterruptSourceGroup + /// implementations. + fn destroy_group(&self, group: Arc>) -> Result<()>; +} + +impl InterruptManager for Arc { + fn create_group( + &self, + type_: InterruptSourceType, + base: u32, + count: u32, + ) -> std::result::Result>, Error> { + self.deref().create_group(type_, base, count) + } + + fn destroy_group( + &self, + group: Arc>, + ) -> std::result::Result<(), Error> { + self.deref().destroy_group(group) + } +} + +/// Trait to manage a group of interrupt sources for a device. +/// +/// A device may support several types of interrupts, and each type of interrupt may contain one or +/// multiple continuous interrupt sources. For example, a PCI device may concurrently support: +/// * Legacy Irq: exactly one interrupt source. +/// * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +/// * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. +/// +/// PCI MSI interrupts of a device may not be configured individually, and must configured as a +/// whole block. So all interrupts of the same type of a device are abstracted as an +/// [InterruptSourceGroup](trait.InterruptSourceGroup.html) object, instead of abstracting each +/// interrupt source as a distinct InterruptSource. +#[allow(clippy::len_without_is_empty)] +pub trait InterruptSourceGroup: Send + Sync { + /// Get type of interrupt sources managed by the group. + fn interrupt_type(&self) -> InterruptSourceType; + + /// Get number of interrupt sources managed by the group. + fn len(&self) -> InterruptIndex; + + /// Get base of the assigned Interrupt Source Identifiers. + fn base(&self) -> InterruptIndex; + + /// Enable the interrupt sources in the group to generate interrupts. + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()>; + + /// Disable the interrupt sources in the group to generate interrupts. + fn disable(&self) -> Result<()>; + + /// Update the interrupt source group configuration. + /// + /// # Arguments + /// * index: sub-index into the group. + /// * config: configuration data for the interrupt source. + fn update(&self, index: InterruptIndex, config: &InterruptSourceConfig) -> Result<()>; + + /// Returns an interrupt notifier from this interrupt. + /// + /// An interrupt notifier allows for external components and processes to inject interrupts into + /// guest, by writing to the file returned by this method. + fn notifier(&self, _index: InterruptIndex) -> Option<&EventFd> { + None + } + + /// Inject an interrupt from this interrupt source into the guest. + /// + /// If the interrupt has an associated `interrupt_status` register, all bits set in `flag` will + /// be atomically ORed into the `interrupt_status` register. + fn trigger(&self, index: InterruptIndex) -> Result<()>; + + /// Mask an interrupt from this interrupt source. + fn mask(&self, _index: InterruptIndex) -> Result<()> { + // Not all interrupt sources can be disabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } + + /// Unmask an interrupt from this interrupt source. + fn unmask(&self, _index: InterruptIndex) -> Result<()> { + // Not all interrupt sources can be disabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } + + /// Check whether there's pending interrupt. + fn get_pending_state(&self, _index: InterruptIndex) -> bool { + false + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/manager.rs b/src/dragonball/src/dbs_interrupt/src/manager.rs new file mode 100644 index 000000000000..bec623dc8768 --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/manager.rs @@ -0,0 +1,794 @@ +// Copyright (C) 2019-2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Interrupt manager to manage and switch device interrupt modes. +//! +//! A device may support multiple interrupt modes. For example, a PCI device may support legacy, PCI +//! MSI and PCI MSIx interrupts. This interrupt manager helps a device backend driver to manage its +//! interrupts and provides interfaces to switch interrupt working modes. +use std::io::{Error, Result}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::Arc; + +use dbs_device::resources::DeviceResources; + +#[cfg(feature = "legacy-irq")] +use super::LegacyIrqSourceConfig; +#[cfg(feature = "msi-irq")] +use super::MsiIrqSourceConfig; +use super::{InterruptManager, InterruptSourceConfig, InterruptSourceGroup, InterruptSourceType}; + +/// Defines the offset when device_id is recorded to msi. +/// +/// For the origin of this value, please refer to the comment of set_msi_device_id function. +pub const MSI_DEVICE_ID_SHIFT: u8 = 3; + +#[cfg(feature = "legacy-irq")] +const LEGACY_CONFIGS: [InterruptSourceConfig; 1] = + [InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})]; + +#[cfg(feature = "msi-irq")] +const MSI_INT_MASK_BIT: u8 = 0; +#[cfg(feature = "msi-irq")] +const MSI_INT_MASK: u32 = (1 << MSI_INT_MASK_BIT) as u32; + +/// Device interrupt working modes. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum DeviceInterruptMode { + /// The device interrupt manager has been disabled. + Disabled = 0, + /// The device interrupt manager works in legacy irq mode. + LegacyIrq = 1, + /// The device interrupt manager works in generic MSI mode. + GenericMsiIrq = 2, + /// The device interrupt manager works in PCI MSI mode. + PciMsiIrq = 3, + /// The device interrupt manager works in PCI MSI-x mode. + PciMsixIrq = 4, +} + +/// A struct to manage interrupts and interrupt modes for a device. +/// +/// The interrupt manager may support multiple working mode. For example, an interrupt manager for a +/// PCI device may work in legacy mode, PCI MSI mode or PCI MSIx mode. Under certain conditions, the +/// interrupt manager may switch between interrupt working modes. To simplify implementation, +/// switching working mode is only supported at configuration stage and will be disabled at runtime +/// stage. The DeviceInterruptManager::enable() switches the interrupt manager from configuration +/// stage into runtime stage. And DeviceInterruptManager::reset() switches from runtime stage back +/// to initial configuration stage. +pub struct DeviceInterruptManager { + mode: DeviceInterruptMode, + activated: bool, + current_idx: usize, + mode2idx: [usize; 5], + intr_mgr: T, + intr_groups: Vec>>, + #[cfg(feature = "msi-irq")] + msi_config: Vec, + /// Device id indicate the device who triggers msi irq. + device_id: Option, +} + +impl DeviceInterruptManager { + /// Create an interrupt manager for a device. + /// + /// # Arguments + /// * `intr_mgr`: underline interrupt manager to allocate/free interrupt groups. + /// * `resources`: resources assigned to the device, including assigned interrupt resources. + pub fn new(intr_mgr: T, resources: &DeviceResources) -> Result { + let mut mgr = DeviceInterruptManager { + mode: DeviceInterruptMode::Disabled, + activated: false, + current_idx: usize::MAX, + mode2idx: [usize::MAX; 5], + intr_mgr, + intr_groups: Vec::new(), + #[cfg(feature = "msi-irq")] + msi_config: Vec::new(), + device_id: None, + }; + + #[cfg(feature = "legacy-irq")] + { + if let Some(irq) = resources.get_legacy_irq() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::LegacyIrq, irq, 1)?; + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + } + + #[cfg(feature = "msi-irq")] + { + if let Some(msi) = resources.get_generic_msi_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::GenericMsiIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + + if let Some(msi) = resources.get_pci_msi_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::PciMsiIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + + if let Some(msi) = resources.get_pci_msix_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::PciMsixIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + } + + Ok(mgr) + } + + /// Set device_id for MSI routing + pub fn set_device_id(&mut self, device_id: Option) { + self.device_id = device_id; + } + + /// Check whether the interrupt manager has been activated. + pub fn is_enabled(&self) -> bool { + self.activated + } + + /// Switch the interrupt manager from configuration stage into runtime stage. + /// + /// The working mode could only be changed at configuration stage, and all requests to change + /// working mode at runtime stage will be rejected. + /// + /// If the interrupt manager is still in DISABLED mode when DeviceInterruptManager::enable() is + /// called, it will be put into LEGACY mode if LEGACY mode is supported. + pub fn enable(&mut self) -> Result<()> { + if self.activated { + return Ok(()); + } + + // Enter Legacy mode by default if Legacy mode is supported. + if self.mode == DeviceInterruptMode::Disabled + && self.mode2idx[DeviceInterruptMode::LegacyIrq as usize] != usize::MAX + { + self.set_working_mode(DeviceInterruptMode::LegacyIrq)?; + } + if self.mode == DeviceInterruptMode::Disabled { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + self.intr_groups[self.current_idx].enable(self.get_configs(self.mode))?; + self.activated = true; + + Ok(()) + } + + /// Switch the interrupt manager from runtime stage back into initial configuration stage. + /// + /// Currently we doesn't track the usage of interrupt group object given out by `get_group()`, + /// so the the caller needs to take the responsibility to release all interrupt group object + /// reference before calling DeviceInterruptManager::reset(). + pub fn reset(&mut self) -> Result<()> { + if self.activated { + self.activated = false; + self.intr_groups[self.current_idx].disable()?; + } + self.set_working_mode(DeviceInterruptMode::Disabled)?; + + Ok(()) + } + + /// Get the current interrupt working mode. + pub fn get_working_mode(&mut self) -> DeviceInterruptMode { + self.mode + } + + /// Switch interrupt working mode. + /// + /// Currently switching working mode is only supported during device configuration stage and + /// will always return failure if called during device runtime stage. The device switches from + /// configuration stage to runtime stage by invoking `DeviceInterruptManager::enable()`. With + /// this constraint, the device drivers may call `DeviceInterruptManager::get_group()` to get + /// the underline active interrupt group object, and directly calls the interrupt group object's + /// methods to trigger/acknowledge interrupts. + /// + /// This is a key design decision for optimizing performance. Though the DeviceInterruptManager + /// object itself is not multi-thread safe and must be protected from concurrent access by the + /// caller, the interrupt source group object is multi-thread safe and could be called + /// concurrently to trigger/acknowledge interrupts. This design may help to improve performance + /// for MSI interrupts. + /// + /// # Arguments + /// * `mode`: target working mode. + pub fn set_working_mode(&mut self, mode: DeviceInterruptMode) -> Result<()> { + // Can't switch mode agian once enabled. + if self.activated { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + if mode != self.mode { + // Supported state transitions: + // - other state -> DISABLED + // - DISABLED -> other + // - non-legacy -> legacy + // - legacy -> non-legacy + if self.mode != DeviceInterruptMode::Disabled + && self.mode != DeviceInterruptMode::LegacyIrq + && mode != DeviceInterruptMode::LegacyIrq + && mode != DeviceInterruptMode::Disabled + { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + // Then enter new state + if mode != DeviceInterruptMode::Disabled { + self.current_idx = self.mode2idx[mode as usize]; + } else { + // We should reset irq configs when disable interrupt + self.reset_configs(mode); + } + self.mode = mode; + } + + Ok(()) + } + + /// Get the underline interrupt source group object, so the device driver could concurrently + /// trigger/acknowledge interrupts by using the returned group object. + pub fn get_group(&self) -> Option>> { + if !self.activated || self.mode == DeviceInterruptMode::Disabled { + None + } else { + Some(self.intr_groups[self.current_idx].clone()) + } + } + + /// Get the underline interrupt source group object, ignore the mode + pub fn get_group_unchecked(&self) -> Arc> { + self.intr_groups[self.current_idx].clone() + } + + /// Reconfigure a specific interrupt in current working mode at configuration or runtime stage. + /// + /// It's mainly used to reconfigure Generic MSI/PCI MSI/PCI MSIx interrupts. Actually legacy + /// interrupts don't support reconfiguration yet. + #[allow(unused_variables)] + pub fn update(&mut self, index: u32) -> Result<()> { + if !self.activated { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + match self.mode { + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + let group = &self.intr_groups[self.current_idx]; + if index >= group.len() || index >= self.msi_config.len() as u32 { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + group.update(index, &self.msi_config[index as usize])?; + Ok(()) + } + _ => Err(Error::from_raw_os_error(libc::EINVAL)), + } + } + + fn get_configs(&self, mode: DeviceInterruptMode) -> &[InterruptSourceConfig] { + match mode { + #[cfg(feature = "legacy-irq")] + DeviceInterruptMode::LegacyIrq => &LEGACY_CONFIGS[..], + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + let idx = self.mode2idx[mode as usize]; + let group_len = self.intr_groups[idx].len() as usize; + &self.msi_config[0..group_len] + } + _ => panic!("unhandled interrupt type in get_configs()"), + } + } + + fn reset_configs(&mut self, mode: DeviceInterruptMode) { + match mode { + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + self.msi_config = vec![ + InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig::default()); + self.msi_config.len() + ]; + } + _ => {} + } + } +} + +#[cfg(feature = "msi-irq")] +impl DeviceInterruptManager { + /// Set the high address for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_high_address(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.high_addr = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Set the low address for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_low_address(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.low_addr = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Set the data for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_data(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.data = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Set msi irq MASK bit + #[allow(irrefutable_let_patterns)] + pub fn set_msi_mask(&mut self, index: u32, mask: bool) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + let mut msg_ctl = msi.msg_ctl; + msg_ctl &= !MSI_INT_MASK; + if mask { + msg_ctl |= MSI_INT_MASK; + } + msi.msg_ctl = msg_ctl; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Get msi irq MASK state + #[allow(irrefutable_let_patterns)] + pub fn get_msi_mask(&self, index: u32) -> Result { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref msi) = self.msi_config[index as usize] { + return Ok((msi.msg_ctl & MSI_INT_MASK) == MSI_INT_MASK); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + #[cfg(target_arch = "aarch64")] + /// Set the device id for a MSI irq + pub fn set_msi_device_id(&mut self, index: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.device_id = self.device_id.map(|dev_id| { + // An pci device attach to ITS will have a new device id which is use for msi + // irq routing. It is calculated according to kernel function PCI_DEVID(), + // new_dev_id = (bus << 8) | devfn. In addition, devfn = device_id << 3, + // according to pci-host-ecam-generic's spec, and we implement bus = 0. + dev_id << MSI_DEVICE_ID_SHIFT + }); + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + fn resize_msi_config_space(&mut self, size: u32) { + if self.msi_config.len() < size as usize { + self.msi_config = + vec![InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig::default()); size as usize]; + } + } +} + +/// Struct to implement a 32-bit interrupt status register. +#[derive(Default, Debug)] +pub struct InterruptStatusRegister32 { + status: AtomicU32, +} + +impl InterruptStatusRegister32 { + /// Create a status register instance. + pub fn new() -> Self { + InterruptStatusRegister32 { + status: AtomicU32::new(0), + } + } + + /// Read current value of the status register. + pub fn read(&self) -> u32 { + self.status.load(Ordering::SeqCst) + } + + /// Write value to the status register. + pub fn write(&self, value: u32) { + self.status.store(value, Ordering::SeqCst); + } + + /// Read current value and reset the status register to 0. + pub fn read_and_clear(&self) -> u32 { + self.status.swap(0, Ordering::SeqCst) + } + + /// Set bits into `value`. + pub fn set_bits(&self, value: u32) { + self.status.fetch_or(value, Ordering::SeqCst); + } + + /// Clear bits present in `value`. + pub fn clear_bits(&self, value: u32) { + self.status.fetch_and(!value, Ordering::SeqCst); + } +} + +#[cfg(all(test, feature = "kvm-legacy-irq", feature = "kvm-msi-irq"))] +pub(crate) mod tests { + use std::sync::Arc; + + use dbs_device::resources::{DeviceResources, MsiIrqType, Resource}; + use kvm_ioctls::{Kvm, VmFd}; + + use super::*; + use crate::KvmIrqManager; + + pub(crate) fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + fn create_init_resources() -> DeviceResources { + let mut resources = DeviceResources::new(); + + resources.append(Resource::MmioAddressRange { + base: 0xd000_0000, + size: 0x10_0000, + }); + resources.append(Resource::LegacyIrq(0)); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base: 0x200, + size: 0x10, + }); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::PciMsi, + base: 0x100, + size: 0x20, + }); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::PciMsix, + base: 0x300, + size: 0x30, + }); + + resources + } + + fn create_interrupt_manager() -> DeviceInterruptManager> { + let vmfd = Arc::new(create_vm_fd()); + #[cfg(target_arch = "x86_64")] + vmfd.create_irq_chip().unwrap(); + #[cfg(target_arch = "aarch64")] + let _ = dbs_arch::gic::create_gic(&vmfd, 1); + let intr_mgr = Arc::new(KvmIrqManager::new(vmfd)); + + let resource = create_init_resources(); + intr_mgr.initialize().unwrap(); + DeviceInterruptManager::new(intr_mgr, &resource).unwrap() + } + + #[test] + fn test_create_device_interrupt_manager() { + let mut mgr = create_interrupt_manager(); + + assert_eq!(mgr.mode, DeviceInterruptMode::Disabled); + assert!(!mgr.activated); + assert_eq!(mgr.current_idx, usize::MAX); + assert_eq!(mgr.intr_groups.len(), 4); + assert!(!mgr.is_enabled()); + assert!(mgr.get_group().is_none()); + + // Enter legacy mode by default + mgr.enable().unwrap(); + assert!(mgr.is_enabled()); + assert_eq!( + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize], + mgr.current_idx + ); + assert!(mgr.get_group().is_some()); + assert_eq!( + mgr.get_group_unchecked().interrupt_type(), + InterruptSourceType::LegacyIrq + ); + + // Disable interrupt manager + mgr.reset().unwrap(); + assert!(!mgr.is_enabled()); + assert_eq!( + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize], + mgr.current_idx + ); + assert_eq!(mgr.get_working_mode(), DeviceInterruptMode::Disabled); + assert!(mgr.get_group().is_none()); + } + + #[test] + fn test_device_interrupt_manager_switch_mode() { + let mut mgr = create_interrupt_manager(); + + // Can't switch working mode in enabled state. + mgr.enable().unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + mgr.reset().unwrap(); + + // Switch from LEGACY to PciMsi mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from LEGACY to PciMsix mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from LEGACY to GenericMsi mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + + // Switch from DISABLED to PciMsi mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from DISABLED to PciMsix mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from DISABLED to GenericMsi mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + } + + #[test] + fn test_msi_config() { + let mut interrupt_manager = create_interrupt_manager(); + + assert!(interrupt_manager.set_msi_data(512, 0).is_err()); + interrupt_manager.set_msi_data(0, 0).unwrap(); + assert!(interrupt_manager.set_msi_high_address(512, 0).is_err()); + interrupt_manager.set_msi_high_address(0, 0).unwrap(); + assert!(interrupt_manager.set_msi_low_address(512, 0).is_err()); + interrupt_manager.set_msi_low_address(0, 0).unwrap(); + assert!(interrupt_manager.get_msi_mask(512).is_err()); + assert!(!interrupt_manager.get_msi_mask(0).unwrap()); + assert!(interrupt_manager.set_msi_mask(512, true).is_err()); + interrupt_manager.set_msi_mask(0, true).unwrap(); + assert!(interrupt_manager.get_msi_mask(0).unwrap()); + } + + #[test] + fn test_set_working_mode_after_activated() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = true; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::Disabled) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsiIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsixIrq) + .is_err()); + } + + #[test] + fn test_disable2legacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + } + + #[test] + fn test_disable2nonlegacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + } + + #[test] + fn test_legacy2nonlegacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + } + + #[test] + fn test_nonlegacy2legacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + } + + #[test] + fn test_update() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + interrupt_manager.enable().unwrap(); + assert!(interrupt_manager.update(0x10).is_err()); + interrupt_manager.update(0x01).unwrap(); + interrupt_manager.reset().unwrap(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + assert!(interrupt_manager.update(0x10).is_err()); + } + + #[test] + fn test_get_configs() { + // legacy irq config + { + let interrupt_manager = create_interrupt_manager(); + + let legacy_config = interrupt_manager.get_configs(DeviceInterruptMode::LegacyIrq); + assert_eq!(legacy_config, LEGACY_CONFIGS); + } + + // generic irq config + { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + let msi_config = interrupt_manager.get_configs(DeviceInterruptMode::GenericMsiIrq); + assert_eq!(msi_config.len(), 0x10); + } + + // msi irq config + { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + let msi_config = interrupt_manager.get_configs(DeviceInterruptMode::PciMsiIrq); + assert_eq!(msi_config.len(), 0x20); + } + + // msix irq config + { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + let msi_config = interrupt_manager.get_configs(DeviceInterruptMode::PciMsixIrq); + assert_eq!(msi_config.len(), 0x30); + } + } + + #[test] + fn test_reset_configs() { + let mut interrupt_manager = create_interrupt_manager(); + + interrupt_manager.reset_configs(DeviceInterruptMode::LegacyIrq); + interrupt_manager.reset_configs(DeviceInterruptMode::LegacyIrq); + + interrupt_manager.set_msi_data(0, 100).unwrap(); + interrupt_manager.set_msi_high_address(0, 200).unwrap(); + interrupt_manager.set_msi_low_address(0, 300).unwrap(); + + interrupt_manager.reset_configs(DeviceInterruptMode::GenericMsiIrq); + assert_eq!( + interrupt_manager.msi_config[0], + InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig::default()) + ); + } + + #[test] + fn test_interrupt_status_register() { + let status = InterruptStatusRegister32::new(); + + assert_eq!(status.read(), 0); + status.write(0x13); + assert_eq!(status.read(), 0x13); + status.clear_bits(0x11); + assert_eq!(status.read(), 0x2); + status.set_bits(0x100); + assert_eq!(status.read_and_clear(), 0x102); + assert_eq!(status.read(), 0); + } +} diff --git a/src/dragonball/src/dbs_interrupt/src/notifier.rs b/src/dragonball/src/dbs_interrupt/src/notifier.rs new file mode 100644 index 000000000000..0589f9e29e1b --- /dev/null +++ b/src/dragonball/src/dbs_interrupt/src/notifier.rs @@ -0,0 +1,230 @@ +// Copyright 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +//! Event notifier to inject device interrupts to virtual machines. + +use std::any::Any; +use std::io::Error; +use std::sync::Arc; + +use vmm_sys_util::eventfd::EventFd; + +use crate::{InterruptIndex, InterruptSourceGroup, InterruptStatusRegister32}; + +#[cfg(feature = "legacy-irq")] +pub use self::legacy::*; +#[cfg(feature = "msi-irq")] +pub use self::msi::*; + +/// Trait to inject device interrupts to virtual machines. +pub trait InterruptNotifier: Send + Sync { + /// Inject a device interrupt to the virtual machine. + fn notify(&self) -> Result<(), Error>; + + /// Get the optional `EventFd` object to inject interrupt to the virtual machine. + fn notifier(&self) -> Option<&EventFd>; + + /// Clone a boxed dyn trait object. + fn clone_boxed(&self) -> Box; + + /// Convert `self` to `std::any::Any`. + fn as_any(&self) -> &dyn Any; +} + +#[cfg(feature = "legacy-irq")] +mod legacy { + use super::*; + + /// Struct to inject legacy interrupt to guest. + #[derive(Clone)] + pub struct LegacyNotifier { + pub(crate) intr_group: Arc>, + pub(crate) intr_status: Arc, + pub(crate) status_bits: u32, + } + + impl LegacyNotifier { + /// Create a legacy notifier. + pub fn new( + intr_group: Arc>, + intr_status: Arc, + status_bits: u32, + ) -> Self { + Self { + intr_group, + intr_status, + status_bits, + } + } + } + + impl InterruptNotifier for LegacyNotifier { + fn notify(&self) -> Result<(), Error> { + self.intr_status.set_bits(self.status_bits); + self.intr_group.trigger(0) + } + + fn notifier(&self) -> Option<&EventFd> { + self.intr_group.notifier(0) + } + + fn clone_boxed(&self) -> Box { + Box::new(self.clone()) + } + + fn as_any(&self) -> &dyn Any { + self + } + } +} + +#[cfg(feature = "msi-irq")] +mod msi { + use super::*; + + /// Struct to inject message signalled interrupt to guest. + #[derive(Clone)] + pub struct MsiNotifier { + pub(crate) intr_group: Arc>, + pub(crate) intr_index: InterruptIndex, + } + + impl MsiNotifier { + /// Create a notifier to inject message signalled interrupt to guest. + pub fn new( + intr_group: Arc>, + intr_index: InterruptIndex, + ) -> Self { + MsiNotifier { + intr_group, + intr_index, + } + } + } + + impl InterruptNotifier for MsiNotifier { + fn notify(&self) -> Result<(), Error> { + self.intr_group.trigger(self.intr_index) + } + + fn notifier(&self) -> Option<&EventFd> { + self.intr_group.notifier(self.intr_index) + } + + fn clone_boxed(&self) -> Box { + Box::new(self.clone()) + } + + fn as_any(&self) -> &dyn Any { + self + } + } +} + +/// Struct to discard interrupts. +#[derive(Copy, Clone, Debug, Default)] +pub struct NoopNotifier {} + +impl NoopNotifier { + /// Create a noop notifier to discard interrupts. + pub fn new() -> Self { + NoopNotifier {} + } +} + +impl InterruptNotifier for NoopNotifier { + fn notify(&self) -> Result<(), Error> { + Ok(()) + } + + fn notifier(&self) -> Option<&EventFd> { + None + } + + fn clone_boxed(&self) -> Box { + Box::new(*self) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// Clone a boxed interrupt notifier object. +pub fn clone_notifier(notifier: &dyn InterruptNotifier) -> Box { + notifier.clone_boxed() +} + +#[cfg(test)] +mod tests { + #![allow(unused_imports)] + #![allow(dead_code)] + use super::*; + + use crate::{InterruptManager, InterruptSourceType}; + + const VIRTIO_INTR_VRING: u32 = 0x01; + const VIRTIO_INTR_CONFIG: u32 = 0x02; + + #[test] + fn create_virtio_null_notifier() { + let notifier = NoopNotifier::new(); + + notifier.notify().unwrap(); + assert!(notifier.notifier().is_none()); + } + + #[cfg(feature = "kvm-legacy-irq")] + #[test] + fn test_create_legacy_notifier() { + let (_vmfd, irq_manager) = crate::kvm::tests::create_kvm_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::LegacyIrq, 0, 1) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + assert_eq!(status.read(), 0); + + let notifer = LegacyNotifier::new(group.clone(), status.clone(), VIRTIO_INTR_CONFIG); + notifer.notify().unwrap(); + assert!(notifer.notifier().is_some()); + assert_eq!(notifer.status_bits, VIRTIO_INTR_CONFIG); + assert_eq!(status.read_and_clear(), VIRTIO_INTR_CONFIG); + assert_eq!(status.read(), 0); + + let notifier = LegacyNotifier::new(group.clone(), status.clone(), VIRTIO_INTR_VRING); + notifier.notify().unwrap(); + assert!(notifier.notifier().is_some()); + assert_eq!(status.read(), VIRTIO_INTR_VRING); + status.clear_bits(VIRTIO_INTR_VRING); + assert_eq!(status.read(), 0); + let eventfd = notifier.notifier().unwrap(); + assert_eq!(eventfd.read().unwrap(), 2); + + let clone = clone_notifier(¬ifier); + assert_eq!(clone.as_any().type_id(), notifier.as_any().type_id()); + } + + #[cfg(feature = "kvm-msi-irq")] + #[test] + fn test_virtio_msi_notifier() { + let (_vmfd, irq_manager) = crate::kvm::tests::create_kvm_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::MsiIrq, 0, 3) + .unwrap(); + let notifier1 = MsiNotifier::new(group.clone(), 1); + let notifier2 = MsiNotifier::new(group.clone(), 2); + let notifier3 = MsiNotifier::new(group.clone(), 3); + assert!(notifier1.notifier().is_some()); + assert!(notifier2.notifier().is_some()); + assert!(notifier3.notifier().is_none()); + + notifier1.notify().unwrap(); + notifier1.notify().unwrap(); + notifier2.notify().unwrap(); + assert_eq!(notifier1.notifier().unwrap().read().unwrap(), 2); + assert_eq!(notifier2.notifier().unwrap().read().unwrap(), 1); + + let clone = clone_notifier(¬ifier1); + assert_eq!(clone.as_any().type_id(), notifier1.as_any().type_id()); + } +} diff --git a/src/dragonball/src/dbs_legacy_devices/Cargo.toml b/src/dragonball/src/dbs_legacy_devices/Cargo.toml new file mode 100644 index 000000000000..8655783f7db1 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "dbs-legacy-devices" +version = "0.1.1" +authors = ["Alibaba Dragonball Team"] +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +description = "dbs-legacy-devices provides emulation for legacy devices." +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "secure-sandbox", "devices", "legacy"] +readme = "README.md" + +[dependencies] +dbs-device = { path = "../dbs_device" } +dbs-utils = { path = "../dbs_utils" } +libc = "0.2.39" +log = "0.4.14" +serde = { version = "1.0.27", features = ["derive", "rc"] } +vm-superio = "0.5.0" +vmm-sys-util = "0.11.0" + +[dev-dependencies] +libc = "0.2.39" diff --git a/src/dragonball/src/dbs_legacy_devices/LICENSE b/src/dragonball/src/dbs_legacy_devices/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_legacy_devices/README.md b/src/dragonball/src/dbs_legacy_devices/README.md new file mode 100644 index 000000000000..e1271995dcc0 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/README.md @@ -0,0 +1,26 @@ +# dbs-legacy-devices + +`dbs-legacy-devices` provides emulation for legacy devices. + +## Serial Devices + +Defined a wrapper over the Serial of [`vm-superio`](https://github.com/rust-vmm/vm-superio). +This wrapper is needed because [Orphan rules](https://doc.rust-lang.org/reference/items/implementations.html#orphan-rules), +which is one crate can not implement a trait for a struct defined in +another crate. This wrapper also contains the input field that is +missing from upstream implementation. + +## i8042 Devices + +Defined a wrapper over the `i8042 PS/2 Controller` of [`vm-superio`](https://github.com/rust-vmm/vm-superio). +The i8042 PS/2 controller emulates, at this point, only the CPU reset command which is needed for announcing the VMM about the guest's shutdown. + +### Acknowledgement + +Part of the code is derived from the [Firecracker](https://github.com/firecracker-microvm/firecracker) project. +And modified to use [`DeviceIoMut`](../dbs_device/src/lib.rs) to support serial port to Bus. + + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/src/dragonball/src/dbs_legacy_devices/THIRD-PARTY b/src/dragonball/src/dbs_legacy_devices/THIRD-PARTY new file mode 120000 index 000000000000..301d0a498953 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/THIRD-PARTY @@ -0,0 +1 @@ +../../THIRD-PARTY \ No newline at end of file diff --git a/src/dragonball/src/dbs_legacy_devices/src/cmos.rs b/src/dragonball/src/dbs_legacy_devices/src/cmos.rs new file mode 100644 index 000000000000..4ac59cdfacba --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/src/cmos.rs @@ -0,0 +1,137 @@ +// Copyright 2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::cmp::min; +use std::mem; + +use libc::{clock_gettime, gmtime_r, timespec, tm, CLOCK_REALTIME}; +use vmm_sys_util::eventfd::EventFd; + +use dbs_device::{DeviceIoMut, PioAddress}; + +/// The value of index offset register is always guaranteed to be in range via INDEX_MASK. +const INDEX_MASK: u8 = 0x7f; +/// Offset of index offset register. +const INDEX_OFFSET: u16 = 0x0; +/// Offset of data offset register. +const DATA_OFFSET: u16 = 0x1; +/// Length of Cmos memory. +const DATA_LEN: usize = 128; + +/// A CMOS/RTC device commonly seen on x86 I/O port 0x70/0x71. +pub struct CmosDevice { + index: u8, + data: [u8; DATA_LEN], + reset_evt: EventFd, +} + +impl CmosDevice { + /// Constructs a CMOS/RTC device with initial data. + /// `mem_below_4g` is the size of memory in bytes below the 32-bit gap. + /// `mem_above_4g` is the size of memory in bytes above the 32-bit gap. + pub fn new(mem_below_4g: u64, mem_above_4g: u64, reset_evt: EventFd) -> CmosDevice { + let mut data = [0u8; DATA_LEN]; + // Extended memory from 16 MB to 4 GB in units of 64 KB + let ext_mem = min( + 0xFFFF, + mem_below_4g.saturating_sub(16 * 1024 * 1024) / (64 * 1024), + ); + data[0x34] = ext_mem as u8; + data[0x35] = (ext_mem >> 8) as u8; + // High memory (> 4GB) in units of 64 KB + let high_mem = min(0x00FF_FFFF, mem_above_4g / (64 * 1024)); + data[0x5b] = high_mem as u8; + data[0x5c] = (high_mem >> 8) as u8; + data[0x5d] = (high_mem >> 16) as u8; + CmosDevice { + index: 0, + data, + reset_evt, + } + } +} +impl DeviceIoMut for CmosDevice { + fn pio_write(&mut self, _base: PioAddress, offset: PioAddress, data: &[u8]) { + if data.len() != 1 { + return; + } + match offset.raw_value() { + INDEX_OFFSET => self.index = data[0], + DATA_OFFSET => { + if self.index == 0x8f && data[0] == 0 { + self.reset_evt.write(1).unwrap(); + } else { + self.data[(self.index & INDEX_MASK) as usize] = data[0] + } + } + _ => {} + }; + } + fn pio_read(&mut self, _base: PioAddress, offset: PioAddress, data: &mut [u8]) { + fn to_bcd(v: u8) -> u8 { + assert!(v < 100); + ((v / 10) << 4) | (v % 10) + } + if data.len() != 1 { + return; + } + data[0] = match offset.raw_value() { + INDEX_OFFSET => self.index, + DATA_OFFSET => { + let seconds; + let minutes; + let hours; + let week_day; + let day; + let month; + let year; + // The clock_gettime and gmtime_r calls are safe as long as the structs they are + // given are large enough, and neither of them fail. It is safe to zero initialize + // the tm and timespec struct because it contains only plain data. + let update_in_progress = unsafe { + let mut timespec: timespec = mem::zeroed(); + clock_gettime(CLOCK_REALTIME, &mut timespec as *mut _); + let now = timespec.tv_sec; + let mut tm: tm = mem::zeroed(); + gmtime_r(&now, &mut tm as *mut _); + // The following lines of code are safe but depend on tm being in scope. + seconds = tm.tm_sec; + minutes = tm.tm_min; + hours = tm.tm_hour; + week_day = tm.tm_wday + 1; + day = tm.tm_mday; + month = tm.tm_mon + 1; + year = tm.tm_year; + // Update in Progress bit held for last 224us of each second + const NANOSECONDS_PER_SECOND: i64 = 1_000_000_000; + const UIP_HOLD_LENGTH: i64 = 8 * NANOSECONDS_PER_SECOND / 32768; + timespec.tv_nsec >= (NANOSECONDS_PER_SECOND - UIP_HOLD_LENGTH) + }; + match self.index { + 0x00 => to_bcd(seconds as u8), + 0x02 => to_bcd(minutes as u8), + 0x04 => to_bcd(hours as u8), + 0x06 => to_bcd(week_day as u8), + 0x07 => to_bcd(day as u8), + 0x08 => to_bcd(month as u8), + 0x09 => to_bcd((year % 100) as u8), + // Bit 5 for 32kHz clock. Bit 7 for Update in Progress + 0x0a => 1 << 5 | (update_in_progress as u8) << 7, + // Bit 0-6 are reserved and must be 0. + // Bit 7 must be 1 (CMOS has power) + 0x0d => 1 << 7, + 0x32 => to_bcd(((year + 1900) / 100) as u8), + _ => { + // self.index is always guaranteed to be in range via INDEX_MASK. + self.data[(self.index & INDEX_MASK) as usize] + } + } + } + _ => 0, + } + } +} diff --git a/src/dragonball/src/dbs_legacy_devices/src/i8042.rs b/src/dragonball/src/dbs_legacy_devices/src/i8042.rs new file mode 100644 index 000000000000..b3f8a859e1e3 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/src/i8042.rs @@ -0,0 +1,136 @@ +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use dbs_device::{DeviceIoMut, PioAddress}; +use dbs_utils::metric::{IncMetric, SharedIncMetric}; +use log::error; +use serde::Serialize; +use vm_superio::{I8042Device as I8042Dev, Trigger}; + +use crate::EventFdTrigger; + +/// Metrics specific to the i8042 device. +#[derive(Default, Serialize)] +pub struct I8042DeviceMetrics { + /// Errors triggered while using the i8042 device. + pub error_count: SharedIncMetric, + /// Number of superfluous read intents on this i8042 device. + pub missed_read_count: SharedIncMetric, + /// Number of superfluous read intents on this i8042 device. + pub missed_write_count: SharedIncMetric, + /// Bytes read by this device. + pub read_count: SharedIncMetric, + /// Bytes written by this device. + pub write_count: SharedIncMetric, +} + +pub type I8042Device = I8042Wrapper; + +pub struct I8042Wrapper { + pub device: I8042Dev, + pub metrics: Arc, +} + +impl I8042Device { + pub fn new(event: EventFdTrigger, metrics: Arc) -> Self { + Self { + device: I8042Dev::new(event), + metrics, + } + } +} + +impl DeviceIoMut for I8042Wrapper { + fn pio_read(&mut self, _base: PioAddress, offset: PioAddress, data: &mut [u8]) { + if data.len() != 1 { + self.metrics.missed_read_count.inc(); + return; + } + data[0] = self.device.read(offset.raw_value() as u8); + self.metrics.read_count.inc(); + } + + fn pio_write(&mut self, _base: PioAddress, offset: PioAddress, data: &[u8]) { + if data.len() != 1 { + self.metrics.missed_write_count.inc(); + return; + } + if let Err(e) = self.device.write(offset.raw_value() as u8, data[0]) { + self.metrics.error_count.inc(); + error!("Failed to trigger i8042 reset event: {:?}", e); + } else { + self.metrics.write_count.inc(); + } + } +} + +#[cfg(test)] +mod tests { + use std::os::unix::prelude::FromRawFd; + + use vmm_sys_util::eventfd::EventFd; + + use super::*; + + const COMMAND_OFFSET: u8 = 4; + const CMD_RESET_CPU: u8 = 0xFE; + + #[test] + fn test_i8042_valid_ops() { + let reset_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + let metrics = Arc::new(I8042DeviceMetrics::default()); + let mut i8042 = I8042Device::new(reset_evt.try_clone().unwrap(), metrics); + + let mut v = [0x00u8; 1]; + i8042.pio_read(PioAddress(0), PioAddress(0), &mut v); + assert_eq!(v[0], 0); + assert_eq!(i8042.metrics.read_count.count(), 1); + + // Check if reset works. + i8042.pio_write( + PioAddress(0), + PioAddress(COMMAND_OFFSET as u16), + &[CMD_RESET_CPU], + ); + assert_eq!(i8042.metrics.write_count.count(), 1); + reset_evt.read().unwrap(); + } + + #[test] + fn test_i8042_invalid_ops() { + let reset_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + let metrics = Arc::new(I8042DeviceMetrics::default()); + let mut i8042 = I8042Device::new(reset_evt.try_clone().unwrap(), metrics); + + let mut v = [0x00u8; 2]; + i8042.pio_read(PioAddress(0), PioAddress(0), &mut v); + assert_eq!(v[0], 0); + assert_eq!(i8042.metrics.read_count.count(), 0); + assert_eq!(i8042.metrics.missed_read_count.count(), 1); + + i8042.pio_write( + PioAddress(0), + PioAddress(COMMAND_OFFSET as u16), + &[CMD_RESET_CPU, 0], + ); + assert_eq!(i8042.metrics.write_count.count(), 0); + assert_eq!(i8042.metrics.missed_write_count.count(), 1); + } + + #[test] + fn test_i8042_reset_err() { + let reset_evt = EventFdTrigger::new(unsafe { EventFd::from_raw_fd(i32::MAX) }); + let metrics = Arc::new(I8042DeviceMetrics::default()); + let mut i8042 = I8042Device::new(reset_evt, metrics); + i8042.pio_write( + PioAddress(0), + PioAddress(COMMAND_OFFSET as u16), + &[CMD_RESET_CPU], + ); + assert_eq!(i8042.metrics.write_count.count(), 0); + assert_eq!(i8042.metrics.error_count.count(), 1); + } +} diff --git a/src/dragonball/src/dbs_legacy_devices/src/lib.rs b/src/dragonball/src/dbs_legacy_devices/src/lib.rs new file mode 100644 index 000000000000..40c865ea745b --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/src/lib.rs @@ -0,0 +1,76 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Emulates virtual and hardware devices. +mod serial; +pub use self::serial::*; + +#[cfg(target_arch = "x86_64")] +mod cmos; +#[cfg(target_arch = "x86_64")] +pub use self::cmos::*; +#[cfg(target_arch = "x86_64")] +mod i8042; +#[cfg(target_arch = "x86_64")] +pub use self::i8042::*; + +#[cfg(target_arch = "aarch64")] +mod rtc_pl031; +#[cfg(target_arch = "aarch64")] +pub use self::rtc_pl031::*; + +use vm_superio::Trigger; +use vmm_sys_util::eventfd::EventFd; +/// Newtype for implementing the trigger functionality for `EventFd`. +/// +/// The trigger is used for handling events in the legacy devices. +pub struct EventFdTrigger(EventFd); + +impl Trigger for EventFdTrigger { + type E = std::io::Error; + + fn trigger(&self) -> std::io::Result<()> { + self.write(1) + } +} +impl std::ops::Deref for EventFdTrigger { + type Target = EventFd; + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl EventFdTrigger { + pub fn try_clone(&self) -> std::io::Result { + Ok(EventFdTrigger((**self).try_clone()?)) + } + pub fn new(evt: EventFd) -> Self { + Self(evt) + } + + pub fn get_event(&self) -> EventFd { + self.0.try_clone().unwrap() + } +} + +#[cfg(test)] +mod tests { + use std::ops::Deref; + + use vmm_sys_util::eventfd::EventFd; + + use super::*; + + #[test] + fn test_eventfd_trigger() { + let intr_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + intr_evt.trigger().unwrap(); + assert_eq!(intr_evt.get_event().read().unwrap(), 1); + intr_evt.try_clone().unwrap().trigger().unwrap(); + assert_eq!(intr_evt.deref().read().unwrap(), 1); + } +} diff --git a/src/dragonball/src/dbs_legacy_devices/src/rtc_pl031.rs b/src/dragonball/src/dbs_legacy_devices/src/rtc_pl031.rs new file mode 100644 index 000000000000..3d2d04dae29c --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/src/rtc_pl031.rs @@ -0,0 +1,128 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! ARM PL031 Real Time Clock +//! +//! This module implements a PL031 Real Time Clock (RTC) that provides to provides long time base counter. +use std::convert::TryInto; + +use dbs_device::{DeviceIoMut, IoAddress}; +use dbs_utils::metric::{IncMetric, SharedIncMetric}; +use log::warn; +use vm_superio::rtc_pl031::{Rtc, RtcEvents}; + +/// Metrics specific to the RTC device +#[derive(Default)] +pub struct RTCDeviceMetrics { + /// Errors triggered while using the RTC device. + pub error_count: SharedIncMetric, + /// Number of superfluous read intents on this RTC device. + pub missed_read_count: SharedIncMetric, + /// Number of superfluous write intents on this RTC device. + pub missed_write_count: SharedIncMetric, +} + +impl RtcEvents for RTCDeviceMetrics { + fn invalid_read(&self) { + self.missed_read_count.inc(); + self.error_count.inc(); + } + + fn invalid_write(&self) { + self.missed_write_count.inc(); + self.error_count.inc(); + } +} + +/// The wrapper of Rtc struct to implement DeviceIoMut trait. +pub struct RTCDevice { + pub rtc: Rtc, +} + +impl Default for RTCDevice { + fn default() -> Self { + Self::new() + } +} + +impl RTCDevice { + pub fn new() -> Self { + let metrics = RTCDeviceMetrics::default(); + Self { + rtc: Rtc::with_events(metrics), + } + } +} + +impl DeviceIoMut for RTCDevice { + fn read(&mut self, _base: IoAddress, offset: IoAddress, data: &mut [u8]) { + if data.len() == 4 { + self.rtc + .read(offset.raw_value() as u16, data.try_into().unwrap()) + } else { + warn!( + "Invalid RTC PL031 read: offset {}, data length {}", + offset.raw_value(), + data.len() + ); + self.rtc.events().invalid_read(); + } + } + + fn write(&mut self, _base: IoAddress, offset: IoAddress, data: &[u8]) { + if data.len() == 4 { + self.rtc + .write(offset.raw_value() as u16, data.try_into().unwrap()) + } else { + warn!( + "Invalid RTC PL031 write: offset {}, data length {}", + offset.raw_value(), + data.len() + ); + self.rtc.events().invalid_write(); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + impl RTCDevice { + fn read(&mut self, offset: u64, data: &mut [u8]) { + DeviceIoMut::read(self, IoAddress::from(0), IoAddress::from(offset), data) + } + + fn write(&mut self, offset: u64, data: &[u8]) { + DeviceIoMut::write(self, IoAddress::from(0), IoAddress::from(offset), data) + } + } + + #[test] + fn test_rtc_read_write_and_event() { + let mut rtc_device = RTCDevice::new(); + let data = [0; 4]; + + // Write to the DR register. Since this is a RO register, the write + // function should fail. + let invalid_writes_before = rtc_device.rtc.events().missed_write_count.count(); + let error_count_before = rtc_device.rtc.events().error_count.count(); + rtc_device.rtc.write(0x000, &data); + let invalid_writes_after = rtc_device.rtc.events().missed_write_count.count(); + let error_count_after = rtc_device.rtc.events().error_count.count(); + assert_eq!(invalid_writes_after - invalid_writes_before, 1); + assert_eq!(error_count_after - error_count_before, 1); + + let write_data_good = 123u32.to_le_bytes(); + let mut data_bad = [0; 2]; + let mut read_data_good = [0; 4]; + + rtc_device.write(0x008, &write_data_good); + rtc_device.write(0x008, &data_bad); + rtc_device.read(0x008, &mut read_data_good); + rtc_device.read(0x008, &mut data_bad); + assert_eq!(u32::from_le_bytes(read_data_good), 123); + assert_eq!(u16::from_le_bytes(data_bad), 0); + } +} diff --git a/src/dragonball/src/dbs_legacy_devices/src/serial.rs b/src/dragonball/src/dbs_legacy_devices/src/serial.rs new file mode 100644 index 000000000000..ba203e2b2731 --- /dev/null +++ b/src/dragonball/src/dbs_legacy_devices/src/serial.rs @@ -0,0 +1,291 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +use std::io::Write; +use std::sync::{Arc, Mutex}; + +use dbs_device::{DeviceIoMut, IoAddress, PioAddress}; +use dbs_utils::metric::{IncMetric, SharedIncMetric}; +use log::error; +use serde::Serialize; +use vm_superio::{serial::SerialEvents, Serial, Trigger}; +use vmm_sys_util::eventfd::EventFd; + +use crate::EventFdTrigger; + +/// Trait for devices that handle raw non-blocking I/O requests. +pub trait ConsoleHandler { + /// Send raw input to this emulated device. + fn raw_input(&mut self, _data: &[u8]) -> std::io::Result { + Ok(0) + } + + /// Set the stream to receive raw output from this emulated device. + fn set_output_stream(&mut self, out: Option>); +} + +/// Metrics specific to the UART device. +#[derive(Default, Serialize)] +pub struct SerialDeviceMetrics { + /// Errors triggered while using the UART device. + pub error_count: SharedIncMetric, + /// Number of flush operations. + pub flush_count: SharedIncMetric, + /// Number of read calls that did not trigger a read. + pub missed_read_count: SharedIncMetric, + /// Number of write calls that did not trigger a write. + pub missed_write_count: SharedIncMetric, + /// Number of succeeded read calls. + pub read_count: SharedIncMetric, + /// Number of succeeded write calls. + pub write_count: SharedIncMetric, +} + +pub struct SerialEventsWrapper { + pub metrics: Arc, + pub buffer_ready_event_fd: Option, +} + +impl SerialEvents for SerialEventsWrapper { + fn buffer_read(&self) { + self.metrics.read_count.inc(); + } + + fn out_byte(&self) { + self.metrics.write_count.inc(); + } + + fn tx_lost_byte(&self) { + self.metrics.missed_write_count.inc(); + } + + fn in_buffer_empty(&self) { + match self + .buffer_ready_event_fd + .as_ref() + .map_or(Ok(()), |buf_ready| buf_ready.write(1)) + { + Ok(_) => (), + Err(err) => error!( + "Could not signal that serial device buffer is ready: {:?}", + err + ), + } + } +} + +pub type SerialDevice = SerialWrapper; + +impl SerialDevice { + /// Creates a new SerialDevice instance. + pub fn new(event: EventFd) -> Self { + let out = Arc::new(Mutex::new(None)); + Self { + serial: Serial::with_events( + EventFdTrigger::new(event), + SerialEventsWrapper { + metrics: Arc::new(SerialDeviceMetrics::default()), + buffer_ready_event_fd: None, + }, + AdapterWriter(out.clone()), + ), + out, + } + } +} + +pub struct SerialWrapper { + pub serial: Serial, + pub out: Arc>>>, +} + +impl ConsoleHandler for SerialWrapper { + fn raw_input(&mut self, data: &[u8]) -> std::io::Result { + self.serial + .enqueue_raw_bytes(data) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{e:?}"))) + } + + fn set_output_stream(&mut self, out: Option>) { + *self.out.lock().unwrap() = out; + } +} + +impl DeviceIoMut for SerialWrapper { + fn pio_read(&mut self, _base: PioAddress, offset: PioAddress, data: &mut [u8]) { + if data.len() != 1 { + self.serial.events().metrics.missed_read_count.inc(); + return; + } + data[0] = self.serial.read(offset.raw_value() as u8); + } + fn pio_write(&mut self, _base: PioAddress, offset: PioAddress, data: &[u8]) { + if data.len() != 1 { + self.serial.events().metrics.missed_write_count.inc(); + return; + } + if let Err(e) = self.serial.write(offset.raw_value() as u8, data[0]) { + error!("Failed the pio write to serial: {:?}", e); + self.serial.events().metrics.error_count.inc(); + } + } + + fn read(&mut self, _base: IoAddress, offset: IoAddress, data: &mut [u8]) { + if data.len() != 1 { + self.serial.events().metrics.missed_read_count.inc(); + return; + } + data[0] = self.serial.read(offset.raw_value() as u8); + } + fn write(&mut self, _base: IoAddress, offset: IoAddress, data: &[u8]) { + if data.len() != 1 { + self.serial.events().metrics.missed_write_count.inc(); + return; + } + if let Err(e) = self.serial.write(offset.raw_value() as u8, data[0]) { + error!("Failed the write to serial: {:?}", e); + self.serial.events().metrics.error_count.inc(); + } + } +} + +pub struct AdapterWriter(pub Arc>>>); + +impl Write for AdapterWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(w) = self.0.lock().unwrap().as_mut() { + w.write(buf) + } else { + Ok(buf.len()) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + if let Some(w) = self.0.lock().unwrap().as_mut() { + w.flush() + } else { + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io; + use std::sync::{Arc, Mutex}; + + #[derive(Clone)] + struct SharedBuffer { + buf: Arc>>, + } + + impl io::Write for SharedBuffer { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.buf.lock().unwrap().write(buf) + } + fn flush(&mut self) -> io::Result<()> { + self.buf.lock().unwrap().flush() + } + } + + #[test] + fn test_serial_bus_write() { + let serial_out_buf = Arc::new(Mutex::new(Vec::new())); + let serial_out = Box::new(SharedBuffer { + buf: serial_out_buf.clone(), + }); + let intr_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + + let mut serial = SerialDevice::new(intr_evt); + let metrics = serial.serial.events().metrics.clone(); + + serial.set_output_stream(Some(serial_out)); + + let invalid_writes_before = serial.serial.events().metrics.missed_write_count.count(); + ::pio_write(&mut serial, PioAddress(0), PioAddress(0), &[b'x', b'y']); + let writes_before = metrics.write_count.count(); + + let invalid_writes_after = metrics.missed_write_count.count(); + assert_eq!(invalid_writes_before + 1, invalid_writes_after); + + assert_eq!(serial_out_buf.lock().unwrap().as_slice().len(), 0); + ::write(&mut serial, IoAddress(0), IoAddress(0), &[b'x', b'y']); + assert_eq!(serial_out_buf.lock().unwrap().as_slice().len(), 0); + + let invalid_writes_after = metrics.missed_write_count.count(); + assert_eq!(invalid_writes_before + 2, invalid_writes_after); + + ::pio_write(&mut serial, PioAddress(0), PioAddress(0), &[b'a']); + ::pio_write(&mut serial, PioAddress(0), PioAddress(0), &[b'b']); + ::write(&mut serial, IoAddress(0), IoAddress(0), &[b'c']); + assert_eq!( + serial_out_buf.lock().unwrap().as_slice(), + &[b'a', b'b', b'c'] + ); + + let invalid_writes_after_2 = metrics.missed_write_count.count(); + let writes_after = metrics.write_count.count(); + // The `invalid_write_count` metric should be the same as before the one-byte writes. + assert_eq!(invalid_writes_after_2, invalid_writes_after); + assert_eq!(writes_after, writes_before + 3); + } + + #[test] + fn test_serial_bus_read() { + let intr_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + + let metrics = Arc::new(SerialDeviceMetrics::default()); + + let out: Arc>>> = + Arc::new(Mutex::new(Some(Box::new(std::io::sink())))); + let mut serial = SerialDevice { + serial: Serial::with_events( + intr_evt, + SerialEventsWrapper { + metrics: metrics.clone(), + buffer_ready_event_fd: None, + }, + AdapterWriter(out.clone()), + ), + out, + }; + serial + .serial + .enqueue_raw_bytes(&[b'a', b'b', b'c']) + .unwrap(); + + let invalid_reads_before = metrics.missed_read_count.count(); + + let mut v = [0x00; 2]; + ::pio_read(&mut serial, PioAddress(0), PioAddress(0), &mut v); + assert_eq!(v[0], b'\0'); + + let invalid_reads_after = metrics.missed_read_count.count(); + assert_eq!(invalid_reads_before + 1, invalid_reads_after); + + ::read(&mut serial, IoAddress(0), IoAddress(0), &mut v); + assert_eq!(v[0], b'\0'); + + let invalid_reads_after = metrics.missed_read_count.count(); + assert_eq!(invalid_reads_before + 2, invalid_reads_after); + + let mut v = [0x00; 1]; + ::pio_read(&mut serial, PioAddress(0), PioAddress(0), &mut v); + assert_eq!(v[0], b'a'); + + ::pio_read(&mut serial, PioAddress(0), PioAddress(0), &mut v); + assert_eq!(v[0], b'b'); + + ::read(&mut serial, IoAddress(0), IoAddress(0), &mut v); + assert_eq!(v[0], b'c'); + + let invalid_reads_after_2 = metrics.missed_read_count.count(); + // The `invalid_read_count` metric should be the same as before the one-byte reads. + assert_eq!(invalid_reads_after_2, invalid_reads_after); + } +} diff --git a/src/dragonball/src/dbs_tdx/Cargo.toml b/src/dragonball/src/dbs_tdx/Cargo.toml new file mode 100644 index 000000000000..2643c6e48595 --- /dev/null +++ b/src/dragonball/src/dbs_tdx/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "dbs-tdx" +version = "0.1.0" +authors = ["Alibaba Dragonball Team"] +description = "helpers and utilities to create TDX VM" +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "secure-sandbox", "TDX", "confidential container"] +readme = "README.md" + +[dependencies] +thiserror = "1.0" +kvm-bindings = { version = "0.6.0", features = ["fam-wrappers"] } +vmm-sys-util = "0.11.0" + +[dev-dependencies] +serde_json = "1.0.9" diff --git a/src/dragonball/src/dbs_tdx/README.md b/src/dragonball/src/dbs_tdx/README.md new file mode 100644 index 000000000000..3bc735ab300c --- /dev/null +++ b/src/dragonball/src/dbs_tdx/README.md @@ -0,0 +1,14 @@ +# dbs-tdx + +This crate is a collection of modules that provides helpers and utilities to create a TDX Dragonball VM. + +Currently this crate involves: +- tdx-ioctls + +## Acknowledgement + +Part of the code is derived from the [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor) project. + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/src/dragonball/src/dbs_tdx/src/lib.rs b/src/dragonball/src/dbs_tdx/src/lib.rs new file mode 100644 index 000000000000..6b67688e57f8 --- /dev/null +++ b/src/dragonball/src/dbs_tdx/src/lib.rs @@ -0,0 +1,5 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#[cfg(target_arch = "x86_64")] +pub mod tdx_ioctls; diff --git a/src/dragonball/src/dbs_tdx/src/tdx_ioctls.rs b/src/dragonball/src/dbs_tdx/src/tdx_ioctls.rs new file mode 100644 index 000000000000..c9323fa42dab --- /dev/null +++ b/src/dragonball/src/dbs_tdx/src/tdx_ioctls.rs @@ -0,0 +1,220 @@ +// Copyright © 2019 Intel Corporation +// +// Copyright (c) 2023 Alibaba Cloud. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause +// + +use std::os::unix::io::RawFd; + +use kvm_bindings::{CpuId, __IncompleteArrayField, KVMIO}; +use thiserror::Error; +use vmm_sys_util::fam::{FamStruct, FamStructWrapper}; +use vmm_sys_util::ioctl::ioctl_with_val; +use vmm_sys_util::{generate_fam_struct_impl, ioctl_ioc_nr, ioctl_iowr_nr}; + +/// Tdx capability list. +pub type TdxCaps = FamStructWrapper; + +/// Cpuid configs entry counts. +const TDX1_MAX_NR_CPUID_CONFIGS: usize = 6; + +generate_fam_struct_impl!( + TdxCapabilities, + TdxCpuidConfig, + cpuid_configs, + u32, + nr_cpuid_configs, + TDX1_MAX_NR_CPUID_CONFIGS +); + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, PartialEq)] +/// Tdx cpuid config. +pub struct TdxCpuidConfig { + /// cpuid leaf + pub leaf: u32, + /// cpuid sub leaf + pub sub_leaf: u32, + /// eax + pub eax: u32, + /// ebx + pub ebx: u32, + /// ecx + pub ecx: u32, + /// edx + pub edx: u32, +} + +#[repr(C)] +#[derive(Default)] +/// Tdx capabilities. +pub struct TdxCapabilities { + /// cpuid bits need to be fixed to 0. + pub attrs_fixed0: u64, + /// cpuid bits need to be fixed to 1. + pub attrs_fixed1: u64, + /// xfam bits need to be fixed to 0. + pub xfam_fixed0: u64, + /// xfam bits need to be fixed to 1. + pub xfam_fixed1: u64, + /// cpuid configs entry number. + pub nr_cpuid_configs: u32, + /// padding. + pub padding: u32, + /// cpuid config list + pub cpuid_configs: __IncompleteArrayField, +} + +ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); +/// TDX module related errors. +#[derive(Error, Debug)] +pub enum TdxIoctlError { + /// Failed to create TdxCaps + #[error("Failed to create TdxCaps")] + TdxCapabilitiesCreate, + /// Failed to get TDX Capbilities + #[error("Failed to get TDX Capbilities: {0}")] + TdxCapabilities(#[source] std::io::Error), + /// Failed to init TDX. + #[error("Failed to init TDX: {0}")] + TdxInit(#[source] std::io::Error), + /// Failed to finalize TDX. + #[error("Failed to finalize TDX: {0}")] + TdxFinalize(#[source] std::io::Error), + /// Failed to init TDX memory region. + #[error("Failed to init TDX memory region: {0}")] + TdxInitMemRegion(#[source] std::io::Error), + /// Failed to init TDX vcpu. + #[error("Failed to init TDX vcpu: {0}")] + TdxInitVcpu(#[source] std::io::Error), +} + +/// TDX related ioctl command +#[repr(u32)] +enum TdxCommand { + /// Get Capability + Capabilities = 0, + /// Init TD + InitVm = 1, + /// Init vcpu for TD + InitVcpu = 2, + /// Init memory region for TD + InitMemRegion = 3, + /// Finalize TD + Finalize = 4, +} + +/// TDX related ioctl command +fn tdx_command( + fd: &RawFd, + command: TdxCommand, + metadata: u32, + data: u64, +) -> std::result::Result<(), std::io::Error> { + #[repr(C)] + struct TdxIoctlCmd { + command: TdxCommand, + metadata: u32, + data: u64, + } + let cmd = TdxIoctlCmd { + command, + metadata, + data, + }; + let ret = unsafe { + ioctl_with_val( + fd, + KVM_MEMORY_ENCRYPT_OP(), + &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, + ) + }; + if ret < 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) +} + +/// Init TDX +pub fn tdx_init( + vm_fd: &RawFd, + cpu_id: &CpuId, + max_vcpus: u32, +) -> std::result::Result<(), TdxIoctlError> { + #[repr(C)] + struct TdxInitVm { + max_vcpus: u32, + tsc_khz: u32, + attributes: u64, + cpuid: u64, + mrconfigid: [u64; 6], + mrowner: [u64; 6], + mrownerconfig: [u64; 6], + reserved: [u64; 43], + } + let data = TdxInitVm { + max_vcpus, + tsc_khz: 0, + attributes: 0, // TDX1_TD_ATTRIBUTE_DEBUG, + cpuid: cpu_id.as_fam_struct_ptr() as u64, + mrconfigid: [0; 6], + mrowner: [0; 6], + mrownerconfig: [0; 6], + reserved: [0; 43], + }; + tdx_command(vm_fd, TdxCommand::InitVm, 0, &data as *const _ as u64) + .map_err(TdxIoctlError::TdxInit) +} + +/// Finalize the TDX setup for this VM +pub fn tdx_finalize(vm_fd: &RawFd) -> std::result::Result<(), TdxIoctlError> { + tdx_command(vm_fd, TdxCommand::Finalize, 0, 0).map_err(TdxIoctlError::TdxFinalize) +} + +/// Initialize TDX memory Region +pub fn tdx_init_memory_region( + vm_fd: &RawFd, + host_address: u64, + guest_address: u64, + size: u64, + measure: bool, +) -> std::result::Result<(), TdxIoctlError> { + #[repr(C)] + struct TdxInitMemRegion { + host_address: u64, + guest_address: u64, + pages: u64, + } + let data = TdxInitMemRegion { + host_address, + guest_address, + pages: size / 4096, + }; + tdx_command( + vm_fd, + TdxCommand::InitMemRegion, + if measure { 1 } else { 0 }, + &data as *const _ as u64, + ) + .map_err(TdxIoctlError::TdxInitMemRegion) +} + +/// Initialize TDX vcpu +pub fn tdx_init_vcpu(vcpu_fd: &RawFd, hob_address: u64) -> std::result::Result<(), TdxIoctlError> { + tdx_command(vcpu_fd, TdxCommand::InitVcpu, 0, hob_address).map_err(TdxIoctlError::TdxInitVcpu) +} + +/// Get tdx capabilities. +pub fn tdx_get_caps(kvm_fd: &RawFd) -> std::result::Result { + let mut tdx_caps = TdxCaps::new(TDX1_MAX_NR_CPUID_CONFIGS) + .map_err(|_| TdxIoctlError::TdxCapabilitiesCreate)?; + tdx_command( + kvm_fd, + TdxCommand::Capabilities, + 0, + tdx_caps.as_mut_fam_struct_ptr() as *const _ as u64, + ) + .map_err(TdxIoctlError::TdxCapabilities)?; + Ok(tdx_caps) +} diff --git a/src/dragonball/src/dbs_upcall/Cargo.toml b/src/dragonball/src/dbs_upcall/Cargo.toml new file mode 100755 index 000000000000..b650510920d8 --- /dev/null +++ b/src/dragonball/src/dbs_upcall/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "dbs-upcall" +version = "0.3.0" +authors = ["Alibaba Dragonball Team"] +license = "Apache-2.0" +edition = "2018" +description = "dbs-upcall is a direct communication tool between VMM and guest" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-virtio-devices" +keywords = ["dragonball", "secure-sandbox", "devices", "upcall", "virtio"] +readme = "README.md" + +[dependencies] +anyhow = "1" +log = "0.4.14" +thiserror = "1" +timerfd = "1.2.0" + +dbs-utils = { path = "../dbs_utils" } +dbs-virtio-devices = { path = "../dbs_virtio_devices", features = ["virtio-vsock"] } diff --git a/src/dragonball/src/dbs_upcall/LICENSE b/src/dragonball/src/dbs_upcall/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_upcall/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_upcall/README.md b/src/dragonball/src/dbs_upcall/README.md new file mode 100755 index 000000000000..0dd7b4cf0a18 --- /dev/null +++ b/src/dragonball/src/dbs_upcall/README.md @@ -0,0 +1,69 @@ +# dbs-upcall + +`dbs-upcall` is a direct communication tool between VMM and guest developed upon VSOCK. The server side of the upcall is a driver in guest kernel (kernel patches are needed for this feature) and it'll start to serve the requests after the kernel starts. And the client side is in VMM , it'll be a thread that communicates with VSOCK through UDS. + +We have accomplished device hotplug / hot-unplug directly through upcall in order to avoid virtualization of ACPI to minimize virtual machines overhead. And there could be many other usage through this direct communication channel. + +## Design + +### Server Design + +The server side of upcall is a driver in guest kernel and the VSOCK port is 0xDB. +After the VSOCK is connected, upcall related service will be registered and a kernel thread providing corresponding service will be created. +The upcall service thread will first send a message with message type Connect to try to connect with the client side (VMM). After service successfully connects, the service thread will get into a loop for continuously receiving requests from the client side and processing the requests until the service stops. + +The service we currently support: +1. device manager : supports CPU hotplug / hot-unplug, virtio-mmio devices hotplug / hot-unplug + +### Client Design +The client side is in VMM and we abstract related logic into this crate `dbs-upcall`. + +The upcall state machine for the client side: +![Upcall State Machine](./images/upcall_state_machine.png) + +The client side's workflow: +1. [Current State: `WaitingServer`] Check the connection with VSOCK server. +2. [Current State: `WaitingService`]Check the connection with upcall server side in the guest kernel for message type Connect and magic version. +3. [Current State: `ServiceConnected`] The request could be sent through upcall in this state. + +If step 1 and 2 failed, upcall will try to reconnect. +If request is sent in step 3, upcall state will change to `ServiceBusy` and upcall will not process other requests in this state. + +### Message Design +There are two parts for the upcall request message : message header and message load. +And there are three parts for the upcall reply message: message header, result and message load. + +Message Header contains following information and it remains the same for the request and the reply : +1. magic_version(u32): magic version for identifying upcall and the service type +2. msg_size(u32): size of the message load +3. msg_type(u32): type for the message to identify its usage (e.g. ADD_CPU) +4. msg_flags(u32): reserved + +For the upcall request message, message load currently contains two kinds of msg_load. +msg_load type 1: add_mmio_dev - for virtio-mmio hotplug / hot-unplug request: +1. mmio_base +2. mmio_size +3. mmio_irq + +msg_load type 2: `cpu_dev_info`` - for CPU hotplug / hot-unplug request: +1. count +2. `apic_ver` +3. `apic_ids[256]` + +For the upcall reply message, reply contains result and two kinds of msg_load. +If result is 0, the operation is successful. +If result is not 0, result refers to the error code. + +msg_load type 1: add_mmio_dev - for virtio-mmio reply: +currently empty + +msg_load type 2: `cpu_dev_reply_info`` - for CPU hotplug / hot-unplug reply: +1. `apic_index` + +## Kernel Patches + +Kernel patches are needed for dbs-upcall. You could go to [Upcall Kernel Patches](/tools/packaging/kernel/patches/5.10.x/dragonball-experimental) to get the upcall patches. + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). \ No newline at end of file diff --git a/src/dragonball/src/dbs_upcall/images/upcall_state_machine.png b/src/dragonball/src/dbs_upcall/images/upcall_state_machine.png new file mode 100755 index 000000000000..8f7256b30b97 Binary files /dev/null and b/src/dragonball/src/dbs_upcall/images/upcall_state_machine.png differ diff --git a/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs new file mode 100755 index 000000000000..f618828108ea --- /dev/null +++ b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs @@ -0,0 +1,562 @@ +// Copyright 2022 Alibaba Corporation. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! # Upcall Device Manager Service. +//! +//! Provides basic operations for the upcall device manager, include: +//! - CPU / Mmio-Virtio Device's hot-plug +//! - CPU Device's hot-unplug + +use std::fmt; +use std::mem; + +use dbs_virtio_devices::vsock::backend::VsockStream; + +use crate::{ + Result, UpcallClientError, UpcallClientRequest, UpcallClientResponse, UpcallClientService, +}; + +const DEV_MGR_MSG_SIZE: usize = 0x400; +const DEV_MGR_MAGIC_VERSION: u32 = 0x444D0100; +const DEV_MGR_BYTE: &[u8; 1usize] = b"d"; + +/// Device manager's op code. +#[allow(dead_code)] +#[repr(u32)] +enum DevMgrMsgType { + Connect = 0x00000000, + AddCpu = 0x00000001, + DelCpu = 0x00000002, + AddMem = 0x00000003, + DelMem = 0x00000004, + AddMmio = 0x00000005, + DelMmio = 0x00000006, + AddPci = 0x00000007, + DelPci = 0x00000008, +} + +/// Device manager's header for messages. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +struct DevMgrMsgHeader { + pub magic_version: u32, + pub msg_size: u32, + pub msg_type: u32, + pub msg_flags: u32, +} + +/// Command struct to add/del a MMIO Virtio Device. +#[repr(C)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct MmioDevRequest { + /// base address of the virtio MMIO configuration window. + pub mmio_base: u64, + /// size of the virtio MMIO configuration window. + pub mmio_size: u64, + /// Interrupt number assigned to the MMIO virito device. + pub mmio_irq: u32, +} + +/// Command struct to add/del a vCPU. +#[repr(C)] +#[derive(Clone)] +pub struct CpuDevRequest { + /// hotplug or hot unplug cpu count + pub count: u8, + #[cfg(target_arch = "x86_64")] + /// apic version + pub apic_ver: u8, + #[cfg(target_arch = "x86_64")] + /// apic id array + pub apic_ids: [u8; 256], +} + +impl PartialEq for CpuDevRequest { + #[cfg(target_arch = "x86_64")] + fn eq(&self, other: &CpuDevRequest) -> bool { + self.count == other.count + && self.apic_ver == other.apic_ver + && self + .apic_ids + .iter() + .zip(other.apic_ids.iter()) + .all(|(s, o)| s == o) + } + + #[cfg(target_arch = "aarch64")] + fn eq(&self, other: &CpuDevRequest) -> bool { + self.count == other.count + } +} + +impl fmt::Debug for CpuDevRequest { + #[cfg(target_arch = "x86_64")] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use std::fmt::Write as _; + let mut apic_ids = String::from("[ "); + for apic_id in self.apic_ids.iter() { + if apic_id == &0 { + break; + } + let _ = write!(apic_ids, "{apic_id}"); + apic_ids.push(' '); + } + apic_ids.push_str(" ]"); + f.debug_struct("CpuDevRequest") + .field("count", &self.count) + .field("apic_ver", &self.apic_ver) + .field("apic_ids", &apic_ids) + .finish() + } + + #[cfg(target_arch = "aarch64")] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuDevRequest") + .field("count", &self.count) + .finish() + } +} + +/// Device manager's request representation in client side. +#[derive(Clone, PartialEq, Debug)] +pub enum DevMgrRequest { + /// Add a MMIO virtio device + AddMmioDev(MmioDevRequest), + /// Del a MMIO device device + DelMmioDev(MmioDevRequest), + /// Add a VCPU + AddVcpu(CpuDevRequest), + /// Del a VCPU + DelVcpu(CpuDevRequest), +} + +impl DevMgrRequest { + /// Convert client side's representation into server side's representation. + pub fn build(&self) -> Box<[u8; DEV_MGR_MSG_SIZE]> { + let buffer = Box::new([0; DEV_MGR_MSG_SIZE]); + let size_hdr = mem::size_of::(); + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + msg_hdr.msg_flags = 0; + + match self { + DevMgrRequest::AddMmioDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::AddMmio as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let mmio_dev = + unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut MmioDevRequest) }; + *mmio_dev = *s; + } + DevMgrRequest::DelMmioDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::DelMmio as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let mmio_dev = + unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut MmioDevRequest) }; + *mmio_dev = *s; + } + DevMgrRequest::AddVcpu(s) => { + msg_hdr.msg_type = DevMgrMsgType::AddCpu as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let vcpu_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; + *vcpu_dev = s.clone(); + } + DevMgrRequest::DelVcpu(s) => { + msg_hdr.msg_type = DevMgrMsgType::DelCpu as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let vcpu_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; + *vcpu_dev = s.clone(); + } + } + + buffer + } +} + +/// Device manager's response from cpu device. +#[repr(C)] +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct CpuDevResponse { + #[cfg(target_arch = "x86_64")] + /// apic id index of last act cpu + pub apic_id_index: u32, + #[cfg(target_arch = "aarch64")] + /// cpu id of last act cpu + pub cpu_id: u32, +} + +/// Device manager's response inner message. +#[derive(Debug, Eq, PartialEq)] +pub struct DevMgrResponseInfo { + /// 0 means success and other result is the error code. + pub result: i32, + /// Additional info returned by device. + pub info: I, +} + +/// Device manager's response representation in client side. +#[derive(Debug, Eq, PartialEq)] +pub enum DevMgrResponse { + /// Add mmio device's response (no response body) + AddMmioDev(DevMgrResponseInfo<()>), + /// Add / Del cpu device's response + CpuDev(DevMgrResponseInfo), + /// Other response + Other(DevMgrResponseInfo<()>), +} + +impl DevMgrResponse { + /// Convert server side's representation into client side's representation. + fn make(buffer: &[u8]) -> Result { + let size_hdr = mem::size_of::(); + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + let result = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut i32) }; + + match msg_hdr.msg_type { + msg_type + if msg_type == DevMgrMsgType::AddCpu as u32 + || msg_type == DevMgrMsgType::DelCpu as u32 => + { + let response = unsafe { + &mut *(buffer[(size_hdr + mem::size_of::())..].as_ptr() + as *mut CpuDevResponse) + }; + Ok(DevMgrResponse::CpuDev(DevMgrResponseInfo { + result: *result, + info: response.clone(), + })) + } + msg_type if msg_type == DevMgrMsgType::AddMmio as u32 => { + Ok(DevMgrResponse::AddMmioDev(DevMgrResponseInfo { + result: *result, + info: (), + })) + } + _ => Ok(DevMgrResponse::Other(DevMgrResponseInfo { + result: *result, + info: (), + })), + } + } +} + +/// Device manager service, realized upcall client service. +#[derive(Default)] +pub struct DevMgrService {} + +impl UpcallClientService for DevMgrService { + fn connection_start(&self, stream: &mut Box) -> Result<()> { + stream + .write_all(DEV_MGR_BYTE) + .map_err(UpcallClientError::ServiceConnect) + } + + fn connection_check(&self, stream: &mut Box) -> Result<()> { + let mut buf = [0; DEV_MGR_MSG_SIZE]; + stream + .read_exact(&mut buf) + .map_err(UpcallClientError::ServiceConnect)?; + let hdr = unsafe { &*(buf.as_ptr() as *const DevMgrMsgHeader) }; + if hdr.magic_version == DEV_MGR_MAGIC_VERSION + && hdr.msg_size == 0 + && hdr.msg_flags == 0 + && hdr.msg_type == DevMgrMsgType::Connect as u32 + { + Ok(()) + } else { + Err(UpcallClientError::InvalidMessage(format!( + "upcall device manager expect msg_type {:?}, but received {}", + DevMgrMsgType::Connect as u32, + hdr.msg_type + ))) + } + } + + fn send_request( + &self, + stream: &mut Box, + request: UpcallClientRequest, + ) -> Result<()> { + let msg = match request { + UpcallClientRequest::DevMgr(req) => req.build(), + // we don't have other message type yet + #[cfg(test)] + UpcallClientRequest::FakeRequest => unimplemented!(), + }; + stream + .write_all(&*msg) + .map_err(UpcallClientError::SendRequest) + } + + fn handle_response(&self, stream: &mut Box) -> Result { + let mut buf = [0; DEV_MGR_MSG_SIZE]; + stream + .read_exact(&mut buf) + .map_err(UpcallClientError::GetResponse)?; + let response = DevMgrResponse::make(&buf)?; + + Ok(UpcallClientResponse::DevMgr(response)) + } +} + +#[cfg(test)] +mod tests { + use dbs_virtio_devices::vsock::backend::{VsockBackend, VsockInnerBackend}; + + use super::*; + + #[test] + fn test_build_dev_mgr_request() { + let size_hdr = mem::size_of::(); + // add mmio dev request + { + let add_mmio_dev_request = MmioDevRequest { + mmio_base: 0, + mmio_size: 1, + mmio_irq: 2, + }; + let dev_mgr_request = DevMgrRequest::AddMmioDev(add_mmio_dev_request); + let buffer = dev_mgr_request.build(); + + // valid total size + assert_eq!(buffer.len(), DEV_MGR_MSG_SIZE); + + // valid header + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + assert_eq!(msg_hdr.magic_version, DEV_MGR_MAGIC_VERSION); + assert_eq!(msg_hdr.msg_flags, 0); + assert_eq!(msg_hdr.msg_type, DevMgrMsgType::AddMmio as u32); + assert_eq!(msg_hdr.msg_size, mem::size_of::() as u32); + + // valid request + let mmio_dev_req = + unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut MmioDevRequest) }; + assert_eq!(mmio_dev_req, &add_mmio_dev_request); + } + + // add vcpu dev request + { + let cpu_dev_request = CpuDevRequest { + count: 1, + #[cfg(target_arch = "x86_64")] + apic_ver: 2, + #[cfg(target_arch = "x86_64")] + apic_ids: [3; 256], + }; + let dev_mgr_request = DevMgrRequest::AddVcpu(cpu_dev_request.clone()); + let buffer = dev_mgr_request.build(); + + // valid total size + assert_eq!(buffer.len(), DEV_MGR_MSG_SIZE); + + // valid header + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + assert_eq!(msg_hdr.magic_version, DEV_MGR_MAGIC_VERSION); + assert_eq!(msg_hdr.msg_flags, 0); + assert_eq!(msg_hdr.msg_type, DevMgrMsgType::AddCpu as u32); + assert_eq!(msg_hdr.msg_size, mem::size_of::() as u32); + + // valid request + let cpu_dev_req = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; + assert_eq!(cpu_dev_req, &cpu_dev_request); + } + + // del vcpu dev request + { + let cpu_dev_request = CpuDevRequest { + count: 1, + #[cfg(target_arch = "x86_64")] + apic_ver: 2, + #[cfg(target_arch = "x86_64")] + apic_ids: [3; 256], + }; + let dev_mgr_request = DevMgrRequest::DelVcpu(cpu_dev_request.clone()); + let buffer = dev_mgr_request.build(); + + // valid total size + assert_eq!(buffer.len(), DEV_MGR_MSG_SIZE); + + // valid header + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + assert_eq!(msg_hdr.magic_version, DEV_MGR_MAGIC_VERSION); + assert_eq!(msg_hdr.msg_flags, 0); + assert_eq!(msg_hdr.msg_type, DevMgrMsgType::DelCpu as u32); + assert_eq!(msg_hdr.msg_size, mem::size_of::() as u32); + + // valid request + let cpu_dev_req = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; + assert_eq!(cpu_dev_req, &cpu_dev_request); + } + } + + #[test] + fn test_make_dev_mgr_response() { + let size_hdr = mem::size_of::(); + + // test cpu response + { + let buffer = [0; DEV_MGR_MSG_SIZE]; + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + + msg_hdr.msg_type = DevMgrMsgType::AddCpu as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + msg_hdr.msg_flags = 0; + + let result = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut i32) }; + *result = 0; + + let vcpu_result = unsafe { + &mut *(buffer[(size_hdr + mem::size_of::())..].as_ptr() as *mut CpuDevResponse) + }; + + #[cfg(target_arch = "x86_64")] + { + vcpu_result.apic_id_index = 1; + } + #[cfg(target_arch = "aarch64")] + { + vcpu_result.cpu_id = 1; + } + + match DevMgrResponse::make(&buffer).unwrap() { + DevMgrResponse::CpuDev(resp) => { + assert_eq!(resp.result, 0); + #[cfg(target_arch = "x86_64")] + assert_eq!(resp.info.apic_id_index, 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(resp.info.cpu_id, 1); + } + _ => unreachable!(), + } + } + + // test add mmio response + { + let buffer = [0; DEV_MGR_MSG_SIZE]; + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + + msg_hdr.msg_type = DevMgrMsgType::AddMmio as u32; + msg_hdr.msg_size = 0; + msg_hdr.msg_flags = 0; + + let result = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut i32) }; + *result = 0; + + match DevMgrResponse::make(&buffer).unwrap() { + DevMgrResponse::AddMmioDev(resp) => { + assert_eq!(resp.result, 0); + } + _ => unreachable!(), + } + } + + // test result error + { + let buffer = [0; DEV_MGR_MSG_SIZE]; + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + + msg_hdr.msg_type = DevMgrMsgType::AddMmio as u32; + msg_hdr.msg_size = 0; + msg_hdr.msg_flags = 0; + + let result = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut i32) }; + *result = 1; + + match DevMgrResponse::make(&buffer).unwrap() { + DevMgrResponse::AddMmioDev(resp) => { + assert_eq!(resp.result, 1); + } + _ => unreachable!(), + } + } + + // test invalid unknown msg flag + { + let buffer = [0; DEV_MGR_MSG_SIZE]; + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + + msg_hdr.msg_type = 0xabcd1234; + msg_hdr.msg_size = 0; + msg_hdr.msg_flags = 0; + + let result = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut i32) }; + *result = 1; + + match DevMgrResponse::make(&buffer).unwrap() { + DevMgrResponse::Other(resp) => { + assert_eq!(resp.result, 1); + } + _ => unreachable!(), + } + } + } + + fn get_vsock_inner_backend_stream_pair() -> (Box, Box) { + let mut vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let outer_stream = connector.connect().unwrap(); + let inner_stream = vsock_backend.accept().unwrap(); + + (inner_stream, outer_stream) + } + + #[test] + fn test_dev_mgr_service_connection_start() { + let (mut inner_stream, mut outer_stream) = get_vsock_inner_backend_stream_pair(); + let dev_mgr_service = DevMgrService {}; + + assert!(dev_mgr_service.connection_start(&mut inner_stream).is_ok()); + let mut reader_buf = [0; 1]; + outer_stream.read_exact(&mut reader_buf).unwrap(); + assert_eq!(reader_buf, [b'd']); + } + + #[test] + fn test_dev_mgr_service_send_request() { + let (mut inner_stream, mut outer_stream) = get_vsock_inner_backend_stream_pair(); + let dev_mgr_service = DevMgrService {}; + + let add_mmio_dev_request = DevMgrRequest::AddMmioDev(MmioDevRequest { + mmio_base: 0, + mmio_size: 1, + mmio_irq: 2, + }); + let request = UpcallClientRequest::DevMgr(add_mmio_dev_request.clone()); + + assert!(dev_mgr_service + .send_request(&mut outer_stream, request) + .is_ok()); + + let mut reader_buf = [0; DEV_MGR_MSG_SIZE]; + inner_stream.read_exact(&mut reader_buf).unwrap(); + + assert!(add_mmio_dev_request + .build() + .iter() + .zip(reader_buf.iter()) + .all(|(req, buf)| req == buf)); + } + + #[test] + fn test_dev_mgr_service_handle_response() { + let (mut inner_stream, mut outer_stream) = get_vsock_inner_backend_stream_pair(); + let dev_mgr_service = DevMgrService {}; + + let buffer = [0; DEV_MGR_MSG_SIZE]; + let msg_hdr = unsafe { &mut *(buffer.as_ptr() as *mut DevMgrMsgHeader) }; + msg_hdr.magic_version = DEV_MGR_MAGIC_VERSION; + msg_hdr.msg_type = DevMgrMsgType::AddMmio as u32; + msg_hdr.msg_size = 0; + + inner_stream.write_all(&buffer).unwrap(); + assert!(dev_mgr_service.handle_response(&mut outer_stream).is_ok()); + } +} diff --git a/src/dragonball/src/dbs_upcall/src/lib.rs b/src/dragonball/src/dbs_upcall/src/lib.rs new file mode 100755 index 000000000000..8e03c4e01236 --- /dev/null +++ b/src/dragonball/src/dbs_upcall/src/lib.rs @@ -0,0 +1,1141 @@ +// Copyright 2022 Alibaba Corporation. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] + +//! # Upcall Client's Implementation +//! +//! Provides basic operations for upcall client, include: +//! - Connect to upcall server and service +//! - Send data to server +//! - Receive data from server + +mod dev_mgr_service; + +use std::io::Write; +use std::os::unix::io::AsRawFd; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use dbs_utils::epoll_manager::{EpollManager, EventOps, EventSet, Events, MutEventSubscriber}; +use dbs_virtio_devices::vsock::backend::{VsockInnerConnector, VsockStream}; +use log::{debug, error, info, trace, warn}; +use timerfd::{SetTimeFlags, TimerFd, TimerState}; + +pub use crate::dev_mgr_service::{ + CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, +}; + +const SERVER_PORT: u32 = 0xDB; +const SERVER_RECONNECT_DURATION_MS: u64 = 10; +const SERVER_MAX_RECONNECT_TIME: u32 = 500; + +/// Upcall client error. +#[derive(Debug, thiserror::Error)] +pub enum UpcallClientError { + /// Received invalid upcall message. + #[error("received invalid upcall message: {0}")] + InvalidMessage(String), + /// Upcall server connect error. + #[error("upcall server connect error: {0}")] + ServerConnect(#[source] std::io::Error), + /// Upcall service connect error. + #[error("upcall service connect error: {0}")] + ServiceConnect(#[source] std::io::Error), + /// Upcall send request error. + #[error("upcall send request error: {0}")] + SendRequest(#[source] std::io::Error), + /// Upcall get response error. + #[error("upcall get response error: {0}")] + GetResponse(#[source] std::io::Error), + /// Errors with timerfd. + #[error("timerfd error: {0}")] + TimerFd(#[source] std::io::Error), + /// Upcall is not connected. + #[error("upcall is not connected")] + UpcallIsNotConnected, + /// Upcall is busy now. + #[error("upcall is busy now")] + UpcallIsBusy, +} + +/// Upcall client result. +pub type Result = std::result::Result; + +/// Upcall client state, used by upcall client state machine. +/// +// NOTE: here's not a state like `ServerDisconnect`, because we always connect +// to server immediately when constructing the connection or disconnected from +// server. +#[derive(Clone, Eq, PartialEq, Debug)] +pub enum UpcallClientState { + /// There are two possible scenarios for a connection in this state: + /// - Server's connection is broken, waiting for reconnect. + /// - Server connection request sent, waiting for server's response. + WaitingServer, + /// Service connection request sent, waiting for service's response. + WaitingService, + /// The upcall service is connected. + ServiceConnected, + /// The upcall channl is busy (request has been sent, but response has not + /// been received). + ServiceBusy, + /// Error state that cannot just reconnect to server. + ReconnectError, +} + +#[allow(clippy::large_enum_variant)] +/// Upcall client request of different services. +pub enum UpcallClientRequest { + /// Device manager's request. + DevMgr(DevMgrRequest), + #[cfg(test)] + /// Fake service's request. + FakeRequest, +} + +/// Upcall client response of different services. +#[derive(Debug, Eq, PartialEq)] +pub enum UpcallClientResponse { + /// Device manager's response. + DevMgr(DevMgrResponse), + /// Upcall client disconnected, and need to reconnect. + UpcallReset, + #[cfg(test)] + /// Fake service's response + FakeResponse, +} + +/// Shared info between upcall client and upcall epoll handler. +struct UpcallClientInfo { + service: S, + connector: VsockInnerConnector, + stream: Option>, + state: UpcallClientState, + result_callback: Option>, +} + +impl UpcallClientInfo { + fn server_connection_start(&mut self) -> Result<()> { + let mut stream = self + .connector + .connect() + .map_err(UpcallClientError::ServerConnect)?; + stream + .set_nonblocking(true) + .map_err(UpcallClientError::ServerConnect)?; + + let cmd = format!("CONNECT {SERVER_PORT}\n"); + stream + .write_all(&cmd.into_bytes()) + .map_err(UpcallClientError::ServerConnect)?; + + // drop the old stream + let _ = self.stream.replace(stream); + + Ok(()) + } + + fn server_connection_check(&mut self) -> Result<()> { + let mut buffer = [0; 50]; + let len = self + .stream + .as_mut() + .unwrap() + .read(&mut buffer) + .map_err(UpcallClientError::ServerConnect)?; + + if !(len > 2 && buffer[0..2] == [b'O', b'K']) { + return Err(UpcallClientError::InvalidMessage(format!( + "upcall server expect ok, but received {}", + String::from_utf8_lossy(&buffer[0..2]), + ))); + } + + Ok(()) + } + + fn service_connection_start(&mut self) -> Result<()> { + self.service.connection_start(self.stream.as_mut().unwrap()) + } + + fn service_connection_check(&mut self) -> Result<()> { + self.service.connection_check(self.stream.as_mut().unwrap()) + } + + fn send_request(&mut self, request: UpcallClientRequest) -> Result<()> { + self.service + .send_request(self.stream.as_mut().unwrap(), request) + } + + fn handle_response(&mut self) -> Result { + self.service.handle_response(self.stream.as_mut().unwrap()) + } + + fn set_state(&mut self, state: UpcallClientState) { + self.state = state; + } + + fn set_callback(&mut self, callback: Box) { + self.result_callback.replace(callback); + } + + fn consume_callback(&mut self, response: UpcallClientResponse) { + if let Some(cb) = self.result_callback.take() { + cb(response) + }; + } +} + +/// Upcall client's Implementation. +pub struct UpcallClient { + epoll_manager: EpollManager, + info: Arc>>, +} + +impl UpcallClient { + /// Create a new Upcall Client instance. + pub fn new( + connector: VsockInnerConnector, + epoll_manager: EpollManager, + service: S, + ) -> Result { + let info = UpcallClientInfo { + connector, + stream: None, + state: UpcallClientState::WaitingServer, + service, + result_callback: None, + }; + Ok(UpcallClient { + epoll_manager, + info: Arc::new(Mutex::new(info)), + }) + } + + /// Connect upcall client to upcall server. + pub fn connect(&mut self) -> Result<()> { + let handler = Box::new(UpcallEpollHandler::new(self.info.clone())?); + self.epoll_manager.add_subscriber(handler); + + Ok(()) + } + + fn send_request_inner( + &self, + request: UpcallClientRequest, + callback: Option>, + ) -> Result<()> { + let mut info = self.info.lock().unwrap(); + match info.state { + UpcallClientState::WaitingServer + | UpcallClientState::WaitingService + | UpcallClientState::ReconnectError => Err(UpcallClientError::UpcallIsNotConnected), + UpcallClientState::ServiceBusy => Err(UpcallClientError::UpcallIsBusy), + UpcallClientState::ServiceConnected => { + info.send_request(request)?; + info.set_state(UpcallClientState::ServiceBusy); + if let Some(cb) = callback { + info.set_callback(cb) + }; + Ok(()) + } + } + } + + /// Send request to upcall server, and get the response from callback + /// function. + pub fn send_request( + &self, + request: UpcallClientRequest, + callback: Box, + ) -> Result<()> { + self.send_request_inner(request, Some(callback)) + } + + /// Only send request to upcall server, and discard the response. + pub fn send_request_without_result(&self, request: UpcallClientRequest) -> Result<()> { + self.send_request_inner(request, None) + } + + /// Get the link state of upcall client. + pub fn get_state(&self) -> UpcallClientState { + self.info.lock().unwrap().state.clone() + } + + /// The upcall client is ready to send request to upcall server or not. + pub fn is_ready(&self) -> bool { + self.get_state() == UpcallClientState::ServiceConnected + } +} + +/// Event handler of upcall client. +pub struct UpcallEpollHandler { + info: Arc>>, + reconnect_timer: TimerFd, + reconnect_time: u32, + in_reconnect: bool, +} + +impl UpcallEpollHandler { + fn new(info: Arc>>) -> Result { + let handler = UpcallEpollHandler { + info, + reconnect_timer: TimerFd::new().map_err(UpcallClientError::TimerFd)?, + reconnect_time: 0, + in_reconnect: false, + }; + let info = handler.info.clone(); + info.lock().unwrap().server_connection_start()?; + + Ok(handler) + } + + fn set_reconnect(&mut self) -> Result<()> { + if self.in_reconnect { + info!("upcall server is waiting for reconnect"); + return Ok(()); + } + self.in_reconnect = true; + + self.reconnect_timer + .set_state(TimerState::Disarmed, SetTimeFlags::Default); + + if self.reconnect_time > SERVER_MAX_RECONNECT_TIME { + error!("upcall server's max reconnect time exceed"); + return Ok(()); + } + + self.reconnect_timer.set_state( + TimerState::Oneshot(Duration::from_millis(SERVER_RECONNECT_DURATION_MS)), + SetTimeFlags::Default, + ); + + self.reconnect_time += 1; + Ok(()) + } + + fn handle_stream_event(&mut self, ops: &mut EventOps) { + let info = self.info.clone(); + let mut info = info.lock().unwrap(); + match info.state { + UpcallClientState::WaitingServer => { + if let Err(e) = info.server_connection_check() { + debug!("upcall connect server check failed, {}", e); + info.set_state(UpcallClientState::WaitingServer); + if let Err(e) = self.set_reconnect() { + error!("set reconnect error: {}", e); + info.set_state(UpcallClientState::ReconnectError); + } + } else { + info!("upcall connect server success"); + // It's time to connect to service when server is connected. + if let Err(e) = info.service_connection_start() { + warn!("upcall connect service start failed {}", e); + info.set_state(UpcallClientState::WaitingServer); + if let Err(e) = self.set_reconnect() { + error!("set reconnect error: {}", e); + info.set_state(UpcallClientState::ReconnectError); + } + } else { + // only if both server connection check and service connection start are ok, change to next state + info.state = UpcallClientState::WaitingService; + } + } + } + UpcallClientState::WaitingService => { + if let Err(e) = info.service_connection_check() { + warn!("upcall connect service check failed, {}", e); + info.set_state(UpcallClientState::WaitingServer); + if let Err(e) = self.set_reconnect() { + error!("set reconnect error: {}", e); + info.set_state(UpcallClientState::ReconnectError); + } + } else { + info!("upcall connect service success"); + info.set_state(UpcallClientState::ServiceConnected); + } + } + UpcallClientState::ServiceBusy => match info.handle_response() { + Ok(response) => { + trace!("upcall handle response success"); + info.set_state(UpcallClientState::ServiceConnected); + info.consume_callback(response); + } + Err(e) => { + warn!("upcall response failed {}", e); + info.set_state(UpcallClientState::WaitingServer); + if let Err(e) = self.set_reconnect() { + error!("set reconnect error: {}", e); + info.set_state(UpcallClientState::ReconnectError); + } + } + }, + UpcallClientState::ServiceConnected | UpcallClientState::ReconnectError => { + error!("we should get message from event handler when connection state is `ServiceConnected`"); + } + } + + if self.in_reconnect { + // remove the old stream's fd in epoll and drop the old stream + if let Some(stream) = info.stream.as_ref() { + ops.remove(Events::new_raw(stream.as_raw_fd(), EventSet::IN)) + .unwrap(); + } + let _ = info.stream.take(); + + // consume the result callback before reconnect + info.consume_callback(UpcallClientResponse::UpcallReset); + } + } + + fn handle_reconnect_event(&mut self, ops: &mut EventOps) { + // we should clear the reconnect timer and flag first + self.in_reconnect = false; + self.reconnect_timer + .set_state(TimerState::Disarmed, SetTimeFlags::Default); + + let info = self.info.clone(); + let mut info = info.lock().unwrap(); + // reconnect to server + if let Err(e) = info.server_connection_start() { + warn!("upcall reconnect server /failed: {}", e); + if let Err(e) = self.set_reconnect() { + error!("set reconnect error: {}", e); + } + } + debug!("upcall reconnect server..."); + // add new stream's fn to epoll + if let Some(stream) = info.stream.as_ref() { + ops.add(Events::new_raw(stream.as_raw_fd(), EventSet::IN)) + .unwrap(); + } + } +} + +impl MutEventSubscriber for UpcallEpollHandler +where + S: UpcallClientService + Send + 'static, +{ + fn process(&mut self, events: Events, ops: &mut EventOps) { + trace!("UpcallEpollHandler: process"); + + let info = self.info.lock().unwrap(); + let stream_fd = info.stream.as_ref().map(|s| s.as_raw_fd()); + drop(info); + + let reconnect_fd = self.reconnect_timer.as_raw_fd(); + match events.fd() { + fd if Some(fd) == stream_fd => self.handle_stream_event(ops), + fd if fd == reconnect_fd => { + self.handle_reconnect_event(ops); + } + _ => error!("upcall epoll handler: unknown event"), + } + } + + fn init(&mut self, ops: &mut EventOps) { + trace!("UpcallEpollHandler: init"); + // add the reconnect time fd into epoll manager + ops.add(Events::new(&self.reconnect_timer, EventSet::IN)) + .unwrap(); + // add the first stream into epoll manager + let info = self.info.lock().unwrap(); + ops.add(Events::new_raw( + info.stream.as_ref().unwrap().as_raw_fd(), + EventSet::IN, + )) + .unwrap(); + } +} + +/// The definition of upcall client service. +pub trait UpcallClientService { + /// Start to connect to service. + fn connection_start(&self, stream: &mut Box) -> Result<()>; + /// Check service's connection callback. + fn connection_check(&self, stream: &mut Box) -> Result<()>; + /// Send request to service. + fn send_request( + &self, + stream: &mut Box, + request: UpcallClientRequest, + ) -> Result<()>; + /// Service's response callback. + fn handle_response(&self, stream: &mut Box) -> Result; +} + +#[cfg(test)] +mod tests { + use dbs_utils::epoll_manager::SubscriberOps; + use dbs_virtio_devices::vsock::backend::{VsockBackend, VsockInnerBackend}; + + use super::*; + + #[derive(Default)] + struct FakeService { + connection_start_err: bool, + connection_check_err: bool, + handle_response_err: bool, + } + + impl UpcallClientService for FakeService { + fn connection_start(&self, stream: &mut Box) -> Result<()> { + if self.connection_start_err { + return Err(UpcallClientError::InvalidMessage(String::from( + "test failed", + ))); + } + stream + .write_all(&String::from("CONN START").into_bytes()) + .unwrap(); + Ok(()) + } + fn connection_check(&self, stream: &mut Box) -> Result<()> { + if self.connection_check_err { + return Err(UpcallClientError::InvalidMessage(String::from( + "test failed", + ))); + } + let mut buffer = [0; 10]; + stream.read_exact(&mut buffer).unwrap(); + assert_eq!(buffer, String::from("CONN CHECK").into_bytes().as_slice()); + Ok(()) + } + fn send_request( + &self, + stream: &mut Box, + _request: UpcallClientRequest, + ) -> Result<()> { + stream + .write_all(&String::from("TEST REQ").into_bytes()) + .unwrap(); + Ok(()) + } + + fn handle_response( + &self, + stream: &mut Box, + ) -> Result { + if self.handle_response_err { + return Err(UpcallClientError::InvalidMessage(String::from( + "test failed", + ))); + } + let mut buffer = [0; 9]; + stream.read_exact(&mut buffer).unwrap(); + assert_eq!(buffer, String::from("TEST RESP").into_bytes().as_slice()); + Ok(UpcallClientResponse::FakeResponse) + } + } + + fn get_upcall_client_info() -> (VsockInnerBackend, UpcallClientInfo) { + let vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let upcall_client_info = UpcallClientInfo { + service: FakeService::default(), + connector, + stream: None, + state: UpcallClientState::WaitingServer, + result_callback: None, + }; + (vsock_backend, upcall_client_info) + } + + #[test] + fn test_upcall_client_info_server_connection_start_and_check() { + let (mut vsock_backend, mut info) = get_upcall_client_info(); + + assert!(info.server_connection_start().is_ok()); + assert!(info.stream.is_some()); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + assert_eq!( + read_buffer, + format!("CONNECT {SERVER_PORT}\n",).into_bytes() + ); + + let writer_buffer = String::from("ERR").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + assert!(info.server_connection_check().is_err()); + + let writer_buffer = String::from("OK 1024\n").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + assert!(info.server_connection_check().is_ok()); + } + + #[test] + fn test_upcall_client_info_service_connection() { + let (mut vsock_backend, mut info) = get_upcall_client_info(); + info.server_connection_start().unwrap(); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + assert!(info.service_connection_start().is_ok()); + let mut read_buffer = vec![0; 10]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + assert_eq!( + read_buffer, + String::from("CONN START").into_bytes().as_slice() + ); + + let writer_buffer = String::from("CONN CHECK").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + assert!(info.service_connection_check().is_ok()); + } + + #[test] + fn test_upcall_client_info_request_and_response() { + let (mut vsock_backend, mut info) = get_upcall_client_info(); + info.server_connection_start().unwrap(); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + assert!(info.send_request(UpcallClientRequest::FakeRequest).is_ok()); + let mut read_buffer = vec![0; 8]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + assert_eq!( + read_buffer, + String::from("TEST REQ").into_bytes().as_slice() + ); + + let writer_buffer = String::from("TEST RESP").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + assert!(info.handle_response().is_ok()); + } + + #[test] + fn test_upcall_client_info_set_state() { + let (_, mut info) = get_upcall_client_info(); + + info.set_state(UpcallClientState::WaitingServer); + assert_eq!(info.state, UpcallClientState::WaitingServer); + + info.set_state(UpcallClientState::ReconnectError); + assert_eq!(info.state, UpcallClientState::ReconnectError); + } + + #[test] + fn test_upcall_client_info_callback() { + let (_, mut info) = get_upcall_client_info(); + assert!(info.result_callback.is_none()); + + let callbacked = Arc::new(Mutex::new(None)); + let callbacked_ = callbacked.clone(); + info.set_callback(Box::new(move |resp| { + *callbacked_.lock().unwrap() = Some(resp); + })); + assert!(info.result_callback.is_some()); + + info.consume_callback(UpcallClientResponse::FakeResponse); + assert!(info.result_callback.is_none()); + assert_eq!( + *callbacked.lock().unwrap(), + Some(UpcallClientResponse::FakeResponse) + ); + } + + fn get_upcall_client() -> (VsockInnerBackend, UpcallClient) { + let vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let epoll_manager = EpollManager::default(); + let upcall_client = + UpcallClient::new(connector, epoll_manager, FakeService::default()).unwrap(); + + (vsock_backend, upcall_client) + } + + #[test] + fn test_upcall_client_connect() { + let (mut vsock_backend, mut upcall_client) = get_upcall_client(); + + assert!(upcall_client.connect().is_ok()); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + assert_eq!(read_buffer, format!("CONNECT {SERVER_PORT}\n").into_bytes()); + } + + #[allow(clippy::mutex_atomic)] + #[allow(clippy::redundant_clone)] + #[test] + fn test_upcall_client_send_request() { + let (mut vsock_backend, upcall_client) = get_upcall_client(); + let info = upcall_client.info.clone(); + let connector = vsock_backend.get_connector(); + let outer_stream = connector.connect().unwrap(); + info.lock().unwrap().stream = Some(outer_stream); + let mut inner_stream = vsock_backend.accept().unwrap(); + + let got_response = Arc::new(Mutex::new(false)); + // assume service is connected + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::ServiceConnected); + } + + let got_response_ = got_response.clone(); + assert!(upcall_client + .send_request( + UpcallClientRequest::FakeRequest, + Box::new(move |_| { + *got_response_.lock().unwrap() = true; + }), + ) + .is_ok()); + assert!(info.lock().unwrap().result_callback.is_some()); + + let mut read_buffer = vec![0; 8]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("TEST RESP").into_bytes(); + assert!(inner_stream.write_all(writer_buffer.as_slice()).is_ok()); + let response = info.lock().unwrap().handle_response().unwrap(); + info.lock().unwrap().consume_callback(response); + assert!(info.lock().unwrap().result_callback.is_none()); + + assert!(*got_response.lock().unwrap()); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_upcall_client_send_request_without_result() { + let (mut vsock_backend, upcall_client) = get_upcall_client(); + let info = upcall_client.info.clone(); + let connector = vsock_backend.get_connector(); + let outer_stream = connector.connect().unwrap(); + info.lock().unwrap().stream = Some(outer_stream); + let mut inner_stream = vsock_backend.accept().unwrap(); + + // assume service is connected + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::ServiceConnected); + } + + assert!(upcall_client + .send_request_without_result(UpcallClientRequest::FakeRequest) + .is_ok()); + assert!(info.lock().unwrap().result_callback.is_none()); + + let mut read_buffer = vec![0; 8]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("TEST RESP").into_bytes(); + assert!(inner_stream.write_all(writer_buffer.as_slice()).is_ok()); + assert!(info.lock().unwrap().handle_response().is_ok()); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_upcall_client_send_request_error() { + let (_, upcall_client) = get_upcall_client(); + let info = upcall_client.info.clone(); + + let do_test = || { + assert!(upcall_client + .send_request_inner(UpcallClientRequest::FakeRequest, None) + .is_err()); + }; + + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::WaitingServer); + } + do_test(); + + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::WaitingService); + } + do_test(); + + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::ReconnectError); + } + do_test(); + + { + let mut i = info.lock().unwrap(); + i.set_state(UpcallClientState::ServiceBusy); + } + do_test(); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_upcall_client_get_state() { + let (_, upcall_client) = get_upcall_client(); + + assert_eq!(upcall_client.get_state(), UpcallClientState::WaitingServer); + + let info = upcall_client.info.clone(); + info.lock().unwrap().state = UpcallClientState::ServiceBusy; + assert_eq!(upcall_client.get_state(), UpcallClientState::ServiceBusy); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn test_upcall_client_is_ready() { + let (_, upcall_client) = get_upcall_client(); + + assert!(!upcall_client.is_ready()); + + let info = upcall_client.info.clone(); + info.lock().unwrap().state = UpcallClientState::ServiceConnected; + assert!(upcall_client.is_ready()); + } + + fn get_upcall_epoll_handler() -> (VsockInnerBackend, UpcallEpollHandler) { + let (inner_backend, info) = get_upcall_client_info(); + let epoll_handler = UpcallEpollHandler::new(Arc::new(Mutex::new(info))).unwrap(); + + (inner_backend, epoll_handler) + } + + #[test] + fn test_upcall_epoll_handler_set_reconnect() { + let (_, mut epoll_handler) = get_upcall_epoll_handler(); + + assert!(epoll_handler.set_reconnect().is_ok()); + assert_eq!(epoll_handler.reconnect_time, 1); + assert!(epoll_handler.in_reconnect); + match epoll_handler.reconnect_timer.get_state() { + TimerState::Oneshot(dur) => { + assert!(dur.as_millis() < 10 && dur.as_millis() > 5); + } + _ => unreachable!(), + } + } + + #[test] + fn test_upcall_epoll_handler_stream_event() { + // Waiting Server state, server connection check error + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::WaitingServer); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!(info.lock().unwrap().state, UpcallClientState::WaitingServer); + assert_eq!(epoll_handler.reconnect_time, 1); + assert!(epoll_handler.in_reconnect); + } + + // Waiting Server state, server connection check success, but service + // connection start error + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::WaitingServer); + info.lock().unwrap().service.connection_start_err = true; + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("OK 1024\n").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!(info.lock().unwrap().state, UpcallClientState::WaitingServer); + assert_eq!(epoll_handler.reconnect_time, 1); + assert!(epoll_handler.in_reconnect); + } + + // Waiting Server state, server connection check success, and service + // connection start success, too + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::WaitingServer); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("OK 1024\n").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!( + info.lock().unwrap().state, + UpcallClientState::WaitingService + ); + } + + // Waiting Service state, service connection check error + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::WaitingService); + info.lock().unwrap().service.connection_check_err = true; + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!(info.lock().unwrap().state, UpcallClientState::WaitingServer); + assert_eq!(epoll_handler.reconnect_time, 1); + assert!(epoll_handler.in_reconnect); + } + + // Waiting Service state, service connection check ok + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::WaitingService); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("CONN CHECK").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!( + info.lock().unwrap().state, + UpcallClientState::ServiceConnected + ); + } + + // Service Busy state, handle response err + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::ServiceBusy); + info.lock().unwrap().service.handle_response_err = true; + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!(info.lock().unwrap().state, UpcallClientState::WaitingServer); + assert_eq!(epoll_handler.reconnect_time, 1); + assert!(epoll_handler.in_reconnect); + } + + // Service Busy state, handle response ok + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::ServiceBusy); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + let writer_buffer = String::from("TEST RESP").into_bytes(); + inner_stream.write_all(&writer_buffer).unwrap(); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!( + info.lock().unwrap().state, + UpcallClientState::ServiceConnected + ); + } + + // Service Connected state + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::ServiceConnected); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!( + info.lock().unwrap().state, + UpcallClientState::ServiceConnected + ); + } + + // Reconnect Error state + { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (mut vsock_backend, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + let info = epoll_handler.info.clone(); + info.lock() + .unwrap() + .set_state(UpcallClientState::ReconnectError); + + let mut inner_stream = vsock_backend.accept().unwrap(); + let mut read_buffer = vec![0; 12]; + assert!(inner_stream.read_exact(&mut read_buffer).is_ok()); + + epoll_handler.handle_stream_event(&mut event_ops); + assert_eq!( + info.lock().unwrap().state, + UpcallClientState::ReconnectError + ); + } + } + + #[test] + fn test_upcall_epoll_handler_reconnect_event() { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (_, mut epoll_handler) = get_upcall_epoll_handler(); + + epoll_handler.handle_reconnect_event(&mut event_ops); + } + + #[test] + fn test_upcall_epoll_handler_process() { + let (_, epoll_handler) = get_upcall_epoll_handler(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(epoll_handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_ops = inner_mgr.event_ops(id).unwrap(); + let (_, mut epoll_handler) = get_upcall_epoll_handler(); + let info = epoll_handler.info.clone(); + let stream_fd = info.lock().unwrap().stream.as_ref().unwrap().as_raw_fd(); + let reconnect_fd = epoll_handler.reconnect_timer.as_raw_fd(); + let event_set = EventSet::EDGE_TRIGGERED; + event_ops + .add(Events::new_raw(stream_fd, EventSet::IN)) + .unwrap(); + + // test for stream event + let events = Events::new_raw(stream_fd, event_set); + epoll_handler.process(events, &mut event_ops); + + // test for reconnect event + let events = Events::new_raw(reconnect_fd, event_set); + epoll_handler.process(events, &mut event_ops); + } +} diff --git a/src/dragonball/src/dbs_utils/Cargo.toml b/src/dragonball/src/dbs_utils/Cargo.toml new file mode 100644 index 000000000000..ae2267ffc2f9 --- /dev/null +++ b/src/dragonball/src/dbs_utils/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "dbs-utils" +version = "0.2.1" +authors = ["Alibaba Dragonball Team"] +description = "helpers and utilities used by dragonball-sandbox components" +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox" +keywords = ["dragonball", "secure-sandbox", "utils"] +readme = "README.md" + +[dependencies] +anyhow = "1.0" +event-manager = { version = "0.2.1", features = [ "remote_endpoint" ] } +libc = "0.2.39" +log = "0.4.14" +serde = { version = "1.0.27", features = ["derive", "rc"] } +thiserror = "1.0" +timerfd = "1.0" +vmm-sys-util = "0.11.0" + +[dev-dependencies] +serde_json = "1.0.9" diff --git a/src/dragonball/src/dbs_utils/LICENSE b/src/dragonball/src/dbs_utils/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_utils/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_utils/README.md b/src/dragonball/src/dbs_utils/README.md new file mode 100644 index 000000000000..fae004e49478 --- /dev/null +++ b/src/dragonball/src/dbs_utils/README.md @@ -0,0 +1,13 @@ +# dbs-utils + +This crate is a collection of modules that provides helpers and utilities used by multiple `dragonball-sandbox` components. + +And also provides some wrappers for [`vmm-sys-util`](https://github.com/rust-vmm/vmm-sys-util). + +## Acknowledgement + +Part of the code is derived from the [Firecracker](https://github.com/firecracker-microvm/firecracker) project. + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/src/dragonball/src/dbs_utils/THIRD-PARTY b/src/dragonball/src/dbs_utils/THIRD-PARTY new file mode 120000 index 000000000000..301d0a498953 --- /dev/null +++ b/src/dragonball/src/dbs_utils/THIRD-PARTY @@ -0,0 +1 @@ +../../THIRD-PARTY \ No newline at end of file diff --git a/src/dragonball/src/dbs_utils/src/epoll_manager.rs b/src/dragonball/src/dbs_utils/src/epoll_manager.rs new file mode 100644 index 000000000000..b27c523afc4b --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/epoll_manager.rs @@ -0,0 +1,174 @@ +// Copyright 2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! A simple wrapper over event_manager::EventManager to solve possible deadlock. + +use anyhow::{anyhow, Result}; +use std::sync::{Arc, Mutex}; + +pub use event_manager::{ + Error, EventManager, EventOps, EventSet, Events, MutEventSubscriber, RemoteEndpoint, + SubscriberId, SubscriberOps, +}; + +/// Type of epoll subscriber. +pub type EpollSubscriber = Box; + +type EpollManagerImpl = Arc>>; + +/// A wrapper struct over EventManager to solve possible deadlock. +/// +/// It's a rather tough topic to deal with the epoll event manager in rust way. +/// The event_manager::EventManager is designed for single-threaded environment and it leaves +/// the task for concurrent access to the clients. +/// There are two types of threads involved, epoll worker thread and vCPU threads. +/// To reduce overhead, the epoll worker thread calls epoll::wait() without timeout, so the +/// worker thread will hold the EpollManagerImpl::Mutex for undetermined periods. When the vCPU +/// threads tries to activate virtio devices, they need to acquire the same EpollManagerImpl::Mutex. +/// Thus the vCPU threads may block for an undetermined time. To solve this issue, we perform +/// an kick()/try_lock() loop to wake up the epoll worker thread from sleeping. +#[derive(Clone)] +pub struct EpollManager { + pub mgr: EpollManagerImpl, + endpoint: Arc>>, +} + +impl EpollManager { + /// Add a new epoll event subscriber. + pub fn add_subscriber(&self, handler: EpollSubscriber) -> SubscriberId { + let _ = self.endpoint.lock().unwrap().kick(); + if let Ok(mut mgr) = self.mgr.try_lock() { + mgr.add_subscriber(handler) + } else { + return self + .endpoint + .lock() + .unwrap() + .call_blocking::<_, _, Error>(move |mgr| Ok(mgr.add_subscriber(handler))) + .unwrap(); + } + } + + /// Remove a given epoll event subscriber. + pub fn remove_subscriber(&mut self, subscriber_id: SubscriberId) -> Result { + let mut mgr = self + .mgr + .lock() + .map_err(|e| anyhow!("EventManager lock fail. {:?}", e))?; + mgr.remove_subscriber(subscriber_id) + .map_err(|e| anyhow!("remove subscriber err. {:?}", e)) + } + + /// Add an epoll event to be monitored. + pub fn add_event( + &self, + subscriber_id: SubscriberId, + events: Events, + ) -> std::result::Result<(), Error> { + loop { + let _ = self.endpoint.lock().unwrap().kick(); + if let Ok(mut mgr) = self.mgr.try_lock() { + let mut ops = mgr.event_ops(subscriber_id)?; + return ops.add(events); + } + } + } + + /// Run the epoll polling loop. + pub fn handle_events(&self, timeout: i32) -> std::result::Result { + // Do not expect poisoned lock. + let mut guard = self.mgr.lock().unwrap(); + + guard.run_with_timeout(timeout) + } +} + +impl Default for EpollManager { + /// Create a new epoll manager. + fn default() -> Self { + let mgr = EventManager::new().expect("epoll_manager: failed create new instance"); + let endpoint = Arc::new(Mutex::new(mgr.remote_endpoint())); + + EpollManager { + mgr: Arc::new(Mutex::new(mgr)), + endpoint, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::os::unix::io::AsRawFd; + use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + + struct DummySubscriber { + pub event: EventFd, + } + + impl DummySubscriber { + fn new() -> Self { + Self { + event: EventFd::new(0).unwrap(), + } + } + } + + impl MutEventSubscriber for DummySubscriber { + fn process(&mut self, events: Events, _ops: &mut EventOps) { + let source = events.fd(); + let event_set = events.event_set(); + assert_ne!(source, self.event.as_raw_fd()); + match event_set { + EventSet::IN => { + unreachable!() + } + EventSet::OUT => { + self.event.read().unwrap(); + } + _ => { + unreachable!() + } + } + } + + fn init(&mut self, _ops: &mut EventOps) {} + } + + #[test] + fn test_epoll_manager() { + let mut epoll_manager = EpollManager::default(); + let epoll_manager_clone = epoll_manager.clone(); + let thread = std::thread::spawn(move || loop { + let count = epoll_manager_clone.handle_events(-1).unwrap(); + if count == 0 { + continue; + } + assert_eq!(count, 1); + break; + }); + let handler = DummySubscriber::new(); + let event = handler.event.try_clone().unwrap(); + let id = epoll_manager.add_subscriber(Box::new(handler)); + + thread.join().unwrap(); + + epoll_manager + .add_event(id, Events::new(&event, EventSet::OUT)) + .unwrap(); + event.write(1).unwrap(); + + let epoll_manager_clone = epoll_manager.clone(); + let thread = std::thread::spawn(move || loop { + let count = epoll_manager_clone.handle_events(-1).unwrap(); + if count == 0 { + continue; + } + assert_eq!(count, 2); + break; + }); + + thread.join().unwrap(); + epoll_manager.remove_subscriber(id).unwrap(); + } +} diff --git a/src/dragonball/src/dbs_utils/src/lib.rs b/src/dragonball/src/dbs_utils/src/lib.rs new file mode 100644 index 000000000000..a3013e2d263a --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/lib.rs @@ -0,0 +1,9 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +pub mod epoll_manager; +pub mod metric; +pub mod net; +pub mod rate_limiter; +pub mod time; diff --git a/src/dragonball/src/dbs_utils/src/metric.rs b/src/dragonball/src/dbs_utils/src/metric.rs new file mode 100644 index 000000000000..cfef025f0712 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/metric.rs @@ -0,0 +1,199 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Defines the public components of the metric system. +//! +//! # Design +//! The main design goals of this system are: +//! * Use lockless operations, preferably ones that don't require anything other than +//! simple reads/writes being atomic. +//! * Exploit interior mutability and atomics being Sync to allow all methods (including the ones +//! which are effectively mutable) to be callable on a global non-mut static. +//! * Rely on `serde` to provide the actual serialization for writing the metrics. +//! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, +//! to avoid having to initialize everything by hand. +//! +//! The system implements 2 types of metrics: +//! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter +//! (i.e the number of times an API request failed). These metrics are reset upon flush. +//! * Shared Store Metrics (SharedStoreMetrics) - are targeted at keeping a persistent value, it is not +//! intended to act as a counter (i.e for measure the process start up time for example). +//! +//! The current approach for the `SharedIncMetrics` type is to store two values (current and previous) +//! and compute the delta between them each time we do a flush (i.e by serialization). There are a number of advantages +//! to this approach, including: +//! * We don't have to introduce an additional write (to reset the value) from the thread which +//! does to actual writing, so less synchronization effort is required. +//! * We don't have to worry at all that much about losing some data if writing fails for a while +//! (this could be a concern, I guess). +//! If if turns out this approach is not really what we want, it's pretty easy to resort to +//! something else, while working behind the same interface. + +use std::sync::atomic::{AtomicUsize, Ordering}; + +use serde::{Serialize, Serializer}; + +/// Used for defining new types of metrics that act as a counter (i.e they are continuously updated by +/// incrementing their value). +pub trait IncMetric { + /// Adds `value` to the current counter. + fn add(&self, value: usize); + /// Increments by 1 unit the current counter. + fn inc(&self) { + self.add(1); + } + /// Returns current value of the counter. + fn count(&self) -> usize; +} + +/// Representation of a metric that is expected to be incremented from more than one thread, so more +/// synchronization is necessary. +// It's currently used for vCPU metrics. An alternative here would be +// to have one instance of every metric for each thread, and to +// aggregate them when writing. However this probably overkill unless we have a lot of vCPUs +// incrementing metrics very often. Still, it's there if we ever need it :-s +// We will be keeping two values for each metric for being able to reset +// counters on each metric. +// 1st member - current value being updated +// 2nd member - old value that gets the current value whenever metrics is flushed to disk +#[derive(Default)] +pub struct SharedIncMetric(AtomicUsize, AtomicUsize); + +impl IncMetric for SharedIncMetric { + // While the order specified for this operation is still Relaxed, the actual instruction will + // be an asm "LOCK; something" and thus atomic across multiple threads, simply because of the + // fetch_and_add (as opposed to "store(load() + 1)") implementation for atomics. + // TODO: would a stronger ordering make a difference here? + fn add(&self, value: usize) { + self.0.fetch_add(value, Ordering::Relaxed); + } + + fn count(&self) -> usize { + self.0.load(Ordering::Relaxed) + } +} + +impl Serialize for SharedIncMetric { + /// Reset counters of each metrics. Here we suppose that Serialize's goal is to help with the + /// flushing of metrics. + /// !!! Any print of the metrics will also reset them. Use with caution !!! + fn serialize(&self, serializer: S) -> Result { + // There's no serializer.serialize_usize() for some reason :( + let snapshot = self.0.load(Ordering::Relaxed); + let res = serializer.serialize_u64(snapshot as u64 - self.1.load(Ordering::Relaxed) as u64); + + if res.is_ok() { + self.1.store(snapshot, Ordering::Relaxed); + } + res + } +} + +/// Used for defining new types of metrics that do not need a counter and act as a persistent indicator. +pub trait StoreMetric { + /// Returns current value of the counter. + fn fetch(&self) -> usize; + /// Stores `value` to the current counter. + fn store(&self, value: usize); +} + +/// Representation of a metric that is expected to hold a value that can be accessed +/// from more than one thread, so more synchronization is necessary. +#[derive(Default)] +pub struct SharedStoreMetric(AtomicUsize); + +impl StoreMetric for SharedStoreMetric { + fn fetch(&self) -> usize { + self.0.load(Ordering::Relaxed) + } + + fn store(&self, value: usize) { + self.0.store(value, Ordering::Relaxed); + } +} + +impl IncMetric for SharedStoreMetric { + fn add(&self, value: usize) { + // This operation wraps around on overflow. + self.0.fetch_add(value, Ordering::Relaxed); + } + + fn count(&self) -> usize { + self.0.load(Ordering::Relaxed) + } +} + +impl Serialize for SharedStoreMetric { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_u64(self.0.load(Ordering::Relaxed) as u64) + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::fence; + use std::sync::Arc; + use std::thread; + + use super::*; + + #[test] + fn test_shared_inc_metric() { + let metric = Arc::new(SharedIncMetric::default()); + + // We're going to create a number of threads that will attempt to increase this metric + // in parallel. If everything goes fine we still can't be sure the synchronization works, + // but if something fails, then we definitely have a problem :-s + + const NUM_THREADS_TO_SPAWN: usize = 4; + const NUM_INCREMENTS_PER_THREAD: usize = 10_0000; + const M2_INITIAL_COUNT: usize = 123; + + metric.add(M2_INITIAL_COUNT); + + let mut v = Vec::with_capacity(NUM_THREADS_TO_SPAWN); + + for _ in 0..NUM_THREADS_TO_SPAWN { + let r = metric.clone(); + v.push(thread::spawn(move || { + for _ in 0..NUM_INCREMENTS_PER_THREAD { + r.inc(); + } + })); + } + + for handle in v { + handle.join().unwrap(); + } + + assert_eq!( + metric.count(), + M2_INITIAL_COUNT + NUM_THREADS_TO_SPAWN * NUM_INCREMENTS_PER_THREAD + ); + } + + #[test] + fn test_shared_store_metric() { + let m1 = Arc::new(SharedStoreMetric::default()); + m1.store(1); + fence(Ordering::SeqCst); + assert_eq!(1, m1.fetch()); + } + + #[test] + fn test_serialize() { + let s = serde_json::to_string(&SharedIncMetric( + AtomicUsize::new(123), + AtomicUsize::new(111), + )); + assert!(s.is_ok()); + } + + #[test] + fn test_wraps_around() { + let m = SharedStoreMetric(AtomicUsize::new(usize::MAX)); + m.add(1); + assert_eq!(m.count(), 0); + } +} diff --git a/src/dragonball/src/dbs_utils/src/net/mac.rs b/src/dragonball/src/dbs_utils/src/net/mac.rs new file mode 100644 index 000000000000..1c618694f15f --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/mac.rs @@ -0,0 +1,161 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::fmt; +use std::result::Result; + +use serde::de::{Deserialize, Deserializer, Error}; +use serde::ser::{Serialize, Serializer}; + +/// Segments of MAC address separated by ":". +pub const MAC_ADDR_LEN: usize = 6; + +#[derive(Debug)] +pub enum MacError { + MacLengthError(usize), +} + +/// MAC address for ethernet NIC. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct MacAddr { + bytes: [u8; MAC_ADDR_LEN], +} + +impl MacAddr { + /// Parse a string into an MacAddr object. + /// The error contains the str that failed to be parsed, for nicer error message generation. + pub fn parse_str(s: &S) -> Result + where + S: AsRef + ?Sized, + { + let v: Vec<&str> = s.as_ref().split(':').collect(); + let mut bytes = [0u8; MAC_ADDR_LEN]; + + if v.len() != MAC_ADDR_LEN { + return Err(s.as_ref()); + } + + for i in 0..MAC_ADDR_LEN { + if v[i].len() != 2 { + return Err(s.as_ref()); + } + bytes[i] = u8::from_str_radix(v[i], 16).map_err(|_| s.as_ref())?; + } + + Ok(MacAddr { bytes }) + } + + /// Create a MacAddr object from raw bytes unchecked. + /// + /// Does not check whether src.len() == MAC_ADDR_LEN. + #[inline] + pub fn from_bytes_unchecked(src: &[u8]) -> MacAddr { + let mut bytes = [0u8; MAC_ADDR_LEN]; + let _ = &bytes[..].copy_from_slice(src); + + MacAddr { bytes } + } + + /// Create a MacAddr object from raw bytes. + #[inline] + pub fn from_bytes(src: &[u8]) -> Result { + if src.len() != MAC_ADDR_LEN { + return Err(MacError::MacLengthError(src.len())); + } + Ok(MacAddr::from_bytes_unchecked(src)) + } + + /// Get raw bytes of the MacAddr object. + #[inline] + pub fn get_bytes(&self) -> &[u8] { + &self.bytes + } +} + +impl fmt::Display for MacAddr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = &self.bytes; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +impl Serialize for MacAddr { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.to_string().serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for MacAddr { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + MacAddr::parse_str(&s).map_err(|_| D::Error::custom("The provided MAC address is invalid.")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mac_addr() { + // too long + assert!(MacAddr::parse_str("aa:aa:aa:aa:aa:aa:aa").is_err()); + + // invalid hex + assert!(MacAddr::parse_str("aa:aa:aa:aa:aa:ax").is_err()); + + // single digit mac address component should be invalid + assert!(MacAddr::parse_str("aa:aa:aa:aa:aa:b").is_err()); + + // components with more than two digits should also be invalid + assert!(MacAddr::parse_str("aa:aa:aa:aa:aa:bbb").is_err()); + + let mac = MacAddr::parse_str("12:34:56:78:9a:BC").unwrap(); + + println!("parsed MAC address: {mac}"); + + let bytes = mac.get_bytes(); + assert_eq!(bytes, [0x12u8, 0x34, 0x56, 0x78, 0x9a, 0xbc]); + } + + #[test] + fn test_from_bytes() { + let src1 = [0x01, 0x02, 0x03, 0x04, 0x05]; + let src2 = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06]; + let src3 = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; + + assert!(MacAddr::from_bytes(&src1[..]).is_err()); + + let x = MacAddr::from_bytes(&src2[..]).unwrap(); + assert_eq!(x.to_string(), String::from("01:02:03:04:05:06")); + + assert!(MacAddr::from_bytes(&src3[..]).is_err()); + } + + #[cfg(feature = "with-serde")] + #[test] + fn test_mac_addr_serialization_and_deserialization() { + let mac: MacAddr = + serde_json::from_str("\"12:34:56:78:9a:bc\"").expect("MacAddr deserialization failed."); + + let bytes = mac.get_bytes(); + assert_eq!(bytes, [0x12u8, 0x34, 0x56, 0x78, 0x9a, 0xbc]); + + let s = serde_json::to_string(&mac).expect("MacAddr serialization failed."); + assert_eq!(s, "\"12:34:56:78:9a:bc\""); + } +} diff --git a/src/dragonball/src/dbs_utils/src/net/mod.rs b/src/dragonball/src/dbs_utils/src/net/mod.rs new file mode 100644 index 000000000000..5260f0e598a0 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +#![deny(missing_docs)] +//! # Network-related utilities +//! +//! Provides tools for representing and handling network related concepts like MAC addresses and +//! network interfaces. + +mod mac; +pub use self::mac::{MacAddr, MAC_ADDR_LEN}; + +mod tap; +pub use self::tap::{Error as TapError, Tap}; + +pub mod net_gen; diff --git a/src/dragonball/src/dbs_utils/src/net/net_gen/if_tun.rs b/src/dragonball/src/dbs_utils/src/net/net_gen/if_tun.rs new file mode 100644 index 000000000000..c5ce74e1783b --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/net_gen/if_tun.rs @@ -0,0 +1,603 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +/* automatically generated by rust-bindgen */ + +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField(::std::marker::PhantomData); +impl __IncompleteArrayField { + #[inline] + pub fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_ptr(&self) -> *const T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_mut_ptr(&mut self) -> *mut T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl ::std::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +impl ::std::clone::Clone for __IncompleteArrayField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl ::std::marker::Copy for __IncompleteArrayField {} +pub const __BITS_PER_LONG: ::std::os::raw::c_uint = 64; +pub const __FD_SETSIZE: ::std::os::raw::c_uint = 1024; +pub const ETH_ALEN: ::std::os::raw::c_uint = 6; +pub const ETH_HLEN: ::std::os::raw::c_uint = 14; +pub const ETH_ZLEN: ::std::os::raw::c_uint = 60; +pub const ETH_DATA_LEN: ::std::os::raw::c_uint = 1500; +pub const ETH_FRAME_LEN: ::std::os::raw::c_uint = 1514; +pub const ETH_FCS_LEN: ::std::os::raw::c_uint = 4; +pub const ETH_P_LOOP: ::std::os::raw::c_uint = 96; +pub const ETH_P_PUP: ::std::os::raw::c_uint = 512; +pub const ETH_P_PUPAT: ::std::os::raw::c_uint = 513; +pub const ETH_P_TSN: ::std::os::raw::c_uint = 8944; +pub const ETH_P_IP: ::std::os::raw::c_uint = 2048; +pub const ETH_P_X25: ::std::os::raw::c_uint = 2053; +pub const ETH_P_ARP: ::std::os::raw::c_uint = 2054; +pub const ETH_P_BPQ: ::std::os::raw::c_uint = 2303; +pub const ETH_P_IEEEPUP: ::std::os::raw::c_uint = 2560; +pub const ETH_P_IEEEPUPAT: ::std::os::raw::c_uint = 2561; +pub const ETH_P_BATMAN: ::std::os::raw::c_uint = 17157; +pub const ETH_P_DEC: ::std::os::raw::c_uint = 24576; +pub const ETH_P_DNA_DL: ::std::os::raw::c_uint = 24577; +pub const ETH_P_DNA_RC: ::std::os::raw::c_uint = 24578; +pub const ETH_P_DNA_RT: ::std::os::raw::c_uint = 24579; +pub const ETH_P_LAT: ::std::os::raw::c_uint = 24580; +pub const ETH_P_DIAG: ::std::os::raw::c_uint = 24581; +pub const ETH_P_CUST: ::std::os::raw::c_uint = 24582; +pub const ETH_P_SCA: ::std::os::raw::c_uint = 24583; +pub const ETH_P_TEB: ::std::os::raw::c_uint = 25944; +pub const ETH_P_RARP: ::std::os::raw::c_uint = 32821; +pub const ETH_P_ATALK: ::std::os::raw::c_uint = 32923; +pub const ETH_P_AARP: ::std::os::raw::c_uint = 33011; +pub const ETH_P_8021Q: ::std::os::raw::c_uint = 33024; +pub const ETH_P_IPX: ::std::os::raw::c_uint = 33079; +pub const ETH_P_IPV6: ::std::os::raw::c_uint = 34525; +pub const ETH_P_PAUSE: ::std::os::raw::c_uint = 34824; +pub const ETH_P_SLOW: ::std::os::raw::c_uint = 34825; +pub const ETH_P_WCCP: ::std::os::raw::c_uint = 34878; +pub const ETH_P_MPLS_UC: ::std::os::raw::c_uint = 34887; +pub const ETH_P_MPLS_MC: ::std::os::raw::c_uint = 34888; +pub const ETH_P_ATMMPOA: ::std::os::raw::c_uint = 34892; +pub const ETH_P_PPP_DISC: ::std::os::raw::c_uint = 34915; +pub const ETH_P_PPP_SES: ::std::os::raw::c_uint = 34916; +pub const ETH_P_LINK_CTL: ::std::os::raw::c_uint = 34924; +pub const ETH_P_ATMFATE: ::std::os::raw::c_uint = 34948; +pub const ETH_P_PAE: ::std::os::raw::c_uint = 34958; +pub const ETH_P_AOE: ::std::os::raw::c_uint = 34978; +pub const ETH_P_8021AD: ::std::os::raw::c_uint = 34984; +pub const ETH_P_802_EX1: ::std::os::raw::c_uint = 34997; +pub const ETH_P_TIPC: ::std::os::raw::c_uint = 35018; +pub const ETH_P_8021AH: ::std::os::raw::c_uint = 35047; +pub const ETH_P_MVRP: ::std::os::raw::c_uint = 35061; +pub const ETH_P_1588: ::std::os::raw::c_uint = 35063; +pub const ETH_P_PRP: ::std::os::raw::c_uint = 35067; +pub const ETH_P_FCOE: ::std::os::raw::c_uint = 35078; +pub const ETH_P_TDLS: ::std::os::raw::c_uint = 35085; +pub const ETH_P_FIP: ::std::os::raw::c_uint = 35092; +pub const ETH_P_80221: ::std::os::raw::c_uint = 35095; +pub const ETH_P_LOOPBACK: ::std::os::raw::c_uint = 36864; +pub const ETH_P_QINQ1: ::std::os::raw::c_uint = 37120; +pub const ETH_P_QINQ2: ::std::os::raw::c_uint = 37376; +pub const ETH_P_QINQ3: ::std::os::raw::c_uint = 37632; +pub const ETH_P_EDSA: ::std::os::raw::c_uint = 56026; +pub const ETH_P_AF_IUCV: ::std::os::raw::c_uint = 64507; +pub const ETH_P_802_3_MIN: ::std::os::raw::c_uint = 1536; +pub const ETH_P_802_3: ::std::os::raw::c_uint = 1; +pub const ETH_P_AX25: ::std::os::raw::c_uint = 2; +pub const ETH_P_ALL: ::std::os::raw::c_uint = 3; +pub const ETH_P_802_2: ::std::os::raw::c_uint = 4; +pub const ETH_P_SNAP: ::std::os::raw::c_uint = 5; +pub const ETH_P_DDCMP: ::std::os::raw::c_uint = 6; +pub const ETH_P_WAN_PPP: ::std::os::raw::c_uint = 7; +pub const ETH_P_PPP_MP: ::std::os::raw::c_uint = 8; +pub const ETH_P_LOCALTALK: ::std::os::raw::c_uint = 9; +pub const ETH_P_CAN: ::std::os::raw::c_uint = 12; +pub const ETH_P_CANFD: ::std::os::raw::c_uint = 13; +pub const ETH_P_PPPTALK: ::std::os::raw::c_uint = 16; +pub const ETH_P_TR_802_2: ::std::os::raw::c_uint = 17; +pub const ETH_P_MOBITEX: ::std::os::raw::c_uint = 21; +pub const ETH_P_CONTROL: ::std::os::raw::c_uint = 22; +pub const ETH_P_IRDA: ::std::os::raw::c_uint = 23; +pub const ETH_P_ECONET: ::std::os::raw::c_uint = 24; +pub const ETH_P_HDLC: ::std::os::raw::c_uint = 25; +pub const ETH_P_ARCNET: ::std::os::raw::c_uint = 26; +pub const ETH_P_DSA: ::std::os::raw::c_uint = 27; +pub const ETH_P_TRAILER: ::std::os::raw::c_uint = 28; +pub const ETH_P_PHONET: ::std::os::raw::c_uint = 245; +pub const ETH_P_IEEE802154: ::std::os::raw::c_uint = 246; +pub const ETH_P_CAIF: ::std::os::raw::c_uint = 247; +pub const ETH_P_XDSA: ::std::os::raw::c_uint = 248; +pub const BPF_LD: ::std::os::raw::c_uint = 0; +pub const BPF_LDX: ::std::os::raw::c_uint = 1; +pub const BPF_ST: ::std::os::raw::c_uint = 2; +pub const BPF_STX: ::std::os::raw::c_uint = 3; +pub const BPF_ALU: ::std::os::raw::c_uint = 4; +pub const BPF_JMP: ::std::os::raw::c_uint = 5; +pub const BPF_RET: ::std::os::raw::c_uint = 6; +pub const BPF_MISC: ::std::os::raw::c_uint = 7; +pub const BPF_W: ::std::os::raw::c_uint = 0; +pub const BPF_H: ::std::os::raw::c_uint = 8; +pub const BPF_B: ::std::os::raw::c_uint = 16; +pub const BPF_IMM: ::std::os::raw::c_uint = 0; +pub const BPF_ABS: ::std::os::raw::c_uint = 32; +pub const BPF_IND: ::std::os::raw::c_uint = 64; +pub const BPF_MEM: ::std::os::raw::c_uint = 96; +pub const BPF_LEN: ::std::os::raw::c_uint = 128; +pub const BPF_MSH: ::std::os::raw::c_uint = 160; +pub const BPF_ADD: ::std::os::raw::c_uint = 0; +pub const BPF_SUB: ::std::os::raw::c_uint = 16; +pub const BPF_MUL: ::std::os::raw::c_uint = 32; +pub const BPF_DIV: ::std::os::raw::c_uint = 48; +pub const BPF_OR: ::std::os::raw::c_uint = 64; +pub const BPF_AND: ::std::os::raw::c_uint = 80; +pub const BPF_LSH: ::std::os::raw::c_uint = 96; +pub const BPF_RSH: ::std::os::raw::c_uint = 112; +pub const BPF_NEG: ::std::os::raw::c_uint = 128; +pub const BPF_MOD: ::std::os::raw::c_uint = 144; +pub const BPF_XOR: ::std::os::raw::c_uint = 160; +pub const BPF_JA: ::std::os::raw::c_uint = 0; +pub const BPF_JEQ: ::std::os::raw::c_uint = 16; +pub const BPF_JGT: ::std::os::raw::c_uint = 32; +pub const BPF_JGE: ::std::os::raw::c_uint = 48; +pub const BPF_JSET: ::std::os::raw::c_uint = 64; +pub const BPF_K: ::std::os::raw::c_uint = 0; +pub const BPF_X: ::std::os::raw::c_uint = 8; +pub const BPF_MAXINSNS: ::std::os::raw::c_uint = 4096; +pub const BPF_MAJOR_VERSION: ::std::os::raw::c_uint = 1; +pub const BPF_MINOR_VERSION: ::std::os::raw::c_uint = 1; +pub const BPF_A: ::std::os::raw::c_uint = 16; +pub const BPF_TAX: ::std::os::raw::c_uint = 0; +pub const BPF_TXA: ::std::os::raw::c_uint = 128; +pub const BPF_MEMWORDS: ::std::os::raw::c_uint = 16; +pub const SKF_AD_OFF: ::std::os::raw::c_int = -4096; +pub const SKF_AD_PROTOCOL: ::std::os::raw::c_uint = 0; +pub const SKF_AD_PKTTYPE: ::std::os::raw::c_uint = 4; +pub const SKF_AD_IFINDEX: ::std::os::raw::c_uint = 8; +pub const SKF_AD_NLATTR: ::std::os::raw::c_uint = 12; +pub const SKF_AD_NLATTR_NEST: ::std::os::raw::c_uint = 16; +pub const SKF_AD_MARK: ::std::os::raw::c_uint = 20; +pub const SKF_AD_QUEUE: ::std::os::raw::c_uint = 24; +pub const SKF_AD_HATYPE: ::std::os::raw::c_uint = 28; +pub const SKF_AD_RXHASH: ::std::os::raw::c_uint = 32; +pub const SKF_AD_CPU: ::std::os::raw::c_uint = 36; +pub const SKF_AD_ALU_XOR_X: ::std::os::raw::c_uint = 40; +pub const SKF_AD_VLAN_TAG: ::std::os::raw::c_uint = 44; +pub const SKF_AD_VLAN_TAG_PRESENT: ::std::os::raw::c_uint = 48; +pub const SKF_AD_PAY_OFFSET: ::std::os::raw::c_uint = 52; +pub const SKF_AD_RANDOM: ::std::os::raw::c_uint = 56; +pub const SKF_AD_VLAN_TPID: ::std::os::raw::c_uint = 60; +pub const SKF_AD_MAX: ::std::os::raw::c_uint = 64; +pub const SKF_NET_OFF: ::std::os::raw::c_int = -1048576; +pub const SKF_LL_OFF: ::std::os::raw::c_int = -2097152; +pub const BPF_NET_OFF: ::std::os::raw::c_int = -1048576; +pub const BPF_LL_OFF: ::std::os::raw::c_int = -2097152; +pub const TUN_READQ_SIZE: ::std::os::raw::c_uint = 500; +pub const TUN_TYPE_MASK: ::std::os::raw::c_uint = 15; +pub const IFF_TUN: ::std::os::raw::c_uint = 1; +pub const IFF_TAP: ::std::os::raw::c_uint = 2; +pub const IFF_NO_PI: ::std::os::raw::c_uint = 4096; +pub const IFF_ONE_QUEUE: ::std::os::raw::c_uint = 8192; +pub const IFF_VNET_HDR: ::std::os::raw::c_uint = 16384; +pub const IFF_TUN_EXCL: ::std::os::raw::c_uint = 32768; +pub const IFF_MULTI_QUEUE: ::std::os::raw::c_uint = 256; +pub const IFF_ATTACH_QUEUE: ::std::os::raw::c_uint = 512; +pub const IFF_DETACH_QUEUE: ::std::os::raw::c_uint = 1024; +pub const IFF_PERSIST: ::std::os::raw::c_uint = 2048; +pub const IFF_NOFILTER: ::std::os::raw::c_uint = 4096; +pub const TUN_TX_TIMESTAMP: ::std::os::raw::c_uint = 1; +pub const TUN_F_CSUM: ::std::os::raw::c_uint = 1; +pub const TUN_F_TSO4: ::std::os::raw::c_uint = 2; +pub const TUN_F_TSO6: ::std::os::raw::c_uint = 4; +pub const TUN_F_TSO_ECN: ::std::os::raw::c_uint = 8; +pub const TUN_F_UFO: ::std::os::raw::c_uint = 16; +pub const TUN_PKT_STRIP: ::std::os::raw::c_uint = 1; +pub const TUN_FLT_ALLMULTI: ::std::os::raw::c_uint = 1; +pub type __s8 = ::std::os::raw::c_schar; +pub type __u8 = ::std::os::raw::c_uchar; +pub type __s16 = ::std::os::raw::c_short; +pub type __u16 = ::std::os::raw::c_ushort; +pub type __s32 = ::std::os::raw::c_int; +pub type __u32 = ::std::os::raw::c_uint; +pub type __s64 = ::std::os::raw::c_longlong; +pub type __u64 = ::std::os::raw::c_ulonglong; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fd_set { + pub fds_bits: [::std::os::raw::c_ulong; 16usize], +} +#[test] +fn bindgen_test_layout___kernel_fd_set() { + assert_eq!( + ::std::mem::size_of::<__kernel_fd_set>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fd_set>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fd_set)).fds_bits as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fd_set), + "::", + stringify!(fds_bits) + ) + ); +} +impl Clone for __kernel_fd_set { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_sighandler_t = + ::std::option::Option; +pub type __kernel_key_t = ::std::os::raw::c_int; +pub type __kernel_mqd_t = ::std::os::raw::c_int; +pub type __kernel_old_uid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_gid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_dev_t = ::std::os::raw::c_ulong; +pub type __kernel_long_t = ::std::os::raw::c_long; +pub type __kernel_ulong_t = ::std::os::raw::c_ulong; +pub type __kernel_ino_t = __kernel_ulong_t; +pub type __kernel_mode_t = ::std::os::raw::c_uint; +pub type __kernel_pid_t = ::std::os::raw::c_int; +pub type __kernel_ipc_pid_t = ::std::os::raw::c_int; +pub type __kernel_uid_t = ::std::os::raw::c_uint; +pub type __kernel_gid_t = ::std::os::raw::c_uint; +pub type __kernel_suseconds_t = __kernel_long_t; +pub type __kernel_daddr_t = ::std::os::raw::c_int; +pub type __kernel_uid32_t = ::std::os::raw::c_uint; +pub type __kernel_gid32_t = ::std::os::raw::c_uint; +pub type __kernel_size_t = __kernel_ulong_t; +pub type __kernel_ssize_t = __kernel_long_t; +pub type __kernel_ptrdiff_t = __kernel_long_t; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fsid_t { + pub val: [::std::os::raw::c_int; 2usize], +} +#[test] +fn bindgen_test_layout___kernel_fsid_t() { + assert_eq!( + ::std::mem::size_of::<__kernel_fsid_t>(), + 8usize, + concat!("Size of: ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fsid_t>(), + 4usize, + concat!("Alignment of ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fsid_t)).val as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fsid_t), + "::", + stringify!(val) + ) + ); +} +impl Clone for __kernel_fsid_t { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_off_t = __kernel_long_t; +pub type __kernel_loff_t = ::std::os::raw::c_longlong; +pub type __kernel_time_t = __kernel_long_t; +pub type __kernel_clock_t = __kernel_long_t; +pub type __kernel_timer_t = ::std::os::raw::c_int; +pub type __kernel_clockid_t = ::std::os::raw::c_int; +pub type __kernel_caddr_t = *mut ::std::os::raw::c_char; +pub type __kernel_uid16_t = ::std::os::raw::c_ushort; +pub type __kernel_gid16_t = ::std::os::raw::c_ushort; +pub type __le16 = __u16; +pub type __be16 = __u16; +pub type __le32 = __u32; +pub type __be32 = __u32; +pub type __le64 = __u64; +pub type __be64 = __u64; +pub type __sum16 = __u16; +pub type __wsum = __u32; +#[repr(C, packed)] +#[derive(Debug, Default, Copy)] +pub struct ethhdr { + pub h_dest: [::std::os::raw::c_uchar; 6usize], + pub h_source: [::std::os::raw::c_uchar; 6usize], + pub h_proto: __be16, +} +#[test] +fn bindgen_test_layout_ethhdr() { + assert_eq!( + ::std::mem::size_of::(), + 14usize, + concat!("Size of: ", stringify!(ethhdr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(ethhdr)) + ); + let ethhdr_test = ethhdr::default(); + let p_ethhdr_test = ðhdr_test as *const ethhdr as usize; + assert_eq!( + std::ptr::addr_of!(ethhdr_test.h_dest) as usize - p_ethhdr_test, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ethhdr), + "::", + stringify!(h_dest) + ) + ); + assert_eq!( + std::ptr::addr_of!(ethhdr_test.h_source) as usize - p_ethhdr_test, + 6usize, + concat!( + "Alignment of field: ", + stringify!(ethhdr), + "::", + stringify!(h_source) + ) + ); + assert_eq!( + std::ptr::addr_of!(ethhdr_test.h_proto) as usize - p_ethhdr_test, + 12usize, + concat!( + "Alignment of field: ", + stringify!(ethhdr), + "::", + stringify!(h_proto) + ) + ); +} +impl Clone for ethhdr { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sock_filter { + pub code: __u16, + pub jt: __u8, + pub jf: __u8, + pub k: __u32, +} +#[test] +fn bindgen_test_layout_sock_filter() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(sock_filter)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(sock_filter)) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_filter)).code as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sock_filter), + "::", + stringify!(code) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_filter)).jt as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(sock_filter), + "::", + stringify!(jt) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_filter)).jf as *const _ as usize }, + 3usize, + concat!( + "Alignment of field: ", + stringify!(sock_filter), + "::", + stringify!(jf) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_filter)).k as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(sock_filter), + "::", + stringify!(k) + ) + ); +} +impl Clone for sock_filter { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Copy)] +pub struct sock_fprog { + pub len: ::std::os::raw::c_ushort, + pub filter: *mut sock_filter, +} +#[test] +fn bindgen_test_layout_sock_fprog() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(sock_fprog)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(sock_fprog)) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_fprog)).len as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sock_fprog), + "::", + stringify!(len) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sock_fprog)).filter as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(sock_fprog), + "::", + stringify!(filter) + ) + ); +} +impl Clone for sock_fprog { + fn clone(&self) -> Self { + *self + } +} +impl Default for sock_fprog { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct tun_pi { + pub flags: __u16, + pub proto: __be16, +} +#[test] +fn bindgen_test_layout_tun_pi() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(tun_pi)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(tun_pi)) + ); + assert_eq!( + unsafe { &(*(0 as *const tun_pi)).flags as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(tun_pi), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const tun_pi)).proto as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(tun_pi), + "::", + stringify!(proto) + ) + ); +} +impl Clone for tun_pi { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct tun_filter { + pub flags: __u16, + pub count: __u16, + pub addr: __IncompleteArrayField<[__u8; 6usize]>, +} +#[test] +fn bindgen_test_layout_tun_filter() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(tun_filter)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(tun_filter)) + ); + assert_eq!( + unsafe { &(*(0 as *const tun_filter)).flags as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(tun_filter), + "::", + stringify!(flags) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const tun_filter)).count as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(tun_filter), + "::", + stringify!(count) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const tun_filter)).addr as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(tun_filter), + "::", + stringify!(addr) + ) + ); +} +impl Clone for tun_filter { + fn clone(&self) -> Self { + *self + } +} diff --git a/src/dragonball/src/dbs_utils/src/net/net_gen/iff.rs b/src/dragonball/src/dbs_utils/src/net/net_gen/iff.rs new file mode 100644 index 000000000000..9043cc6cb038 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/net_gen/iff.rs @@ -0,0 +1,3266 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +/* automatically generated by rust-bindgen */ + +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField(::std::marker::PhantomData); +impl __IncompleteArrayField { + #[inline] + pub fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_ptr(&self) -> *const T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_mut_ptr(&mut self) -> *mut T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl ::std::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +impl ::std::clone::Clone for __IncompleteArrayField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl ::std::marker::Copy for __IncompleteArrayField {} +#[repr(C)] +pub struct __BindgenUnionField(::std::marker::PhantomData); +impl __BindgenUnionField { + #[inline] + pub fn new() -> Self { + __BindgenUnionField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_ref(&self) -> &T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + ::std::mem::transmute(self) + } +} +impl ::std::default::Default for __BindgenUnionField { + #[inline] + fn default() -> Self { + Self::new() + } +} +impl ::std::clone::Clone for __BindgenUnionField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl ::std::marker::Copy for __BindgenUnionField {} +impl ::std::fmt::Debug for __BindgenUnionField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fmt.write_str("__BindgenUnionField") + } +} +pub const __UAPI_DEF_IN_ADDR: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_IPPROTO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_PKTINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IP_MREQ: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_SOCKADDR_IN: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_CLASS: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_ADDR: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_ADDR_ALT: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_SOCKADDR_IN6: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPV6_MREQ: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPPROTO_V6: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPV6_OPTIONS: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_PKTINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IP6_MTUINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_XATTR: ::std::os::raw::c_uint = 1; +pub const __BITS_PER_LONG: ::std::os::raw::c_uint = 64; +pub const __FD_SETSIZE: ::std::os::raw::c_uint = 1024; +pub const _K_SS_MAXSIZE: ::std::os::raw::c_uint = 128; +pub const _SYS_SOCKET_H: ::std::os::raw::c_uint = 1; +pub const _FEATURES_H: ::std::os::raw::c_uint = 1; +pub const _DEFAULT_SOURCE: ::std::os::raw::c_uint = 1; +pub const __USE_ISOC11: ::std::os::raw::c_uint = 1; +pub const __USE_ISOC99: ::std::os::raw::c_uint = 1; +pub const __USE_ISOC95: ::std::os::raw::c_uint = 1; +pub const __USE_POSIX_IMPLICITLY: ::std::os::raw::c_uint = 1; +pub const _POSIX_SOURCE: ::std::os::raw::c_uint = 1; +pub const _POSIX_C_SOURCE: ::std::os::raw::c_uint = 200809; +pub const __USE_POSIX: ::std::os::raw::c_uint = 1; +pub const __USE_POSIX2: ::std::os::raw::c_uint = 1; +pub const __USE_POSIX199309: ::std::os::raw::c_uint = 1; +pub const __USE_POSIX199506: ::std::os::raw::c_uint = 1; +pub const __USE_XOPEN2K: ::std::os::raw::c_uint = 1; +pub const __USE_XOPEN2K8: ::std::os::raw::c_uint = 1; +pub const _ATFILE_SOURCE: ::std::os::raw::c_uint = 1; +pub const __USE_MISC: ::std::os::raw::c_uint = 1; +pub const __USE_ATFILE: ::std::os::raw::c_uint = 1; +pub const __USE_FORTIFY_LEVEL: ::std::os::raw::c_uint = 0; +pub const _STDC_PREDEF_H: ::std::os::raw::c_uint = 1; +pub const __STDC_IEC_559__: ::std::os::raw::c_uint = 1; +pub const __STDC_IEC_559_COMPLEX__: ::std::os::raw::c_uint = 1; +pub const __STDC_ISO_10646__: ::std::os::raw::c_uint = 201505; +pub const __STDC_NO_THREADS__: ::std::os::raw::c_uint = 1; +pub const __GNU_LIBRARY__: ::std::os::raw::c_uint = 6; +pub const __GLIBC__: ::std::os::raw::c_uint = 2; +pub const __GLIBC_MINOR__: ::std::os::raw::c_uint = 23; +pub const _SYS_CDEFS_H: ::std::os::raw::c_uint = 1; +pub const __WORDSIZE: ::std::os::raw::c_uint = 64; +pub const __WORDSIZE_TIME64_COMPAT32: ::std::os::raw::c_uint = 1; +pub const __SYSCALL_WORDSIZE: ::std::os::raw::c_uint = 64; +pub const _SYS_UIO_H: ::std::os::raw::c_uint = 1; +pub const _SYS_TYPES_H: ::std::os::raw::c_uint = 1; +pub const _BITS_TYPES_H: ::std::os::raw::c_uint = 1; +pub const _BITS_TYPESIZES_H: ::std::os::raw::c_uint = 1; +pub const __OFF_T_MATCHES_OFF64_T: ::std::os::raw::c_uint = 1; +pub const __INO_T_MATCHES_INO64_T: ::std::os::raw::c_uint = 1; +pub const __clock_t_defined: ::std::os::raw::c_uint = 1; +pub const __time_t_defined: ::std::os::raw::c_uint = 1; +pub const __clockid_t_defined: ::std::os::raw::c_uint = 1; +pub const __timer_t_defined: ::std::os::raw::c_uint = 1; +pub const __BIT_TYPES_DEFINED__: ::std::os::raw::c_uint = 1; +pub const _ENDIAN_H: ::std::os::raw::c_uint = 1; +pub const __LITTLE_ENDIAN: ::std::os::raw::c_uint = 1234; +pub const __BIG_ENDIAN: ::std::os::raw::c_uint = 4321; +pub const __PDP_ENDIAN: ::std::os::raw::c_uint = 3412; +pub const __BYTE_ORDER: ::std::os::raw::c_uint = 1234; +pub const __FLOAT_WORD_ORDER: ::std::os::raw::c_uint = 1234; +pub const LITTLE_ENDIAN: ::std::os::raw::c_uint = 1234; +pub const BIG_ENDIAN: ::std::os::raw::c_uint = 4321; +pub const PDP_ENDIAN: ::std::os::raw::c_uint = 3412; +pub const BYTE_ORDER: ::std::os::raw::c_uint = 1234; +pub const _BITS_BYTESWAP_H: ::std::os::raw::c_uint = 1; +pub const _SYS_SELECT_H: ::std::os::raw::c_uint = 1; +pub const __FD_ZERO_STOS: &'static [u8; 6usize] = b"stosq\x00"; +pub const _SIGSET_H_types: ::std::os::raw::c_uint = 1; +pub const __timespec_defined: ::std::os::raw::c_uint = 1; +pub const _STRUCT_TIMEVAL: ::std::os::raw::c_uint = 1; +pub const FD_SETSIZE: ::std::os::raw::c_uint = 1024; +pub const _SYS_SYSMACROS_H: ::std::os::raw::c_uint = 1; +pub const _BITS_PTHREADTYPES_H: ::std::os::raw::c_uint = 1; +pub const __SIZEOF_PTHREAD_ATTR_T: ::std::os::raw::c_uint = 56; +pub const __SIZEOF_PTHREAD_MUTEX_T: ::std::os::raw::c_uint = 40; +pub const __SIZEOF_PTHREAD_MUTEXATTR_T: ::std::os::raw::c_uint = 4; +pub const __SIZEOF_PTHREAD_COND_T: ::std::os::raw::c_uint = 48; +pub const __SIZEOF_PTHREAD_CONDATTR_T: ::std::os::raw::c_uint = 4; +pub const __SIZEOF_PTHREAD_RWLOCK_T: ::std::os::raw::c_uint = 56; +pub const __SIZEOF_PTHREAD_RWLOCKATTR_T: ::std::os::raw::c_uint = 8; +pub const __SIZEOF_PTHREAD_BARRIER_T: ::std::os::raw::c_uint = 32; +pub const __SIZEOF_PTHREAD_BARRIERATTR_T: ::std::os::raw::c_uint = 4; +pub const __have_pthread_attr_t: ::std::os::raw::c_uint = 1; +pub const __PTHREAD_MUTEX_HAVE_PREV: ::std::os::raw::c_uint = 1; +pub const __PTHREAD_RWLOCK_INT_FLAGS_SHARED: ::std::os::raw::c_uint = 1; +pub const _BITS_UIO_H: ::std::os::raw::c_uint = 1; +pub const UIO_MAXIOV: ::std::os::raw::c_uint = 1024; +pub const PF_UNSPEC: ::std::os::raw::c_uint = 0; +pub const PF_LOCAL: ::std::os::raw::c_uint = 1; +pub const PF_UNIX: ::std::os::raw::c_uint = 1; +pub const PF_FILE: ::std::os::raw::c_uint = 1; +pub const PF_INET: ::std::os::raw::c_uint = 2; +pub const PF_AX25: ::std::os::raw::c_uint = 3; +pub const PF_IPX: ::std::os::raw::c_uint = 4; +pub const PF_APPLETALK: ::std::os::raw::c_uint = 5; +pub const PF_NETROM: ::std::os::raw::c_uint = 6; +pub const PF_BRIDGE: ::std::os::raw::c_uint = 7; +pub const PF_ATMPVC: ::std::os::raw::c_uint = 8; +pub const PF_X25: ::std::os::raw::c_uint = 9; +pub const PF_INET6: ::std::os::raw::c_uint = 10; +pub const PF_ROSE: ::std::os::raw::c_uint = 11; +pub const PF_DECnet: ::std::os::raw::c_uint = 12; +pub const PF_NETBEUI: ::std::os::raw::c_uint = 13; +pub const PF_SECURITY: ::std::os::raw::c_uint = 14; +pub const PF_KEY: ::std::os::raw::c_uint = 15; +pub const PF_NETLINK: ::std::os::raw::c_uint = 16; +pub const PF_ROUTE: ::std::os::raw::c_uint = 16; +pub const PF_PACKET: ::std::os::raw::c_uint = 17; +pub const PF_ASH: ::std::os::raw::c_uint = 18; +pub const PF_ECONET: ::std::os::raw::c_uint = 19; +pub const PF_ATMSVC: ::std::os::raw::c_uint = 20; +pub const PF_RDS: ::std::os::raw::c_uint = 21; +pub const PF_SNA: ::std::os::raw::c_uint = 22; +pub const PF_IRDA: ::std::os::raw::c_uint = 23; +pub const PF_PPPOX: ::std::os::raw::c_uint = 24; +pub const PF_WANPIPE: ::std::os::raw::c_uint = 25; +pub const PF_LLC: ::std::os::raw::c_uint = 26; +pub const PF_IB: ::std::os::raw::c_uint = 27; +pub const PF_MPLS: ::std::os::raw::c_uint = 28; +pub const PF_CAN: ::std::os::raw::c_uint = 29; +pub const PF_TIPC: ::std::os::raw::c_uint = 30; +pub const PF_BLUETOOTH: ::std::os::raw::c_uint = 31; +pub const PF_IUCV: ::std::os::raw::c_uint = 32; +pub const PF_RXRPC: ::std::os::raw::c_uint = 33; +pub const PF_ISDN: ::std::os::raw::c_uint = 34; +pub const PF_PHONET: ::std::os::raw::c_uint = 35; +pub const PF_IEEE802154: ::std::os::raw::c_uint = 36; +pub const PF_CAIF: ::std::os::raw::c_uint = 37; +pub const PF_ALG: ::std::os::raw::c_uint = 38; +pub const PF_NFC: ::std::os::raw::c_uint = 39; +pub const PF_VSOCK: ::std::os::raw::c_uint = 40; +pub const PF_MAX: ::std::os::raw::c_uint = 41; +pub const AF_UNSPEC: ::std::os::raw::c_uint = 0; +pub const AF_LOCAL: ::std::os::raw::c_uint = 1; +pub const AF_UNIX: ::std::os::raw::c_uint = 1; +pub const AF_FILE: ::std::os::raw::c_uint = 1; +pub const AF_INET: ::std::os::raw::c_uint = 2; +pub const AF_AX25: ::std::os::raw::c_uint = 3; +pub const AF_IPX: ::std::os::raw::c_uint = 4; +pub const AF_APPLETALK: ::std::os::raw::c_uint = 5; +pub const AF_NETROM: ::std::os::raw::c_uint = 6; +pub const AF_BRIDGE: ::std::os::raw::c_uint = 7; +pub const AF_ATMPVC: ::std::os::raw::c_uint = 8; +pub const AF_X25: ::std::os::raw::c_uint = 9; +pub const AF_INET6: ::std::os::raw::c_uint = 10; +pub const AF_ROSE: ::std::os::raw::c_uint = 11; +pub const AF_DECnet: ::std::os::raw::c_uint = 12; +pub const AF_NETBEUI: ::std::os::raw::c_uint = 13; +pub const AF_SECURITY: ::std::os::raw::c_uint = 14; +pub const AF_KEY: ::std::os::raw::c_uint = 15; +pub const AF_NETLINK: ::std::os::raw::c_uint = 16; +pub const AF_ROUTE: ::std::os::raw::c_uint = 16; +pub const AF_PACKET: ::std::os::raw::c_uint = 17; +pub const AF_ASH: ::std::os::raw::c_uint = 18; +pub const AF_ECONET: ::std::os::raw::c_uint = 19; +pub const AF_ATMSVC: ::std::os::raw::c_uint = 20; +pub const AF_RDS: ::std::os::raw::c_uint = 21; +pub const AF_SNA: ::std::os::raw::c_uint = 22; +pub const AF_IRDA: ::std::os::raw::c_uint = 23; +pub const AF_PPPOX: ::std::os::raw::c_uint = 24; +pub const AF_WANPIPE: ::std::os::raw::c_uint = 25; +pub const AF_LLC: ::std::os::raw::c_uint = 26; +pub const AF_IB: ::std::os::raw::c_uint = 27; +pub const AF_MPLS: ::std::os::raw::c_uint = 28; +pub const AF_CAN: ::std::os::raw::c_uint = 29; +pub const AF_TIPC: ::std::os::raw::c_uint = 30; +pub const AF_BLUETOOTH: ::std::os::raw::c_uint = 31; +pub const AF_IUCV: ::std::os::raw::c_uint = 32; +pub const AF_RXRPC: ::std::os::raw::c_uint = 33; +pub const AF_ISDN: ::std::os::raw::c_uint = 34; +pub const AF_PHONET: ::std::os::raw::c_uint = 35; +pub const AF_IEEE802154: ::std::os::raw::c_uint = 36; +pub const AF_CAIF: ::std::os::raw::c_uint = 37; +pub const AF_ALG: ::std::os::raw::c_uint = 38; +pub const AF_NFC: ::std::os::raw::c_uint = 39; +pub const AF_VSOCK: ::std::os::raw::c_uint = 40; +pub const AF_MAX: ::std::os::raw::c_uint = 41; +pub const SOL_RAW: ::std::os::raw::c_uint = 255; +pub const SOL_DECNET: ::std::os::raw::c_uint = 261; +pub const SOL_X25: ::std::os::raw::c_uint = 262; +pub const SOL_PACKET: ::std::os::raw::c_uint = 263; +pub const SOL_ATM: ::std::os::raw::c_uint = 264; +pub const SOL_AAL: ::std::os::raw::c_uint = 265; +pub const SOL_IRDA: ::std::os::raw::c_uint = 266; +pub const SOMAXCONN: ::std::os::raw::c_uint = 128; +pub const _BITS_SOCKADDR_H: ::std::os::raw::c_uint = 1; +pub const _SS_SIZE: ::std::os::raw::c_uint = 128; +pub const FIOSETOWN: ::std::os::raw::c_uint = 35073; +pub const SIOCSPGRP: ::std::os::raw::c_uint = 35074; +pub const FIOGETOWN: ::std::os::raw::c_uint = 35075; +pub const SIOCGPGRP: ::std::os::raw::c_uint = 35076; +pub const SIOCATMARK: ::std::os::raw::c_uint = 35077; +pub const SIOCGSTAMP: ::std::os::raw::c_uint = 35078; +pub const SIOCGSTAMPNS: ::std::os::raw::c_uint = 35079; +pub const SOL_SOCKET: ::std::os::raw::c_uint = 1; +pub const SO_DEBUG: ::std::os::raw::c_uint = 1; +pub const SO_REUSEADDR: ::std::os::raw::c_uint = 2; +pub const SO_TYPE: ::std::os::raw::c_uint = 3; +pub const SO_ERROR: ::std::os::raw::c_uint = 4; +pub const SO_DONTROUTE: ::std::os::raw::c_uint = 5; +pub const SO_BROADCAST: ::std::os::raw::c_uint = 6; +pub const SO_SNDBUF: ::std::os::raw::c_uint = 7; +pub const SO_RCVBUF: ::std::os::raw::c_uint = 8; +pub const SO_SNDBUFFORCE: ::std::os::raw::c_uint = 32; +pub const SO_RCVBUFFORCE: ::std::os::raw::c_uint = 33; +pub const SO_KEEPALIVE: ::std::os::raw::c_uint = 9; +pub const SO_OOBINLINE: ::std::os::raw::c_uint = 10; +pub const SO_NO_CHECK: ::std::os::raw::c_uint = 11; +pub const SO_PRIORITY: ::std::os::raw::c_uint = 12; +pub const SO_LINGER: ::std::os::raw::c_uint = 13; +pub const SO_BSDCOMPAT: ::std::os::raw::c_uint = 14; +pub const SO_REUSEPORT: ::std::os::raw::c_uint = 15; +pub const SO_PASSCRED: ::std::os::raw::c_uint = 16; +pub const SO_PEERCRED: ::std::os::raw::c_uint = 17; +pub const SO_RCVLOWAT: ::std::os::raw::c_uint = 18; +pub const SO_SNDLOWAT: ::std::os::raw::c_uint = 19; +pub const SO_RCVTIMEO: ::std::os::raw::c_uint = 20; +pub const SO_SNDTIMEO: ::std::os::raw::c_uint = 21; +pub const SO_SECURITY_AUTHENTICATION: ::std::os::raw::c_uint = 22; +pub const SO_SECURITY_ENCRYPTION_TRANSPORT: ::std::os::raw::c_uint = 23; +pub const SO_SECURITY_ENCRYPTION_NETWORK: ::std::os::raw::c_uint = 24; +pub const SO_BINDTODEVICE: ::std::os::raw::c_uint = 25; +pub const SO_ATTACH_FILTER: ::std::os::raw::c_uint = 26; +pub const SO_DETACH_FILTER: ::std::os::raw::c_uint = 27; +pub const SO_GET_FILTER: ::std::os::raw::c_uint = 26; +pub const SO_PEERNAME: ::std::os::raw::c_uint = 28; +pub const SO_TIMESTAMP: ::std::os::raw::c_uint = 29; +pub const SCM_TIMESTAMP: ::std::os::raw::c_uint = 29; +pub const SO_ACCEPTCONN: ::std::os::raw::c_uint = 30; +pub const SO_PEERSEC: ::std::os::raw::c_uint = 31; +pub const SO_PASSSEC: ::std::os::raw::c_uint = 34; +pub const SO_TIMESTAMPNS: ::std::os::raw::c_uint = 35; +pub const SCM_TIMESTAMPNS: ::std::os::raw::c_uint = 35; +pub const SO_MARK: ::std::os::raw::c_uint = 36; +pub const SO_TIMESTAMPING: ::std::os::raw::c_uint = 37; +pub const SCM_TIMESTAMPING: ::std::os::raw::c_uint = 37; +pub const SO_PROTOCOL: ::std::os::raw::c_uint = 38; +pub const SO_DOMAIN: ::std::os::raw::c_uint = 39; +pub const SO_RXQ_OVFL: ::std::os::raw::c_uint = 40; +pub const SO_WIFI_STATUS: ::std::os::raw::c_uint = 41; +pub const SCM_WIFI_STATUS: ::std::os::raw::c_uint = 41; +pub const SO_PEEK_OFF: ::std::os::raw::c_uint = 42; +pub const SO_NOFCS: ::std::os::raw::c_uint = 43; +pub const SO_LOCK_FILTER: ::std::os::raw::c_uint = 44; +pub const SO_SELECT_ERR_QUEUE: ::std::os::raw::c_uint = 45; +pub const SO_BUSY_POLL: ::std::os::raw::c_uint = 46; +pub const SO_MAX_PACING_RATE: ::std::os::raw::c_uint = 47; +pub const SO_BPF_EXTENSIONS: ::std::os::raw::c_uint = 48; +pub const SO_INCOMING_CPU: ::std::os::raw::c_uint = 49; +pub const SO_ATTACH_BPF: ::std::os::raw::c_uint = 50; +pub const SO_DETACH_BPF: ::std::os::raw::c_uint = 27; +pub const IFNAMSIZ: ::std::os::raw::c_uint = 16; +pub const IFALIASZ: ::std::os::raw::c_uint = 256; +pub const GENERIC_HDLC_VERSION: ::std::os::raw::c_uint = 4; +pub const CLOCK_DEFAULT: ::std::os::raw::c_uint = 0; +pub const CLOCK_EXT: ::std::os::raw::c_uint = 1; +pub const CLOCK_INT: ::std::os::raw::c_uint = 2; +pub const CLOCK_TXINT: ::std::os::raw::c_uint = 3; +pub const CLOCK_TXFROMRX: ::std::os::raw::c_uint = 4; +pub const ENCODING_DEFAULT: ::std::os::raw::c_uint = 0; +pub const ENCODING_NRZ: ::std::os::raw::c_uint = 1; +pub const ENCODING_NRZI: ::std::os::raw::c_uint = 2; +pub const ENCODING_FM_MARK: ::std::os::raw::c_uint = 3; +pub const ENCODING_FM_SPACE: ::std::os::raw::c_uint = 4; +pub const ENCODING_MANCHESTER: ::std::os::raw::c_uint = 5; +pub const PARITY_DEFAULT: ::std::os::raw::c_uint = 0; +pub const PARITY_NONE: ::std::os::raw::c_uint = 1; +pub const PARITY_CRC16_PR0: ::std::os::raw::c_uint = 2; +pub const PARITY_CRC16_PR1: ::std::os::raw::c_uint = 3; +pub const PARITY_CRC16_PR0_CCITT: ::std::os::raw::c_uint = 4; +pub const PARITY_CRC16_PR1_CCITT: ::std::os::raw::c_uint = 5; +pub const PARITY_CRC32_PR0_CCITT: ::std::os::raw::c_uint = 6; +pub const PARITY_CRC32_PR1_CCITT: ::std::os::raw::c_uint = 7; +pub const LMI_DEFAULT: ::std::os::raw::c_uint = 0; +pub const LMI_NONE: ::std::os::raw::c_uint = 1; +pub const LMI_ANSI: ::std::os::raw::c_uint = 2; +pub const LMI_CCITT: ::std::os::raw::c_uint = 3; +pub const LMI_CISCO: ::std::os::raw::c_uint = 4; +pub const IF_GET_IFACE: ::std::os::raw::c_uint = 1; +pub const IF_GET_PROTO: ::std::os::raw::c_uint = 2; +pub const IF_IFACE_V35: ::std::os::raw::c_uint = 4096; +pub const IF_IFACE_V24: ::std::os::raw::c_uint = 4097; +pub const IF_IFACE_X21: ::std::os::raw::c_uint = 4098; +pub const IF_IFACE_T1: ::std::os::raw::c_uint = 4099; +pub const IF_IFACE_E1: ::std::os::raw::c_uint = 4100; +pub const IF_IFACE_SYNC_SERIAL: ::std::os::raw::c_uint = 4101; +pub const IF_IFACE_X21D: ::std::os::raw::c_uint = 4102; +pub const IF_PROTO_HDLC: ::std::os::raw::c_uint = 8192; +pub const IF_PROTO_PPP: ::std::os::raw::c_uint = 8193; +pub const IF_PROTO_CISCO: ::std::os::raw::c_uint = 8194; +pub const IF_PROTO_FR: ::std::os::raw::c_uint = 8195; +pub const IF_PROTO_FR_ADD_PVC: ::std::os::raw::c_uint = 8196; +pub const IF_PROTO_FR_DEL_PVC: ::std::os::raw::c_uint = 8197; +pub const IF_PROTO_X25: ::std::os::raw::c_uint = 8198; +pub const IF_PROTO_HDLC_ETH: ::std::os::raw::c_uint = 8199; +pub const IF_PROTO_FR_ADD_ETH_PVC: ::std::os::raw::c_uint = 8200; +pub const IF_PROTO_FR_DEL_ETH_PVC: ::std::os::raw::c_uint = 8201; +pub const IF_PROTO_FR_PVC: ::std::os::raw::c_uint = 8202; +pub const IF_PROTO_FR_ETH_PVC: ::std::os::raw::c_uint = 8203; +pub const IF_PROTO_RAW: ::std::os::raw::c_uint = 8204; +pub const IFHWADDRLEN: ::std::os::raw::c_uint = 6; +pub type __s8 = ::std::os::raw::c_schar; +pub type __u8 = ::std::os::raw::c_uchar; +pub type __s16 = ::std::os::raw::c_short; +pub type __u16 = ::std::os::raw::c_ushort; +pub type __s32 = ::std::os::raw::c_int; +pub type __u32 = ::std::os::raw::c_uint; +pub type __s64 = ::std::os::raw::c_longlong; +pub type __u64 = ::std::os::raw::c_ulonglong; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fd_set { + pub fds_bits: [::std::os::raw::c_ulong; 16usize], +} +#[test] +fn bindgen_test_layout___kernel_fd_set() { + assert_eq!( + ::std::mem::size_of::<__kernel_fd_set>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fd_set>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fd_set)).fds_bits as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fd_set), + "::", + stringify!(fds_bits) + ) + ); +} +impl Clone for __kernel_fd_set { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_sighandler_t = + ::std::option::Option; +pub type __kernel_key_t = ::std::os::raw::c_int; +pub type __kernel_mqd_t = ::std::os::raw::c_int; +pub type __kernel_old_uid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_gid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_dev_t = ::std::os::raw::c_ulong; +pub type __kernel_long_t = ::std::os::raw::c_long; +pub type __kernel_ulong_t = ::std::os::raw::c_ulong; +pub type __kernel_ino_t = __kernel_ulong_t; +pub type __kernel_mode_t = ::std::os::raw::c_uint; +pub type __kernel_pid_t = ::std::os::raw::c_int; +pub type __kernel_ipc_pid_t = ::std::os::raw::c_int; +pub type __kernel_uid_t = ::std::os::raw::c_uint; +pub type __kernel_gid_t = ::std::os::raw::c_uint; +pub type __kernel_suseconds_t = __kernel_long_t; +pub type __kernel_daddr_t = ::std::os::raw::c_int; +pub type __kernel_uid32_t = ::std::os::raw::c_uint; +pub type __kernel_gid32_t = ::std::os::raw::c_uint; +pub type __kernel_size_t = __kernel_ulong_t; +pub type __kernel_ssize_t = __kernel_long_t; +pub type __kernel_ptrdiff_t = __kernel_long_t; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fsid_t { + pub val: [::std::os::raw::c_int; 2usize], +} +#[test] +fn bindgen_test_layout___kernel_fsid_t() { + assert_eq!( + ::std::mem::size_of::<__kernel_fsid_t>(), + 8usize, + concat!("Size of: ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fsid_t>(), + 4usize, + concat!("Alignment of ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fsid_t)).val as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fsid_t), + "::", + stringify!(val) + ) + ); +} +impl Clone for __kernel_fsid_t { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_off_t = __kernel_long_t; +pub type __kernel_loff_t = ::std::os::raw::c_longlong; +pub type __kernel_time_t = __kernel_long_t; +pub type __kernel_clock_t = __kernel_long_t; +pub type __kernel_timer_t = ::std::os::raw::c_int; +pub type __kernel_clockid_t = ::std::os::raw::c_int; +pub type __kernel_caddr_t = *mut ::std::os::raw::c_char; +pub type __kernel_uid16_t = ::std::os::raw::c_ushort; +pub type __kernel_gid16_t = ::std::os::raw::c_ushort; +pub type __le16 = __u16; +pub type __be16 = __u16; +pub type __le32 = __u32; +pub type __be32 = __u32; +pub type __le64 = __u64; +pub type __be64 = __u64; +pub type __sum16 = __u16; +pub type __wsum = __u32; +pub type __kernel_sa_family_t = ::std::os::raw::c_ushort; +#[repr(C)] +pub struct __kernel_sockaddr_storage { + pub ss_family: __kernel_sa_family_t, + pub __data: [::std::os::raw::c_char; 126usize], + pub __bindgen_align: [u64; 0usize], +} +#[test] +fn bindgen_test_layout___kernel_sockaddr_storage() { + assert_eq!( + ::std::mem::size_of::<__kernel_sockaddr_storage>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_sockaddr_storage)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_sockaddr_storage>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_sockaddr_storage)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_sockaddr_storage)).ss_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_sockaddr_storage), + "::", + stringify!(ss_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_sockaddr_storage)).__data as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_sockaddr_storage), + "::", + stringify!(__data) + ) + ); +} +impl Default for __kernel_sockaddr_storage { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +pub type __u_char = ::std::os::raw::c_uchar; +pub type __u_short = ::std::os::raw::c_ushort; +pub type __u_int = ::std::os::raw::c_uint; +pub type __u_long = ::std::os::raw::c_ulong; +pub type __int8_t = ::std::os::raw::c_schar; +pub type __uint8_t = ::std::os::raw::c_uchar; +pub type __int16_t = ::std::os::raw::c_short; +pub type __uint16_t = ::std::os::raw::c_ushort; +pub type __int32_t = ::std::os::raw::c_int; +pub type __uint32_t = ::std::os::raw::c_uint; +pub type __int64_t = ::std::os::raw::c_long; +pub type __uint64_t = ::std::os::raw::c_ulong; +pub type __quad_t = ::std::os::raw::c_long; +pub type __u_quad_t = ::std::os::raw::c_ulong; +pub type __dev_t = ::std::os::raw::c_ulong; +pub type __uid_t = ::std::os::raw::c_uint; +pub type __gid_t = ::std::os::raw::c_uint; +pub type __ino_t = ::std::os::raw::c_ulong; +pub type __ino64_t = ::std::os::raw::c_ulong; +pub type __mode_t = ::std::os::raw::c_uint; +pub type __nlink_t = ::std::os::raw::c_ulong; +pub type __off_t = ::std::os::raw::c_long; +pub type __off64_t = ::std::os::raw::c_long; +pub type __pid_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __fsid_t { + pub __val: [::std::os::raw::c_int; 2usize], +} +#[test] +fn bindgen_test_layout___fsid_t() { + assert_eq!( + ::std::mem::size_of::<__fsid_t>(), + 8usize, + concat!("Size of: ", stringify!(__fsid_t)) + ); + assert_eq!( + ::std::mem::align_of::<__fsid_t>(), + 4usize, + concat!("Alignment of ", stringify!(__fsid_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const __fsid_t)).__val as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__fsid_t), + "::", + stringify!(__val) + ) + ); +} +impl Clone for __fsid_t { + fn clone(&self) -> Self { + *self + } +} +pub type __clock_t = ::std::os::raw::c_long; +pub type __rlim_t = ::std::os::raw::c_ulong; +pub type __rlim64_t = ::std::os::raw::c_ulong; +pub type __id_t = ::std::os::raw::c_uint; +pub type __time_t = ::std::os::raw::c_long; +pub type __useconds_t = ::std::os::raw::c_uint; +pub type __suseconds_t = ::std::os::raw::c_long; +pub type __daddr_t = ::std::os::raw::c_int; +pub type __key_t = ::std::os::raw::c_int; +pub type __clockid_t = ::std::os::raw::c_int; +pub type __timer_t = *mut ::std::os::raw::c_void; +pub type __blksize_t = ::std::os::raw::c_long; +pub type __blkcnt_t = ::std::os::raw::c_long; +pub type __blkcnt64_t = ::std::os::raw::c_long; +pub type __fsblkcnt_t = ::std::os::raw::c_ulong; +pub type __fsblkcnt64_t = ::std::os::raw::c_ulong; +pub type __fsfilcnt_t = ::std::os::raw::c_ulong; +pub type __fsfilcnt64_t = ::std::os::raw::c_ulong; +pub type __fsword_t = ::std::os::raw::c_long; +pub type __ssize_t = ::std::os::raw::c_long; +pub type __syscall_slong_t = ::std::os::raw::c_long; +pub type __syscall_ulong_t = ::std::os::raw::c_ulong; +pub type __loff_t = __off64_t; +pub type __qaddr_t = *mut __quad_t; +pub type __caddr_t = *mut ::std::os::raw::c_char; +pub type __intptr_t = ::std::os::raw::c_long; +pub type __socklen_t = ::std::os::raw::c_uint; +pub type u_char = __u_char; +pub type u_short = __u_short; +pub type u_int = __u_int; +pub type u_long = __u_long; +pub type quad_t = __quad_t; +pub type u_quad_t = __u_quad_t; +pub type fsid_t = __fsid_t; +pub type loff_t = __loff_t; +pub type ino_t = __ino_t; +pub type dev_t = __dev_t; +pub type gid_t = __gid_t; +pub type mode_t = __mode_t; +pub type nlink_t = __nlink_t; +pub type uid_t = __uid_t; +pub type off_t = __off_t; +pub type pid_t = __pid_t; +pub type id_t = __id_t; +pub type daddr_t = __daddr_t; +pub type caddr_t = __caddr_t; +pub type key_t = __key_t; +pub type clock_t = __clock_t; +pub type time_t = __time_t; +pub type clockid_t = __clockid_t; +pub type timer_t = __timer_t; +pub type ulong = ::std::os::raw::c_ulong; +pub type ushort = ::std::os::raw::c_ushort; +pub type uint = ::std::os::raw::c_uint; +pub type u_int8_t = ::std::os::raw::c_uchar; +pub type u_int16_t = ::std::os::raw::c_ushort; +pub type u_int32_t = ::std::os::raw::c_uint; +pub type u_int64_t = ::std::os::raw::c_ulong; +pub type register_t = ::std::os::raw::c_long; +pub type __sig_atomic_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __sigset_t { + pub __val: [::std::os::raw::c_ulong; 16usize], +} +#[test] +fn bindgen_test_layout___sigset_t() { + assert_eq!( + ::std::mem::size_of::<__sigset_t>(), + 128usize, + concat!("Size of: ", stringify!(__sigset_t)) + ); + assert_eq!( + ::std::mem::align_of::<__sigset_t>(), + 8usize, + concat!("Alignment of ", stringify!(__sigset_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const __sigset_t)).__val as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__sigset_t), + "::", + stringify!(__val) + ) + ); +} +impl Clone for __sigset_t { + fn clone(&self) -> Self { + *self + } +} +pub type sigset_t = __sigset_t; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct timespec { + pub tv_sec: __time_t, + pub tv_nsec: __syscall_slong_t, +} +#[test] +fn bindgen_test_layout_timespec() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(timespec)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(timespec)) + ); + assert_eq!( + unsafe { &(*(0 as *const timespec)).tv_sec as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(timespec), + "::", + stringify!(tv_sec) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const timespec)).tv_nsec as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(timespec), + "::", + stringify!(tv_nsec) + ) + ); +} +impl Clone for timespec { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct timeval { + pub tv_sec: __time_t, + pub tv_usec: __suseconds_t, +} +#[test] +fn bindgen_test_layout_timeval() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(timeval)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(timeval)) + ); + assert_eq!( + unsafe { &(*(0 as *const timeval)).tv_sec as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(timeval), + "::", + stringify!(tv_sec) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const timeval)).tv_usec as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(timeval), + "::", + stringify!(tv_usec) + ) + ); +} +impl Clone for timeval { + fn clone(&self) -> Self { + *self + } +} +pub type suseconds_t = __suseconds_t; +pub type __fd_mask = ::std::os::raw::c_long; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fd_set { + pub __fds_bits: [__fd_mask; 16usize], +} +#[test] +fn bindgen_test_layout_fd_set() { + assert_eq!( + ::std::mem::size_of::(), + 128usize, + concat!("Size of: ", stringify!(fd_set)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(fd_set)) + ); + assert_eq!( + unsafe { &(*(0 as *const fd_set)).__fds_bits as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(fd_set), + "::", + stringify!(__fds_bits) + ) + ); +} +impl Clone for fd_set { + fn clone(&self) -> Self { + *self + } +} +pub type fd_mask = __fd_mask; +extern "C" { + pub fn select( + __nfds: ::std::os::raw::c_int, + __readfds: *mut fd_set, + __writefds: *mut fd_set, + __exceptfds: *mut fd_set, + __timeout: *mut timeval, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn pselect( + __nfds: ::std::os::raw::c_int, + __readfds: *mut fd_set, + __writefds: *mut fd_set, + __exceptfds: *mut fd_set, + __timeout: *const timespec, + __sigmask: *const __sigset_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn gnu_dev_major(__dev: ::std::os::raw::c_ulonglong) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn gnu_dev_minor(__dev: ::std::os::raw::c_ulonglong) -> ::std::os::raw::c_uint; +} +extern "C" { + pub fn gnu_dev_makedev( + __major: ::std::os::raw::c_uint, + __minor: ::std::os::raw::c_uint, + ) -> ::std::os::raw::c_ulonglong; +} +pub type blksize_t = __blksize_t; +pub type blkcnt_t = __blkcnt_t; +pub type fsblkcnt_t = __fsblkcnt_t; +pub type fsfilcnt_t = __fsfilcnt_t; +pub type pthread_t = ::std::os::raw::c_ulong; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_attr_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 56usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_long>, + pub bindgen_union_field: [u64; 7usize], +} +#[test] +fn bindgen_test_layout_pthread_attr_t() { + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(pthread_attr_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_attr_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_attr_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_attr_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_attr_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_attr_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_attr_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Copy)] +pub struct __pthread_internal_list { + pub __prev: *mut __pthread_internal_list, + pub __next: *mut __pthread_internal_list, +} +#[test] +fn bindgen_test_layout___pthread_internal_list() { + assert_eq!( + ::std::mem::size_of::<__pthread_internal_list>(), + 16usize, + concat!("Size of: ", stringify!(__pthread_internal_list)) + ); + assert_eq!( + ::std::mem::align_of::<__pthread_internal_list>(), + 8usize, + concat!("Alignment of ", stringify!(__pthread_internal_list)) + ); + assert_eq!( + unsafe { &(*(0 as *const __pthread_internal_list)).__prev as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__pthread_internal_list), + "::", + stringify!(__prev) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const __pthread_internal_list)).__next as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(__pthread_internal_list), + "::", + stringify!(__next) + ) + ); +} +impl Clone for __pthread_internal_list { + fn clone(&self) -> Self { + *self + } +} +impl Default for __pthread_internal_list { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +pub type __pthread_list_t = __pthread_internal_list; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_mutex_t { + pub __data: __BindgenUnionField, + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 40usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_long>, + pub bindgen_union_field: [u64; 5usize], +} +#[repr(C)] +#[derive(Debug, Copy)] +pub struct pthread_mutex_t___pthread_mutex_s { + pub __lock: ::std::os::raw::c_int, + pub __count: ::std::os::raw::c_uint, + pub __owner: ::std::os::raw::c_int, + pub __nusers: ::std::os::raw::c_uint, + pub __kind: ::std::os::raw::c_int, + pub __spins: ::std::os::raw::c_short, + pub __elision: ::std::os::raw::c_short, + pub __list: __pthread_list_t, +} +#[test] +fn bindgen_test_layout_pthread_mutex_t___pthread_mutex_s() { + assert_eq!( + ::std::mem::size_of::(), + 40usize, + concat!("Size of: ", stringify!(pthread_mutex_t___pthread_mutex_s)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!( + "Alignment of ", + stringify!(pthread_mutex_t___pthread_mutex_s) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__lock as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__lock) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__count as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__count) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__owner as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__owner) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__nusers as *const _ as usize + }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__nusers) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__kind as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__kind) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__spins as *const _ as usize }, + 20usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__spins) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__elision as *const _ as usize + }, + 22usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__elision) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t___pthread_mutex_s)).__list as *const _ as usize }, + 24usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t___pthread_mutex_s), + "::", + stringify!(__list) + ) + ); +} +impl Clone for pthread_mutex_t___pthread_mutex_s { + fn clone(&self) -> Self { + *self + } +} +impl Default for pthread_mutex_t___pthread_mutex_s { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[test] +fn bindgen_test_layout_pthread_mutex_t() { + assert_eq!( + ::std::mem::size_of::(), + 40usize, + concat!("Size of: ", stringify!(pthread_mutex_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_mutex_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t)).__data as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t), + "::", + stringify!(__data) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutex_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutex_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_mutex_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_mutexattr_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 4usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_int>, + pub bindgen_union_field: u32, +} +#[test] +fn bindgen_test_layout_pthread_mutexattr_t() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(pthread_mutexattr_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(pthread_mutexattr_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutexattr_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutexattr_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_mutexattr_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_mutexattr_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_mutexattr_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_cond_t { + pub __data: __BindgenUnionField, + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 48usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_longlong>, + pub bindgen_union_field: [u64; 6usize], +} +#[repr(C)] +#[derive(Debug, Copy)] +pub struct pthread_cond_t__bindgen_ty_1 { + pub __lock: ::std::os::raw::c_int, + pub __futex: ::std::os::raw::c_uint, + pub __total_seq: ::std::os::raw::c_ulonglong, + pub __wakeup_seq: ::std::os::raw::c_ulonglong, + pub __woken_seq: ::std::os::raw::c_ulonglong, + pub __mutex: *mut ::std::os::raw::c_void, + pub __nwaiters: ::std::os::raw::c_uint, + pub __broadcast_seq: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_pthread_cond_t__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(pthread_cond_t__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_cond_t__bindgen_ty_1)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__lock as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__lock) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__futex as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__futex) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__total_seq as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__total_seq) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__wakeup_seq as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__wakeup_seq) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__woken_seq as *const _ as usize }, + 24usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__woken_seq) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__mutex as *const _ as usize }, + 32usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__mutex) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__nwaiters as *const _ as usize }, + 40usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__nwaiters) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_cond_t__bindgen_ty_1)).__broadcast_seq as *const _ as usize + }, + 44usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t__bindgen_ty_1), + "::", + stringify!(__broadcast_seq) + ) + ); +} +impl Clone for pthread_cond_t__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} +impl Default for pthread_cond_t__bindgen_ty_1 { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[test] +fn bindgen_test_layout_pthread_cond_t() { + assert_eq!( + ::std::mem::size_of::(), + 48usize, + concat!("Size of: ", stringify!(pthread_cond_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_cond_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t)).__data as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t), + "::", + stringify!(__data) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_cond_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_cond_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_cond_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_condattr_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 4usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_int>, + pub bindgen_union_field: u32, +} +#[test] +fn bindgen_test_layout_pthread_condattr_t() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(pthread_condattr_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(pthread_condattr_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_condattr_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_condattr_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_condattr_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_condattr_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_condattr_t { + fn clone(&self) -> Self { + *self + } +} +pub type pthread_key_t = ::std::os::raw::c_uint; +pub type pthread_once_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_rwlock_t { + pub __data: __BindgenUnionField, + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 56usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_long>, + pub bindgen_union_field: [u64; 7usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_rwlock_t__bindgen_ty_1 { + pub __lock: ::std::os::raw::c_int, + pub __nr_readers: ::std::os::raw::c_uint, + pub __readers_wakeup: ::std::os::raw::c_uint, + pub __writer_wakeup: ::std::os::raw::c_uint, + pub __nr_readers_queued: ::std::os::raw::c_uint, + pub __nr_writers_queued: ::std::os::raw::c_uint, + pub __writer: ::std::os::raw::c_int, + pub __shared: ::std::os::raw::c_int, + pub __rwelision: ::std::os::raw::c_schar, + pub __pad1: [::std::os::raw::c_uchar; 7usize], + pub __pad2: ::std::os::raw::c_ulong, + pub __flags: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_pthread_rwlock_t__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(pthread_rwlock_t__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_rwlock_t__bindgen_ty_1)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__lock as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__lock) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__nr_readers as *const _ as usize + }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__nr_readers) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__readers_wakeup as *const _ as usize + }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__readers_wakeup) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__writer_wakeup as *const _ as usize + }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__writer_wakeup) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__nr_readers_queued as *const _ + as usize + }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__nr_readers_queued) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__nr_writers_queued as *const _ + as usize + }, + 20usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__nr_writers_queued) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__writer as *const _ as usize }, + 24usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__writer) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__shared as *const _ as usize }, + 28usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__shared) + ) + ); + assert_eq!( + unsafe { + &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__rwelision as *const _ as usize + }, + 32usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__rwelision) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__pad1 as *const _ as usize }, + 33usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__pad1) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__pad2 as *const _ as usize }, + 40usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__pad2) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t__bindgen_ty_1)).__flags as *const _ as usize }, + 48usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t__bindgen_ty_1), + "::", + stringify!(__flags) + ) + ); +} +impl Clone for pthread_rwlock_t__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} +#[test] +fn bindgen_test_layout_pthread_rwlock_t() { + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(pthread_rwlock_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_rwlock_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t)).__data as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t), + "::", + stringify!(__data) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlock_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlock_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_rwlock_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_rwlockattr_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 8usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_long>, + pub bindgen_union_field: u64, +} +#[test] +fn bindgen_test_layout_pthread_rwlockattr_t() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(pthread_rwlockattr_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_rwlockattr_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlockattr_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlockattr_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_rwlockattr_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_rwlockattr_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_rwlockattr_t { + fn clone(&self) -> Self { + *self + } +} +pub type pthread_spinlock_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_barrier_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 32usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_long>, + pub bindgen_union_field: [u64; 4usize], +} +#[test] +fn bindgen_test_layout_pthread_barrier_t() { + assert_eq!( + ::std::mem::size_of::(), + 32usize, + concat!("Size of: ", stringify!(pthread_barrier_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(pthread_barrier_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_barrier_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_barrier_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_barrier_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_barrier_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_barrier_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct pthread_barrierattr_t { + pub __size: __BindgenUnionField<[::std::os::raw::c_char; 4usize]>, + pub __align: __BindgenUnionField<::std::os::raw::c_int>, + pub bindgen_union_field: u32, +} +#[test] +fn bindgen_test_layout_pthread_barrierattr_t() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(pthread_barrierattr_t)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(pthread_barrierattr_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_barrierattr_t)).__size as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_barrierattr_t), + "::", + stringify!(__size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const pthread_barrierattr_t)).__align as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(pthread_barrierattr_t), + "::", + stringify!(__align) + ) + ); +} +impl Clone for pthread_barrierattr_t { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Copy)] +pub struct iovec { + pub iov_base: *mut ::std::os::raw::c_void, + pub iov_len: usize, +} +#[test] +fn bindgen_test_layout_iovec() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(iovec)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(iovec)) + ); + assert_eq!( + unsafe { &(*(0 as *const iovec)).iov_base as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(iovec), + "::", + stringify!(iov_base) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const iovec)).iov_len as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(iovec), + "::", + stringify!(iov_len) + ) + ); +} +impl Clone for iovec { + fn clone(&self) -> Self { + *self + } +} +impl Default for iovec { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +extern "C" { + pub fn readv( + __fd: ::std::os::raw::c_int, + __iovec: *const iovec, + __count: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn writev( + __fd: ::std::os::raw::c_int, + __iovec: *const iovec, + __count: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn preadv( + __fd: ::std::os::raw::c_int, + __iovec: *const iovec, + __count: ::std::os::raw::c_int, + __offset: __off_t, + ) -> isize; +} +extern "C" { + pub fn pwritev( + __fd: ::std::os::raw::c_int, + __iovec: *const iovec, + __count: ::std::os::raw::c_int, + __offset: __off_t, + ) -> isize; +} +pub type socklen_t = __socklen_t; +pub const __socket_type_SOCK_STREAM: __socket_type = 1; +pub const __socket_type_SOCK_DGRAM: __socket_type = 2; +pub const __socket_type_SOCK_RAW: __socket_type = 3; +pub const __socket_type_SOCK_RDM: __socket_type = 4; +pub const __socket_type_SOCK_SEQPACKET: __socket_type = 5; +pub const __socket_type_SOCK_DCCP: __socket_type = 6; +pub const __socket_type_SOCK_PACKET: __socket_type = 10; +pub const __socket_type_SOCK_CLOEXEC: __socket_type = 524288; +pub const __socket_type_SOCK_NONBLOCK: __socket_type = 2048; +pub type __socket_type = ::std::os::raw::c_uint; +pub type sa_family_t = ::std::os::raw::c_ushort; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sockaddr { + pub sa_family: sa_family_t, + pub sa_data: [::std::os::raw::c_char; 14usize], +} +#[test] +fn bindgen_test_layout_sockaddr() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(sockaddr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(sockaddr)) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr)).sa_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr), + "::", + stringify!(sa_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr)).sa_data as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr), + "::", + stringify!(sa_data) + ) + ); +} +impl Clone for sockaddr { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +pub struct sockaddr_storage { + pub ss_family: sa_family_t, + pub __ss_padding: [::std::os::raw::c_char; 118usize], + pub __ss_align: ::std::os::raw::c_ulong, +} +#[test] +fn bindgen_test_layout_sockaddr_storage() { + assert_eq!( + ::std::mem::size_of::(), + 128usize, + concat!("Size of: ", stringify!(sockaddr_storage)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(sockaddr_storage)) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_storage)).ss_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_storage), + "::", + stringify!(ss_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_storage)).__ss_padding as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_storage), + "::", + stringify!(__ss_padding) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_storage)).__ss_align as *const _ as usize }, + 120usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_storage), + "::", + stringify!(__ss_align) + ) + ); +} +impl Default for sockaddr_storage { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +pub const MSG_OOB: _bindgen_ty_1 = 1; +pub const MSG_PEEK: _bindgen_ty_1 = 2; +pub const MSG_DONTROUTE: _bindgen_ty_1 = 4; +pub const MSG_CTRUNC: _bindgen_ty_1 = 8; +pub const MSG_PROXY: _bindgen_ty_1 = 16; +pub const MSG_TRUNC: _bindgen_ty_1 = 32; +pub const MSG_DONTWAIT: _bindgen_ty_1 = 64; +pub const MSG_EOR: _bindgen_ty_1 = 128; +pub const MSG_WAITALL: _bindgen_ty_1 = 256; +pub const MSG_FIN: _bindgen_ty_1 = 512; +pub const MSG_SYN: _bindgen_ty_1 = 1024; +pub const MSG_CONFIRM: _bindgen_ty_1 = 2048; +pub const MSG_RST: _bindgen_ty_1 = 4096; +pub const MSG_ERRQUEUE: _bindgen_ty_1 = 8192; +pub const MSG_NOSIGNAL: _bindgen_ty_1 = 16384; +pub const MSG_MORE: _bindgen_ty_1 = 32768; +pub const MSG_WAITFORONE: _bindgen_ty_1 = 65536; +pub const MSG_FASTOPEN: _bindgen_ty_1 = 536870912; +pub const MSG_CMSG_CLOEXEC: _bindgen_ty_1 = 1073741824; +pub type _bindgen_ty_1 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy)] +pub struct msghdr { + pub msg_name: *mut ::std::os::raw::c_void, + pub msg_namelen: socklen_t, + pub msg_iov: *mut iovec, + pub msg_iovlen: usize, + pub msg_control: *mut ::std::os::raw::c_void, + pub msg_controllen: usize, + pub msg_flags: ::std::os::raw::c_int, +} +#[test] +fn bindgen_test_layout_msghdr() { + assert_eq!( + ::std::mem::size_of::(), + 56usize, + concat!("Size of: ", stringify!(msghdr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(msghdr)) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_name as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_name) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_namelen as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_namelen) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_iov as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_iov) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_iovlen as *const _ as usize }, + 24usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_iovlen) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_control as *const _ as usize }, + 32usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_control) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_controllen as *const _ as usize }, + 40usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_controllen) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const msghdr)).msg_flags as *const _ as usize }, + 48usize, + concat!( + "Alignment of field: ", + stringify!(msghdr), + "::", + stringify!(msg_flags) + ) + ); +} +impl Clone for msghdr { + fn clone(&self) -> Self { + *self + } +} +impl Default for msghdr { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct cmsghdr { + pub cmsg_len: usize, + pub cmsg_level: ::std::os::raw::c_int, + pub cmsg_type: ::std::os::raw::c_int, + pub __cmsg_data: __IncompleteArrayField<::std::os::raw::c_uchar>, +} +#[test] +fn bindgen_test_layout_cmsghdr() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(cmsghdr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(cmsghdr)) + ); +} +impl Clone for cmsghdr { + fn clone(&self) -> Self { + *self + } +} +extern "C" { + pub fn __cmsg_nxthdr(__mhdr: *mut msghdr, __cmsg: *mut cmsghdr) -> *mut cmsghdr; +} +pub const SCM_RIGHTS: _bindgen_ty_2 = 1; +pub type _bindgen_ty_2 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct linger { + pub l_onoff: ::std::os::raw::c_int, + pub l_linger: ::std::os::raw::c_int, +} +#[test] +fn bindgen_test_layout_linger() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(linger)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(linger)) + ); + assert_eq!( + unsafe { &(*(0 as *const linger)).l_onoff as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(linger), + "::", + stringify!(l_onoff) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const linger)).l_linger as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(linger), + "::", + stringify!(l_linger) + ) + ); +} +impl Clone for linger { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct osockaddr { + pub sa_family: ::std::os::raw::c_ushort, + pub sa_data: [::std::os::raw::c_uchar; 14usize], +} +#[test] +fn bindgen_test_layout_osockaddr() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(osockaddr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(osockaddr)) + ); + assert_eq!( + unsafe { &(*(0 as *const osockaddr)).sa_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(osockaddr), + "::", + stringify!(sa_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const osockaddr)).sa_data as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(osockaddr), + "::", + stringify!(sa_data) + ) + ); +} +impl Clone for osockaddr { + fn clone(&self) -> Self { + *self + } +} +pub const SHUT_RD: _bindgen_ty_3 = 0; +pub const SHUT_WR: _bindgen_ty_3 = 1; +pub const SHUT_RDWR: _bindgen_ty_3 = 2; +pub type _bindgen_ty_3 = ::std::os::raw::c_uint; +extern "C" { + pub fn socket( + __domain: ::std::os::raw::c_int, + __type: ::std::os::raw::c_int, + __protocol: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn socketpair( + __domain: ::std::os::raw::c_int, + __type: ::std::os::raw::c_int, + __protocol: ::std::os::raw::c_int, + __fds: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn bind( + __fd: ::std::os::raw::c_int, + __addr: *const sockaddr, + __len: socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getsockname( + __fd: ::std::os::raw::c_int, + __addr: *mut sockaddr, + __len: *mut socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn connect( + __fd: ::std::os::raw::c_int, + __addr: *const sockaddr, + __len: socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn getpeername( + __fd: ::std::os::raw::c_int, + __addr: *mut sockaddr, + __len: *mut socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn send( + __fd: ::std::os::raw::c_int, + __buf: *const ::std::os::raw::c_void, + __n: usize, + __flags: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn recv( + __fd: ::std::os::raw::c_int, + __buf: *mut ::std::os::raw::c_void, + __n: usize, + __flags: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn sendto( + __fd: ::std::os::raw::c_int, + __buf: *const ::std::os::raw::c_void, + __n: usize, + __flags: ::std::os::raw::c_int, + __addr: *const sockaddr, + __addr_len: socklen_t, + ) -> isize; +} +extern "C" { + pub fn recvfrom( + __fd: ::std::os::raw::c_int, + __buf: *mut ::std::os::raw::c_void, + __n: usize, + __flags: ::std::os::raw::c_int, + __addr: *mut sockaddr, + __addr_len: *mut socklen_t, + ) -> isize; +} +extern "C" { + pub fn sendmsg( + __fd: ::std::os::raw::c_int, + __message: *const msghdr, + __flags: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn recvmsg( + __fd: ::std::os::raw::c_int, + __message: *mut msghdr, + __flags: ::std::os::raw::c_int, + ) -> isize; +} +extern "C" { + pub fn getsockopt( + __fd: ::std::os::raw::c_int, + __level: ::std::os::raw::c_int, + __optname: ::std::os::raw::c_int, + __optval: *mut ::std::os::raw::c_void, + __optlen: *mut socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn setsockopt( + __fd: ::std::os::raw::c_int, + __level: ::std::os::raw::c_int, + __optname: ::std::os::raw::c_int, + __optval: *const ::std::os::raw::c_void, + __optlen: socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn listen(__fd: ::std::os::raw::c_int, __n: ::std::os::raw::c_int) + -> ::std::os::raw::c_int; +} +extern "C" { + pub fn accept( + __fd: ::std::os::raw::c_int, + __addr: *mut sockaddr, + __addr_len: *mut socklen_t, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn shutdown( + __fd: ::std::os::raw::c_int, + __how: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn sockatmark(__fd: ::std::os::raw::c_int) -> ::std::os::raw::c_int; +} +extern "C" { + pub fn isfdtype( + __fd: ::std::os::raw::c_int, + __fdtype: ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sync_serial_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, +} +#[test] +fn bindgen_test_layout_sync_serial_settings() { + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(sync_serial_settings)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(sync_serial_settings)) + ); + assert_eq!( + unsafe { &(*(0 as *const sync_serial_settings)).clock_rate as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sync_serial_settings), + "::", + stringify!(clock_rate) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sync_serial_settings)).clock_type as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(sync_serial_settings), + "::", + stringify!(clock_type) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sync_serial_settings)).loopback as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(sync_serial_settings), + "::", + stringify!(loopback) + ) + ); +} +impl Clone for sync_serial_settings { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct te1_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, + pub slot_map: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_te1_settings() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(te1_settings)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(te1_settings)) + ); + assert_eq!( + unsafe { &(*(0 as *const te1_settings)).clock_rate as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(te1_settings), + "::", + stringify!(clock_rate) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const te1_settings)).clock_type as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(te1_settings), + "::", + stringify!(clock_type) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const te1_settings)).loopback as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(te1_settings), + "::", + stringify!(loopback) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const te1_settings)).slot_map as *const _ as usize }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(te1_settings), + "::", + stringify!(slot_map) + ) + ); +} +impl Clone for te1_settings { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct raw_hdlc_proto { + pub encoding: ::std::os::raw::c_ushort, + pub parity: ::std::os::raw::c_ushort, +} +#[test] +fn bindgen_test_layout_raw_hdlc_proto() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(raw_hdlc_proto)) + ); + assert_eq!( + ::std::mem::align_of::(), + 2usize, + concat!("Alignment of ", stringify!(raw_hdlc_proto)) + ); + assert_eq!( + unsafe { &(*(0 as *const raw_hdlc_proto)).encoding as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(raw_hdlc_proto), + "::", + stringify!(encoding) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const raw_hdlc_proto)).parity as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(raw_hdlc_proto), + "::", + stringify!(parity) + ) + ); +} +impl Clone for raw_hdlc_proto { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto { + pub t391: ::std::os::raw::c_uint, + pub t392: ::std::os::raw::c_uint, + pub n391: ::std::os::raw::c_uint, + pub n392: ::std::os::raw::c_uint, + pub n393: ::std::os::raw::c_uint, + pub lmi: ::std::os::raw::c_ushort, + pub dce: ::std::os::raw::c_ushort, +} +#[test] +fn bindgen_test_layout_fr_proto() { + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(fr_proto)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(fr_proto)) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).t391 as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(t391) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).t392 as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(t392) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).n391 as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(n391) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).n392 as *const _ as usize }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(n392) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).n393 as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(n393) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).lmi as *const _ as usize }, + 20usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(lmi) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto)).dce as *const _ as usize }, + 22usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto), + "::", + stringify!(dce) + ) + ); +} +impl Clone for fr_proto { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc { + pub dlci: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_fr_proto_pvc() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(fr_proto_pvc)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(fr_proto_pvc)) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto_pvc)).dlci as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto_pvc), + "::", + stringify!(dlci) + ) + ); +} +impl Clone for fr_proto_pvc { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc_info { + pub dlci: ::std::os::raw::c_uint, + pub master: [::std::os::raw::c_char; 16usize], +} +#[test] +fn bindgen_test_layout_fr_proto_pvc_info() { + assert_eq!( + ::std::mem::size_of::(), + 20usize, + concat!("Size of: ", stringify!(fr_proto_pvc_info)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(fr_proto_pvc_info)) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto_pvc_info)).dlci as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto_pvc_info), + "::", + stringify!(dlci) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const fr_proto_pvc_info)).master as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(fr_proto_pvc_info), + "::", + stringify!(master) + ) + ); +} +impl Clone for fr_proto_pvc_info { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct cisco_proto { + pub interval: ::std::os::raw::c_uint, + pub timeout: ::std::os::raw::c_uint, +} +#[test] +fn bindgen_test_layout_cisco_proto() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(cisco_proto)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(cisco_proto)) + ); + assert_eq!( + unsafe { &(*(0 as *const cisco_proto)).interval as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(cisco_proto), + "::", + stringify!(interval) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const cisco_proto)).timeout as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(cisco_proto), + "::", + stringify!(timeout) + ) + ); +} +impl Clone for cisco_proto { + fn clone(&self) -> Self { + *self + } +} +pub const net_device_flags_IFF_UP: net_device_flags = 1; +pub const net_device_flags_IFF_BROADCAST: net_device_flags = 2; +pub const net_device_flags_IFF_DEBUG: net_device_flags = 4; +pub const net_device_flags_IFF_LOOPBACK: net_device_flags = 8; +pub const net_device_flags_IFF_POINTOPOINT: net_device_flags = 16; +pub const net_device_flags_IFF_NOTRAILERS: net_device_flags = 32; +pub const net_device_flags_IFF_RUNNING: net_device_flags = 64; +pub const net_device_flags_IFF_NOARP: net_device_flags = 128; +pub const net_device_flags_IFF_PROMISC: net_device_flags = 256; +pub const net_device_flags_IFF_ALLMULTI: net_device_flags = 512; +pub const net_device_flags_IFF_MASTER: net_device_flags = 1024; +pub const net_device_flags_IFF_SLAVE: net_device_flags = 2048; +pub const net_device_flags_IFF_MULTICAST: net_device_flags = 4096; +pub const net_device_flags_IFF_PORTSEL: net_device_flags = 8192; +pub const net_device_flags_IFF_AUTOMEDIA: net_device_flags = 16384; +pub const net_device_flags_IFF_DYNAMIC: net_device_flags = 32768; +/** + * enum net_device_flags - &struct net_device flags + * + * These are the &struct net_device flags, they can be set by drivers, the + * kernel and some can be triggered by userspace. Userspace can query and + * set these flags using userspace utilities but there is also a sysfs + * entry available for all dev flags which can be queried and set. These flags + * are shared for all types of net_devices. The sysfs entries are available + * via /sys/class/net//flags. Flags which can be toggled through sysfs + * are annotated below, note that only a few flags can be toggled and some + * other flags are always preserved from the original net_device flags + * even if you try to set them via sysfs. Flags which are always preserved + * are kept under the flag grouping @IFF_VOLATILE. Flags which are __volatile__ + * are annotated below as such. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_UP: interface is up. Can be toggled through sysfs. + * @IFF_BROADCAST: broadcast address valid. Volatile. + * @IFF_DEBUG: turn on debugging. Can be toggled through sysfs. + * @IFF_LOOPBACK: is a loopback net. Volatile. + * @IFF_POINTOPOINT: interface is has p-p link. Volatile. + * @IFF_NOTRAILERS: avoid use of trailers. Can be toggled through sysfs. + * Volatile. + * @IFF_RUNNING: interface RFC2863 OPER_UP. Volatile. + * @IFF_NOARP: no ARP protocol. Can be toggled through sysfs. Volatile. + * @IFF_PROMISC: receive all packets. Can be toggled through sysfs. + * @IFF_ALLMULTI: receive all multicast packets. Can be toggled through + * sysfs. + * @IFF_MASTER: master of a load balancer. Volatile. + * @IFF_SLAVE: slave of a load balancer. Volatile. + * @IFF_MULTICAST: Supports multicast. Can be toggled through sysfs. + * @IFF_PORTSEL: can set media type. Can be toggled through sysfs. + * @IFF_AUTOMEDIA: auto media select active. Can be toggled through sysfs. + * @IFF_DYNAMIC: dialup device with changing addresses. Can be toggled + * through sysfs. + * @IFF_LOWER_UP: driver signals L1 up. Volatile. + * @IFF_DORMANT: driver signals dormant. Volatile. + * @IFF_ECHO: echo sent packets. Volatile. + */ +pub type net_device_flags = ::std::os::raw::c_uint; +pub const IF_OPER_UNKNOWN: _bindgen_ty_4 = 0; +pub const IF_OPER_NOTPRESENT: _bindgen_ty_4 = 1; +pub const IF_OPER_DOWN: _bindgen_ty_4 = 2; +pub const IF_OPER_LOWERLAYERDOWN: _bindgen_ty_4 = 3; +pub const IF_OPER_TESTING: _bindgen_ty_4 = 4; +pub const IF_OPER_DORMANT: _bindgen_ty_4 = 5; +pub const IF_OPER_UP: _bindgen_ty_4 = 6; +pub type _bindgen_ty_4 = ::std::os::raw::c_uint; +pub const IF_LINK_MODE_DEFAULT: _bindgen_ty_5 = 0; +pub const IF_LINK_MODE_DORMANT: _bindgen_ty_5 = 1; +pub type _bindgen_ty_5 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifmap { + pub mem_start: ::std::os::raw::c_ulong, + pub mem_end: ::std::os::raw::c_ulong, + pub base_addr: ::std::os::raw::c_ushort, + pub irq: ::std::os::raw::c_uchar, + pub dma: ::std::os::raw::c_uchar, + pub port: ::std::os::raw::c_uchar, +} +#[test] +fn bindgen_test_layout_ifmap() { + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(ifmap)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(ifmap)) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).mem_start as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(mem_start) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).mem_end as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(mem_end) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).base_addr as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(base_addr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).irq as *const _ as usize }, + 18usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(irq) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).dma as *const _ as usize }, + 19usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(dma) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifmap)).port as *const _ as usize }, + 20usize, + concat!( + "Alignment of field: ", + stringify!(ifmap), + "::", + stringify!(port) + ) + ); +} +impl Clone for ifmap { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings { + pub type_: ::std::os::raw::c_uint, + pub size: ::std::os::raw::c_uint, + pub ifs_ifsu: if_settings__bindgen_ty_1, +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings__bindgen_ty_1 { + pub raw_hdlc: __BindgenUnionField<*mut raw_hdlc_proto>, + pub cisco: __BindgenUnionField<*mut cisco_proto>, + pub fr: __BindgenUnionField<*mut fr_proto>, + pub fr_pvc: __BindgenUnionField<*mut fr_proto_pvc>, + pub fr_pvc_info: __BindgenUnionField<*mut fr_proto_pvc_info>, + pub sync: __BindgenUnionField<*mut sync_serial_settings>, + pub te1: __BindgenUnionField<*mut te1_settings>, + pub bindgen_union_field: u64, +} +#[test] +fn bindgen_test_layout_if_settings__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(if_settings__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(if_settings__bindgen_ty_1)) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).raw_hdlc as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(raw_hdlc) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).cisco as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(cisco) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).fr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(fr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).fr_pvc as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(fr_pvc) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).fr_pvc_info as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(fr_pvc_info) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).sync as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(sync) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings__bindgen_ty_1)).te1 as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings__bindgen_ty_1), + "::", + stringify!(te1) + ) + ); +} +impl Clone for if_settings__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} +#[test] +fn bindgen_test_layout_if_settings() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(if_settings)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(if_settings)) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings)).type_ as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(if_settings), + "::", + stringify!(type_) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings)).size as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(if_settings), + "::", + stringify!(size) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const if_settings)).ifs_ifsu as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(if_settings), + "::", + stringify!(ifs_ifsu) + ) + ); +} +impl Clone for if_settings { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq { + pub ifr_ifrn: ifreq__bindgen_ty_1, + pub ifr_ifru: ifreq__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_1 { + pub ifrn_name: __BindgenUnionField<[::std::os::raw::c_uchar; 16usize]>, + pub bindgen_union_field: [u8; 16usize], +} +#[test] +fn bindgen_test_layout_ifreq__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(ifreq__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::(), + 1usize, + concat!("Alignment of ", stringify!(ifreq__bindgen_ty_1)) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_1)).ifrn_name as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_1), + "::", + stringify!(ifrn_name) + ) + ); +} +impl Clone for ifreq__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_2 { + pub ifru_addr: __BindgenUnionField, + pub ifru_dstaddr: __BindgenUnionField, + pub ifru_broadaddr: __BindgenUnionField, + pub ifru_netmask: __BindgenUnionField, + pub ifru_hwaddr: __BindgenUnionField, + pub ifru_flags: __BindgenUnionField<::std::os::raw::c_short>, + pub ifru_ivalue: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_mtu: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_map: __BindgenUnionField, + pub ifru_slave: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_newname: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_data: __BindgenUnionField<*mut ::std::os::raw::c_void>, + pub ifru_settings: __BindgenUnionField, + pub bindgen_union_field: [u64; 3usize], +} +#[test] +fn bindgen_test_layout_ifreq__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::(), + 24usize, + concat!("Size of: ", stringify!(ifreq__bindgen_ty_2)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(ifreq__bindgen_ty_2)) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_addr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_addr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_dstaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_dstaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_broadaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_broadaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_netmask as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_netmask) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_hwaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_hwaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_flags as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_flags) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_ivalue as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_ivalue) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_mtu as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_mtu) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_map as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_map) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_slave as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_slave) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_newname as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_newname) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_data as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_data) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq__bindgen_ty_2)).ifru_settings as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq__bindgen_ty_2), + "::", + stringify!(ifru_settings) + ) + ); +} +impl Clone for ifreq__bindgen_ty_2 { + fn clone(&self) -> Self { + *self + } +} +#[test] +fn bindgen_test_layout_ifreq() { + assert_eq!( + ::std::mem::size_of::(), + 40usize, + concat!("Size of: ", stringify!(ifreq)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(ifreq)) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq)).ifr_ifrn as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ifreq), + "::", + stringify!(ifr_ifrn) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ifreq)).ifr_ifru as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(ifreq), + "::", + stringify!(ifr_ifru) + ) + ); +} +impl Clone for ifreq { + fn clone(&self) -> Self { + *self + } +} diff --git a/src/dragonball/src/dbs_utils/src/net/net_gen/inn.rs b/src/dragonball/src/dbs_utils/src/net/net_gen/inn.rs new file mode 100644 index 000000000000..84401eafa1a4 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/net_gen/inn.rs @@ -0,0 +1,845 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +/* automatically generated by rust-bindgen */ + +pub const __BITS_PER_LONG: ::std::os::raw::c_uint = 64; +pub const __FD_SETSIZE: ::std::os::raw::c_uint = 1024; +pub const __UAPI_DEF_IN_ADDR: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_IPPROTO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_PKTINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IP_MREQ: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_SOCKADDR_IN: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN_CLASS: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_ADDR: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_ADDR_ALT: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_SOCKADDR_IN6: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPV6_MREQ: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPPROTO_V6: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IPV6_OPTIONS: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IN6_PKTINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_IP6_MTUINFO: ::std::os::raw::c_uint = 1; +pub const __UAPI_DEF_XATTR: ::std::os::raw::c_uint = 1; +pub const _K_SS_MAXSIZE: ::std::os::raw::c_uint = 128; +pub const IP_TOS: ::std::os::raw::c_uint = 1; +pub const IP_TTL: ::std::os::raw::c_uint = 2; +pub const IP_HDRINCL: ::std::os::raw::c_uint = 3; +pub const IP_OPTIONS: ::std::os::raw::c_uint = 4; +pub const IP_ROUTER_ALERT: ::std::os::raw::c_uint = 5; +pub const IP_RECVOPTS: ::std::os::raw::c_uint = 6; +pub const IP_RETOPTS: ::std::os::raw::c_uint = 7; +pub const IP_PKTINFO: ::std::os::raw::c_uint = 8; +pub const IP_PKTOPTIONS: ::std::os::raw::c_uint = 9; +pub const IP_MTU_DISCOVER: ::std::os::raw::c_uint = 10; +pub const IP_RECVERR: ::std::os::raw::c_uint = 11; +pub const IP_RECVTTL: ::std::os::raw::c_uint = 12; +pub const IP_RECVTOS: ::std::os::raw::c_uint = 13; +pub const IP_MTU: ::std::os::raw::c_uint = 14; +pub const IP_FREEBIND: ::std::os::raw::c_uint = 15; +pub const IP_IPSEC_POLICY: ::std::os::raw::c_uint = 16; +pub const IP_XFRM_POLICY: ::std::os::raw::c_uint = 17; +pub const IP_PASSSEC: ::std::os::raw::c_uint = 18; +pub const IP_TRANSPARENT: ::std::os::raw::c_uint = 19; +pub const IP_RECVRETOPTS: ::std::os::raw::c_uint = 7; +pub const IP_ORIGDSTADDR: ::std::os::raw::c_uint = 20; +pub const IP_RECVORIGDSTADDR: ::std::os::raw::c_uint = 20; +pub const IP_MINTTL: ::std::os::raw::c_uint = 21; +pub const IP_NODEFRAG: ::std::os::raw::c_uint = 22; +pub const IP_CHECKSUM: ::std::os::raw::c_uint = 23; +pub const IP_BIND_ADDRESS_NO_PORT: ::std::os::raw::c_uint = 24; +pub const IP_RECVFRAGSIZE: ::std::os::raw::c_uint = 25; +pub const IP_PMTUDISC_DONT: ::std::os::raw::c_uint = 0; +pub const IP_PMTUDISC_WANT: ::std::os::raw::c_uint = 1; +pub const IP_PMTUDISC_DO: ::std::os::raw::c_uint = 2; +pub const IP_PMTUDISC_PROBE: ::std::os::raw::c_uint = 3; +pub const IP_PMTUDISC_INTERFACE: ::std::os::raw::c_uint = 4; +pub const IP_PMTUDISC_OMIT: ::std::os::raw::c_uint = 5; +pub const IP_MULTICAST_IF: ::std::os::raw::c_uint = 32; +pub const IP_MULTICAST_TTL: ::std::os::raw::c_uint = 33; +pub const IP_MULTICAST_LOOP: ::std::os::raw::c_uint = 34; +pub const IP_ADD_MEMBERSHIP: ::std::os::raw::c_uint = 35; +pub const IP_DROP_MEMBERSHIP: ::std::os::raw::c_uint = 36; +pub const IP_UNBLOCK_SOURCE: ::std::os::raw::c_uint = 37; +pub const IP_BLOCK_SOURCE: ::std::os::raw::c_uint = 38; +pub const IP_ADD_SOURCE_MEMBERSHIP: ::std::os::raw::c_uint = 39; +pub const IP_DROP_SOURCE_MEMBERSHIP: ::std::os::raw::c_uint = 40; +pub const IP_MSFILTER: ::std::os::raw::c_uint = 41; +pub const MCAST_JOIN_GROUP: ::std::os::raw::c_uint = 42; +pub const MCAST_BLOCK_SOURCE: ::std::os::raw::c_uint = 43; +pub const MCAST_UNBLOCK_SOURCE: ::std::os::raw::c_uint = 44; +pub const MCAST_LEAVE_GROUP: ::std::os::raw::c_uint = 45; +pub const MCAST_JOIN_SOURCE_GROUP: ::std::os::raw::c_uint = 46; +pub const MCAST_LEAVE_SOURCE_GROUP: ::std::os::raw::c_uint = 47; +pub const MCAST_MSFILTER: ::std::os::raw::c_uint = 48; +pub const IP_MULTICAST_ALL: ::std::os::raw::c_uint = 49; +pub const IP_UNICAST_IF: ::std::os::raw::c_uint = 50; +pub const MCAST_EXCLUDE: ::std::os::raw::c_uint = 0; +pub const MCAST_INCLUDE: ::std::os::raw::c_uint = 1; +pub const IP_DEFAULT_MULTICAST_TTL: ::std::os::raw::c_uint = 1; +pub const IP_DEFAULT_MULTICAST_LOOP: ::std::os::raw::c_uint = 1; +pub const __SOCK_SIZE__: ::std::os::raw::c_uint = 16; +pub const IN_CLASSA_NET: ::std::os::raw::c_uint = 4278190080; +pub const IN_CLASSA_NSHIFT: ::std::os::raw::c_uint = 24; +pub const IN_CLASSA_HOST: ::std::os::raw::c_uint = 16777215; +pub const IN_CLASSA_MAX: ::std::os::raw::c_uint = 128; +pub const IN_CLASSB_NET: ::std::os::raw::c_uint = 4294901760; +pub const IN_CLASSB_NSHIFT: ::std::os::raw::c_uint = 16; +pub const IN_CLASSB_HOST: ::std::os::raw::c_uint = 65535; +pub const IN_CLASSB_MAX: ::std::os::raw::c_uint = 65536; +pub const IN_CLASSC_NET: ::std::os::raw::c_uint = 4294967040; +pub const IN_CLASSC_NSHIFT: ::std::os::raw::c_uint = 8; +pub const IN_CLASSC_HOST: ::std::os::raw::c_uint = 255; +pub const IN_MULTICAST_NET: ::std::os::raw::c_uint = 4026531840; +pub const IN_LOOPBACKNET: ::std::os::raw::c_uint = 127; +pub const INADDR_LOOPBACK: ::std::os::raw::c_uint = 2130706433; +pub const INADDR_UNSPEC_GROUP: ::std::os::raw::c_uint = 3758096384; +pub const INADDR_ALLHOSTS_GROUP: ::std::os::raw::c_uint = 3758096385; +pub const INADDR_ALLRTRS_GROUP: ::std::os::raw::c_uint = 3758096386; +pub const INADDR_MAX_LOCAL_GROUP: ::std::os::raw::c_uint = 3758096639; +pub const __LITTLE_ENDIAN: ::std::os::raw::c_uint = 1234; +pub type __s8 = ::std::os::raw::c_schar; +pub type __u8 = ::std::os::raw::c_uchar; +pub type __s16 = ::std::os::raw::c_short; +pub type __u16 = ::std::os::raw::c_ushort; +pub type __s32 = ::std::os::raw::c_int; +pub type __u32 = ::std::os::raw::c_uint; +pub type __s64 = ::std::os::raw::c_longlong; +pub type __u64 = ::std::os::raw::c_ulonglong; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fd_set { + pub fds_bits: [::std::os::raw::c_ulong; 16usize], +} +#[test] +fn bindgen_test_layout___kernel_fd_set() { + assert_eq!( + ::std::mem::size_of::<__kernel_fd_set>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fd_set>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_fd_set)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fd_set)).fds_bits as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fd_set), + "::", + stringify!(fds_bits) + ) + ); +} +impl Clone for __kernel_fd_set { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_sighandler_t = + ::std::option::Option; +pub type __kernel_key_t = ::std::os::raw::c_int; +pub type __kernel_mqd_t = ::std::os::raw::c_int; +pub type __kernel_old_uid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_gid_t = ::std::os::raw::c_ushort; +pub type __kernel_old_dev_t = ::std::os::raw::c_ulong; +pub type __kernel_long_t = ::std::os::raw::c_long; +pub type __kernel_ulong_t = ::std::os::raw::c_ulong; +pub type __kernel_ino_t = __kernel_ulong_t; +pub type __kernel_mode_t = ::std::os::raw::c_uint; +pub type __kernel_pid_t = ::std::os::raw::c_int; +pub type __kernel_ipc_pid_t = ::std::os::raw::c_int; +pub type __kernel_uid_t = ::std::os::raw::c_uint; +pub type __kernel_gid_t = ::std::os::raw::c_uint; +pub type __kernel_suseconds_t = __kernel_long_t; +pub type __kernel_daddr_t = ::std::os::raw::c_int; +pub type __kernel_uid32_t = ::std::os::raw::c_uint; +pub type __kernel_gid32_t = ::std::os::raw::c_uint; +pub type __kernel_size_t = __kernel_ulong_t; +pub type __kernel_ssize_t = __kernel_long_t; +pub type __kernel_ptrdiff_t = __kernel_long_t; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct __kernel_fsid_t { + pub val: [::std::os::raw::c_int; 2usize], +} +#[test] +fn bindgen_test_layout___kernel_fsid_t() { + assert_eq!( + ::std::mem::size_of::<__kernel_fsid_t>(), + 8usize, + concat!("Size of: ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_fsid_t>(), + 4usize, + concat!("Alignment of ", stringify!(__kernel_fsid_t)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_fsid_t)).val as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_fsid_t), + "::", + stringify!(val) + ) + ); +} +impl Clone for __kernel_fsid_t { + fn clone(&self) -> Self { + *self + } +} +pub type __kernel_off_t = __kernel_long_t; +pub type __kernel_loff_t = ::std::os::raw::c_longlong; +pub type __kernel_time_t = __kernel_long_t; +pub type __kernel_clock_t = __kernel_long_t; +pub type __kernel_timer_t = ::std::os::raw::c_int; +pub type __kernel_clockid_t = ::std::os::raw::c_int; +pub type __kernel_caddr_t = *mut ::std::os::raw::c_char; +pub type __kernel_uid16_t = ::std::os::raw::c_ushort; +pub type __kernel_gid16_t = ::std::os::raw::c_ushort; +pub type __le16 = __u16; +pub type __be16 = __u16; +pub type __le32 = __u32; +pub type __be32 = __u32; +pub type __le64 = __u64; +pub type __be64 = __u64; +pub type __sum16 = __u16; +pub type __wsum = __u32; +pub type __kernel_sa_family_t = ::std::os::raw::c_ushort; +#[repr(C)] +pub struct __kernel_sockaddr_storage { + pub ss_family: __kernel_sa_family_t, + pub __data: [::std::os::raw::c_char; 126usize], + pub __bindgen_align: [u64; 0usize], +} +#[test] +fn bindgen_test_layout___kernel_sockaddr_storage() { + assert_eq!( + ::std::mem::size_of::<__kernel_sockaddr_storage>(), + 128usize, + concat!("Size of: ", stringify!(__kernel_sockaddr_storage)) + ); + assert_eq!( + ::std::mem::align_of::<__kernel_sockaddr_storage>(), + 8usize, + concat!("Alignment of ", stringify!(__kernel_sockaddr_storage)) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_sockaddr_storage)).ss_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_sockaddr_storage), + "::", + stringify!(ss_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const __kernel_sockaddr_storage)).__data as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(__kernel_sockaddr_storage), + "::", + stringify!(__data) + ) + ); +} +impl Default for __kernel_sockaddr_storage { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +pub const IPPROTO_IP: _bindgen_ty_1 = 0; +pub const IPPROTO_ICMP: _bindgen_ty_1 = 1; +pub const IPPROTO_IGMP: _bindgen_ty_1 = 2; +pub const IPPROTO_IPIP: _bindgen_ty_1 = 4; +pub const IPPROTO_TCP: _bindgen_ty_1 = 6; +pub const IPPROTO_EGP: _bindgen_ty_1 = 8; +pub const IPPROTO_PUP: _bindgen_ty_1 = 12; +pub const IPPROTO_UDP: _bindgen_ty_1 = 17; +pub const IPPROTO_IDP: _bindgen_ty_1 = 22; +pub const IPPROTO_TP: _bindgen_ty_1 = 29; +pub const IPPROTO_DCCP: _bindgen_ty_1 = 33; +pub const IPPROTO_IPV6: _bindgen_ty_1 = 41; +pub const IPPROTO_RSVP: _bindgen_ty_1 = 46; +pub const IPPROTO_GRE: _bindgen_ty_1 = 47; +pub const IPPROTO_ESP: _bindgen_ty_1 = 50; +pub const IPPROTO_AH: _bindgen_ty_1 = 51; +pub const IPPROTO_MTP: _bindgen_ty_1 = 92; +pub const IPPROTO_BEETPH: _bindgen_ty_1 = 94; +pub const IPPROTO_ENCAP: _bindgen_ty_1 = 98; +pub const IPPROTO_PIM: _bindgen_ty_1 = 103; +pub const IPPROTO_COMP: _bindgen_ty_1 = 108; +pub const IPPROTO_SCTP: _bindgen_ty_1 = 132; +pub const IPPROTO_UDPLITE: _bindgen_ty_1 = 136; +pub const IPPROTO_MPLS: _bindgen_ty_1 = 137; +pub const IPPROTO_RAW: _bindgen_ty_1 = 255; +pub const IPPROTO_MAX: _bindgen_ty_1 = 256; +pub type _bindgen_ty_1 = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct in_addr { + pub s_addr: __be32, +} +#[test] +fn bindgen_test_layout_in_addr() { + assert_eq!( + ::std::mem::size_of::(), + 4usize, + concat!("Size of: ", stringify!(in_addr)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(in_addr)) + ); + assert_eq!( + unsafe { &(*(0 as *const in_addr)).s_addr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(in_addr), + "::", + stringify!(s_addr) + ) + ); +} +impl Clone for in_addr { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ip_mreq { + pub imr_multiaddr: in_addr, + pub imr_interface: in_addr, +} +#[test] +fn bindgen_test_layout_ip_mreq() { + assert_eq!( + ::std::mem::size_of::(), + 8usize, + concat!("Size of: ", stringify!(ip_mreq)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(ip_mreq)) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreq)).imr_multiaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreq), + "::", + stringify!(imr_multiaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreq)).imr_interface as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreq), + "::", + stringify!(imr_interface) + ) + ); +} +impl Clone for ip_mreq { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ip_mreqn { + pub imr_multiaddr: in_addr, + pub imr_address: in_addr, + pub imr_ifindex: ::std::os::raw::c_int, +} +#[test] +fn bindgen_test_layout_ip_mreqn() { + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(ip_mreqn)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(ip_mreqn)) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreqn)).imr_multiaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreqn), + "::", + stringify!(imr_multiaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreqn)).imr_address as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreqn), + "::", + stringify!(imr_address) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreqn)).imr_ifindex as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreqn), + "::", + stringify!(imr_ifindex) + ) + ); +} +impl Clone for ip_mreqn { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ip_mreq_source { + pub imr_multiaddr: __be32, + pub imr_interface: __be32, + pub imr_sourceaddr: __be32, +} +#[test] +fn bindgen_test_layout_ip_mreq_source() { + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(ip_mreq_source)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(ip_mreq_source)) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreq_source)).imr_multiaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreq_source), + "::", + stringify!(imr_multiaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreq_source)).imr_interface as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreq_source), + "::", + stringify!(imr_interface) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_mreq_source)).imr_sourceaddr as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(ip_mreq_source), + "::", + stringify!(imr_sourceaddr) + ) + ); +} +impl Clone for ip_mreq_source { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ip_msfilter { + pub imsf_multiaddr: __be32, + pub imsf_interface: __be32, + pub imsf_fmode: __u32, + pub imsf_numsrc: __u32, + pub imsf_slist: [__be32; 1usize], +} +#[test] +fn bindgen_test_layout_ip_msfilter() { + assert_eq!( + ::std::mem::size_of::(), + 20usize, + concat!("Size of: ", stringify!(ip_msfilter)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(ip_msfilter)) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_msfilter)).imsf_multiaddr as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(ip_msfilter), + "::", + stringify!(imsf_multiaddr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_msfilter)).imsf_interface as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(ip_msfilter), + "::", + stringify!(imsf_interface) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_msfilter)).imsf_fmode as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(ip_msfilter), + "::", + stringify!(imsf_fmode) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_msfilter)).imsf_numsrc as *const _ as usize }, + 12usize, + concat!( + "Alignment of field: ", + stringify!(ip_msfilter), + "::", + stringify!(imsf_numsrc) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const ip_msfilter)).imsf_slist as *const _ as usize }, + 16usize, + concat!( + "Alignment of field: ", + stringify!(ip_msfilter), + "::", + stringify!(imsf_slist) + ) + ); +} +impl Clone for ip_msfilter { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +pub struct group_req { + pub gr_interface: __u32, + pub gr_group: __kernel_sockaddr_storage, +} +#[test] +fn bindgen_test_layout_group_req() { + assert_eq!( + ::std::mem::size_of::(), + 136usize, + concat!("Size of: ", stringify!(group_req)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(group_req)) + ); + assert_eq!( + unsafe { &(*(0 as *const group_req)).gr_interface as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(group_req), + "::", + stringify!(gr_interface) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_req)).gr_group as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(group_req), + "::", + stringify!(gr_group) + ) + ); +} +impl Default for group_req { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[repr(C)] +pub struct group_source_req { + pub gsr_interface: __u32, + pub gsr_group: __kernel_sockaddr_storage, + pub gsr_source: __kernel_sockaddr_storage, +} +#[test] +fn bindgen_test_layout_group_source_req() { + assert_eq!( + ::std::mem::size_of::(), + 264usize, + concat!("Size of: ", stringify!(group_source_req)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(group_source_req)) + ); + assert_eq!( + unsafe { &(*(0 as *const group_source_req)).gsr_interface as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(group_source_req), + "::", + stringify!(gsr_interface) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_source_req)).gsr_group as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(group_source_req), + "::", + stringify!(gsr_group) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_source_req)).gsr_source as *const _ as usize }, + 136usize, + concat!( + "Alignment of field: ", + stringify!(group_source_req), + "::", + stringify!(gsr_source) + ) + ); +} +impl Default for group_source_req { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[repr(C)] +pub struct group_filter { + pub gf_interface: __u32, + pub gf_group: __kernel_sockaddr_storage, + pub gf_fmode: __u32, + pub gf_numsrc: __u32, + pub gf_slist: [__kernel_sockaddr_storage; 1usize], +} +#[test] +fn bindgen_test_layout_group_filter() { + assert_eq!( + ::std::mem::size_of::(), + 272usize, + concat!("Size of: ", stringify!(group_filter)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(group_filter)) + ); + assert_eq!( + unsafe { &(*(0 as *const group_filter)).gf_interface as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(group_filter), + "::", + stringify!(gf_interface) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_filter)).gf_group as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(group_filter), + "::", + stringify!(gf_group) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_filter)).gf_fmode as *const _ as usize }, + 136usize, + concat!( + "Alignment of field: ", + stringify!(group_filter), + "::", + stringify!(gf_fmode) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_filter)).gf_numsrc as *const _ as usize }, + 140usize, + concat!( + "Alignment of field: ", + stringify!(group_filter), + "::", + stringify!(gf_numsrc) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const group_filter)).gf_slist as *const _ as usize }, + 144usize, + concat!( + "Alignment of field: ", + stringify!(group_filter), + "::", + stringify!(gf_slist) + ) + ); +} +impl Default for group_filter { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct in_pktinfo { + pub ipi_ifindex: ::std::os::raw::c_int, + pub ipi_spec_dst: in_addr, + pub ipi_addr: in_addr, +} +#[test] +fn bindgen_test_layout_in_pktinfo() { + assert_eq!( + ::std::mem::size_of::(), + 12usize, + concat!("Size of: ", stringify!(in_pktinfo)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(in_pktinfo)) + ); + assert_eq!( + unsafe { &(*(0 as *const in_pktinfo)).ipi_ifindex as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(in_pktinfo), + "::", + stringify!(ipi_ifindex) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const in_pktinfo)).ipi_spec_dst as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(in_pktinfo), + "::", + stringify!(ipi_spec_dst) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const in_pktinfo)).ipi_addr as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(in_pktinfo), + "::", + stringify!(ipi_addr) + ) + ); +} +impl Clone for in_pktinfo { + fn clone(&self) -> Self { + *self + } +} +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sockaddr_in { + pub sin_family: __kernel_sa_family_t, + pub sin_port: __be16, + pub sin_addr: in_addr, + pub __pad: [::std::os::raw::c_uchar; 8usize], +} +#[test] +fn bindgen_test_layout_sockaddr_in() { + assert_eq!( + ::std::mem::size_of::(), + 16usize, + concat!("Size of: ", stringify!(sockaddr_in)) + ); + assert_eq!( + ::std::mem::align_of::(), + 4usize, + concat!("Alignment of ", stringify!(sockaddr_in)) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_in)).sin_family as *const _ as usize }, + 0usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_in), + "::", + stringify!(sin_family) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_in)).sin_port as *const _ as usize }, + 2usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_in), + "::", + stringify!(sin_port) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_in)).sin_addr as *const _ as usize }, + 4usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_in), + "::", + stringify!(sin_addr) + ) + ); + assert_eq!( + unsafe { &(*(0 as *const sockaddr_in)).__pad as *const _ as usize }, + 8usize, + concat!( + "Alignment of field: ", + stringify!(sockaddr_in), + "::", + stringify!(__pad) + ) + ); +} +impl Clone for sockaddr_in { + fn clone(&self) -> Self { + *self + } +} diff --git a/src/dragonball/src/dbs_utils/src/net/net_gen/mod.rs b/src/dragonball/src/dbs_utils/src/net/net_gen/mod.rs new file mode 100644 index 000000000000..ba0f62c13aec --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/net_gen/mod.rs @@ -0,0 +1,36 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright TUNTAP, 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +#![allow(clippy::all)] +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(missing_docs)] +#![allow(deref_nullptr)] +#![allow(ambiguous_glob_reexports)] + +// generated with bindgen /usr/include/linux/if.h --no-unstable-rust +// --constified-enum '*' --with-derive-default -- -D __UAPI_DEF_IF_IFNAMSIZ -D +// __UAPI_DEF_IF_NET_DEVICE_FLAGS -D __UAPI_DEF_IF_IFREQ -D __UAPI_DEF_IF_IFMAP +// Name is "iff" to avoid conflicting with "if" keyword. +// Generated against Linux 4.11 to include fix "uapi: fix linux/if.h userspace +// compilation errors". +// Manual fixup of ifrn_name to be of type c_uchar instead of c_char. +pub mod iff; +// generated with bindgen /usr/include/linux/if_tun.h --no-unstable-rust +// --constified-enum '*' --with-derive-default +pub mod if_tun; +// generated with bindgen /usr/include/linux/in.h --no-unstable-rust +// --constified-enum '*' --with-derive-default +// Name is "inn" to avoid conflicting with "in" keyword. +pub mod inn; +// generated with bindgen /usr/include/linux/sockios.h --no-unstable-rust +// --constified-enum '*' --with-derive-default +pub mod sockios; +pub use if_tun::*; +pub use iff::*; +pub use inn::*; +pub use sockios::*; diff --git a/src/dragonball/src/dbs_utils/src/net/net_gen/sockios.rs b/src/dragonball/src/dbs_utils/src/net/net_gen/sockios.rs new file mode 100644 index 000000000000..18037c3533cf --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/net_gen/sockios.rs @@ -0,0 +1,91 @@ +// Copyright 2023 Alibaba Cloud. All Rights Reserved. +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// SPDX-License-Identifier: Apache-2.0 + +/* automatically generated by rust-bindgen */ + +pub const FIOSETOWN: ::std::os::raw::c_uint = 35073; +pub const SIOCSPGRP: ::std::os::raw::c_uint = 35074; +pub const FIOGETOWN: ::std::os::raw::c_uint = 35075; +pub const SIOCGPGRP: ::std::os::raw::c_uint = 35076; +pub const SIOCATMARK: ::std::os::raw::c_uint = 35077; +pub const SIOCGSTAMP: ::std::os::raw::c_uint = 35078; +pub const SIOCGSTAMPNS: ::std::os::raw::c_uint = 35079; +pub const SOCK_IOC_TYPE: ::std::os::raw::c_uint = 137; +pub const SIOCADDRT: ::std::os::raw::c_uint = 35083; +pub const SIOCDELRT: ::std::os::raw::c_uint = 35084; +pub const SIOCRTMSG: ::std::os::raw::c_uint = 35085; +pub const SIOCGIFNAME: ::std::os::raw::c_uint = 35088; +pub const SIOCSIFLINK: ::std::os::raw::c_uint = 35089; +pub const SIOCGIFCONF: ::std::os::raw::c_uint = 35090; +pub const SIOCGIFFLAGS: ::std::os::raw::c_uint = 35091; +pub const SIOCSIFFLAGS: ::std::os::raw::c_uint = 35092; +pub const SIOCGIFADDR: ::std::os::raw::c_uint = 35093; +pub const SIOCSIFADDR: ::std::os::raw::c_uint = 35094; +pub const SIOCGIFDSTADDR: ::std::os::raw::c_uint = 35095; +pub const SIOCSIFDSTADDR: ::std::os::raw::c_uint = 35096; +pub const SIOCGIFBRDADDR: ::std::os::raw::c_uint = 35097; +pub const SIOCSIFBRDADDR: ::std::os::raw::c_uint = 35098; +pub const SIOCGIFNETMASK: ::std::os::raw::c_uint = 35099; +pub const SIOCSIFNETMASK: ::std::os::raw::c_uint = 35100; +pub const SIOCGIFMETRIC: ::std::os::raw::c_uint = 35101; +pub const SIOCSIFMETRIC: ::std::os::raw::c_uint = 35102; +pub const SIOCGIFMEM: ::std::os::raw::c_uint = 35103; +pub const SIOCSIFMEM: ::std::os::raw::c_uint = 35104; +pub const SIOCGIFMTU: ::std::os::raw::c_uint = 35105; +pub const SIOCSIFMTU: ::std::os::raw::c_uint = 35106; +pub const SIOCSIFNAME: ::std::os::raw::c_uint = 35107; +pub const SIOCSIFHWADDR: ::std::os::raw::c_uint = 35108; +pub const SIOCGIFENCAP: ::std::os::raw::c_uint = 35109; +pub const SIOCSIFENCAP: ::std::os::raw::c_uint = 35110; +pub const SIOCGIFHWADDR: ::std::os::raw::c_uint = 35111; +pub const SIOCGIFSLAVE: ::std::os::raw::c_uint = 35113; +pub const SIOCSIFSLAVE: ::std::os::raw::c_uint = 35120; +pub const SIOCADDMULTI: ::std::os::raw::c_uint = 35121; +pub const SIOCDELMULTI: ::std::os::raw::c_uint = 35122; +pub const SIOCGIFINDEX: ::std::os::raw::c_uint = 35123; +pub const SIOGIFINDEX: ::std::os::raw::c_uint = 35123; +pub const SIOCSIFPFLAGS: ::std::os::raw::c_uint = 35124; +pub const SIOCGIFPFLAGS: ::std::os::raw::c_uint = 35125; +pub const SIOCDIFADDR: ::std::os::raw::c_uint = 35126; +pub const SIOCSIFHWBROADCAST: ::std::os::raw::c_uint = 35127; +pub const SIOCGIFCOUNT: ::std::os::raw::c_uint = 35128; +pub const SIOCGIFBR: ::std::os::raw::c_uint = 35136; +pub const SIOCSIFBR: ::std::os::raw::c_uint = 35137; +pub const SIOCGIFTXQLEN: ::std::os::raw::c_uint = 35138; +pub const SIOCSIFTXQLEN: ::std::os::raw::c_uint = 35139; +pub const SIOCETHTOOL: ::std::os::raw::c_uint = 35142; +pub const SIOCGMIIPHY: ::std::os::raw::c_uint = 35143; +pub const SIOCGMIIREG: ::std::os::raw::c_uint = 35144; +pub const SIOCSMIIREG: ::std::os::raw::c_uint = 35145; +pub const SIOCWANDEV: ::std::os::raw::c_uint = 35146; +pub const SIOCOUTQNSD: ::std::os::raw::c_uint = 35147; +pub const SIOCGSKNS: ::std::os::raw::c_uint = 35148; +pub const SIOCDARP: ::std::os::raw::c_uint = 35155; +pub const SIOCGARP: ::std::os::raw::c_uint = 35156; +pub const SIOCSARP: ::std::os::raw::c_uint = 35157; +pub const SIOCDRARP: ::std::os::raw::c_uint = 35168; +pub const SIOCGRARP: ::std::os::raw::c_uint = 35169; +pub const SIOCSRARP: ::std::os::raw::c_uint = 35170; +pub const SIOCGIFMAP: ::std::os::raw::c_uint = 35184; +pub const SIOCSIFMAP: ::std::os::raw::c_uint = 35185; +pub const SIOCADDDLCI: ::std::os::raw::c_uint = 35200; +pub const SIOCDELDLCI: ::std::os::raw::c_uint = 35201; +pub const SIOCGIFVLAN: ::std::os::raw::c_uint = 35202; +pub const SIOCSIFVLAN: ::std::os::raw::c_uint = 35203; +pub const SIOCBONDENSLAVE: ::std::os::raw::c_uint = 35216; +pub const SIOCBONDRELEASE: ::std::os::raw::c_uint = 35217; +pub const SIOCBONDSETHWADDR: ::std::os::raw::c_uint = 35218; +pub const SIOCBONDSLAVEINFOQUERY: ::std::os::raw::c_uint = 35219; +pub const SIOCBONDINFOQUERY: ::std::os::raw::c_uint = 35220; +pub const SIOCBONDCHANGEACTIVE: ::std::os::raw::c_uint = 35221; +pub const SIOCBRADDBR: ::std::os::raw::c_uint = 35232; +pub const SIOCBRDELBR: ::std::os::raw::c_uint = 35233; +pub const SIOCBRADDIF: ::std::os::raw::c_uint = 35234; +pub const SIOCBRDELIF: ::std::os::raw::c_uint = 35235; +pub const SIOCSHWTSTAMP: ::std::os::raw::c_uint = 35248; +pub const SIOCGHWTSTAMP: ::std::os::raw::c_uint = 35249; +pub const SIOCDEVPRIVATE: ::std::os::raw::c_uint = 35312; +pub const SIOCPROTOPRIVATE: ::std::os::raw::c_uint = 35296; diff --git a/src/dragonball/src/dbs_utils/src/net/tap.rs b/src/dragonball/src/dbs_utils/src/net/tap.rs new file mode 100644 index 000000000000..012cce494263 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/net/tap.rs @@ -0,0 +1,471 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::fs::File; +use std::io::{Error as IoError, Read, Result as IoResult, Write}; +use std::net::UdpSocket; +use std::os::raw::*; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr}; + +use crate::net::net_gen; + +// As defined in the Linux UAPI: +// https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33 +pub(crate) const IFACE_NAME_MAX_LEN: usize = 16; + +/// List of errors the tap implementation can throw. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Failed to create a socket. + #[error("cannot create socket. {0}")] + CreateSocket(#[source] IoError), + + /// Unable to create tap interface. + #[error("cannot create tap devic. {0}")] + CreateTap(IoError), + + /// Invalid interface name. + #[error("invalid network interface name")] + InvalidIfname, + + /// ioctl failed. + #[error("failure while issue Tap ioctl command. {0}")] + IoctlError(#[source] IoError), + + /// Couldn't open /dev/net/tun. + #[error("cannot open tap device. {0}")] + OpenTun(#[source] IoError), +} + +pub type Result = ::std::result::Result; + +const TUNTAP: ::std::os::raw::c_uint = 84; +ioctl_iow_nr!(TUNSETIFF, TUNTAP, 202, ::std::os::raw::c_int); +ioctl_iow_nr!(TUNSETOFFLOAD, TUNTAP, 208, ::std::os::raw::c_uint); +ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); + +/// Handle for a network tap interface. +/// +/// For now, this simply wraps the file descriptor for the tap device so methods +/// can run ioctls on the interface. The tap interface fd will be closed when +/// Tap goes out of scope, and the kernel will clean up the interface automatically. +#[derive(Debug)] +pub struct Tap { + /// tap device file handle + pub tap_file: File, + pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN], + pub(crate) if_flags: std::os::raw::c_short, +} + +impl PartialEq for Tap { + fn eq(&self, other: &Tap) -> bool { + self.if_name == other.if_name + } +} + +fn create_socket() -> Result { + // This is safe since we check the return value. + let sock = unsafe { libc::socket(libc::AF_INET, libc::SOCK_DGRAM, 0) }; + if sock < 0 { + return Err(Error::CreateSocket(IoError::last_os_error())); + } + + // This is safe; nothing else will use or hold onto the raw sock fd. + Ok(unsafe { UdpSocket::from_raw_fd(sock) }) +} + +// Returns a byte vector representing the contents of a null terminated C string which +// contains if_name. +fn build_terminated_if_name(if_name: &str) -> Result<[u8; IFACE_NAME_MAX_LEN]> { + // Convert the string slice to bytes, and shadow the variable, + // since we no longer need the &str version. + let if_name = if_name.as_bytes(); + + if if_name.len() >= IFACE_NAME_MAX_LEN { + return Err(Error::InvalidIfname); + } + + let mut terminated_if_name = [b'\0'; IFACE_NAME_MAX_LEN]; + terminated_if_name[..if_name.len()].copy_from_slice(if_name); + + Ok(terminated_if_name) +} + +impl Tap { + /// Create a TUN/TAP device given the interface name. + /// # Arguments + /// + /// * `if_name` - the name of the interface. + /// # Example + /// + /// ```no_run + /// use dbs_utils::net::Tap; + /// Tap::open_named("doc-test-tap", false).unwrap(); + /// ``` + pub fn open_named(if_name: &str, multi_vq: bool) -> Result { + let terminated_if_name = build_terminated_if_name(if_name)?; + + // This is pretty messy because of the unions used by ifreq. Since we + // don't call as_mut on the same union field more than once, this block + // is safe. + let mut ifreq: net_gen::ifreq = Default::default(); + unsafe { + let ifrn_name = ifreq.ifr_ifrn.ifrn_name.as_mut(); + ifrn_name.copy_from_slice(terminated_if_name.as_ref()); + let ifru_flags = ifreq.ifr_ifru.ifru_flags.as_mut(); + *ifru_flags = (net_gen::IFF_TAP + | net_gen::IFF_NO_PI + | net_gen::IFF_VNET_HDR + | if multi_vq { + net_gen::IFF_MULTI_QUEUE + } else { + 0 + }) as c_short; + } + + Tap::create_tap_with_ifreq(&mut ifreq) + } + + fn create_tap_with_ifreq(ifreq: &mut net_gen::ifreq) -> Result { + let fd = unsafe { + // Open calls are safe because we give a constant null-terminated + // string and verify the result. + libc::open( + b"/dev/net/tun\0".as_ptr() as *const c_char, + libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC, + ) + }; + if fd < 0 { + return Err(Error::OpenTun(IoError::last_os_error())); + } + + // We just checked that the fd is valid. + let tuntap = unsafe { File::from_raw_fd(fd) }; + + // ioctl is safe since we call it with a valid tap fd and check the return + // value. + let ret = unsafe { ioctl_with_mut_ref(&tuntap, TUNSETIFF(), ifreq) }; + + if ret < 0 { + return Err(Error::CreateTap(IoError::last_os_error())); + } + + // Safe since only the name is accessed, and it's cloned out. + Ok(Tap { + tap_file: tuntap, + if_name: unsafe { *ifreq.ifr_ifrn.ifrn_name.as_ref() }, + if_flags: unsafe { *ifreq.ifr_ifru.ifru_flags.as_ref() }, + }) + } + + /// Change the origin tap into multiqueue taps. + pub fn into_mq_taps(self, vq_pairs: usize) -> Result> { + let mut taps = Vec::new(); + + if vq_pairs <= 1 { + taps.push(self); + return Ok(taps); + } + + // Add other socket into the origin tap interface + for _ in 0..vq_pairs - 1 { + let mut ifreq = self.get_ifreq(); + let tap = Tap::create_tap_with_ifreq(&mut ifreq)?; + + tap.enable()?; + + taps.push(tap); + } + + taps.insert(0, self); + Ok(taps) + } + + /// Set the offload flags for the tap interface. + pub fn set_offload(&self, flags: c_uint) -> Result<()> { + // ioctl is safe. Called with a valid tap fd, and we check the return. + let ret = unsafe { ioctl_with_val(&self.tap_file, TUNSETOFFLOAD(), c_ulong::from(flags)) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + + /// Enable the tap interface. + pub fn enable(&self) -> Result<()> { + let sock = create_socket()?; + + let mut ifreq = self.get_ifreq(); + + // We only access one field of the ifru union, hence this is safe. + unsafe { + let ifru_flags = ifreq.ifr_ifru.ifru_flags.as_mut(); + *ifru_flags = + (net_gen::net_device_flags_IFF_UP | net_gen::net_device_flags_IFF_RUNNING) as i16; + } + + // ioctl is safe. Called with a valid sock fd, and we check the return. + let ret = + unsafe { ioctl_with_ref(&sock, c_ulong::from(net_gen::sockios::SIOCSIFFLAGS), &ifreq) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + + /// Set the size of the vnet hdr. + pub fn set_vnet_hdr_size(&self, size: c_int) -> Result<()> { + // ioctl is safe. Called with a valid tap fd, and we check the return. + let ret = unsafe { ioctl_with_ref(&self.tap_file, TUNSETVNETHDRSZ(), &size) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + + fn get_ifreq(&self) -> net_gen::ifreq { + let mut ifreq: net_gen::ifreq = Default::default(); + + // This sets the name of the interface, which is the only entry + // in a single-field union. + unsafe { + let ifrn_name = ifreq.ifr_ifrn.ifrn_name.as_mut(); + ifrn_name.clone_from_slice(&self.if_name); + + let flags = ifreq.ifr_ifru.ifru_flags.as_mut(); + *flags = self.if_flags; + } + + ifreq + } + + /// Get the origin flags when interface was created. + pub fn if_flags(&self) -> u32 { + self.if_flags as u32 + } +} + +impl Read for Tap { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + self.tap_file.read(buf) + } +} + +impl Write for Tap { + fn write(&mut self, buf: &[u8]) -> IoResult { + self.tap_file.write(buf) + } + + fn flush(&mut self) -> IoResult<()> { + Ok(()) + } +} + +impl AsRawFd for Tap { + fn as_raw_fd(&self) -> RawFd { + self.tap_file.as_raw_fd() + } +} + +mod tests { + #![allow(dead_code)] + + use std::mem; + use std::net::Ipv4Addr; + use std::str; + use std::sync::atomic::{AtomicUsize, Ordering}; + + use super::*; + + const SUBNET_MASK: &str = "255.255.255.0"; + const TAP_IP_PREFIX: &str = "192.168.241."; + const FAKE_MAC: &str = "12:34:56:78:9a:bc"; + + // We skip the first 10 bytes because the IFF_VNET_HDR flag is set when the interface + // is created, and the legacy header is 10 bytes long without a certain flag which + // is not set in Tap::new(). + const VETH_OFFSET: usize = 10; + static NEXT_IP: AtomicUsize = AtomicUsize::new(1); + + // Create a sockaddr_in from an IPv4 address, and expose it as + // an opaque sockaddr suitable for usage by socket ioctls. + fn create_sockaddr(ip_addr: Ipv4Addr) -> net_gen::sockaddr { + // IPv4 addresses big-endian (network order), but Ipv4Addr will give us + // a view of those bytes directly so we can avoid any endian trickiness. + let addr_in = net_gen::sockaddr_in { + sin_family: net_gen::AF_INET as u16, + sin_port: 0, + sin_addr: unsafe { mem::transmute(ip_addr.octets()) }, + __pad: [0; 8usize], + }; + + unsafe { mem::transmute(addr_in) } + } + impl Tap { + // We do not run unit tests in parallel so we should have no problem + // assigning the same IP. + + /// Create a new tap interface. + pub fn new() -> Result { + // The name of the tap should be {module_name}{index} so that + // we make sure it stays different when tests are run concurrently. + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + Self::open_named(&format!("dbs_tap{next_ip}"), false) + } + + /// Set the host-side IP address for the tap interface. + pub fn set_ip_addr(&self, ip_addr: Ipv4Addr) -> Result<()> { + let sock = create_socket()?; + let addr = create_sockaddr(ip_addr); + + let mut ifreq = self.get_ifreq(); + + // We only access one field of the ifru union, hence this is safe. + unsafe { + let ifru_addr = ifreq.ifr_ifru.ifru_addr.as_mut(); + *ifru_addr = addr; + } + + // ioctl is safe. Called with a valid sock fd, and we check the return. + let ret = unsafe { + ioctl_with_ref(&sock, c_ulong::from(net_gen::sockios::SIOCSIFADDR), &ifreq) + }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + + /// Set the netmask for the subnet that the tap interface will exist on. + pub fn set_netmask(&self, netmask: Ipv4Addr) -> Result<()> { + let sock = create_socket()?; + let addr = create_sockaddr(netmask); + + let mut ifreq = self.get_ifreq(); + + // We only access one field of the ifru union, hence this is safe. + unsafe { + let ifru_addr = ifreq.ifr_ifru.ifru_addr.as_mut(); + *ifru_addr = addr; + } + + // ioctl is safe. Called with a valid sock fd, and we check the return. + let ret = unsafe { + ioctl_with_ref( + &sock, + c_ulong::from(net_gen::sockios::SIOCSIFNETMASK), + &ifreq, + ) + }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + } + + fn tap_name_to_string(tap: &Tap) -> String { + let null_pos = tap.if_name.iter().position(|x| *x == 0).unwrap(); + str::from_utf8(&tap.if_name[..null_pos]) + .unwrap() + .to_string() + } + + #[test] + fn test_tap_name() { + // Sanity check that the assumed max iface name length is correct. + assert_eq!( + IFACE_NAME_MAX_LEN, + net_gen::ifreq__bindgen_ty_1::default() + .bindgen_union_field + .len() + ); + + // 16 characters - too long. + let name = "a123456789abcdef"; + match Tap::open_named(name, false) { + Err(Error::InvalidIfname) => (), + _ => panic!("Expected Error::InvalidIfname"), + }; + + // 15 characters - OK. + let name = "a123456789abcde"; + let tap = Tap::open_named(name, false).unwrap(); + assert_eq!( + name, + std::str::from_utf8(&tap.if_name[0..(IFACE_NAME_MAX_LEN - 1)]).unwrap() + ); + } + + #[test] + fn test_tap_partial_eq() { + assert_ne!(Tap::new().unwrap(), Tap::new().unwrap()); + } + + #[test] + fn test_tap_configure() { + // `fetch_add` adds to the current value, returning the previous value. + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + + let tap = Tap::new().unwrap(); + let ip_addr: Ipv4Addr = format!("{TAP_IP_PREFIX}{next_ip}").parse().unwrap(); + let netmask: Ipv4Addr = SUBNET_MASK.parse().unwrap(); + + let ret = tap.set_ip_addr(ip_addr); + assert!(ret.is_ok()); + let ret = tap.set_netmask(netmask); + assert!(ret.is_ok()); + } + + #[test] + fn test_set_options() { + // This line will fail to provide an initialized FD if the test is not run as root. + let tap = Tap::new().unwrap(); + tap.set_vnet_hdr_size(16).unwrap(); + tap.set_offload(0).unwrap(); + + let faulty_tap = Tap { + tap_file: unsafe { File::from_raw_fd(i32::MAX) }, + if_name: [0x01; 16], + if_flags: 0, + }; + assert!(faulty_tap.set_vnet_hdr_size(16).is_err()); + assert!(faulty_tap.set_offload(0).is_err()); + } + + #[test] + fn test_tap_enable() { + let tap = Tap::new().unwrap(); + let ret = tap.enable(); + assert!(ret.is_ok()); + } + + #[test] + fn test_tap_get_ifreq() { + let tap = Tap::new().unwrap(); + let ret = tap.get_ifreq(); + assert_eq!( + "__BindgenUnionField", + format!("{:?}", ret.ifr_ifrn.ifrn_name) + ); + } + + #[test] + fn test_raw_fd() { + let tap = Tap::new().unwrap(); + assert_eq!(tap.as_raw_fd(), tap.tap_file.as_raw_fd()); + } +} diff --git a/src/dragonball/src/dbs_utils/src/rate_limiter.rs b/src/dragonball/src/dbs_utils/src/rate_limiter.rs new file mode 100644 index 000000000000..e99e2336cb4e --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/rate_limiter.rs @@ -0,0 +1,908 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(missing_docs)] +//! # Rate Limiter +//! +//! Provides a rate limiter written in Rust useful for IO operations that need to +//! be throttled. +//! +//! ## Behavior +//! +//! The rate limiter starts off as 'unblocked' with two token buckets configured +//! with the values passed in the `RateLimiter::new()` constructor. +//! All subsequent accounting is done independently for each token bucket based +//! on the `TokenType` used. If any of the buckets runs out of budget, the limiter +//! goes in the 'blocked' state. At this point an internal timer is set up which +//! will later 'wake up' the user in order to retry sending data. The 'wake up' +//! notification will be dispatched as an event on the FD provided by the `AsRawFD` +//! trait implementation. +//! +//! The contract is that the user shall also call the `event_handler()` method on +//! receipt of such an event. +//! +//! The token buckets are replenished every time a `consume()` is called, before +//! actually trying to consume the requested amount of tokens. The amount of tokens +//! replenished is automatically calculated to respect the `complete_refill_time` +//! configuration parameter provided by the user. The token buckets will never +//! replenish above their respective `size`. +//! +//! Each token bucket can start off with a `one_time_burst` initial extra capacity +//! on top of their `size`. This initial extra credit does not replenish and +//! can be used for an initial burst of data. +//! +//! The granularity for 'wake up' events when the rate limiter is blocked is +//! currently hardcoded to `10 milliseconds`. +//! +//! ## Limitations +//! +//! This rate limiter implementation relies on the *Linux kernel's timerfd* so its +//! usage is limited to Linux systems. +//! +//! Another particularity of this implementation is that it is not self-driving. +//! It is meant to be used in an external event loop and thus implements the `AsRawFd` +//! trait and provides an *event-handler* as part of its API. This *event-handler* +//! needs to be called by the user on every event on the rate limiter's `AsRawFd` FD. + +use std::os::unix::io::{AsRawFd, RawFd}; +use std::time::{Duration, Instant}; +use std::{fmt, io}; + +use log::error; +use timerfd::{ClockId, SetTimeFlags, TimerFd, TimerState}; + +#[derive(Debug)] +/// Describes the errors that may occur while handling rate limiter events. +pub enum Error { + /// The event handler was called spuriously. + SpuriousRateLimiterEvent(&'static str), +} + +// Interval at which the refill timer will run when limiter is at capacity. +const REFILL_TIMER_INTERVAL_MS: u64 = 10; +const TIMER_REFILL_STATE: TimerState = + TimerState::Oneshot(Duration::from_millis(REFILL_TIMER_INTERVAL_MS)); + +const NANOSEC_IN_ONE_MILLISEC: u64 = 1_000_000; + +// Euclid's two-thousand-year-old algorithm for finding the greatest common divisor. +fn gcd(x: u64, y: u64) -> u64 { + let mut x = x; + let mut y = y; + while y != 0 { + let t = y; + y = x % y; + x = t; + } + x +} + +/// Enum describing the outcomes of a `reduce()` call on a `TokenBucket`. +#[derive(Clone, Debug, PartialEq)] +pub enum BucketReduction { + /// No enough tokens + Failure, + /// Part of the available tokens have been consumed. + Success, + /// A number of tokens `inner` times larger than the bucket size have been consumed. + OverConsumption(f64), +} + +/// TokenBucket provides a lower level interface to rate limiting with a +/// configurable capacity, refill-rate and initial burst. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TokenBucket { + // Bucket defining traits. + size: u64, + // Initial burst size. + initial_one_time_burst: u64, + // Complete refill time in milliseconds. + refill_time: u64, + + // Internal state descriptors. + + // Number of free initial tokens, that can be consumed at no cost. + one_time_burst: u64, + // Current token budget. + budget: u64, + // Last time this token bucket saw activity. + last_update: Instant, + + // Fields used for pre-processing optimizations. + processed_capacity: u64, + processed_refill_time: u64, +} + +impl TokenBucket { + /// Creates a `TokenBucket` wrapped in an `Option`. + /// + /// TokenBucket created is of `size` total capacity and takes `complete_refill_time_ms` + /// milliseconds to go from zero tokens to total capacity. The `one_time_burst` is initial + /// extra credit on top of total capacity, that does not replenish and which can be used + /// for an initial burst of data. + /// + /// If the `size` or the `complete refill time` are zero, then `None` is returned. + pub fn new(size: u64, one_time_burst: u64, complete_refill_time_ms: u64) -> Self { + // If either token bucket capacity or refill time is 0, disable limiting. + debug_assert!(size != 0 && complete_refill_time_ms != 0); + + // Formula for computing current refill amount: + // refill_token_count = (delta_time * size) / (complete_refill_time_ms * 1_000_000) + // In order to avoid overflows, simplify the fractions by computing greatest common divisor. + + let complete_refill_time_ns = complete_refill_time_ms * NANOSEC_IN_ONE_MILLISEC; + // Get the greatest common factor between `size` and `complete_refill_time_ns`. + let common_factor = gcd(size, complete_refill_time_ns); + // The division will be exact since `common_factor` is a factor of `size`. + let processed_capacity: u64 = size / common_factor; + // The division will be exact since `common_factor` is a factor of `complete_refill_time_ns`. + let processed_refill_time: u64 = complete_refill_time_ns / common_factor; + + TokenBucket { + size, + one_time_burst, + initial_one_time_burst: one_time_burst, + refill_time: complete_refill_time_ms, + // Start off full. + budget: size, + // Last updated is now. + last_update: Instant::now(), + processed_capacity, + processed_refill_time, + } + } + + // Replenishes token bucket based on elapsed time. Should only be called internally by `Self`. + fn auto_replenish(&mut self) { + // Compute time passed since last refill/update. + let time_delta = self.last_update.elapsed().as_nanos() as u64; + self.last_update = Instant::now(); + + // At each 'time_delta' nanoseconds the bucket should refill with: + // refill_amount = (time_delta * size) / (complete_refill_time_ms * 1_000_000) + // `processed_capacity` and `processed_refill_time` are the result of simplifying above + // fraction formula with their greatest-common-factor. + let tokens = (time_delta * self.processed_capacity) / self.processed_refill_time; + self.budget = std::cmp::min(self.budget + tokens, self.size); + } + + /// Attempts to consume `tokens` from the bucket and returns whether the action succeeded. + pub fn reduce(&mut self, mut tokens: u64) -> BucketReduction { + // First things first: consume the one-time-burst budget. + if self.one_time_burst > 0 { + // We still have burst budget for *all* tokens requests. + if self.one_time_burst >= tokens { + self.one_time_burst -= tokens; + self.last_update = Instant::now(); + // No need to continue to the refill process, we still have burst budget to consume from. + return BucketReduction::Success; + } else { + // We still have burst budget for *some* of the tokens requests. + // The tokens left unfulfilled will be consumed from current `self.budget`. + tokens -= self.one_time_burst; + self.one_time_burst = 0; + } + } + + if tokens > self.budget { + // Hit the bucket bottom, let's auto-replenish and try again. + self.auto_replenish(); + + // This operation requests a bandwidth higher than the bucket size + if tokens > self.size { + error!( + "Consumed {} tokens from bucket of size {}", + tokens, self.size + ); + // Empty the bucket and report an overconsumption of + // (remaining tokens / size) times larger than the bucket size + tokens -= self.budget; + self.budget = 0; + return BucketReduction::OverConsumption(tokens as f64 / self.size as f64); + } + + if tokens > self.budget { + // Still not enough tokens, consume() fails, return false. + return BucketReduction::Failure; + } + } + + self.budget -= tokens; + BucketReduction::Success + } + + /// "Manually" adds tokens to bucket. + pub fn force_replenish(&mut self, tokens: u64) { + // This means we are still during the burst interval. + // Of course there is a very small chance that the last reduce() also used up burst + // budget which should now be replenished, but for performance and code-complexity + // reasons we're just gonna let that slide since it's practically inconsequential. + if self.one_time_burst > 0 { + self.one_time_burst += tokens; + return; + } + self.budget = std::cmp::min(self.budget + tokens, self.size); + } + + /// Returns the capacity of the token bucket. + pub fn capacity(&self) -> u64 { + self.size + } + + /// Returns the remaining one time burst budget. + pub fn one_time_burst(&self) -> u64 { + self.one_time_burst + } + + /// Returns the time in milliseconds required to to completely fill the bucket. + pub fn refill_time_ms(&self) -> u64 { + self.refill_time + } + + /// Returns the current budget (one time burst allowance notwithstanding). + pub fn budget(&self) -> u64 { + self.budget + } + + /// Returns the initially configured one time burst budget. + pub fn initial_one_time_burst(&self) -> u64 { + self.initial_one_time_burst + } +} + +/// Enum that describes the type of token used. +pub enum TokenType { + /// Token type used for bandwidth limiting. + Bytes, + /// Token type used for operations/second limiting. + Ops, +} + +/// Enum that describes the type of token bucket update. +#[derive(Clone, Debug)] +pub enum BucketUpdate { + /// No Update - same as before. + None, + /// Rate Limiting is disabled on this bucket. + Disabled, + /// Rate Limiting enabled with updated bucket. + Update(TokenBucket), +} + +/// Rate Limiter that works on both bandwidth and ops/s limiting. +/// +/// Bandwidth (bytes/s) and ops/s limiting can be used at the same time or individually. +/// +/// Implementation uses a single timer through TimerFd to refresh either or +/// both token buckets. +/// +/// Its internal buckets are 'passively' replenished as they're being used (as +/// part of `consume()` operations). +/// A timer is enabled and used to 'actively' replenish the token buckets when +/// limiting is in effect and `consume()` operations are disabled. +/// +/// RateLimiters will generate events on the FDs provided by their `AsRawFd` trait +/// implementation. These events are meant to be consumed by the user of this struct. +/// On each such event, the user must call the `event_handler()` method. +pub struct RateLimiter { + /// Bandwidth limit in bytes/s + bandwidth: Option, + /// Operate limit in ops/s + ops: Option, + /// Timer handle + timer_fd: TimerFd, + /// Internal flag that quickly determines timer state. + timer_active: bool, +} + +impl PartialEq for RateLimiter { + fn eq(&self, other: &RateLimiter) -> bool { + self.bandwidth == other.bandwidth && self.ops == other.ops + } +} + +impl fmt::Debug for RateLimiter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "RateLimiter {{ bandwidth: {:?}, ops: {:?} }}", + self.bandwidth, self.ops + ) + } +} + +impl RateLimiter { + /// This function creates a `TokenBucket` wrapped in an `Option` with a given total capacity, + /// one time burst, and complete refill time (in miliseconds). If the total capacity or the + /// complete refill time are zero, then `None` is returned. + pub fn make_bucket( + total_capacity: u64, + one_time_burst: u64, + complete_refill_time_ms: u64, + ) -> Option { + // If either token bucket capacity or refill time is 0, disable limiting. + if total_capacity != 0 && complete_refill_time_ms != 0 { + Some(TokenBucket::new( + total_capacity, + one_time_burst, + complete_refill_time_ms, + )) + } else { + None + } + } + /// Creates a new Rate Limiter that can limit on both bytes/s and ops/s. + /// + /// # Arguments + /// + /// * `bytes_total_capacity` - the total capacity of the `TokenType::Bytes` token bucket. + /// * `bytes_one_time_burst` - initial extra credit on top of `bytes_total_capacity`, + /// that does not replenish and which can be used for an initial burst of data. + /// * `bytes_complete_refill_time_ms` - number of milliseconds for the `TokenType::Bytes` + /// token bucket to go from zero Bytes to `bytes_total_capacity` Bytes. + /// * `ops_total_capacity` - the total capacity of the `TokenType::Ops` token bucket. + /// * `ops_one_time_burst` - initial extra credit on top of `ops_total_capacity`, + /// that does not replenish and which can be used for an initial burst of data. + /// * `ops_complete_refill_time_ms` - number of milliseconds for the `TokenType::Ops` token + /// bucket to go from zero Ops to `ops_total_capacity` Ops. + /// + /// If either bytes/ops *size* or *refill_time* are **zero**, the limiter + /// is **disabled** for that respective token type. + /// + /// # Errors + /// + /// If the timerfd creation fails, an error is returned. + pub fn new( + bytes_total_capacity: u64, + bytes_one_time_burst: u64, + bytes_complete_refill_time_ms: u64, + ops_total_capacity: u64, + ops_one_time_burst: u64, + ops_complete_refill_time_ms: u64, + ) -> io::Result { + let bytes_token_bucket = Self::make_bucket( + bytes_total_capacity, + bytes_one_time_burst, + bytes_complete_refill_time_ms, + ); + + let ops_token_bucket = Self::make_bucket( + ops_total_capacity, + ops_one_time_burst, + ops_complete_refill_time_ms, + ); + + // We'll need a timer_fd, even if our current config effectively disables rate limiting, + // because `Self::update_buckets()` might re-enable it later, and we might be + // seccomp-blocked from creating the timer_fd at that time. + let timer_fd = TimerFd::new_custom(ClockId::Monotonic, true, true)?; + + Ok(RateLimiter { + bandwidth: bytes_token_bucket, + ops: ops_token_bucket, + timer_fd, + timer_active: false, + }) + } + + // Arm the timer of the rate limiter with the provided `TimerState`. + fn activate_timer(&mut self, timer_state: TimerState) { + // Register the timer; don't care about its previous state + self.timer_fd.set_state(timer_state, SetTimeFlags::Default); + self.timer_active = true; + } + + /// Attempts to consume tokens and returns whether that is possible. + /// + /// If rate limiting is disabled on provided `token_type`, this function will always succeed. + pub fn consume(&mut self, tokens: u64, token_type: TokenType) -> bool { + // If the timer is active, we can't consume tokens from any bucket and the function fails. + if self.timer_active { + return false; + } + + // Identify the required token bucket. + let token_bucket = match token_type { + TokenType::Bytes => self.bandwidth.as_mut(), + TokenType::Ops => self.ops.as_mut(), + }; + // Try to consume from the token bucket. + if let Some(bucket) = token_bucket { + let refill_time = bucket.refill_time_ms(); + match bucket.reduce(tokens) { + // When we report budget is over, there will be no further calls here, + // register a timer to replenish the bucket and resume processing; + // make sure there is only one running timer for this limiter. + BucketReduction::Failure => { + if !self.timer_active { + self.activate_timer(TIMER_REFILL_STATE); + } + false + } + // The operation succeeded and further calls can be made. + BucketReduction::Success => true, + // The operation succeeded as the tokens have been consumed + // but the timer still needs to be armed. + BucketReduction::OverConsumption(ratio) => { + // The operation "borrowed" a number of tokens `ratio` times + // greater than the size of the bucket, and since it takes + // `refill_time` milliseconds to fill an empty bucket, in + // order to enforce the bandwidth limit we need to prevent + // further calls to the rate limiter for + // `ratio * refill_time` milliseconds. + self.activate_timer(TimerState::Oneshot(Duration::from_millis( + (ratio * refill_time as f64) as u64, + ))); + true + } + } + } else { + // If bucket is not present rate limiting is disabled on token type, + // consume() will always succeed. + true + } + } + + /// Adds tokens of `token_type` to their respective bucket. + /// + /// Can be used to *manually* add tokens to a bucket. Useful for reverting a + /// `consume()` if needed. + pub fn manual_replenish(&mut self, tokens: u64, token_type: TokenType) { + // Identify the required token bucket. + let token_bucket = match token_type { + TokenType::Bytes => self.bandwidth.as_mut(), + TokenType::Ops => self.ops.as_mut(), + }; + // Add tokens to the token bucket. + if let Some(bucket) = token_bucket { + bucket.force_replenish(tokens); + } + } + + /// Returns whether this rate limiter is blocked. + /// + /// The limiter 'blocks' when a `consume()` operation fails because there was not enough + /// budget for it. + /// An event will be generated on the exported FD when the limiter 'unblocks'. + pub fn is_blocked(&self) -> bool { + self.timer_active + } + + /// This function needs to be called every time there is an event on the + /// FD provided by this object's `AsRawFd` trait implementation. + /// + /// # Errors + /// + /// If the rate limiter is disabled or is not blocked, an error is returned. + pub fn event_handler(&mut self) -> Result<(), Error> { + match self.timer_fd.read() { + 0 => Err(Error::SpuriousRateLimiterEvent( + "Rate limiter event handler called without a present timer", + )), + _ => { + self.timer_active = false; + Ok(()) + } + } + } + + /// Updates the parameters of the token buckets associated with this RateLimiter. + // TODO: Please note that, right now, the buckets become full after being updated. + pub fn update_buckets(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { + match bytes { + BucketUpdate::Disabled => self.bandwidth = None, + BucketUpdate::Update(tb) => self.bandwidth = Some(tb), + BucketUpdate::None => (), + }; + match ops { + BucketUpdate::Disabled => self.ops = None, + BucketUpdate::Update(tb) => self.ops = Some(tb), + BucketUpdate::None => (), + }; + } + /// Returns an immutable view of the inner bandwidth token bucket. + pub fn bandwidth(&self) -> Option<&TokenBucket> { + self.bandwidth.as_ref() + } + + /// Returns an immutable view of the inner ops token bucket. + pub fn ops(&self) -> Option<&TokenBucket> { + self.ops.as_ref() + } +} + +impl AsRawFd for RateLimiter { + /// Provides a FD which needs to be monitored for POLLIN events. + /// + /// This object's `event_handler()` method must be called on such events. + /// + /// Will return a negative value if rate limiting is disabled on both + /// token types. + fn as_raw_fd(&self) -> RawFd { + self.timer_fd.as_raw_fd() + } +} + +impl Default for RateLimiter { + /// Default RateLimiter is a no-op limiter with infinite budget. + fn default() -> Self { + // Safe to unwrap since this will not attempt to create timer_fd. + RateLimiter::new(0, 0, 0, 0, 0, 0).expect("Failed to build default RateLimiter") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + const TEST_REFILL_TIMER_INTERVAL_MS: u64 = 100; + impl TokenBucket { + // Resets the token bucket: budget set to max capacity and last-updated set to now. + fn reset(&mut self) { + self.budget = self.size; + self.last_update = Instant::now(); + } + + fn get_last_update(&self) -> &Instant { + &self.last_update + } + + fn get_processed_capacity(&self) -> u64 { + self.processed_capacity + } + + fn get_processed_refill_time(&self) -> u64 { + self.processed_refill_time + } + + // After a restore, we cannot be certain that the last_update field has the same value. + pub fn partial_eq(&self, other: &TokenBucket) -> bool { + (other.capacity() == self.capacity()) + && (other.one_time_burst() == self.one_time_burst()) + && (other.refill_time_ms() == self.refill_time_ms()) + && (other.budget() == self.budget()) + } + } + + impl RateLimiter { + fn get_token_bucket(&self, token_type: TokenType) -> Option<&TokenBucket> { + match token_type { + TokenType::Bytes => self.bandwidth.as_ref(), + TokenType::Ops => self.ops.as_ref(), + } + } + } + + #[test] + fn test_token_bucket_create() { + let before = Instant::now(); + let tb = TokenBucket::new(1000, 0, 1000); + assert_eq!(tb.capacity(), 1000); + assert_eq!(tb.budget(), 1000); + assert_eq!(tb.initial_one_time_burst(), 0); + assert!(*tb.get_last_update() >= before); + let after = Instant::now(); + assert!(*tb.get_last_update() <= after); + assert_eq!(tb.get_processed_capacity(), 1); + assert_eq!(tb.get_processed_refill_time(), 1_000_000); + } + + #[test] + fn test_token_bucket_preprocess() { + let tb = TokenBucket::new(1000, 0, 1000); + assert_eq!(tb.get_processed_capacity(), 1); + assert_eq!(tb.get_processed_refill_time(), NANOSEC_IN_ONE_MILLISEC); + + let thousand = 1000; + let tb = TokenBucket::new(3 * 7 * 11 * 19 * thousand, 0, 7 * 11 * 13 * 17); + assert_eq!(tb.get_processed_capacity(), 3 * 19); + assert_eq!( + tb.get_processed_refill_time(), + 13 * 17 * (NANOSEC_IN_ONE_MILLISEC / thousand) + ); + } + + #[test] + fn test_token_bucket_reduce() { + // token bucket with capacity 1000 and refill time of 1000 milliseconds + // allowing rate of 1 token/ms. + let capacity = 1000; + let refill_ms = 1000; + let mut tb = TokenBucket::new(capacity, 0, refill_ms as u64); + + assert_eq!(tb.reduce(123), BucketReduction::Success); + assert_eq!(tb.budget(), capacity - 123); + assert_eq!(tb.reduce(capacity), BucketReduction::Failure); + + // Since the CI machine might be slow, we should sleep less milliseconds here than desired 123 ms to avoid errors caused by CI machines. + thread::sleep(Duration::from_millis(80)); + assert_eq!(tb.reduce(1), BucketReduction::Success); + assert_eq!(tb.reduce(100), BucketReduction::Success); + assert_eq!(tb.reduce(capacity), BucketReduction::Failure); + + // token bucket with capacity 1000 and refill time of 1000 milliseconds + let mut tb = TokenBucket::new(1000, 1100, 1000); + // safely assuming the thread can run these 3 commands in less than 500ms + assert_eq!(tb.reduce(1000), BucketReduction::Success); + assert_eq!(tb.one_time_burst(), 100); + assert_eq!(tb.reduce(500), BucketReduction::Success); + assert_eq!(tb.one_time_burst(), 0); + assert_eq!(tb.reduce(500), BucketReduction::Success); + assert_eq!(tb.reduce(500), BucketReduction::Failure); + thread::sleep(Duration::from_millis(500)); + assert_eq!(tb.reduce(500), BucketReduction::Success); + thread::sleep(Duration::from_millis(1000)); + assert_eq!(tb.reduce(2500), BucketReduction::OverConsumption(1.5)); + + let before = Instant::now(); + tb.reset(); + assert_eq!(tb.capacity(), 1000); + assert_eq!(tb.budget(), 1000); + assert!(*tb.get_last_update() >= before); + let after = Instant::now(); + assert!(*tb.get_last_update() <= after); + } + + #[test] + fn test_rate_limiter_default() { + let mut l = RateLimiter::default(); + + // limiter should not be blocked + assert!(!l.is_blocked()); + // limiter should be disabled so consume(whatever) should work + assert!(l.consume(u64::max_value(), TokenType::Ops)); + assert!(l.consume(u64::max_value(), TokenType::Bytes)); + // calling the handler without there having been an event should error + assert!(l.event_handler().is_err()); + assert_eq!( + format!("{:?}", l.event_handler().err().unwrap()), + "SpuriousRateLimiterEvent(\ + \"Rate limiter event handler called without a present timer\")" + ); + } + + #[test] + fn test_rate_limiter_new() { + let l = RateLimiter::new(1000, 1001, 1002, 1003, 1004, 1005).unwrap(); + + let bw = l.bandwidth.unwrap(); + assert_eq!(bw.capacity(), 1000); + assert_eq!(bw.one_time_burst(), 1001); + assert_eq!(bw.initial_one_time_burst(), 1001); + assert_eq!(bw.refill_time_ms(), 1002); + assert_eq!(bw.budget(), 1000); + + let ops = l.ops.unwrap(); + assert_eq!(ops.capacity(), 1003); + assert_eq!(ops.one_time_burst(), 1004); + assert_eq!(ops.initial_one_time_burst(), 1004); + assert_eq!(ops.refill_time_ms(), 1005); + assert_eq!(ops.budget(), 1003); + } + + #[test] + fn test_rate_limiter_manual_replenish() { + // rate limiter with limit of 1000 bytes/s and 1000 ops/s + let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); + + // consume 123 bytes + assert!(l.consume(123, TokenType::Bytes)); + l.manual_replenish(23, TokenType::Bytes); + { + let bytes_tb = l.get_token_bucket(TokenType::Bytes).unwrap(); + assert_eq!(bytes_tb.budget(), 900); + } + // consume 123 ops + assert!(l.consume(123, TokenType::Ops)); + l.manual_replenish(23, TokenType::Ops); + { + let bytes_tb = l.get_token_bucket(TokenType::Ops).unwrap(); + assert_eq!(bytes_tb.budget(), 900); + } + } + + #[test] + fn test_rate_limiter_bandwidth() { + // rate limiter with limit of 1000 bytes/s + let mut l = RateLimiter::new(1000, 0, 1000, 0, 0, 0).unwrap(); + + // limiter should not be blocked + assert!(!l.is_blocked()); + // raw FD for this disabled should be valid + assert!(l.as_raw_fd() > 0); + + // ops/s limiter should be disabled so consume(whatever) should work + assert!(l.consume(u64::max_value(), TokenType::Ops)); + + // do full 1000 bytes + assert!(l.consume(1000, TokenType::Bytes)); + // try and fail on another 100 + assert!(!l.consume(100, TokenType::Bytes)); + // since consume failed, limiter should be blocked now + assert!(l.is_blocked()); + // wait half the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // limiter should still be blocked + assert!(l.is_blocked()); + // wait the other half of the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // the timer_fd should have an event on it by now + assert!(l.event_handler().is_ok()); + // limiter should now be unblocked + assert!(!l.is_blocked()); + // try and succeed on another 100 bytes this time + assert!(l.consume(100, TokenType::Bytes)); + } + + #[test] + fn test_rate_limiter_ops() { + // rate limiter with limit of 1000 ops/s + let mut l = RateLimiter::new(0, 0, 0, 1000, 0, 1000).unwrap(); + + // limiter should not be blocked + assert!(!l.is_blocked()); + // raw FD for this disabled should be valid + assert!(l.as_raw_fd() > 0); + + // bytes/s limiter should be disabled so consume(whatever) should work + assert!(l.consume(u64::max_value(), TokenType::Bytes)); + + // do full 1000 ops + assert!(l.consume(1000, TokenType::Ops)); + // try and fail on another 100 + assert!(!l.consume(100, TokenType::Ops)); + // since consume failed, limiter should be blocked now + assert!(l.is_blocked()); + // wait half the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // limiter should still be blocked + assert!(l.is_blocked()); + // wait the other half of the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // the timer_fd should have an event on it by now + assert!(l.event_handler().is_ok()); + // limiter should now be unblocked + assert!(!l.is_blocked()); + // try and succeed on another 100 ops this time + assert!(l.consume(100, TokenType::Ops)); + } + + #[test] + fn test_rate_limiter_full() { + // rate limiter with limit of 1000 bytes/s and 1000 ops/s + let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); + + // limiter should not be blocked + assert!(!l.is_blocked()); + // raw FD for this disabled should be valid + assert!(l.as_raw_fd() > 0); + + // do full 1000 bytes + assert!(l.consume(1000, TokenType::Ops)); + // do full 1000 bytes + assert!(l.consume(1000, TokenType::Bytes)); + // try and fail on another 100 ops + assert!(!l.consume(100, TokenType::Ops)); + // try and fail on another 100 bytes + assert!(!l.consume(100, TokenType::Bytes)); + // since consume failed, limiter should be blocked now + assert!(l.is_blocked()); + // wait half the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // limiter should still be blocked + assert!(l.is_blocked()); + // wait the other half of the timer period + thread::sleep(Duration::from_millis(TEST_REFILL_TIMER_INTERVAL_MS / 2)); + // the timer_fd should have an event on it by now + assert!(l.event_handler().is_ok()); + // limiter should now be unblocked + assert!(!l.is_blocked()); + // try and succeed on another 100 ops this time + assert!(l.consume(100, TokenType::Ops)); + // try and succeed on another 100 bytes this time + assert!(l.consume(100, TokenType::Bytes)); + } + + #[test] + fn test_rate_limiter_overconsumption() { + // initialize the rate limiter + let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); + // try to consume 2.5x the bucket size + // we are "borrowing" 1.5x the bucket size in tokens since + // the bucket is full + assert!(l.consume(2500, TokenType::Bytes)); + + // check that even after a whole second passes, the rate limiter + // is still blocked + thread::sleep(Duration::from_millis(1000)); + assert!(l.event_handler().is_err()); + assert!(l.is_blocked()); + + // after 1.5x the replenish time has passed, the rate limiter + // is available again + thread::sleep(Duration::from_millis(500)); + assert!(l.event_handler().is_ok()); + assert!(!l.is_blocked()); + + // reset the rate limiter + let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); + // try to consume 1.5x the bucket size + // we are "borrowing" 1.5x the bucket size in tokens since + // the bucket is full, should arm the timer to 0.5x replenish + // time, which is 500 ms + assert!(l.consume(1500, TokenType::Bytes)); + + // check that after more than the minimum refill time, + // the rate limiter is still blocked + thread::sleep(Duration::from_millis(200)); + assert!(l.event_handler().is_err()); + assert!(l.is_blocked()); + + // try to consume some tokens, which should fail as the timer + // is still active + assert!(!l.consume(100, TokenType::Bytes)); + assert!(l.event_handler().is_err()); + assert!(l.is_blocked()); + + // check that after the minimum refill time, the timer was not + // overwritten and the rate limiter is still blocked from the + // borrowing we performed earlier + thread::sleep(Duration::from_millis(90)); + assert!(l.event_handler().is_err()); + assert!(l.is_blocked()); + assert!(!l.consume(100, TokenType::Bytes)); + + // after waiting out the full duration, rate limiter should be + // availale again + thread::sleep(Duration::from_millis(210)); + assert!(l.event_handler().is_ok()); + assert!(!l.is_blocked()); + assert!(l.consume(100, TokenType::Bytes)); + } + + #[test] + fn test_update_buckets() { + let mut x = RateLimiter::new(1000, 2000, 1000, 10, 20, 1000).unwrap(); + + let initial_bw = x.bandwidth.clone(); + let initial_ops = x.ops.clone(); + + x.update_buckets(BucketUpdate::None, BucketUpdate::None); + assert_eq!(x.bandwidth, initial_bw); + assert_eq!(x.ops, initial_ops); + + let new_bw = RateLimiter::make_bucket(123, 0, 57).unwrap(); + let new_ops = RateLimiter::make_bucket(321, 12346, 89).unwrap(); + x.update_buckets( + BucketUpdate::Update(new_bw.clone()), + BucketUpdate::Update(new_ops.clone()), + ); + + // We have manually adjust the last_update field, because it changes when update_buckets() + // constructs new buckets (and thus gets a different value for last_update). We do this so + // it makes sense to test the following assertions. + x.bandwidth.as_mut().unwrap().last_update = new_bw.last_update; + x.ops.as_mut().unwrap().last_update = new_ops.last_update; + + assert_eq!(x.bandwidth, Some(new_bw)); + assert_eq!(x.ops, Some(new_ops)); + + x.update_buckets(BucketUpdate::Disabled, BucketUpdate::Disabled); + assert_eq!(x.bandwidth, None); + assert_eq!(x.ops, None); + } + + #[test] + fn test_rate_limiter_debug() { + let l = RateLimiter::new(1, 2, 3, 4, 5, 6).unwrap(); + assert_eq!( + format!("{l:?}"), + format!( + "RateLimiter {{ bandwidth: {:?}, ops: {:?} }}", + l.bandwidth(), + l.ops() + ), + ); + } +} diff --git a/src/dragonball/src/dbs_utils/src/time.rs b/src/dragonball/src/dbs_utils/src/time.rs new file mode 100644 index 000000000000..899007f9d3a1 --- /dev/null +++ b/src/dragonball/src/dbs_utils/src/time.rs @@ -0,0 +1,258 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt; + +/// Constant to convert seconds to nanoseconds. +pub const NANOS_PER_SECOND: u64 = 1_000_000_000; +/// Constant to convert milliseconds to nanoseconds. +pub const NANOS_PER_MILLISECOND: u64 = 1_000_000; + +/// Wrapper over `libc::clockid_t` to specify Linux Kernel clock source. +pub enum ClockType { + /// Equivalent to `libc::CLOCK_MONOTONIC`. + Monotonic, + /// Equivalent to `libc::CLOCK_REALTIME`. + Real, + /// Equivalent to `libc::CLOCK_PROCESS_CPUTIME_ID`. + ProcessCpu, + /// Equivalent to `libc::CLOCK_THREAD_CPUTIME_ID`. + ThreadCpu, +} + +impl From for libc::clockid_t { + fn from(clock_type: ClockType) -> Self { + match clock_type { + ClockType::Monotonic => libc::CLOCK_MONOTONIC, + ClockType::Real => libc::CLOCK_REALTIME, + ClockType::ProcessCpu => libc::CLOCK_PROCESS_CPUTIME_ID, + ClockType::ThreadCpu => libc::CLOCK_THREAD_CPUTIME_ID, + } + } +} + +/// Structure representing the date in local time with nanosecond precision. +pub struct LocalTime { + /// Seconds in current minute. + sec: i32, + /// Minutes in current hour. + min: i32, + /// Hours in current day, 24H format. + hour: i32, + /// Days in current month. + mday: i32, + /// Months in current year. + mon: i32, + /// Years passed since 1900 BC. + year: i32, + /// Nanoseconds in current second. + nsec: i64, +} + +impl LocalTime { + /// Returns the [LocalTime](struct.LocalTime.html) structure for the calling moment. + pub fn now() -> LocalTime { + let mut timespec = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + let mut tm: libc::tm = libc::tm { + tm_sec: 0, + tm_min: 0, + tm_hour: 0, + tm_mday: 0, + tm_mon: 0, + tm_year: 0, + tm_wday: 0, + tm_yday: 0, + tm_isdst: 0, + tm_gmtoff: 0, + tm_zone: std::ptr::null(), + }; + + // Safe because the parameters are valid. + unsafe { + libc::clock_gettime(libc::CLOCK_REALTIME, &mut timespec); + libc::localtime_r(×pec.tv_sec, &mut tm); + } + + LocalTime { + sec: tm.tm_sec, + min: tm.tm_min, + hour: tm.tm_hour, + mday: tm.tm_mday, + mon: tm.tm_mon, + year: tm.tm_year, + nsec: timespec.tv_nsec, + } + } +} + +impl fmt::Display for LocalTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}-{:02}-{:02}T{:02}:{:02}:{:02}.{:09}", + self.year + 1900, + self.mon + 1, + self.mday, + self.hour, + self.min, + self.sec, + self.nsec + ) + } +} + +/// Holds a micro-second resolution timestamp with both the real time and cpu time. +#[derive(Clone)] +pub struct TimestampUs { + /// Real time in microseconds. + pub time_us: u64, + /// Cpu time in microseconds. + pub cputime_us: u64, +} + +impl Default for TimestampUs { + fn default() -> TimestampUs { + TimestampUs { + time_us: get_time_us(ClockType::Monotonic), + cputime_us: get_time_us(ClockType::ProcessCpu), + } + } +} + +/// Get process CPU time in us. +pub fn now_cputime_us() -> u64 { + get_time_us(ClockType::ProcessCpu) +} + +/// Returns a timestamp in nanoseconds from a monotonic clock. +/// +/// Uses `_rdstc` on `x86_64` and [`get_time`](fn.get_time.html) on other architectures. +pub fn timestamp_cycles() -> u64 { + #[cfg(target_arch = "x86_64")] + // Safe because there's nothing that can go wrong with this call. + unsafe { + std::arch::x86_64::_rdtsc() + } + #[cfg(not(target_arch = "x86_64"))] + { + get_time_ns(ClockType::Monotonic) + } +} + +/// Returns a timestamp in nanoseconds based on the provided clock type. +/// +/// # Arguments +/// +/// * `clock_type` - Identifier of the Linux Kernel clock on which to act. +pub fn get_time_ns(clock_type: ClockType) -> u64 { + let mut time_struct = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + // Safe because the parameters are valid. + unsafe { libc::clock_gettime(clock_type.into(), &mut time_struct) }; + seconds_to_nanoseconds(time_struct.tv_sec).expect("Time conversion overflow") as u64 + + (time_struct.tv_nsec as u64) +} + +/// Returns a timestamp in microseconds based on the provided clock type. +/// +/// # Arguments +/// +/// * `clock_type` - Identifier of the Linux Kernel clock on which to act. +pub fn get_time_us(clock_type: ClockType) -> u64 { + get_time_ns(clock_type) / 1000 +} + +/// Returns a timestamp in milliseconds based on the provided clock type. +/// +/// # Arguments +/// +/// * `clock_type` - Identifier of the Linux Kernel clock on which to act. +pub fn get_time_ms(clock_type: ClockType) -> u64 { + get_time_ns(clock_type) / NANOS_PER_MILLISECOND +} + +/// Converts a timestamp in seconds to an equivalent one in nanoseconds. +/// Returns `None` if the conversion overflows. +/// +/// # Arguments +/// +/// * `value` - Timestamp in seconds. +pub fn seconds_to_nanoseconds(value: i64) -> Option { + value.checked_mul(NANOS_PER_SECOND as i64) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_time() { + for _ in 0..1000 { + assert!(get_time_ns(ClockType::Monotonic) <= get_time_ns(ClockType::Monotonic)); + } + + for _ in 0..1000 { + assert!(get_time_ns(ClockType::ProcessCpu) <= get_time_ns(ClockType::ProcessCpu)); + } + + for _ in 0..1000 { + assert!(get_time_ns(ClockType::ThreadCpu) <= get_time_ns(ClockType::ThreadCpu)); + } + + assert_ne!(get_time_ns(ClockType::Real), 0); + assert_ne!(get_time_us(ClockType::Real), 0); + assert!(get_time_ns(ClockType::Real) / 1000 <= get_time_us(ClockType::Real)); + assert!( + get_time_ns(ClockType::Real) / NANOS_PER_MILLISECOND <= get_time_ms(ClockType::Real) + ); + } + + #[test] + fn test_local_time_display() { + let local_time = LocalTime { + sec: 30, + min: 15, + hour: 10, + mday: 4, + mon: 6, + year: 119, + nsec: 123_456_789, + }; + assert_eq!( + String::from("2019-07-04T10:15:30.123456789"), + local_time.to_string() + ); + + let local_time = LocalTime { + sec: 5, + min: 5, + hour: 5, + mday: 23, + mon: 7, + year: 44, + nsec: 123, + }; + assert_eq!( + String::from("1944-08-23T05:05:05.000000123"), + local_time.to_string() + ); + + let local_time = LocalTime::now(); + assert!(local_time.mon >= 0 && local_time.mon <= 11); + } + + #[test] + fn test_seconds_to_nanoseconds() { + assert_eq!( + seconds_to_nanoseconds(100).unwrap() as u64, + 100 * NANOS_PER_SECOND + ); + + assert!(seconds_to_nanoseconds(9_223_372_037).is_none()); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/Cargo.toml b/src/dragonball/src/dbs_virtio_devices/Cargo.toml new file mode 100644 index 000000000000..b7bd8e60fd24 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/Cargo.toml @@ -0,0 +1,52 @@ +[package] +name = "dbs-virtio-devices" +version = "0.3.1" +authors = ["Alibaba Dragonball Team"] +license = "Apache-2.0 AND BSD-3-Clause" +edition = "2018" +description = "Virtio device backend driver framework and device drivers" +homepage = "https://github.com/openanolis/dragonball-sandbox" +repository = "https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-virtio-devices" +keywords = ["dragonball", "secure-sandbox", "devices", "virtio"] +readme = "README.md" + +[dependencies] +byteorder = "1.4.3" +caps = "0.5.3" +dbs-device = { path = "../dbs_device" } +dbs-interrupt = { path = "../dbs_interrupt", features = ["kvm-legacy-irq", "kvm-msi-irq"] } +dbs-utils = { path = "../dbs_utils" } +epoll = ">=4.3.1, <4.3.2" +io-uring = "0.5.2" +fuse-backend-rs = { version = "0.10.5", optional = true } +kvm-bindings = "0.6.0" +kvm-ioctls = "0.12.0" +libc = "0.2.119" +log = "0.4.14" +nix = "0.24.3" +nydus-api = "0.3.1" +nydus-rafs = "0.3.2" +nydus-storage = "0.6.4" +rlimit = "0.7.0" +serde = "1.0.27" +serde_json = "1.0.9" +thiserror = "1" +threadpool = "1" +virtio-bindings = "0.1.0" +virtio-queue = "0.7.0" +vmm-sys-util = "0.11.0" +vm-memory = { version = "0.10.0", features = [ "backend-mmap" ] } +sendfd = "0.4.3" + +[dev-dependencies] +vm-memory = { version = "0.10.0", features = [ "backend-mmap", "backend-atomic" ] } + +[features] +virtio-mmio = [] +virtio-vsock = ["virtio-mmio"] +virtio-net = ["virtio-mmio"] +virtio-blk = ["virtio-mmio"] +virtio-fs = ["virtio-mmio", "fuse-backend-rs/virtiofs", "nydus-rafs/virtio-fs"] +virtio-fs-pro = ["virtio-fs", "nydus-storage/backend-registry", "nydus-storage/backend-oss"] +virtio-mem = ["virtio-mmio"] +virtio-balloon = ["virtio-mmio"] diff --git a/src/dragonball/src/dbs_virtio_devices/LICENSE b/src/dragonball/src/dbs_virtio_devices/LICENSE new file mode 120000 index 000000000000..30cff7403da0 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/dragonball/src/dbs_virtio_devices/README.md b/src/dragonball/src/dbs_virtio_devices/README.md new file mode 100644 index 000000000000..1cc9f320ee3a --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/README.md @@ -0,0 +1,11 @@ +# dbs-virtio-devices + +`dbs-virtio-devices` provides emulation for virtio devices. + +## Acknowledgement + +Part of the code is derived from the [Firecracker](https://github.com/firecracker-microvm/firecracker) project. + +## License + +This project is licensed under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). diff --git a/src/dragonball/src/dbs_virtio_devices/THIRD-PARTY b/src/dragonball/src/dbs_virtio_devices/THIRD-PARTY new file mode 120000 index 000000000000..301d0a498953 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/THIRD-PARTY @@ -0,0 +1 @@ +../../THIRD-PARTY \ No newline at end of file diff --git a/src/dragonball/src/dbs_virtio_devices/src/balloon.rs b/src/dragonball/src/dbs_virtio_devices/src/balloon.rs new file mode 100644 index 000000000000..8ddad0bf799d --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/balloon.rs @@ -0,0 +1,1005 @@ +// Copyright (C) 2020 Alibaba Cloud Computing. All rights reserved. +// Copyright (c) 2020 Ant Financial +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#![allow(dead_code)] + +use std::any::Any; +use std::cmp; +use std::convert::TryFrom; +use std::io::{self, Write}; +use std::marker::PhantomData; +use std::mem::size_of; +use std::ops::Deref; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; + +use dbs_device::resources::ResourceConstraint; +use dbs_interrupt::{InterruptNotifier, NoopNotifier}; +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use log::{debug, error, info, trace}; +use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1; +use virtio_queue::{QueueOwnedT, QueueSync, QueueT}; +use vm_memory::{ + ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, + GuestRegionMmap, MemoryRegionAddress, +}; + +use crate::device::{VirtioDevice, VirtioDeviceConfig, VirtioDeviceInfo, VirtioQueueConfig}; +use crate::{ + ActivateResult, ConfigError, ConfigResult, DbsGuestAddressSpace, Error, Result, TYPE_BALLOON, +}; + +const BALLOON_DRIVER_NAME: &str = "virtio-balloon"; + +// Supported fields in the configuration space: +const CONFIG_SPACE_SIZE: usize = 16; + +const QUEUE_SIZE: u16 = 128; +const NUM_QUEUES: usize = 2; +const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES]; +const PMD_SHIFT: u64 = 21; +const PMD_SIZE: u64 = 1 << PMD_SHIFT; + +// New descriptors are pending on the virtio queue. +const INFLATE_QUEUE_AVAIL_EVENT: u32 = 0; +// New descriptors are pending on the virtio queue. +const DEFLATE_QUEUE_AVAIL_EVENT: u32 = 1; +// New descriptors are pending on the virtio queue. +const REPORTING_QUEUE_AVAIL_EVENT: u32 = 2; +// The device has been dropped. +const KILL_EVENT: u32 = 3; +// The device should be paused. +const PAUSE_EVENT: u32 = 4; +const BALLOON_EVENTS_COUNT: u32 = 5; + +// Page shift in the host. +const PAGE_SHIFT: u32 = 12; +// Huge Page shift in the host. +const HUGE_PAGE_SHIFT: u32 = 21; + +// Size of a PFN in the balloon interface. +const VIRTIO_BALLOON_PFN_SHIFT: u64 = 12; +// feature to deflate balloon on OOM +const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: usize = 2; +// feature to enable free page reporting +const VIRTIO_BALLOON_F_REPORTING: usize = 5; + +// The PAGE_REPORTING_CAPACITY of CLH is set to 32. +// This value is got from patch in https://patchwork.kernel.org/patch/11377073/. +// But dragonball reporting capacity is set to 128 in before. +// So I keep 128. +const PAGE_REPORTING_CAPACITY: u16 = 128; + +#[derive(Debug, thiserror::Error)] +pub enum BalloonError {} + +pub type BalloonResult = std::result::Result; + +// Got from include/uapi/linux/virtio_balloon.h +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default, PartialEq)] +pub struct VirtioBalloonConfig { + // Number of pages host wants Guest to give up. + pub(crate) num_pages: u32, + // Number of pages we've actually got in balloon. + pub(crate) actual: u32, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioBalloonConfig {} + +pub struct BalloonEpollHandler< + AS: GuestAddressSpace, + Q: QueueT + Send = QueueSync, + R: GuestMemoryRegion = GuestRegionMmap, +> { + pub(crate) config: VirtioDeviceConfig, + pub(crate) inflate: VirtioQueueConfig, + pub(crate) deflate: VirtioQueueConfig, + pub(crate) reporting: Option>, + balloon_config: Arc>, +} + +impl + BalloonEpollHandler +{ + fn process_reporting_queue(&mut self) -> bool { + if let Some(queue) = &mut self.reporting { + if let Err(e) = queue.consume_event() { + error!("Failed to get reporting queue event: {:?}", e); + return false; + } + let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize]; + let mut used_count = 0; + let conf = &mut self.config; + let guard = conf.lock_guest_memory(); + let mem = guard.deref().memory(); + + let mut queue_guard = queue.queue_mut().lock(); + + let mut iter = match queue_guard.iter(mem) { + Err(e) => { + error!("virtio-balloon: failed to process reporting queue. {}", e); + return false; + } + Ok(iter) => iter, + }; + + for mut desc_chain in &mut iter { + let mut next_desc = desc_chain.next(); + let mut len = 0; + while let Some(avail_desc) = next_desc { + if avail_desc.len() as usize % size_of::() != 0 { + error!("the request size {} is not right", avail_desc.len()); + break; + } + let size = avail_desc.len(); + let addr = avail_desc.addr(); + len += size; + + if let Some(region) = mem.find_region(addr) { + let host_addr = match mem.get_host_address(addr) { + Ok(v) => v, + Err(e) => { + error!("virtio-balloon get host address failed! addr:{:x} size: {:x} error:{:?}", addr.0, size, e); + break; + } + }; + if region.file_offset().is_some() { + // when guest memory has file backend we use fallocate free memory + let file_offset = region.file_offset().unwrap(); + let file_fd = file_offset.file().as_raw_fd(); + let file_start = file_offset.start(); + let mode = libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE; + let start_addr = + region.get_host_address(MemoryRegionAddress(0)).unwrap(); + let offset = file_start as i64 + host_addr as i64 - start_addr as i64; + if let Err(e) = Self::do_fallocate(file_fd, offset, size as i64, mode) { + info!( + "virtio-balloon reporting failed fallocate guest address: {:x} offset: {:x} size {:x} fd {:?}", + addr.0, + offset, + size, + file_fd + ); + error!("fallocate get error {}", e); + } + } else { + // when guest memory have no file backend or comes from we use madvise free memory + let advise = libc::MADV_DONTNEED; + if let Err(e) = Self::do_madvise( + host_addr as *mut libc::c_void, + size as usize, + advise, + ) { + info!( + "guest address: {:?} host address: {:?} size {:?} advise {:?}", + addr, + host_addr, + 1 << PAGE_SHIFT, + advise + ); + error!("madvise get error {}", e); + } + } + } + next_desc = desc_chain.next(); + } + used_desc_heads[used_count] = (desc_chain.head_index(), len); + used_count += 1; + } + + drop(queue_guard); + + for &(desc_index, len) in &used_desc_heads[..used_count] { + queue.add_used(mem, desc_index, len); + } + if used_count > 0 { + match queue.notify() { + Ok(_v) => true, + Err(e) => { + error!( + "{}: Failed to signal device change event: {}", + BALLOON_DRIVER_NAME, e + ); + false + } + } + } else { + true + } + } else { + error!( + "{}: Invalid event: Free pages reporting was not configured", + BALLOON_DRIVER_NAME + ); + false + } + } + + fn process_queue(&mut self, idx: u32) -> bool { + let conf = &mut self.config; + + let queue = match idx { + INFLATE_QUEUE_AVAIL_EVENT => &mut self.inflate, + DEFLATE_QUEUE_AVAIL_EVENT => &mut self.deflate, + _ => { + error!("{}: unsupport idx {}", BALLOON_DRIVER_NAME, idx); + return false; + } + }; + + if let Err(e) = queue.consume_event() { + error!( + "{}: Failed to get idx {} queue event: {:?}", + BALLOON_DRIVER_NAME, idx, e + ); + return false; + } + + let mut advice = match idx { + INFLATE_QUEUE_AVAIL_EVENT => libc::MADV_DONTNEED, + DEFLATE_QUEUE_AVAIL_EVENT => libc::MADV_WILLNEED, + _ => { + error!( + "{}: balloon idx: {:?} is not right", + BALLOON_DRIVER_NAME, idx + ); + return false; + } + }; + + let mut used_desc_heads = [0; QUEUE_SIZE as usize]; + let mut used_count = 0; + let guard = conf.lock_guest_memory(); + let mem = guard.deref().memory(); + + let mut queue_guard = queue.queue_mut().lock(); + + let mut iter = match queue_guard.iter(mem) { + Err(e) => { + error!("virtio-balloon: failed to process queue. {}", e); + return false; + } + Ok(iter) => iter, + }; + + for mut desc_chain in &mut iter { + let avail_desc = match desc_chain.next() { + Some(avail_desc) => avail_desc, + None => { + error!( + "{}: Failed to parse balloon available descriptor chain", + BALLOON_DRIVER_NAME + ); + return false; + } + }; + + if avail_desc.is_write_only() { + error!( + "{}: The head contains the request type is not right", + BALLOON_DRIVER_NAME + ); + continue; + } + let avail_desc_len = avail_desc.len(); + if avail_desc_len as usize % size_of::() != 0 { + error!( + "{}: the request size {} is not right", + BALLOON_DRIVER_NAME, avail_desc_len + ); + continue; + } + + let mut offset = 0u64; + while offset < avail_desc_len as u64 { + // Get pfn + let pfn: u32 = match mem.read_obj(GuestAddress(avail_desc.addr().0 + offset)) { + Ok(ret) => ret, + Err(e) => { + error!( + "{}: Fail to read addr {}: {:?}", + BALLOON_DRIVER_NAME, + avail_desc.addr().0 + offset, + e + ); + break; + } + }; + offset += size_of::() as u64; + + // Get pfn_len + let pfn_len = match idx { + INFLATE_QUEUE_AVAIL_EVENT | DEFLATE_QUEUE_AVAIL_EVENT => 1 << PAGE_SHIFT, + _ => { + error!( + "{}: balloon idx: {:?} is not right", + BALLOON_DRIVER_NAME, idx + ); + return false; + } + }; + + trace!( + "{}: process_queue pfn {} len {}", + BALLOON_DRIVER_NAME, + pfn, + pfn_len + ); + + let guest_addr = (pfn as u64) << VIRTIO_BALLOON_PFN_SHIFT; + + if let Some(region) = mem.find_region(GuestAddress(guest_addr)) { + let host_addr = mem.get_host_address(GuestAddress(guest_addr)).unwrap(); + if advice == libc::MADV_DONTNEED && region.file_offset().is_some() { + advice = libc::MADV_REMOVE; + } + if let Err(e) = Self::do_madvise( + host_addr as *mut libc::c_void, + pfn_len as libc::size_t, + advice, + ) { + info!( + "{}: guest address: {:?} host address: {:?} size {:?} advise {:?}", + BALLOON_DRIVER_NAME, guest_addr, host_addr, pfn_len, advice + ); + error!("{}: madvise get error {}", BALLOON_DRIVER_NAME, e); + } + } else { + error!( + "{}: guest address 0x{:x} size {:?} advise {:?} is not available", + BALLOON_DRIVER_NAME, guest_addr, pfn_len, advice + ); + } + } + + used_desc_heads[used_count] = desc_chain.head_index(); + used_count += 1; + } + + drop(queue_guard); + + for &desc_index in &used_desc_heads[..used_count] { + queue.add_used(mem, desc_index, 0); + } + if used_count > 0 { + match queue.notify() { + Ok(_v) => true, + Err(e) => { + error!( + "{}: Failed to signal device queue event: {}", + BALLOON_DRIVER_NAME, e + ); + false + } + } + } else { + true + } + } + + fn do_madvise( + addr: *mut libc::c_void, + size: libc::size_t, + advise: libc::c_int, + ) -> std::result::Result<(), io::Error> { + let res = unsafe { libc::madvise(addr, size, advise) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) + } + + fn do_fallocate( + file_fd: RawFd, + offset: libc::off_t, + len: libc::off_t, + mode: libc::c_int, + ) -> std::result::Result<(), io::Error> { + let res = unsafe { libc::fallocate(file_fd, mode, offset, len) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) + } +} + +impl MutEventSubscriber + for BalloonEpollHandler +where + AS: 'static + GuestAddressSpace + Send + Sync, +{ + fn init(&mut self, ops: &mut EventOps) { + trace!( + target: BALLOON_DRIVER_NAME, + "{}: BalloonEpollHandler::init()", + BALLOON_DRIVER_NAME, + ); + let events = Events::with_data( + self.inflate.eventfd.as_ref(), + INFLATE_QUEUE_AVAIL_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register INFLATE QUEUE event, {:?}", + BALLOON_DRIVER_NAME, e + ); + } + + let events = Events::with_data( + self.deflate.eventfd.as_ref(), + DEFLATE_QUEUE_AVAIL_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register deflate queue event, {:?}", + BALLOON_DRIVER_NAME, e + ); + } + + if let Some(reporting) = &self.reporting { + let events = Events::with_data( + reporting.eventfd.as_ref(), + REPORTING_QUEUE_AVAIL_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register reporting queue event, {:?}", + BALLOON_DRIVER_NAME, e + ); + } + } + } + + fn process(&mut self, events: Events, _ops: &mut EventOps) { + let guard = self.config.lock_guest_memory(); + let _mem = guard.deref(); + let idx = events.data(); + + trace!( + target: BALLOON_DRIVER_NAME, + "{}: BalloonEpollHandler::process() idx {}", + BALLOON_DRIVER_NAME, + idx + ); + match idx { + INFLATE_QUEUE_AVAIL_EVENT | DEFLATE_QUEUE_AVAIL_EVENT => { + if !self.process_queue(idx) { + error!("{}: Failed to handle {} queue", BALLOON_DRIVER_NAME, idx); + } + } + REPORTING_QUEUE_AVAIL_EVENT => { + if !self.process_reporting_queue() { + error!("Failed to handle reporting queue"); + } + } + KILL_EVENT => { + debug!("kill_evt received"); + } + _ => { + error!("{}: unknown idx {}", BALLOON_DRIVER_NAME, idx); + } + } + } +} + +fn page_number_to_mib(number: u64) -> u64 { + number << PAGE_SHIFT >> 10 >> 10 +} + +fn mib_to_page_number(mib: u64) -> u64 { + mib << 10 << 10 >> PAGE_SHIFT +} + +/// Virtio device for exposing entropy to the guest OS through virtio. +pub struct Balloon { + pub(crate) device_info: VirtioDeviceInfo, + pub(crate) config: Arc>, + pub(crate) paused: Arc, + pub(crate) device_change_notifier: Arc, + pub(crate) subscriber_id: Option, + pub(crate) phantom: PhantomData, +} + +#[derive(Copy, Clone, Debug, Default, PartialEq)] +pub struct BalloonConfig { + pub f_deflate_on_oom: bool, + pub f_reporting: bool, +} + +impl Balloon { + // Create a new virtio-balloon. + pub fn new(epoll_mgr: EpollManager, cfg: BalloonConfig) -> Result { + let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; + + let mut queue_sizes = QUEUE_SIZES.to_vec(); + + if cfg.f_deflate_on_oom { + avail_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; + } + if cfg.f_reporting { + avail_features |= 1u64 << VIRTIO_BALLOON_F_REPORTING; + queue_sizes.push(PAGE_REPORTING_CAPACITY); + } + + let config = VirtioBalloonConfig::default(); + + Ok(Balloon { + device_info: VirtioDeviceInfo::new( + BALLOON_DRIVER_NAME.to_string(), + avail_features, + Arc::new(queue_sizes), + config.as_slice().to_vec(), + epoll_mgr, + ), + config: Arc::new(Mutex::new(config)), + paused: Arc::new(AtomicBool::new(false)), + device_change_notifier: Arc::new(NoopNotifier::new()), + subscriber_id: None, + phantom: PhantomData, + }) + } + + pub fn set_size(&self, size_mb: u64) -> Result<()> { + let num_pages = mib_to_page_number(size_mb); + + let balloon_config = &mut self.config.lock().unwrap(); + balloon_config.num_pages = num_pages as u32; + if let Err(e) = self.device_change_notifier.notify() { + error!( + "{}: failed to signal device change event: {}", + BALLOON_DRIVER_NAME, e + ); + return Err(Error::IOError(e)); + } + + Ok(()) + } +} + +impl VirtioDevice for Balloon +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Sync + Send + 'static, +{ + fn device_type(&self) -> u32 { + TYPE_BALLOON + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.device_info.queue_sizes + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + trace!( + target: BALLOON_DRIVER_NAME, + "{}: VirtioDevice::set_acked_features({}, 0x{:x})", + BALLOON_DRIVER_NAME, + page, + value + ); + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, mut data: &mut [u8]) -> ConfigResult { + trace!( + target: BALLOON_DRIVER_NAME, + "{}: VirtioDevice::read_config(0x{:x}, {:?})", + BALLOON_DRIVER_NAME, + offset, + data + ); + let config = &self.config.lock().unwrap(); + let config_space = config.as_slice().to_vec(); + let config_len = config_space.len() as u64; + if offset >= config_len { + error!( + "{}: config space read request out of range, offset {}", + BALLOON_DRIVER_NAME, offset + ); + return Err(ConfigError::InvalidOffset(offset)); + } + if let Some(end) = offset.checked_add(data.len() as u64) { + // This write can't fail, offset and end are checked against config_len. + data.write_all(&config_space[offset as usize..cmp::min(end, config_len) as usize]) + .unwrap(); + } + Ok(()) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + let config = &mut self.config.lock().unwrap(); + let config_slice = config.as_mut_slice(); + let Ok(start) = usize::try_from(offset) else { + error!("Failed to write config space"); + return Err(ConfigError::InvalidOffset(offset)); + }; + let Some(dst) = start.checked_add(data.len()) + .and_then(|end| config_slice.get_mut(start..end)) else + { + error!("Failed to write config space"); + return Err(ConfigError::InvalidOffsetPlusDataLen(offset + data.len() as u64)); + }; + dst.copy_from_slice(data); + Ok(()) + } + + fn activate(&mut self, mut config: VirtioDeviceConfig) -> ActivateResult { + self.device_info.check_queue_sizes(&config.queues)?; + self.device_change_notifier = config.device_change_notifier.clone(); + + trace!( + "{}: activate acked_features 0x{:x}", + BALLOON_DRIVER_NAME, + self.device_info.acked_features + ); + + let inflate = config.queues.remove(0); + let deflate = config.queues.remove(0); + let mut reporting = None; + if (self.device_info.acked_features & (1u64 << VIRTIO_BALLOON_F_REPORTING)) != 0 { + reporting = Some(config.queues.remove(0)); + } + + let handler = Box::new(BalloonEpollHandler { + config, + inflate, + deflate, + reporting, + balloon_config: self.config.clone(), + }); + + self.subscriber_id = Some(self.device_info.register_event_handler(handler)); + + Ok(()) + } + + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + requests.push(ResourceConstraint::GenericIrq { + size: (self.device_info.queue_sizes.len() + 1) as u32, + }); + } + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[cfg(test)] +pub(crate) mod tests { + use dbs_device::resources::DeviceResources; + use dbs_utils::epoll_manager::SubscriberOps; + use kvm_ioctls::Kvm; + use vm_memory::GuestMemoryMmap; + use vmm_sys_util::eventfd::EventFd; + + use super::*; + use crate::tests::VirtQueue; + + fn create_balloon_epoll_handler() -> BalloonEpollHandler> { + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap()); + let queues = vec![VirtioQueueConfig::create(128, 0).unwrap()]; + let resources = DeviceResources::new(); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + + let config = VirtioDeviceConfig::new( + mem, + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + let inflate = VirtioQueueConfig::create(128, 0).unwrap(); + let deflate = VirtioQueueConfig::create(128, 0).unwrap(); + let reporting = Some(VirtioQueueConfig::create(128, 0).unwrap()); + let balloon_config = Arc::new(Mutex::new(VirtioBalloonConfig::default())); + + BalloonEpollHandler { + config, + inflate, + deflate, + reporting, + balloon_config, + } + } + + #[test] + fn test_balloon_page_number_to_mib() { + assert_eq!(page_number_to_mib(1024), 4); + assert_eq!(page_number_to_mib(1023), 3); + assert_eq!(page_number_to_mib(0), 0); + } + + #[test] + fn test_balloon_mib_to_page_number() { + assert_eq!(mib_to_page_number(4), 1024); + assert_eq!(mib_to_page_number(2), 512); + assert_eq!(mib_to_page_number(0), 0); + } + + #[test] + fn test_balloon_virtio_device_normal() { + let epoll_mgr = EpollManager::default(); + let config = BalloonConfig { + f_deflate_on_oom: true, + f_reporting: true, + }; + + let mut dev = Balloon::>::new(epoll_mgr, config).unwrap(); + + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type(&dev), + TYPE_BALLOON + ); + + let queue_size = vec![128, 128, 128]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::queue_max_sizes( + &dev + ), + &queue_size[..] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 0), + dev.device_info.get_avail_features(0) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 1), + dev.device_info.get_avail_features(1) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + dev.device_info.get_avail_features(2) + ); + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_acked_features( + &mut dev, 2, 0, + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + 0, + ); + let config: [u8; 8] = [0; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut dev, 0, &config, + ) + .unwrap(); + let mut data: [u8; 8] = [1; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut dev, 0, &mut data, + ) + .unwrap(); + assert_eq!(config, data); + } + + #[test] + fn test_balloon_virtio_device_active() { + let epoll_mgr = EpollManager::default(); + + // check queue sizes error + { + let config = BalloonConfig { + f_deflate_on_oom: true, + f_reporting: true, + }; + + let mut dev = Balloon::>::new(epoll_mgr.clone(), config).unwrap(); + let queues = vec![ + VirtioQueueConfig::::create(16, 0).unwrap(), + VirtioQueueConfig::::create(16, 0).unwrap(), + ]; + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + assert!(dev.activate(config).is_err()); + } + // Success + { + let config = BalloonConfig { + f_deflate_on_oom: true, + f_reporting: true, + }; + + let mut dev = Balloon::>::new(epoll_mgr, config).unwrap(); + + let queues = vec![ + VirtioQueueConfig::::create(128, 0).unwrap(), + VirtioQueueConfig::::create(128, 0).unwrap(), + VirtioQueueConfig::::create(128, 0).unwrap(), + ]; + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + assert!(dev.activate(config).is_ok()); + } + } + + #[test] + fn test_balloon_set_size() { + let epoll_mgr = EpollManager::default(); + let config = BalloonConfig { + f_deflate_on_oom: true, + f_reporting: true, + }; + + let dev = Balloon::>::new(epoll_mgr, config).unwrap(); + let size = 1024; + assert!(dev.set_size(size).is_ok()); + } + + #[test] + fn test_balloon_epoll_handler_handle_event() { + let handler = create_balloon_epoll_handler(); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_balloon_epoll_handler(); + + // test for INFLATE_QUEUE_AVAIL_EVENT + let events = Events::with_data(&event_fd, INFLATE_QUEUE_AVAIL_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for DEFLATE_QUEUE_AVAIL_EVENT + let events = Events::with_data(&event_fd, DEFLATE_QUEUE_AVAIL_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for REPORTING_QUEUE_AVAIL_EVENT + let events = Events::with_data(&event_fd, REPORTING_QUEUE_AVAIL_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for KILL_EVENT + let events = Events::with_data(&event_fd, KILL_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for unknown event + let events = Events::with_data(&event_fd, BALLOON_EVENTS_COUNT + 10, event_set); + handler.process(events, &mut event_op); + } + + #[test] + fn test_balloon_epoll_handler_process_report_queue() { + let mut handler = create_balloon_epoll_handler(); + let m = &handler.config.vm_as.clone(); + + // Failed to get reporting queue event + assert!(!handler.process_reporting_queue()); + + // No reporting queue + handler.reporting = None; + assert!(!handler.process_reporting_queue()); + + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let q = vq.create_queue(); + vq.avail.idx().store(1); + vq.avail.ring(0).store(0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + let queue_config = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(queue_config.generate_event().is_ok()); + handler.reporting = Some(queue_config); + //Success + assert!(handler.process_reporting_queue()); + } + + #[test] + fn test_balloon_epoll_handler_process_queue() { + let mut handler = create_balloon_epoll_handler(); + let m = &handler.config.vm_as.clone(); + // invalid idx + { + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let q = vq.create_queue(); + vq.avail.idx().store(1); + vq.avail.ring(0).store(0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + let queue_config = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(queue_config.generate_event().is_ok()); + handler.inflate = queue_config; + assert!(!handler.process_queue(10)); + } + // INFLATE_QUEUE_AVAIL_EVENT + { + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let q = vq.create_queue(); + vq.avail.idx().store(1); + vq.avail.ring(0).store(0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + let queue_config = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(queue_config.generate_event().is_ok()); + handler.inflate = queue_config; + assert!(handler.process_queue(INFLATE_QUEUE_AVAIL_EVENT)); + } + // DEFLATE_QUEUE_AVAIL_EVENT + { + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let q = vq.create_queue(); + vq.avail.idx().store(1); + vq.avail.ring(0).store(0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + let queue_config = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(queue_config.generate_event().is_ok()); + handler.deflate = queue_config; + assert!(handler.process_queue(DEFLATE_QUEUE_AVAIL_EVENT)); + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/device.rs b/src/dragonball/src/dbs_virtio_devices/src/block/device.rs new file mode 100644 index 000000000000..8caeef3b9921 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/device.rs @@ -0,0 +1,1362 @@ +// Copyright 2019-2020 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::any::Any; +use std::collections::HashMap; +use std::io::{Seek, SeekFrom}; +use std::marker::PhantomData; +use std::sync::{mpsc, Arc}; +use std::thread; + +use dbs_device::resources::ResourceConstraint; +use dbs_utils::{ + epoll_manager::{EpollManager, SubscriberId}, + rate_limiter::{BucketUpdate, RateLimiter}, +}; +use log::{debug, error, info, warn}; +use virtio_bindings::bindings::virtio_blk::*; +use virtio_queue::QueueT; +use vm_memory::GuestMemoryRegion; +use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + +use crate::{ + ActivateError, ActivateResult, ConfigResult, DbsGuestAddressSpace, Error, Result, VirtioDevice, + VirtioDeviceConfig, VirtioDeviceInfo, TYPE_BLOCK, +}; + +use super::{ + BlockEpollHandler, InnerBlockEpollHandler, KillEvent, Ufile, BLK_DRIVER_NAME, SECTOR_SHIFT, + SECTOR_SIZE, +}; + +/// Supported fields in the configuration space: +/// - 64-bit disk size +/// - 32-bit size max +/// - 32-bit seg max +/// - 16-bit num_queues at offset 34 +const CONFIG_SPACE_SIZE: usize = 64; + +/// Max segments in a data request. +const CONFIG_MAX_SEG: u32 = 16; + +fn build_device_id(disk_image: &dyn Ufile) -> Vec { + let mut default_disk_image_id = vec![0; VIRTIO_BLK_ID_BYTES as usize]; + match disk_image.get_device_id() { + Err(_) => warn!("Could not generate device id. We'll use a default."), + Ok(m) => { + // The kernel only knows to read a maximum of VIRTIO_BLK_ID_BYTES. + // This will also zero out any leftover bytes. + let disk_id = m.as_bytes(); + let bytes_to_copy = std::cmp::min(disk_id.len(), VIRTIO_BLK_ID_BYTES as usize); + default_disk_image_id[..bytes_to_copy].clone_from_slice(&disk_id[..bytes_to_copy]) + } + } + default_disk_image_id +} + +/// Virtio device for exposing block level read/write operations on a host file. +pub struct Block { + pub(crate) device_info: VirtioDeviceInfo, + disk_images: Vec>, + rate_limiters: Vec, + queue_sizes: Arc>, + subscriber_id: Option, + kill_evts: Vec, + evt_senders: Vec>, + epoll_threads: Vec>, + phantom: PhantomData, +} + +impl Block { + /// Create a new virtio block device that operates on the given file. + /// + /// The given file must be seekable and sizable. + pub fn new( + mut disk_images: Vec>, + is_disk_read_only: bool, + queue_sizes: Arc>, + epoll_mgr: EpollManager, + rate_limiters: Vec, + ) -> Result { + let num_queues = disk_images.len(); + + if num_queues == 0 { + return Err(Error::InvalidInput); + } + + let disk_image = &mut disk_images[0]; + + let disk_size = disk_image.seek(SeekFrom::End(0)).map_err(Error::IOError)?; + if disk_size % SECTOR_SIZE != 0 { + warn!( + "Disk size {} is not a multiple of sector size {}; \ + the remainder will not be visible to the guest.", + disk_size, SECTOR_SIZE + ); + } + let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; + avail_features |= 1u64 << VIRTIO_BLK_F_SIZE_MAX; + avail_features |= 1u64 << VIRTIO_BLK_F_SEG_MAX; + + if is_disk_read_only { + avail_features |= 1u64 << VIRTIO_BLK_F_RO; + }; + + if num_queues > 1 { + avail_features |= 1u64 << VIRTIO_BLK_F_MQ; + } + + let config_space = + Self::build_config_space(disk_size, disk_image.get_max_size(), num_queues as u16); + + Ok(Block { + device_info: VirtioDeviceInfo::new( + BLK_DRIVER_NAME.to_string(), + avail_features, + queue_sizes.clone(), + config_space, + epoll_mgr, + ), + disk_images, + rate_limiters, + queue_sizes, + subscriber_id: None, + phantom: PhantomData, + evt_senders: Vec::with_capacity(num_queues), + kill_evts: Vec::with_capacity(num_queues), + epoll_threads: Vec::with_capacity(num_queues), + }) + } + + fn build_config_space(disk_size: u64, max_size: u32, num_queues: u16) -> Vec { + // The disk size field of the configuration space, which uses the first two words. + // If the image is not a multiple of the sector size, the tail bits are not exposed. + // The config space is little endian. + let mut config = Vec::with_capacity(CONFIG_SPACE_SIZE); + let num_sectors = disk_size >> SECTOR_SHIFT; + for i in 0..8 { + config.push((num_sectors >> (8 * i)) as u8); + } + + // The max_size field of the configuration space. + for i in 0..4 { + config.push((max_size >> (8 * i)) as u8); + } + + // The max_seg field of the configuration space. + let max_segs = CONFIG_MAX_SEG; + for i in 0..4 { + config.push((max_segs >> (8 * i)) as u8); + } + + for _i in 0..18 { + config.push(0_u8); + } + + for i in 0..2 { + config.push((num_queues >> (8 * i)) as u8); + } + + config + } + + pub fn set_patch_rate_limiters(&self, bytes: BucketUpdate, ops: BucketUpdate) -> Result<()> { + if self.evt_senders.is_empty() + || self.kill_evts.is_empty() + || self.evt_senders.len() != self.kill_evts.len() + { + error!("virtio-blk: failed to establish channel to send rate-limiter patch data"); + return Err(Error::InternalError); + } + + for sender in self.evt_senders.iter() { + if sender + .send(KillEvent::BucketUpdate(bytes.clone(), ops.clone())) + .is_err() + { + error!("virtio-blk: failed to send rate-limiter patch data"); + return Err(Error::InternalError); + } + } + + for kill_evt in self.kill_evts.iter() { + if let Err(e) = kill_evt.write(1) { + error!( + "virtio-blk: failed to write rate-limiter patch event {:?}", + e + ); + return Err(Error::InternalError); + } + } + + Ok(()) + } +} + +impl VirtioDevice for Block +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Sync + Send + 'static, +{ + fn device_type(&self) -> u32 { + TYPE_BLOCK + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.queue_sizes + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + self.device_info.read_config(offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + self.device_info.write_config(offset, data) + } + + fn activate(&mut self, mut config: VirtioDeviceConfig) -> ActivateResult { + self.device_info.check_queue_sizes(&config.queues[..])?; + + if self.disk_images.len() != config.queues.len() { + error!( + "The disk images number: {} is not equal to queues number: {}", + self.disk_images.len(), + config.queues.len() + ); + return Err(ActivateError::InternalError); + } + let mut kill_evts = Vec::with_capacity(self.queue_sizes.len()); + + let mut i = 0; + // first to reverse the queue's order, thus to make sure the following + // pop queue got the right queue order. + config.queues.reverse(); + while let Some(queue) = config.queues.pop() { + let disk_image = self.disk_images.pop().unwrap(); + let disk_image_id = build_device_id(disk_image.as_ref()); + + let data_desc_vec = + vec![Vec::with_capacity(CONFIG_MAX_SEG as usize); self.queue_sizes[0] as usize]; + let iovecs_vec = + vec![Vec::with_capacity(CONFIG_MAX_SEG as usize); self.queue_sizes[0] as usize]; + + let rate_limiter = self.rate_limiters.pop().unwrap_or_default(); + + let (evt_sender, evt_receiver) = mpsc::channel(); + self.evt_senders.push(evt_sender); + + let kill_evt = EventFd::new(EFD_NONBLOCK)?; + + let mut handler = Box::new(InnerBlockEpollHandler { + rate_limiter, + disk_image, + disk_image_id, + pending_req_map: HashMap::new(), + data_desc_vec, + iovecs_vec, + evt_receiver, + vm_as: config.vm_as.clone(), + queue, + kill_evt: kill_evt.try_clone().unwrap(), + }); + + kill_evts.push(kill_evt.try_clone().unwrap()); + self.kill_evts.push(kill_evt); + + thread::Builder::new() + .name(format!("{}_q{}", "blk_iothread", i)) + .spawn(move || { + if let Err(e) = handler.run() { + error!("Error running worker: {:?}", e); + } + }) + .map(|thread| self.epoll_threads.push(thread)) + .map_err(|e| { + error!("failed to clone the virtio-block epoll thread: {}", e); + ActivateError::InternalError + })?; + + i += 1; + } + let block_handler = Box::new(BlockEpollHandler { + kill_evts, + evt_senders: self.evt_senders.clone(), + config, + }); + + // subscribe this handler for io drain. + self.subscriber_id = Some(self.device_info.register_event_handler(block_handler)); + + Ok(()) + } + + fn reset(&mut self) -> ActivateResult { + Ok(()) + } + + fn remove(&mut self) { + // if the subsriber_id is invalid, it has not been activated yet. + if let Some(subscriber_id) = self.subscriber_id { + // Remove BlockEpollHandler from event manager, so it could be dropped and the resources + // could be freed, e.g. close disk_image, so vmm won't hold the backend file. + match self.device_info.remove_event_handler(subscriber_id) { + Ok(_) => debug!("virtio-blk: removed subscriber_id {:?}", subscriber_id), + Err(e) => { + warn!("virtio-blk: failed to remove event handler: {:?}", e); + } + } + } + + for sender in self.evt_senders.iter() { + if sender.send(KillEvent::Kill).is_err() { + error!("virtio-blk: failed to send kill event to epoller thread"); + } + } + + // notify the io threads handlers to terminate. + for kill_evt in self.kill_evts.iter() { + if let Err(e) = kill_evt.write(1) { + error!("virtio-blk: failed to write kill event {:?}", e); + } + } + + while let Some(thread) = self.epoll_threads.pop() { + if let Err(e) = thread.join() { + error!("virtio-blk: failed to reap the io threads: {:?}", e); + } else { + info!("io thread got reaped."); + } + } + + self.subscriber_id = None; + } + + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + requests.push(ResourceConstraint::GenericIrq { + size: (self.queue_sizes.len() + 1) as u32, + }); + } + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[cfg(test)] +mod tests { + use std::io::{self, Read, Seek, SeekFrom, Write}; + use std::os::unix::io::RawFd; + + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use dbs_utils::rate_limiter::{TokenBucket, TokenType}; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap, GuestRegionMmap}; + use vmm_sys_util::eventfd::EventFd; + + use crate::epoll_helper::*; + use crate::tests::{VirtQueue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::{Error as VirtIoError, VirtioQueueConfig}; + + use super::*; + use crate::block::*; + + pub(super) struct DummyFile { + pub(super) device_id: Option, + pub(super) capacity: u64, + pub(super) have_complete_io: bool, + pub(super) max_size: u32, + pub(super) flush_error: bool, + } + + impl DummyFile { + pub(super) fn new() -> Self { + DummyFile { + device_id: None, + capacity: 0, + have_complete_io: false, + max_size: 0x100000, + flush_error: false, + } + } + } + + impl Read for DummyFile { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + Ok(buf.len()) + } + } + + impl Write for DummyFile { + fn write(&mut self, buf: &[u8]) -> io::Result { + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + if self.flush_error { + Err(io::Error::new(io::ErrorKind::Other, "test flush error")) + } else { + Ok(()) + } + } + } + impl Seek for DummyFile { + fn seek(&mut self, _pos: SeekFrom) -> io::Result { + Ok(0) + } + } + + impl Ufile for DummyFile { + fn get_capacity(&self) -> u64 { + self.capacity + } + + fn get_max_size(&self) -> u32 { + self.max_size + } + + fn get_device_id(&self) -> io::Result { + match &self.device_id { + Some(id) => Ok(id.to_string()), + None => Err(io::Error::new(io::ErrorKind::Other, "dummy_error")), + } + } + + // std err + fn get_data_evt_fd(&self) -> RawFd { + 2 + } + + fn io_read_submit( + &mut self, + _offset: i64, + _iovecs: &mut Vec, + _aio_data: u16, + ) -> io::Result { + Ok(0) + } + + fn io_write_submit( + &mut self, + _offset: i64, + _iovecs: &mut Vec, + _aio_data: u16, + ) -> io::Result { + Ok(0) + } + + fn io_complete(&mut self) -> io::Result> { + let mut v = Vec::new(); + if self.have_complete_io { + v.push((0, 1)); + } + Ok(v) + } + } + + #[test] + fn test_block_build_device_id() { + let device_id = "dummy_device_id"; + let mut file = DummyFile::new(); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let disk_id = build_device_id(disk_image.as_ref()); + assert_eq!(disk_id.len() as u32, VIRTIO_BLK_ID_BYTES); + let disk_image: Box = Box::new(DummyFile::new()); + let disk_id2 = build_device_id(disk_image.as_ref()); + assert_eq!(disk_id2.len() as u32, VIRTIO_BLK_ID_BYTES); + assert_ne!(disk_id, disk_id2); + } + + #[test] + fn test_block_request_parse() { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let mut data_descs = Vec::with_capacity(CONFIG_MAX_SEG as usize); + + assert!(vq.end().0 < 0x1000); + + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // write only request type descriptor + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_OUT, GuestAddress(0x1000)) + .unwrap(); + m.write_obj::(114, GuestAddress(0x1000 + 8)).unwrap(); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedWriteOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // chain too short; no status_desc + vq.dtable(0).flags().store(0); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::DescriptorChainTooShort) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // chain too short; no data desc + vq.dtable(0).flags().store(VIRTQ_DESC_F_NEXT); + vq.dtable(1).set(0x2000, 0x1000, 0, 2); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::DescriptorChainTooShort) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // write only data for OUT + vq.dtable(1) + .flags() + .store(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); + vq.dtable(2).set(0x3000, 0, 0, 0); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedWriteOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // read only data for OUT + m.write_obj::(VIRTIO_BLK_T_OUT, GuestAddress(0x1000)) + .unwrap(); + vq.dtable(1) + .flags() + .store(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedWriteOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // length too big data for OUT + m.write_obj::(VIRTIO_BLK_T_OUT, GuestAddress(0x1000)) + .unwrap(); + vq.dtable(1).flags().store(VIRTQ_DESC_F_NEXT); + vq.dtable(1).len().store(64); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::DescriptorLengthTooBig) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // read only data for IN + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + vq.dtable(1).flags().store(VIRTQ_DESC_F_NEXT); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedReadOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // length too big data for IN + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + vq.dtable(1) + .flags() + .store(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); + vq.dtable(1).len().store(64); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::DescriptorLengthTooBig) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // data desc write only and request type is getDeviceId + m.write_obj::(VIRTIO_BLK_T_GET_ID, GuestAddress(0x1000)) + .unwrap(); + vq.dtable(1) + .flags() + .store(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedReadOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // status desc read only + vq.dtable(2).flags().store(0); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::UnexpectedReadOnlyDescriptor) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // status desc too small + vq.dtable(2).flags().store(VIRTQ_DESC_F_WRITE); + vq.dtable(2).len().store(0); + assert!(matches!( + Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32), + Err(Error::DescriptorLengthTooSmall) + )); + } + + { + let mut q = vq.create_queue(); + data_descs.clear(); + // should be OK now + vq.dtable(2).len().store(0x1000); + let r = Request::parse(&mut q.pop_descriptor_chain(m).unwrap(), &mut data_descs, 32) + .unwrap(); + + assert_eq!(r.request_type, RequestType::GetDeviceID); + assert_eq!(r.sector, 114); + assert_eq!(data_descs[0].data_addr, 0x2000); + assert_eq!(data_descs[0].data_len, 0x40); + assert_eq!(r.status_addr, GuestAddress(0x3000)); + } + } + + #[test] + fn test_block_request_execute() { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let mut data_descs = Vec::with_capacity(CONFIG_MAX_SEG as usize); + assert!(vq.end().0 < 0x1000); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + + let mut file = DummyFile::new(); + file.capacity = 4096; + let mut disk: Box = Box::new(file); + let disk_id = build_device_id(disk.as_ref()); + + { + // RequestType::In + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(req.execute(&mut disk, m, &data_descs, &disk_id).is_ok()); + } + + { + // RequestType::Out + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_OUT, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(req.execute(&mut disk, m, &data_descs, &disk_id).is_ok()); + } + + { + // RequestType::Flush + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_FLUSH, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(req.execute(&mut disk, m, &data_descs, &disk_id).is_ok()); + } + + { + // RequestType::GetDeviceID + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_GET_ID, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(req.execute(&mut disk, m, &data_descs, &disk_id).is_ok()); + } + + { + // RequestType::unsupport + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_GET_ID + 10, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + match req.execute(&mut disk, m, &data_descs, &disk_id) { + Err(ExecuteError::Unsupported(n)) => assert_eq!(n, VIRTIO_BLK_T_GET_ID + 10), + _ => panic!(), + } + } + } + + #[test] + fn test_block_request_update_status() { + let m = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap()); + let vq = VirtQueue::new(GuestAddress(0), &m, 16); + let mut data_descs = Vec::with_capacity(CONFIG_MAX_SEG as usize); + assert!(vq.end().0 < 0x1000); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let mut q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m.as_ref()).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + req.update_status(m.as_ref(), 0); + } + + #[test] + fn test_block_request_check_capacity() { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + let mut data_descs = Vec::with_capacity(CONFIG_MAX_SEG as usize); + assert!(vq.end().0 < 0x1000); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + + let mut disk: Box = Box::new(DummyFile::new()); + let disk_id = build_device_id(disk.as_ref()); + let mut q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(matches!( + req.execute(&mut disk, m, &data_descs, &disk_id), + Err(ExecuteError::BadRequest(VirtIoError::InvalidOffset)) + )); + + let mut file = DummyFile::new(); + file.capacity = 4096; + let mut disk: Box = Box::new(file); + let mut q = vq.create_queue(); + data_descs.clear(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + assert!(req.check_capacity(&mut disk, &data_descs).is_ok()); + } + + #[test] + fn test_block_virtio_device_normal() { + let device_id = "dummy_device_id"; + let epoll_mgr = EpollManager::default(); + + let mut file = DummyFile::new(); + println!("max size {}", file.max_size); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let mut dev = Block::>::new( + vec![disk_image], + true, + Arc::new(vec![128]), + epoll_mgr, + vec![], + ) + .unwrap(); + + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type(&dev), + TYPE_BLOCK + ); + let queue_size = vec![128]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::queue_max_sizes( + &dev + ), + &queue_size[..] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 0), + dev.device_info.get_avail_features(0) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 1), + dev.device_info.get_avail_features(1) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + dev.device_info.get_avail_features(2) + ); + let mut config: [u8; 1] = [0]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut dev, + 0, + &mut config, + ) + .unwrap(); + let config: [u8; 16] = [0; 16]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut dev, 0, &config, + ) + .unwrap(); + } + + #[test] + fn test_block_virtio_device_active() { + let device_id = "dummy_device_id"; + let epoll_mgr = EpollManager::default(); + + { + // check_queue_sizes error + let mut file = DummyFile::new(); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let mut dev = Block::>>::new( + vec![disk_image], + true, + Arc::new(vec![128]), + epoll_mgr.clone(), + vec![], + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = Vec::new(); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + assert!(matches!( + dev.activate(config), + Err(ActivateError::InvalidParam) + )); + } + + { + // test no disk_image + let mut file = DummyFile::new(); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let mut dev = Block::new( + vec![disk_image], + true, + Arc::new(vec![128]), + epoll_mgr.clone(), + vec![], + ) + .unwrap(); + dev.disk_images = vec![]; + + let mem = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![VirtioQueueConfig::::create(256, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + assert!(matches!( + dev.activate(config), + Err(ActivateError::InternalError) + )); + } + + { + // Ok + let mut file = DummyFile::new(); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let mut dev = Block::new( + vec![disk_image], + true, + Arc::new(vec![128]), + epoll_mgr, + vec![], + ) + .unwrap(); + + let mem = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![VirtioQueueConfig::::create(256, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + dev.activate(config).unwrap(); + } + } + + #[test] + fn test_block_set_patch_rate_limiters() { + let device_id = "dummy_device_id"; + let epoll_mgr = EpollManager::default(); + let mut file = DummyFile::new(); + file.device_id = Some(device_id.to_string()); + let disk_image: Box = Box::new(file); + let mut dev = Block::>::new( + vec![disk_image], + true, + Arc::new(vec![128]), + epoll_mgr, + vec![], + ) + .unwrap(); + + let (sender, _receiver) = mpsc::channel(); + dev.evt_senders = vec![sender]; + let event = EventFd::new(0).unwrap(); + dev.kill_evts = vec![event]; + + assert!(dev + .set_patch_rate_limiters(BucketUpdate::None, BucketUpdate::None) + .is_ok()); + } + + fn get_block_epoll_handler_with_file( + file: DummyFile, + ) -> InnerBlockEpollHandler, QueueSync> { + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap()); + let queue = VirtioQueueConfig::create(256, 0).unwrap(); + let rate_limiter = RateLimiter::default(); + let disk_image: Box = Box::new(file); + let disk_image_id = build_device_id(disk_image.as_ref()); + + let data_desc_vec = vec![Vec::with_capacity(CONFIG_MAX_SEG as usize); 256]; + let iovecs_vec = vec![Vec::with_capacity(CONFIG_MAX_SEG as usize); 256]; + + let (_, evt_receiver) = mpsc::channel(); + + InnerBlockEpollHandler { + disk_image, + disk_image_id, + rate_limiter, + pending_req_map: HashMap::new(), + data_desc_vec, + iovecs_vec, + + kill_evt: EventFd::new(0).unwrap(), + evt_receiver, + + vm_as: mem, + queue, + } + } + + fn get_block_epoll_handler() -> InnerBlockEpollHandler, QueueSync> { + let mut file = DummyFile::new(); + file.capacity = 0x100000; + get_block_epoll_handler_with_file(file) + } + + #[test] + fn test_block_get_patch_rate_limiters() { + let mut handler = get_block_epoll_handler(); + let tokenbucket = TokenBucket::new(1, 1, 4); + + handler.get_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::Update(tokenbucket.clone()), + ); + assert_eq!(handler.rate_limiter.ops().unwrap(), &tokenbucket); + } + + #[test] + fn test_block_epoll_handler_handle_event() { + let mut handler = get_block_epoll_handler(); + let mut helper = EpollHelper::new().unwrap(); + + // test for QUEUE_AVAIL_EVENT + let events = epoll::Event::new(epoll::Events::EPOLLIN, QUEUE_AVAIL_EVENT as u64); + handler.handle_event(&mut helper, &events); + handler.queue.generate_event().unwrap(); + handler.handle_event(&mut helper, &events); + + // test for RATE_LIMITER_EVENT + let events = epoll::Event::new(epoll::Events::EPOLLIN, RATE_LIMITER_EVENT as u64); + handler.handle_event(&mut helper, &events); + + // test for END_IO_EVENT + let events = epoll::Event::new(epoll::Events::EPOLLIN, END_IO_EVENT as u64); + handler.handle_event(&mut helper, &events); + } + + #[test] + #[should_panic] + fn test_block_epoll_handler_handle_unknown_event() { + let mut handler = get_block_epoll_handler(); + let mut helper = EpollHelper::new().unwrap(); + + // test for unknown event + let events = epoll::Event::new(epoll::Events::EPOLLIN, KILL_EVENT as u64 + 10); + handler.handle_event(&mut helper, &events); + } + + #[test] + fn test_block_epoll_handler_process_queue() { + { + let mut file = DummyFile::new(); + file.capacity = 0x100000; + // set disk max_size to 0 will cause Request parse error + file.max_size = 0; + let mut handler = get_block_epoll_handler_with_file(file); + + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(handler.process_queue()); + } + + { + // will cause check_capacity error + let file = DummyFile::new(); + let mut handler = get_block_epoll_handler_with_file(file); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(handler.process_queue()); + let err_info: u32 = handler.vm_as.read_obj(GuestAddress(0x3000)).unwrap(); + assert_eq!(err_info, VIRTIO_BLK_S_IOERR); + } + + { + // test io submit + let mut file = DummyFile::new(); + file.capacity = 0x100000; + let mut handler = get_block_epoll_handler_with_file(file); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(!handler.process_queue()); + assert_eq!(handler.pending_req_map.len(), 1); + } + + { + // test for other execute type (not IN/OUT) + let mut file = DummyFile::new(); + file.capacity = 0x100000; + let mut handler = get_block_epoll_handler_with_file(file); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_FLUSH, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(handler.process_queue()); + let err_info: u32 = handler.vm_as.read_obj(GuestAddress(0x3000)).unwrap(); + assert_eq!(err_info, VIRTIO_BLK_S_OK); + } + + { + // test for other execute type (not IN/OUT) : error + let mut file = DummyFile::new(); + file.capacity = 0x100000; + file.flush_error = true; + let mut handler = get_block_epoll_handler_with_file(file); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_FLUSH, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(handler.process_queue()); + let err_info: u32 = handler.vm_as.read_obj(GuestAddress(0x3000)).unwrap(); + assert_eq!(err_info, VIRTIO_BLK_S_IOERR); + } + + { + // test for other execute type (not IN/OUT) : non_supported + let mut file = DummyFile::new(); + file.capacity = 0x100000; + let mut handler = get_block_epoll_handler_with_file(file); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_FLUSH + 10, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(handler.process_queue()); + let err_info: u32 = handler.vm_as.read_obj(GuestAddress(0x3000)).unwrap(); + assert_eq!(err_info, VIRTIO_BLK_S_UNSUPP); + } + + { + // test for rate limiter + let mut file = DummyFile::new(); + file.capacity = 0x100000; + let mut handler = get_block_epoll_handler_with_file(file); + handler.rate_limiter = RateLimiter::new(0, 0, 0, 1, 0, 100).unwrap(); + handler.rate_limiter.consume(1, TokenType::Ops); + let m = &handler.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_FLUSH, GuestAddress(0x1000)) + .unwrap(); + + handler.queue = VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + ); + assert!(!handler.process_queue()); + // test if rate limited + assert!(handler.rate_limiter.is_blocked()); + } + } + + #[test] + fn test_block_epoll_handler_io_complete() { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + // no data + let mut handler = get_block_epoll_handler(); + let mut data_descs = Vec::with_capacity(CONFIG_MAX_SEG as usize); + assert!(handler.io_complete().is_ok()); + + // have data + let mut file = DummyFile::new(); + file.have_complete_io = true; + let disk_image = Box::new(file); + handler.disk_image = disk_image; + + // no data in pending_req_map + assert!(matches!(handler.io_complete(), Err(Error::InternalError))); + + // data in pending_req_map + let vq = VirtQueue::new(GuestAddress(0), m, 16); + assert!(vq.end().0 < 0x1000); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let mut q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x0, 1, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(VIRTIO_BLK_T_IN, GuestAddress(0x1000)) + .unwrap(); + let req = Request::parse( + &mut q.pop_descriptor_chain(m).unwrap(), + &mut data_descs, + 0x100000, + ) + .unwrap(); + handler.pending_req_map.insert(0, req); + handler.io_complete().unwrap(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/handler.rs b/src/dragonball/src/dbs_virtio_devices/src/block/handler.rs new file mode 100644 index 000000000000..08d4d643267d --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/handler.rs @@ -0,0 +1,451 @@ +// Copyright 2019-2020 Alibnc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::collections::HashMap; +use std::ops::Deref; +use std::os::unix::io::AsRawFd; +use std::sync::mpsc::{Receiver, Sender}; + +use dbs_utils::{ + epoll_manager::{EventOps, Events, MutEventSubscriber}, + rate_limiter::{BucketUpdate, RateLimiter, TokenType}, +}; +use log::{debug, error, info, warn}; +use virtio_bindings::bindings::virtio_blk::*; +use virtio_queue::{Queue, QueueOwnedT, QueueT}; +use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryRegion, GuestRegionMmap}; +use vmm_sys_util::eventfd::EventFd; + +use crate::{ + epoll_helper::{EpollHelper, EpollHelperError, EpollHelperHandler}, + DbsGuestAddressSpace, Error, Result, VirtioDeviceConfig, VirtioQueueConfig, +}; + +use super::{ExecuteError, IoDataDesc, KillEvent, Request, RequestType, Ufile, SECTOR_SHIFT}; + +// New descriptors are pending on the virtio queue. +pub const QUEUE_AVAIL_EVENT: u32 = 0; +// Rate limiter budget is now available. +pub const RATE_LIMITER_EVENT: u32 = 1; +// Some AIO requests have been completed. Used to support Linux AIO/TDC AIO. +pub const END_IO_EVENT: u32 = 2; +// trigger the thread to deal with some specific event +pub const KILL_EVENT: u32 = 4; + +pub(crate) struct InnerBlockEpollHandler { + pub(crate) disk_image: Box, + pub(crate) disk_image_id: Vec, + pub(crate) rate_limiter: RateLimiter, + pub(crate) pending_req_map: HashMap, + pub(crate) data_desc_vec: Vec>, + pub(crate) iovecs_vec: Vec>, + pub(crate) kill_evt: EventFd, + pub(crate) evt_receiver: Receiver, + + pub(crate) vm_as: AS, + pub(crate) queue: VirtioQueueConfig, +} + +impl InnerBlockEpollHandler { + pub(crate) fn process_queue(&mut self) -> bool { + let as_mem = self.vm_as.memory(); + let mem = as_mem.deref(); + let mut queue = self.queue.queue_mut().lock(); + + let mut iter = match queue.iter(mem) { + Err(e) => { + error!("virtio-blk: failed to iterate queue. {}", e); + return false; + } + Ok(iter) => iter, + }; + + // Used to collect used descriptors. (index, size) + let mut used_desc_vec: Vec<(u16, u32)> = Vec::new(); + let mut rate_limited = false; + + 'next_desc: for mut desc_chain in &mut iter { + // Safe to index data_desc_vec with index, as index has been checked in iterator + let index = desc_chain.head_index(); + let data_descs = &mut self.data_desc_vec[index as usize]; + let iovecs = &mut self.iovecs_vec[index as usize]; + data_descs.clear(); + iovecs.clear(); + match Request::parse(&mut desc_chain, data_descs, self.disk_image.get_max_size()) { + Err(e) => { + // It's caused by invalid request from guest, simple... + debug!("Failed to parse available descriptor chain: {:?}", e); + used_desc_vec.push((index, 0)); + } + Ok(req) => { + if Self::trigger_rate_limit(&mut self.rate_limiter, &req, data_descs) { + // stop processing the queue + rate_limited = true; + break 'next_desc; + } + // We try processing READ/WRITE requests using AIO first, and fallback to + // synchronous processing if it fails. + match Self::process_aio_request( + &req, + data_descs, + iovecs, + &mut self.disk_image, + mem.deref(), + ) { + Ok(submited) => { + if submited { + self.pending_req_map.insert(req.request_index, req.clone()); + continue 'next_desc; + } + // Else not Submited, fallback to synchronous processing + } + Err(_e) => { + req.update_status(mem.deref(), VIRTIO_BLK_S_IOERR); + used_desc_vec.push((index, 0)); + continue 'next_desc; + } + } + // Synchronously execute the request + // Take a new immutable data_descs reference, as previous mutable one may have + // been consumed. + let data_descs = &self.data_desc_vec[req.request_index as usize]; + match Self::process_request( + &req, + &data_descs[..], + &mut self.disk_image, + &self.disk_image_id, + mem.deref(), + ) { + Ok(num_bytes_to_mem) => { + used_desc_vec.push((index, num_bytes_to_mem)); + } + Err(_e) => { + //METRICS.block.execute_fails.inc(); + used_desc_vec.push((index, 0)); + } + } + } + } + } + if rate_limited { + // If rate limiting kicked in, queue had advanced one element that we aborted + // processing; go back one element so it can be processed next time. + // TODO: log rate limit message or METRIC + iter.go_to_previous_position(); + } + drop(queue); + if !used_desc_vec.is_empty() { + for entry in &used_desc_vec { + self.queue.add_used(mem, entry.0, entry.1); + } + true + } else { + false + } + } + + fn trigger_rate_limit( + rate_limiter: &mut RateLimiter, + req: &Request, + data_descs: &[IoDataDesc], + ) -> bool { + // If limiter.consume() fails it means there is no more TokenType::Ops budget + // and rate limiting is in effect. + if !rate_limiter.consume(1, TokenType::Ops) { + // stop processing the queue + return true; + } + // Exercise the rate limiter only if this request is of data transfer type. + if req.request_type == RequestType::In || req.request_type == RequestType::Out { + // If limiter.consume() fails it means there is no more TokenType::Bytes + // budget and rate limiting is in effect. + + if !rate_limiter.consume(u64::from(req.data_len(data_descs)), TokenType::Bytes) { + // Revert the OPS consume(). + rate_limiter.manual_replenish(1, TokenType::Ops); + return true; + } + } + false + } + + fn process_request( + req: &Request, + data_descs: &[IoDataDesc], + disk_image: &mut Box, + disk_image_id: &[u8], + mem: &M, + ) -> std::result::Result { + match req.execute(disk_image, mem.deref(), data_descs, disk_image_id) { + Ok(l) => { + req.update_status(mem.deref(), VIRTIO_BLK_S_OK); + Ok(l) + } + Err(e) => { + let err_code = match &e { + ExecuteError::BadRequest(e) => { + // It's caused by invalid request from guest, simple... + debug!("Failed to execute GetDeviceID request: {:?}", e); + VIRTIO_BLK_S_IOERR + } + ExecuteError::Flush(e) => { + // only temporary errors are possible here + // TODO recovery + debug!("Failed to execute Flush request: {:?}", e); + VIRTIO_BLK_S_IOERR + } + ExecuteError::Read(e) | ExecuteError::Write(e) => { + // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + // The error recovery policy here is a little messy. + // We can't tell the error type from the returned error code + // and no easy way to recover. + // Hopefully AIO are used and read/write requests never ever + // reaches here when TDC live upgrading is enabled. + // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + warn!("virtio-blk: Failed to execute Read/Write request: {:?}", e); + VIRTIO_BLK_S_IOERR + } + ExecuteError::Seek(e) => { + // It's caused by invalid request from guest, simple... + warn!( + "virtio-blk: Failed to execute out-of-boundary request: {:?}", + e + ); + VIRTIO_BLK_S_IOERR + } + ExecuteError::GetDeviceID(e) => { + // It's caused by invalid request from guest, simple... + warn!("virtio-blk: Failed to execute GetDeviceID request: {:?}", e); + VIRTIO_BLK_S_IOERR + } + ExecuteError::Unsupported(e) => { + // It's caused by invalid request from guest, simple... + warn!("virtio-blk: Failed to execute request: {:?}", e); + VIRTIO_BLK_S_UNSUPP + } + }; + + req.update_status(mem.deref(), err_code); + Err(e) + } + } + } + + // TODO: We should hide the logic of this function inside the Ufile implementation, + // instead of appearing here. + fn process_aio_request( + req: &Request, + data_descs: &[IoDataDesc], + iovecs: &mut Vec, + disk_image: &mut Box, + mem: &M, + ) -> std::result::Result { + if req.request_type != RequestType::In && req.request_type != RequestType::Out { + return Ok(false); + } + + req.check_capacity(disk_image, data_descs).map_err(|e| { + // It's caused by invalid request from guest, simple... + debug!("Failed to get buffer address for request"); + e + })?; + + for io in data_descs { + let host_addr = mem + .get_host_address(GuestAddress(io.data_addr)) + .map_err(|e| { + // It's caused by invalid request from guest, simple... + warn!( + "virtio-blk: Failed to get buffer guest address {:?} for request {:?}", + io.data_addr, req + ); + ExecuteError::BadRequest(Error::GuestMemory(e)) + })?; + iovecs.push(IoDataDesc { + data_addr: host_addr as u64, + data_len: io.data_len, + }); + } + + let submiter: fn( + &mut (dyn Ufile + 'static), + i64, + &mut Vec, + u16, + ) -> std::io::Result = match req.request_type { + RequestType::In => Ufile::io_read_submit, + RequestType::Out => Ufile::io_write_submit, + _ => panic!( + "virtio-blk: unexpected request type {:?} in async I/O", + req.request_type + ), + }; + + match submiter( + disk_image.as_mut(), + (req.sector << SECTOR_SHIFT) as i64, + iovecs, + req.request_index, + ) { + Ok(_) => { + // The request has been queued waiting for process + Ok(true) + } + Err(e) => { + warn!("virtio-blk: submit request {:?} error. {}", req, e); + // Failure may be caused by: + // no enough resource to queue the AIO request + // TODO recover + + // Now fallback to synchronous processing + Ok(false) + } + } + } + + pub(crate) fn io_complete(&mut self) -> Result<()> { + let as_mem = self.vm_as.memory(); + let mem: &AS::M = as_mem.deref(); + let iovs = self.disk_image.io_complete()?; + + // No data to handle + if iovs.is_empty() { + return Ok(()); + } + + for (index, res2) in &iovs { + match self.pending_req_map.remove(index) { + Some(req) => { + // Just ignore the result of write_obj(). Though we have validated + // request.status_addr, but we have released and reacquired the + // guest memory object and the guest may have hot-removed the + // memory maliciously. + let _ = mem.write_obj(*res2 as u8, req.status_addr); + let data_descs = &self.data_desc_vec[req.request_index as usize]; + let len = match req.request_type { + RequestType::In => req.data_len(data_descs), + RequestType::Out => 0, + _ => panic!( + "virtio-blk: unexpected request type {:?} in async I/O completion", + req.request_type + ), + }; + self.queue.add_used(mem, req.request_index, len); + } + None => { + error!("virtio-blk: Cant't find request for AIO completion event."); + // We have run into inconsistent state, let the device manager to do recovery. + return Err(Error::InternalError); + } + } + } + self.queue.notify() + } + + pub(crate) fn get_patch_rate_limiters(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { + self.rate_limiter.update_buckets(bytes, ops); + info!( + "virtio-blk: Update rate limiter for block device {:?}", + String::from_utf8(self.disk_image_id.clone()) + ); + } + + pub(crate) fn run(&mut self) -> std::result::Result<(), EpollHelperError> { + let mut helper = EpollHelper::new()?; + helper.add_event(self.queue.eventfd.as_raw_fd(), QUEUE_AVAIL_EVENT)?; + helper.add_event_custom( + self.disk_image.get_data_evt_fd(), + END_IO_EVENT, + epoll::Events::EPOLLIN | epoll::Events::EPOLLET, + )?; + + helper.add_event(self.rate_limiter.as_raw_fd(), RATE_LIMITER_EVENT)?; + + helper.add_event(self.kill_evt.as_raw_fd(), KILL_EVENT)?; + + helper.run(self)?; + + Ok(()) + } +} + +impl EpollHelperHandler for InnerBlockEpollHandler { + fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { + let slot = event.data as u32; + match slot { + QUEUE_AVAIL_EVENT => { + if let Err(e) = self.queue.consume_event() { + error!("virtio-blk: failed to get queue event: {:?}", e); + return true; + } else if self.rate_limiter.is_blocked() { + // While limiter is blocked, don't process any more requests. + } else if self.process_queue() { + self.queue + .notify() + .expect("virtio-blk: failed to notify guest"); + } + } + END_IO_EVENT => { + // NOTE: Here we should drain io event fd, but different Ufile implementations + // may use different Events, and complete may depend on the count of reads from + // within io event. so leave it to IoEngine::complete to drain event fd. + // io_complete() only returns permanent errors. + self.io_complete() + .expect("virtio-blk: failed to complete IO requests"); + } + RATE_LIMITER_EVENT => { + // Upon rate limiter event, call the rate limiter handler + // and restart processing the queue. + if self.rate_limiter.event_handler().is_ok() && self.process_queue() { + self.queue + .notify() + .expect("virtio-blk: failed to notify guest"); + } + } + KILL_EVENT => { + let _ = self.kill_evt.read(); + while let Ok(evt) = self.evt_receiver.try_recv() { + match evt { + KillEvent::Kill => { + info!("virtio-blk: KILL_EVENT received, stopping inner epoll handler loop"); + + return true; + } + KillEvent::BucketUpdate(bytes, ops) => { + info!( + "virtio-blk: patch the io limiter bucket: {:?}, {:?}", + &bytes, &ops + ); + self.get_patch_rate_limiters(bytes, ops); + } + } + } + } + _ => panic!("virtio_blk: unknown event slot {}", slot), + } + false + } +} + +#[allow(dead_code)] +pub(crate) struct BlockEpollHandler< + AS: DbsGuestAddressSpace, + Q: QueueT + Send = Queue, + R: GuestMemoryRegion = GuestRegionMmap, +> { + pub(crate) evt_senders: Vec>, + pub(crate) kill_evts: Vec, + pub(crate) config: VirtioDeviceConfig, +} + +impl MutEventSubscriber + for BlockEpollHandler +{ + // a dumb impl for BlockEpollHandler to registe event manager for io drain. + fn process(&mut self, _events: Events, _ops: &mut EventOps) {} + fn init(&mut self, _ops: &mut EventOps) {} +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/block/mod.rs new file mode 100644 index 000000000000..a98d159b5233 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/mod.rs @@ -0,0 +1,30 @@ +// Copyright 2019-2020 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +mod device; +pub use self::device::*; +mod handler; +pub(crate) use self::handler::*; +mod request; +pub(crate) use self::request::*; +mod ufile; +pub use self::ufile::*; + +use dbs_utils::rate_limiter::BucketUpdate; + +/// Block deriver name. +pub const BLK_DRIVER_NAME: &str = "virtio-blk"; + +pub(crate) const SECTOR_SHIFT: u8 = 9; +/// The size of sector +pub const SECTOR_SIZE: u64 = (0x01u64) << (SECTOR_SHIFT as u64); + +pub(crate) enum KillEvent { + Kill, + BucketUpdate(BucketUpdate, BucketUpdate), +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/request.rs b/src/dragonball/src/dbs_virtio_devices/src/block/request.rs new file mode 100644 index 000000000000..6a85fcf81281 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/request.rs @@ -0,0 +1,305 @@ +// Copyright 2019-2020 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{self, Seek, SeekFrom, Write}; +use std::ops::Deref; +use std::result; + +use log::error; +use virtio_bindings::bindings::virtio_blk::*; +use virtio_queue::{Descriptor, DescriptorChain}; +use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError}; + +use crate::{ + block::{ufile::Ufile, SECTOR_SHIFT, SECTOR_SIZE}, + Error, Result, +}; + +/// Error executing request. +#[derive(Debug)] +pub(crate) enum ExecuteError { + BadRequest(Error), + Flush(io::Error), + Read(GuestMemoryError), + Seek(io::Error), + Write(GuestMemoryError), + GetDeviceID(GuestMemoryError), + Unsupported(u32), +} + +/// Type of request from driver to device. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum RequestType { + /// Read request. + In, + /// Write request. + Out, + /// Flush request. + Flush, + /// Get device ID request. + GetDeviceID, + /// Unsupported request. + Unsupported(u32), +} + +impl From for RequestType { + fn from(value: u32) -> Self { + match value { + VIRTIO_BLK_T_IN => RequestType::In, + VIRTIO_BLK_T_OUT => RequestType::Out, + VIRTIO_BLK_T_FLUSH => RequestType::Flush, + VIRTIO_BLK_T_GET_ID => RequestType::GetDeviceID, + t => RequestType::Unsupported(t), + } + } +} + +/// The request header represents the mandatory fields of each block device request. +/// +/// A request header contains the following fields: +/// * request_type: an u32 value mapping to a read, write or flush operation. +/// * reserved: 32 bits are reserved for future extensions of the Virtio Spec. +/// * sector: an u64 value representing the offset where a read/write is to occur. +/// +/// The header simplifies reading the request from memory as all request follow +/// the same memory layout. +#[derive(Copy, Clone, Default)] +#[repr(C)] +struct RequestHeader { + request_type: u32, + _reserved: u32, + sector: u64, +} + +// Safe because RequestHeader only contains plain data. +unsafe impl ByteValued for RequestHeader {} + +impl RequestHeader { + /// Reads the request header from GuestMemory starting at `addr`. + /// + /// Virtio 1.0 specifies that the data is transmitted by the driver in little-endian + /// format. Firecracker currently runs only on little endian platforms so we don't + /// need to do an explicit little endian read as all reads are little endian by default. + /// When running on a big endian platform, this code should not compile, and support + /// for explicit little endian reads is required. + #[cfg(target_endian = "little")] + fn read_from(memory: &M, addr: GuestAddress) -> Result { + memory.read_obj(addr).map_err(Error::GuestMemory) + } +} + +/// IO Data descriptor. +#[derive(Clone, Debug)] +#[repr(C)] +pub struct IoDataDesc { + pub data_addr: u64, + pub data_len: usize, +} + +/// The block request. +#[derive(Clone, Debug)] +pub struct Request { + /// The type of the request. + pub(crate) request_type: RequestType, + /// The offset of the request. + pub(crate) sector: u64, + pub(crate) status_addr: GuestAddress, + pub(crate) request_index: u16, +} + +impl Request { + /// Parses a `desc_chain` and returns the associated `Request`. + pub(crate) fn parse( + desc_chain: &mut DescriptorChain, + data_descs: &mut Vec, + max_size: u32, + ) -> Result + where + M: Deref, + M::Target: GuestMemory, + { + let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; + // The head contains the request type which MUST be readable. + if desc.is_write_only() { + return Err(Error::UnexpectedWriteOnlyDescriptor); + } + + let request_header = RequestHeader::read_from(desc_chain.memory(), desc.addr())?; + let mut req = Request { + request_type: RequestType::from(request_header.request_type), + sector: request_header.sector, + status_addr: GuestAddress(0), + request_index: desc_chain.head_index(), + }; + let status_desc; + let mut desc = desc_chain + .next() + .ok_or(Error::DescriptorChainTooShort) + .map_err(|e| { + error!("virtio-blk: Request {:?} has only head descriptor", req); + e + })?; + if !desc.has_next() { + status_desc = desc; + // Only flush requests are allowed to skip the data descriptor. + if req.request_type != RequestType::Flush { + error!("virtio-blk: Request {:?} need a data descriptor", req); + return Err(Error::DescriptorChainTooShort); + } + } else { + while desc.has_next() { + req.check_request(desc, max_size)?; + data_descs.push(IoDataDesc { + data_addr: desc.addr().0, + data_len: desc.len() as usize, + }); + desc = desc_chain + .next() + .ok_or(Error::DescriptorChainTooShort) + .map_err(|e| { + error!("virtio-blk: descriptor chain corrupted"); + e + })?; + } + status_desc = desc; + } + + // The status MUST always be writable and the guest address must be accessible. + if !status_desc.is_write_only() { + return Err(Error::UnexpectedReadOnlyDescriptor); + } + if status_desc.len() < 1 { + return Err(Error::DescriptorLengthTooSmall); + } + if !desc_chain.memory().address_in_range(status_desc.addr()) { + return Err(Error::InvalidGuestAddress(status_desc.addr())); + } + req.status_addr = status_desc.addr(); + + Ok(req) + } + + pub(crate) fn check_request(&self, desc: Descriptor, max_size: u32) -> Result<()> { + match self.request_type { + RequestType::Out => { + if desc.is_write_only() { + error!( + "virtio-blk: Request {:?} sees unexpected write-only descriptor", + self + ); + return Err(Error::UnexpectedWriteOnlyDescriptor); + } else if desc.len() > max_size { + error!( + "virtio-blk: Request {:?} size is greater than disk size ({} > {})", + self, + desc.len(), + max_size + ); + return Err(Error::DescriptorLengthTooBig); + } + } + RequestType::In => { + if !desc.is_write_only() { + error!( + "virtio-blk: Request {:?} sees unexpected read-only descriptor for read", + self + ); + return Err(Error::UnexpectedReadOnlyDescriptor); + } else if desc.len() > max_size { + error!( + "virtio-blk: Request {:?} size is greater than disk size ({} > {})", + self, + desc.len(), + max_size + ); + return Err(Error::DescriptorLengthTooBig); + } + } + RequestType::GetDeviceID if !desc.is_write_only() => { + error!( + "virtio-blk: Request {:?} sees unexpected read-only descriptor for GetDeviceID", + self + ); + return Err(Error::UnexpectedReadOnlyDescriptor); + } + _ => {} + } + Ok(()) + } + + pub(crate) fn execute( + &self, + disk: &mut Box, + mem: &M, + data_descs: &[IoDataDesc], + disk_id: &[u8], + ) -> result::Result { + self.check_capacity(disk, data_descs)?; + disk.seek(SeekFrom::Start(self.sector << SECTOR_SHIFT)) + .map_err(ExecuteError::Seek)?; + let mut len = 0; + for io in data_descs { + match self.request_type { + RequestType::In => { + mem.read_from(GuestAddress(io.data_addr), disk, io.data_len) + .map_err(ExecuteError::Read)?; + len += io.data_len; + } + RequestType::Out => { + mem.write_to(GuestAddress(io.data_addr), disk, io.data_len) + .map_err(ExecuteError::Write)?; + } + RequestType::Flush => match disk.flush() { + Ok(_) => {} + Err(e) => return Err(ExecuteError::Flush(e)), + }, + RequestType::GetDeviceID => { + if io.data_len < disk_id.len() { + return Err(ExecuteError::BadRequest(Error::InvalidOffset)); + } + mem.write_slice(disk_id, GuestAddress(io.data_addr)) + .map_err(ExecuteError::GetDeviceID)?; + // TODO: dragonball returns 0 here, check which value to return? + return Ok(disk_id.len() as u32); + } + RequestType::Unsupported(t) => return Err(ExecuteError::Unsupported(t)), + }; + } + + Ok(len as u32) + } + + pub(crate) fn check_capacity( + &self, + disk: &mut Box, + data_descs: &[IoDataDesc], + ) -> result::Result<(), ExecuteError> { + for d in data_descs { + let mut top = (d.data_len as u64 + SECTOR_SIZE - 1) & !(SECTOR_SIZE - 1u64); + + top = top + .checked_add(self.sector << SECTOR_SHIFT) + .ok_or(ExecuteError::BadRequest(Error::InvalidOffset))?; + if top > disk.get_capacity() { + return Err(ExecuteError::BadRequest(Error::InvalidOffset)); + } + } + + Ok(()) + } + + pub(crate) fn update_status(&self, mem: &M, status: u32) { + // Safe to unwrap because we have validated request.status_addr in parse() + mem.write_obj(status as u8, self.status_addr).unwrap(); + } + + // Return total IO length of all segments. Assume the req has been checked and is valid. + pub(crate) fn data_len(&self, data_descs: &[IoDataDesc]) -> u32 { + let mut len = 0; + for d in data_descs { + len += d.data_len; + } + len as u32 + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/ufile/aio.rs b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/aio.rs new file mode 100644 index 000000000000..418f29c6f2d5 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/aio.rs @@ -0,0 +1,173 @@ +// Copyright 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::os::unix::io::{AsRawFd, RawFd}; + +use vmm_sys_util::aio::{IoContext, IoControlBlock, IoEvent, IOCB_FLAG_RESFD}; +use vmm_sys_util::aio::{IOCB_CMD_PREADV, IOCB_CMD_PWRITEV}; +use vmm_sys_util::eventfd::EventFd; + +use super::IoEngine; +use crate::block::IoDataDesc; + +/// Use AIO to perform asynchronous IO requests. +pub struct Aio { + fd: RawFd, + aio_evtfd: EventFd, + aio_context: IoContext, +} + +impl Aio { + /// Creates a new Aio instence. + /// + /// # Arguments + /// * `nr_events`: maximum number of concurrently processing IO operations. + pub fn new(fd: RawFd, nr_events: u32) -> io::Result { + let aio_context = IoContext::new(nr_events)?; + Ok(Self { + fd, + aio_evtfd: EventFd::new(0)?, + aio_context, + }) + } +} + +impl IoEngine for Aio { + fn event_fd(&self) -> &EventFd { + &self.aio_evtfd + } + + // NOTE: aio doesn't seem to support negative offsets. + fn readv( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result { + let iocbs = [&mut IoControlBlock { + aio_fildes: self.fd as u32, + aio_lio_opcode: IOCB_CMD_PREADV as u16, + aio_resfd: self.aio_evtfd.as_raw_fd() as u32, + aio_flags: IOCB_FLAG_RESFD, + aio_buf: iovecs.as_mut_ptr() as u64, + aio_offset: offset, + aio_nbytes: iovecs.len() as u64, + aio_data: user_data, + ..Default::default() + }]; + + self.aio_context.submit(&iocbs[..]) + } + + fn writev( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result { + let iocbs = [&mut IoControlBlock { + aio_fildes: self.fd as u32, + aio_lio_opcode: IOCB_CMD_PWRITEV as u16, + aio_resfd: self.aio_evtfd.as_raw_fd() as u32, + aio_flags: IOCB_FLAG_RESFD, + aio_buf: iovecs.as_mut_ptr() as u64, + aio_offset: offset, + aio_nbytes: iovecs.len() as u64, + aio_data: user_data, + ..Default::default() + }]; + + self.aio_context.submit(&iocbs[..]) + } + + // For currently supported LocalFile and TdcFile backend, it must not return temporary errors + // and may only return permanent errors. So the virtio-blk driver layer will not try to + // recover and only pass errors up onto the device manager. When changing the error handling + // policy, please do help to update BlockEpollHandler::io_complete(). + fn complete(&mut self) -> io::Result> { + let count = self.aio_evtfd.read()?; + let mut v = Vec::with_capacity(count as usize); + if count > 0 { + let mut events = + vec![ + unsafe { std::mem::MaybeUninit::::zeroed().assume_init() }; + count as usize + ]; + while v.len() < count as usize { + let r = self.aio_context.get_events(1, &mut events[0..], None)?; + for event in events.iter().take(r) { + let index = event.data; + let res2 = event.res; + v.push((index, res2)); + } + } + } + Ok(v) + } +} + +#[cfg(test)] +mod tests { + use std::io::{Read, Seek, SeekFrom, Write}; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + + #[test] + fn aio_engine() { + let temp_file = TempFile::new().unwrap(); + let mut aio = Aio::new(temp_file.as_file().as_raw_fd(), 128).unwrap(); + let buf = vec![0xffu8; 0x1000]; + aio.writev( + 0, + &mut vec![IoDataDesc { + data_addr: buf.as_ptr() as u64, + data_len: 0x10, + }], + 0x123, + ) + .unwrap(); + let com_res = aio.complete().unwrap(); + for cr in com_res { + assert_eq!(cr.0, 0x123); + assert_eq!(cr.1, 0x10); + } + let mut rbuf = vec![0u8; 0x100]; + let rn = temp_file.as_file().read(&mut rbuf).unwrap(); + assert_eq!(rn, 0x10); + assert_eq!(&rbuf[..0x10], &vec![0xff; 0x10]); + + //temp_file.as_file().seek(SeekFrom::End(0x20)).unwrap(); + temp_file.as_file().seek(SeekFrom::Start(0x120)).unwrap(); + temp_file.as_file().write_all(&[0xeeu8; 0x20]).unwrap(); + + let rbuf = vec![0u8; 0x100]; + let ret = aio.readv( + -0x20, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ); + assert_eq!(ret.unwrap_err().kind(), io::ErrorKind::InvalidInput); + aio.readv( + 0x120, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ) + .unwrap(); + let com_res = aio.complete().unwrap(); + for cr in com_res { + assert_eq!(cr.0, 0x456); + assert_eq!(cr.1, 0x20); + } + assert_eq!(&rbuf[..0x20], &vec![0xee; 0x20]); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/ufile/io_uring.rs b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/io_uring.rs new file mode 100644 index 000000000000..d7eb308686d1 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/io_uring.rs @@ -0,0 +1,263 @@ +// Copyright 2022 Alibaba Cloud. All rights reserved. +// Copyright © 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::io; +use std::os::unix::io::{AsRawFd, RawFd}; + +use io_uring::{opcode, squeue, types, Probe}; +use log::info; +use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + +use super::IoEngine; +use crate::block::IoDataDesc; + +/// Use io_uring to perform asynchronous IO requests. +pub struct IoUring { + fd: RawFd, + io_uring: io_uring::IoUring, + evtfd: EventFd, +} + +impl IoUring { + /// Creates a new IoUring instance. + /// + /// # Arguments + /// * `entries`: size of queue, and its value should be the power of two. + pub fn new(fd: RawFd, entries: u32) -> io::Result { + let io_uring = io_uring::IoUring::new(entries)?; + let evtfd = EventFd::new(EFD_NONBLOCK)?; + + // Register the io_uring eventfd that will notify when something in + // the completion queue is ready. + io_uring.submitter().register_eventfd(evtfd.as_raw_fd())?; + + Ok(Self { + fd, + evtfd, + io_uring, + }) + } + + /// Check if io_uring for block device can be used on the current system, as + /// it correctly supports the expected io_uring features. + pub fn is_supported() -> bool { + let error_msg = "io_uring not supported:"; + + // Check we can create an io_uring instance, which effectively verifies + // that io_uring_setup() syscall is supported. + let io_uring = match io_uring::IoUring::new(1) { + Ok(io_uring) => io_uring, + Err(e) => { + info!("{} failed to create io_uring instance: {}", error_msg, e); + return false; + } + }; + + let submitter = io_uring.submitter(); + + let mut probe = Probe::new(); + + // Check we can register a probe to validate supported operations. + match submitter.register_probe(&mut probe) { + Ok(_) => {} + Err(e) => { + info!("{} failed to register a probe: {}", error_msg, e); + return false; + } + } + + // Check IORING_OP_READ is supported + if !probe.is_supported(opcode::Read::CODE) { + info!("{} IORING_OP_READ operation not supported", error_msg); + return false; + } + + // Check IORING_OP_WRITE is supported + if !probe.is_supported(opcode::Write::CODE) { + info!("{} IORING_OP_WRITE operation not supported", error_msg); + return false; + } + + true + } +} + +impl IoEngine for IoUring { + fn event_fd(&self) -> &EventFd { + &self.evtfd + } + + fn readv( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result { + let (submit, mut sq, _cq) = self.io_uring.split(); + + // Safe because we know the file descriptor is valid and we + // relied on vm-memory to provide the buffer address. + let _ = unsafe { + sq.push( + &opcode::Readv::new( + types::Fd(self.fd), + iovecs.as_ptr() as *const libc::iovec, + iovecs.len() as u32, + ) + .offset(offset) + .build() + .flags(squeue::Flags::ASYNC) + .user_data(user_data), + ) + }; + + // Update the submission queue and submit new operations to the + // io_uring instance. + sq.sync(); + submit.submit() + } + + fn writev( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result { + let (submit, mut sq, _cq) = self.io_uring.split(); + + // Safe because we know the file descriptor is valid and we + // relied on vm-memory to provide the buffer address. + let _ = unsafe { + sq.push( + &opcode::Writev::new( + types::Fd(self.fd), + iovecs.as_ptr() as *const libc::iovec, + iovecs.len() as u32, + ) + .offset(offset) + .build() + .flags(squeue::Flags::ASYNC) + .user_data(user_data), + ) + }; + + // Update the submission queue and submit new operations to the + // io_uring instance. + sq.sync(); + submit.submit() + } + + fn complete(&mut self) -> io::Result> { + let _ = self.evtfd.read()?; + let mut completion_list = Vec::new(); + + let cq = self.io_uring.completion(); + for cq_entry in cq { + completion_list.push((cq_entry.user_data(), cq_entry.result() as i64)); + } + + Ok(completion_list) + } +} + +#[cfg(test)] +mod tests { + use std::io::{Read, Seek, SeekFrom, Write}; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::epoll_helper::*; + + struct TestHandler; + + impl EpollHelperHandler for TestHandler { + fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { + let slot = event.data as u32; + slot == 0xfeed + } + } + + #[test] + fn iouring_engine() { + if !IoUring::is_supported() { + return; + } + let temp_file = TempFile::new().unwrap(); + let mut uring = IoUring::new(temp_file.as_file().as_raw_fd(), 128).unwrap(); + + let mut helper = EpollHelper::new().unwrap(); + helper + .add_event(uring.event_fd().as_raw_fd(), 0xfeed) + .unwrap(); + + let mut handler = TestHandler; + + let buf = vec![0xffu8; 0x1000]; + uring + .writev( + 0, + &mut vec![IoDataDesc { + data_addr: buf.as_ptr() as u64, + data_len: 0x10, + }], + 0x123, + ) + .unwrap(); + + helper.run(&mut handler).unwrap(); + + let com_res = uring.complete().unwrap(); + for cr in com_res { + assert_eq!(cr.0, 0x123); + assert_eq!(cr.1, 0x10); + } + let mut rbuf = vec![0u8; 0x100]; + let rn = temp_file.as_file().read(&mut rbuf).unwrap(); + assert_eq!(rn, 0x10); + assert_eq!(&rbuf[..0x10], &vec![0xff; 0x10]); + + //temp_file.as_file().seek(SeekFrom::End(0x20)).unwrap(); + temp_file.as_file().seek(SeekFrom::Start(0x120)).unwrap(); + temp_file.as_file().write_all(&[0xeeu8; 0x20]).unwrap(); + + let rbuf = vec![0u8; 0x100]; + let ret = uring.readv( + -0x120, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ); + assert_eq!(ret.unwrap(), 1); + helper.run(&mut handler).unwrap(); + let com_res = uring.complete().unwrap(); + for cr in com_res { + assert_eq!(cr.0, 0x456); + assert_eq!(cr.1, -22); + } + + uring + .readv( + 0x120, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ) + .unwrap(); + + helper.run(&mut handler).unwrap(); + + let com_res = uring.complete().unwrap(); + for cr in com_res { + assert_eq!(cr.0, 0x456); + assert_eq!(cr.1, 0x20); + } + assert_eq!(&rbuf[..0x20], &vec![0xee; 0x20]); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/ufile/localfile.rs b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/localfile.rs new file mode 100644 index 000000000000..b45d87e2a1a9 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/localfile.rs @@ -0,0 +1,480 @@ +// Copyright 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; +use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::mem::ManuallyDrop; +use std::os::linux::fs::MetadataExt; +use std::os::unix::io::{AsRawFd, RawFd}; + +use log::{info, warn}; +use virtio_bindings::bindings::virtio_blk::{VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK}; + +use super::{IoDataDesc, IoEngine, Ufile}; + +pub struct LocalFile { + pub(crate) file: ManuallyDrop, + no_drop: bool, + capacity: u64, + io_engine: E, +} + +impl LocalFile { + /// Creates a LocalFile instance. + pub fn new(mut file: File, no_drop: bool, io_engine: E) -> io::Result { + let capacity = file.seek(SeekFrom::End(0))?; + + Ok(Self { + file: ManuallyDrop::new(file), + no_drop, + capacity, + io_engine, + }) + } +} + +// Implement our own Drop for LocalFile, as we don't want to close LocalFile.file if no_drop is +// enabled. +impl Drop for LocalFile { + fn drop(&mut self) { + if self.no_drop { + info!("LocalFile: no_drop is enabled, don't close file on drop"); + } else { + // Close the raw fd directly. + let fd = self.file.as_raw_fd(); + if let Err(e) = nix::unistd::close(fd) { + warn!("LocalFile: failed to close disk file: {:?}", e); + } + } + } +} + +impl Read for LocalFile { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.file.read(buf) + } +} + +impl Write for LocalFile { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.file.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.file.flush() + } +} + +impl Seek for LocalFile { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.file.seek(pos) + } +} + +impl Ufile for LocalFile { + fn get_capacity(&self) -> u64 { + self.capacity + } + + fn get_max_size(&self) -> u32 { + // Set max size to 1M to avoid interferes with rate limiter. + 0x100000 + } + + fn get_device_id(&self) -> io::Result { + let blk_metadata = self.file.metadata()?; + // This is how kvmtool does it. + Ok(format!( + "{}{}{}", + blk_metadata.st_dev(), + blk_metadata.st_rdev(), + blk_metadata.st_ino() + )) + } + + fn get_data_evt_fd(&self) -> RawFd { + self.io_engine.event_fd().as_raw_fd() + } + + fn io_read_submit( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u16, + ) -> io::Result { + self.io_engine.readv(offset, iovecs, user_data as u64) + } + + fn io_write_submit( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u16, + ) -> io::Result { + self.io_engine.writev(offset, iovecs, user_data as u64) + } + + fn io_complete(&mut self) -> io::Result> { + Ok(self + .io_engine + .complete()? + .iter() + .map(|(user_data, res)| { + ( + *user_data as u16, + if *res >= 0 { + VIRTIO_BLK_S_OK + } else { + VIRTIO_BLK_S_IOERR + }, + ) + }) + .collect()) + } +} + +#[cfg(test)] +mod tests { + use std::ffi::OsStr; + use std::io::SeekFrom; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::block::aio::Aio; + use crate::block::io_uring::IoUring; + use crate::epoll_helper::*; + + const STOP_EVENT: u32 = 0xfeed; + + struct TestHandler; + + impl EpollHelperHandler for TestHandler { + fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool { + let slot = event.data as u32; + slot == STOP_EVENT + } + } + + fn new_aio_engine() -> Aio { + let temp_file = TempFile::new().unwrap(); + let aio = Aio::new(temp_file.as_file().as_raw_fd(), 128).unwrap(); + aio + } + + fn new_iouring_engine() -> IoUring { + let temp_file = TempFile::new().unwrap(); + let iouring = IoUring::new(temp_file.as_file().as_raw_fd(), 128).unwrap(); + iouring + } + + #[test] + fn test_new() { + // Create with AIO. + let file = TempFile::new().unwrap().into_file(); + let file_with_aio = LocalFile::new(file, false, new_aio_engine()); + assert!(file_with_aio.is_ok()); + + // Create with IO_Uring. + let file = TempFile::new().unwrap().into_file(); + let file_with_iouring = LocalFile::new(file, false, new_iouring_engine()); + assert!(file_with_iouring.is_ok()); + } + + fn have_target_fd(fd: i32, filename: &OsStr) -> bool { + let mut path = std::path::PathBuf::from("/proc/self/fd"); + path.push(fd.to_string()); + if path.exists() { + let entry = path.read_link().unwrap(); + if entry + .file_name() + .unwrap() + .to_str() + .unwrap() + .contains(filename.to_str().unwrap()) + { + return true; + } + } + false + } + + #[test] + fn test_drop() { + // Droped case. + let tempfile = TempFile::new().unwrap(); + let filename = tempfile.as_path().file_name().unwrap().to_owned(); + let file = tempfile.into_file(); + let fd_of_file = file.as_raw_fd(); + let file_with_aio = LocalFile::new(file, false, new_aio_engine()).unwrap(); + + assert!(have_target_fd(fd_of_file, &filename)); + drop(file_with_aio); + assert!(!have_target_fd(fd_of_file, &filename)); + + let tempfile = TempFile::new().unwrap(); + let filename = tempfile.as_path().file_name().unwrap().to_owned(); + let file = tempfile.into_file(); + let fd_of_file = file.as_raw_fd(); + let file_with_iouring = LocalFile::new(file, false, new_iouring_engine()).unwrap(); + + assert!(have_target_fd(fd_of_file, &filename)); + drop(file_with_iouring); + assert!(!have_target_fd(fd_of_file, &filename)); + + // No-drop case. + let tempfile = TempFile::new().unwrap(); + let filename = tempfile.as_path().file_name().unwrap().to_owned(); + let file = tempfile.into_file(); + let fd_of_file = file.as_raw_fd(); + let file_with_aio = LocalFile::new(file, true, new_aio_engine()).unwrap(); + + assert!(have_target_fd(fd_of_file, &filename)); + drop(file_with_aio); + assert!(have_target_fd(fd_of_file, &filename)); + + let tempfile = TempFile::new().unwrap(); + let filename = tempfile.as_path().file_name().unwrap().to_owned(); + let file = tempfile.into_file(); + let fd_of_file = file.as_raw_fd(); + let file_with_iouring = LocalFile::new(file, true, new_iouring_engine()).unwrap(); + + assert!(have_target_fd(fd_of_file, &filename)); + drop(file_with_iouring); + assert!(have_target_fd(fd_of_file, &filename)); + } + + #[test] + fn test_read_write_flush_seek() { + let original_content = b"hello world"; + let size_of_content = original_content.len(); + let file = TempFile::new().unwrap().into_file(); + let mut file_with_aio = LocalFile::new(file, false, new_aio_engine()).unwrap(); + let bytes_write = file_with_aio.write(original_content).unwrap(); + assert_eq!(bytes_write, size_of_content); + file_with_aio.flush().unwrap(); + file_with_aio.rewind().unwrap(); + let mut content = vec![0; 11]; + let bytes_read = file_with_aio.read(&mut content).unwrap(); + assert_eq!(bytes_read, size_of_content); + assert_eq!(content, original_content); + + let original_content = b"hello world"; + let file = TempFile::new().unwrap().into_file(); + let mut file_with_iouring = LocalFile::new(file, false, new_iouring_engine()).unwrap(); + let bytes_write = file_with_iouring.write(original_content).unwrap(); + assert_eq!(bytes_write, size_of_content); + file_with_iouring.flush().unwrap(); + let start: usize = 6; + file_with_iouring + .seek(SeekFrom::Start(start as u64)) + .unwrap(); + let mut content = vec![0; size_of_content - start]; + let bytes_read = file_with_iouring.read(&mut content).unwrap(); + assert_eq!(bytes_read, size_of_content - start); + assert_eq!(content, original_content[start..]); + } + + #[test] + fn test_get_capacity() { + let mut file = TempFile::new().unwrap().into_file(); + let original_content = b"hello world"; + let size_of_content = original_content.len(); + let bytes_write = file.write(original_content).unwrap(); + assert_eq!(bytes_write, size_of_content); + file.rewind().unwrap(); + let file_with_aio = LocalFile::new(file, false, new_aio_engine()).unwrap(); + assert_eq!(file_with_aio.get_capacity(), size_of_content as u64); + + let mut file = TempFile::new().unwrap().into_file(); + let original_content = b"hello world"; + let size_of_content = original_content.len(); + let bytes_write = file.write(original_content).unwrap(); + assert_eq!(bytes_write, size_of_content); + file.rewind().unwrap(); + let file_with_iouring = LocalFile::new(file, false, new_iouring_engine()).unwrap(); + assert_eq!(file_with_iouring.get_capacity(), size_of_content as u64); + } + + #[test] + fn test_get_max_capacity() { + let file = TempFile::new().unwrap().into_file(); + let file_with_aio = LocalFile::new(file, false, new_aio_engine()).unwrap(); + assert_eq!(file_with_aio.get_max_size(), 0x100000); + + let file = TempFile::new().unwrap().into_file(); + let file_with_iouring = LocalFile::new(file, false, new_iouring_engine()).unwrap(); + assert_eq!(file_with_iouring.get_max_size(), 0x100000); + } + + #[test] + fn test_get_device_id() { + let file = TempFile::new().unwrap().into_file(); + let file_with_aio = LocalFile::new(file, false, new_aio_engine()).unwrap(); + assert!(file_with_aio.get_device_id().is_ok()); + let metadata = file_with_aio.file.metadata().unwrap(); + assert_eq!( + file_with_aio.get_device_id().unwrap(), + format!( + "{}{}{}", + metadata.st_dev(), + metadata.st_rdev(), + metadata.st_ino() + ) + ); + + let file = TempFile::new().unwrap().into_file(); + let file_with_iouring = LocalFile::new(file, false, new_iouring_engine()).unwrap(); + assert!(file_with_iouring.get_device_id().is_ok()); + let metadata = file_with_iouring.file.metadata().unwrap(); + assert_eq!( + file_with_iouring.get_device_id().unwrap(), + format!( + "{}{}{}", + metadata.st_dev(), + metadata.st_rdev(), + metadata.st_ino() + ) + ); + } + + #[test] + fn test_get_data_evt_fd() { + let file = TempFile::new().unwrap(); + let aio = Aio::new(file.as_file().as_raw_fd(), 128).unwrap(); + let file_with_aio = LocalFile::new(file.into_file(), false, aio).unwrap(); + assert_eq!( + file_with_aio.get_data_evt_fd(), + file_with_aio.io_engine.event_fd().as_raw_fd() + ); + + let file = TempFile::new().unwrap(); + let iouring = IoUring::new(file.as_file().as_raw_fd(), 128).unwrap(); + let file_with_iouring = LocalFile::new(file.into_file(), false, iouring).unwrap(); + assert_eq!( + file_with_iouring.get_data_evt_fd(), + file_with_iouring.io_engine.event_fd().as_raw_fd() + ); + } + + #[test] + fn test_io_write_submit() { + // Test with Aio. + let file = TempFile::new().unwrap(); + let aio = Aio::new(file.as_file().as_raw_fd(), 128).unwrap(); + let mut file_with_aio = LocalFile::new(file.into_file(), false, aio).unwrap(); + let buf = vec![0xffu8; 0xff]; + file_with_aio + .io_write_submit( + 8, + &mut vec![IoDataDesc { + data_addr: buf.as_ptr() as u64, + data_len: 0x8_usize, + }], + 0x12, + ) + .unwrap(); + let res = file_with_aio.io_complete().unwrap(); + + for element in res { + assert_eq!(element.0, 0x12); + assert_eq!(element.1, VIRTIO_BLK_S_OK); + } + + // Test with IoUring. + let file = TempFile::new().unwrap(); + let iouring = IoUring::new(file.as_file().as_raw_fd(), 128).unwrap(); + let mut helper = EpollHelper::new().unwrap(); + helper + .add_event(iouring.event_fd().as_raw_fd(), 0xfeed) + .unwrap(); + let mut file_with_iouring = LocalFile::new(file.into_file(), false, iouring).unwrap(); + let mut handler = TestHandler; + let buf = vec![0xffu8; 0xff]; + file_with_iouring + .io_write_submit( + 8, + &mut vec![IoDataDesc { + data_addr: buf.as_ptr() as u64, + data_len: 0x8_usize, + }], + 0x12, + ) + .unwrap(); + helper.run(&mut handler).unwrap(); + let res = file_with_iouring.io_complete().unwrap(); + + for element in res { + assert_eq!(element.0, 0x12); + assert_eq!(element.1, VIRTIO_BLK_S_OK); + } + } + + #[test] + fn test_io_read_submit() { + // Test with Aio. + let file = TempFile::new().unwrap(); + file.as_file().seek(SeekFrom::Start(0x120)).unwrap(); + file.as_file().write_all(&[0xeeu8; 0x20]).unwrap(); + let aio = Aio::new(file.as_file().as_raw_fd(), 128).unwrap(); + let mut file_with_aio = LocalFile::new(file.into_file(), false, aio).unwrap(); + let rbuf = vec![0u8; 0x100]; + let ret = file_with_aio.io_read_submit( + -0x20, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ); + assert_eq!(ret.unwrap_err().kind(), io::ErrorKind::InvalidInput); + + file_with_aio + .io_read_submit( + 0x120, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ) + .unwrap(); + let com_res = file_with_aio.io_complete().unwrap(); + for element in com_res { + assert_eq!(element.0, 0x456); + assert_eq!(element.1, VIRTIO_BLK_S_OK); + } + assert_eq!(&rbuf[..0x20], &vec![0xee; 0x20]); + + // Test with IoUring. + let file = TempFile::new().unwrap(); + file.as_file().seek(SeekFrom::Start(0x120)).unwrap(); + file.as_file().write_all(&[0xeeu8; 0x20]).unwrap(); + let iouring = IoUring::new(file.as_file().as_raw_fd(), 128).unwrap(); + let mut helper = EpollHelper::new().unwrap(); + helper + .add_event(iouring.event_fd().as_raw_fd(), 0xfeed) + .unwrap(); + let mut file_with_iouring = LocalFile::new(file.into_file(), false, iouring).unwrap(); + let mut handler = TestHandler; + let rbuf = vec![0u8; 0x100]; + + file_with_iouring + .io_read_submit( + 0x120, + &mut vec![IoDataDesc { + data_addr: rbuf.as_ptr() as u64, + data_len: 0x20, + }], + 0x456, + ) + .unwrap(); + helper.run(&mut handler).unwrap(); + let com_res = file_with_iouring.io_complete().unwrap(); + for element in com_res { + assert_eq!(element.0, 0x456); + assert_eq!(element.1, VIRTIO_BLK_S_OK); + } + assert_eq!(&rbuf[..0x20], &vec![0xee; 0x20]); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/block/ufile/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/mod.rs new file mode 100644 index 000000000000..a9f9cf9fbd14 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/block/ufile/mod.rs @@ -0,0 +1,82 @@ +// Copyright 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +mod localfile; +pub use self::localfile::LocalFile; + +pub mod aio; +pub mod io_uring; + +use std::io::{self, Read, Seek, Write}; +use std::os::unix::io::RawFd; + +use vmm_sys_util::eventfd::EventFd; + +use super::request::IoDataDesc; + +/// Traits for the virtio-blk driver to access backend storage devices, such as localfile. +pub trait Ufile: Read + Write + Seek + Send { + /// Get disk capacity in bytes. + fn get_capacity(&self) -> u64; + + /// Get max size in a segment. + fn get_max_size(&self) -> u32; + + /// Generate a unique device id for the virtio-blk device. + fn get_device_id(&self) -> io::Result; + + /// Get the raw event fd for data plane. + fn get_data_evt_fd(&self) -> RawFd; + + /// Submit asynchronous Read IO requests. + fn io_read_submit( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u16, + ) -> io::Result; + + /// Submit asynchronous Write IO requests. + fn io_write_submit( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u16, + ) -> io::Result; + + /// Poll for completed asynchronous IO requests. + /// + /// For currently supported LocalFile backend, it must not return temporary errors + /// and may only return permanent errors. So the virtio-blk driver layer will not try to + /// recover and only pass errors up onto the device manager. When changing the error handling + /// policy, please do help to update BlockEpollHandler::io_complete(). + fn io_complete(&mut self) -> io::Result>; +} + +/// Traits for the backend IO engine, such as aio or io-uring. +pub trait IoEngine { + /// Returns the EventFd that will notify when something is ready. + fn event_fd(&self) -> &EventFd; + + /// Submit asynchronous Read requests. + fn readv( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result; + + /// Submit asynchronous Write requests. + fn writev( + &mut self, + offset: i64, + iovecs: &mut Vec, + user_data: u64, + ) -> io::Result; + + /// Poll for completed asynchronous IO requests. + /// + /// Return the vector of (user data, result code). + /// NOTE: complete need to drain the io event fd. + fn complete(&mut self) -> io::Result>; +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/device.rs b/src/dragonball/src/dbs_virtio_devices/src/device.rs new file mode 100644 index 000000000000..8ba641df445a --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/device.rs @@ -0,0 +1,884 @@ +// Copyright 2019-2022 Alibaba Cloud. All rights reserved. +// +// Portions Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Vritio Device Model. +//! +//! The Virtio specification defines a group of Virtio devices and transport layers. +//! The Virtio device model defines traits and structs for Virtio transport layers to +//! manage Virtio device backend drivers. + +use std::any::Any; +use std::cmp; +use std::io::Write; +use std::ops::Deref; +use std::sync::Arc; + +use dbs_device::resources::{DeviceResources, ResourceConstraint}; +use dbs_interrupt::{InterruptNotifier, NoopNotifier}; +use dbs_utils::epoll_manager::{EpollManager, EpollSubscriber, SubscriberId}; +use kvm_ioctls::VmFd; +use log::{error, warn}; +use virtio_queue::{DescriptorChain, QueueOwnedT, QueueSync, QueueT}; +use vm_memory::{ + Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, + GuestUsize, +}; +use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + +use crate::{ActivateError, ActivateResult, ConfigError, ConfigResult, Error, Result}; + +/// Virtio queue configuration information. +/// +/// The `VirtioQueueConfig` maintains configuration information for a Virtio queue. +/// It also provides methods to access the queue and associated interrupt/event notifiers. +pub struct VirtioQueueConfig { + /// Virtio queue object to access the associated queue. + pub queue: Q, + /// EventFd to receive queue notification from guest. + pub eventfd: Arc, + /// Notifier to inject interrupt to guest. + notifier: Arc, + /// Queue index into the queue array. + index: u16, +} + +impl VirtioQueueConfig { + /// Create a `VirtioQueueConfig` object. + pub fn new( + queue: Q, + eventfd: Arc, + notifier: Arc, + index: u16, + ) -> Self { + VirtioQueueConfig { + queue, + eventfd, + notifier, + index, + } + } + + /// Create a `VirtioQueueConfig` object with the specified queue size and index. + pub fn create(queue_size: u16, index: u16) -> Result { + let eventfd = EventFd::new(EFD_NONBLOCK).map_err(Error::IOError)?; + + let queue = Q::new(queue_size)?; + Ok(VirtioQueueConfig { + queue, + eventfd: Arc::new(eventfd), + notifier: Arc::new(NoopNotifier::new()), + index, + }) + } + + /// Get queue index. + #[inline] + pub fn index(&self) -> u16 { + self.index + } + + /// Get immutable reference to the associated Virtio queue. + pub fn queue(&self) -> &Q { + &self.queue + } + + /// Get mutable reference to the associated Virtio queue. + pub fn queue_mut(&mut self) -> &mut Q { + &mut self.queue + } + + /// Get the maximum queue size. + #[inline] + pub fn max_size(&self) -> u16 { + self.queue.max_size() + } + + /// Get the next available descriptor. + pub fn get_next_descriptor(&mut self, mem: M) -> Result>> + where + M: Deref + Clone, + M::Target: GuestMemory + Sized, + { + let mut guard = self.queue.lock(); + let mut iter = guard.iter(mem)?; + Ok(iter.next()) + } + + /// Put a used descriptor into the used ring. + #[inline] + pub fn add_used(&mut self, mem: &M, desc_index: u16, len: u32) { + self.queue + .add_used(mem, desc_index, len) + .unwrap_or_else(|_| panic!("Failed to add used. index: {}", desc_index)) + } + + /// Consume a queue notification event. + #[inline] + pub fn consume_event(&self) -> Result { + self.eventfd.read().map_err(Error::IOError) + } + + /// Produce a queue notification event. + #[inline] + pub fn generate_event(&self) -> Result<()> { + self.eventfd.write(1).map_err(Error::IOError) + } + + /// Inject an interrupt to the guest for queue change events. + #[inline] + pub fn notify(&self) -> Result<()> { + self.notifier.notify().map_err(Error::IOError) + } + + /// Set interrupt notifier to inject interrupts to the guest. + #[inline] + pub fn set_interrupt_notifier(&mut self, notifier: Arc) { + self.notifier = notifier; + } + + /// Return the actual size of the queue, as the driver may not set up a + /// queue as big as the device allows. + #[inline] + pub fn actual_size(&self) -> u16 { + // TODO: rework once https://github.com/rust-vmm/vm-virtio/pull/153 get merged. + //self.queue.size() + std::cmp::min(self.queue.size(), self.queue.max_size()) + } +} + +impl Clone for VirtioQueueConfig { + fn clone(&self) -> Self { + VirtioQueueConfig { + queue: self.queue.clone(), + eventfd: self.eventfd.clone(), + notifier: self.notifier.clone(), + index: self.index, + } + } +} + +/// Virtio device configuration information. +/// +/// This structure maintains all configuration information for a Virtio device. It will be passed +/// to VirtioDevice::activate() and the Virtio device will take ownership of the configuration +/// object. On VirtioDevice::reset(), the configuration object should be returned to the caller. +pub struct VirtioDeviceConfig< + AS: GuestAddressSpace, + Q: QueueT = QueueSync, + R: GuestMemoryRegion = GuestRegionMmap, +> { + /// `GustMemoryAddress` object to access the guest memory. + pub vm_as: AS, + /// `VmFd` object for the device to access the hypervisor, such as KVM/HyperV etc. + pub vm_fd: Arc, + /// Resources assigned to the Virtio device. + pub resources: DeviceResources, + /// Virtio queues for normal data stream. + pub queues: Vec>, + /// Virtio queue for device control requests. + pub ctrl_queue: Option>, + /// Interrupt notifier to inject Virtio device change interrupt to the guest. + pub device_change_notifier: Arc, + /// Shared memory region for Virtio-fs etc. + pub shm_regions: Option>, +} + +impl VirtioDeviceConfig +where + AS: GuestAddressSpace, + Q: QueueT, + R: GuestMemoryRegion, +{ + /// Creates a new `VirtioDeviceConfig` object. + pub fn new( + vm_as: AS, + vm_fd: Arc, + resources: DeviceResources, + queues: Vec>, + ctrl_queue: Option>, + device_change_notifier: Arc, + ) -> Self { + VirtioDeviceConfig { + vm_as, + vm_fd, + resources, + queues, + ctrl_queue, + device_change_notifier, + shm_regions: None, + } + } + + /// Inject a Virtio device change notification to the guest. + pub fn notify_device_changes(&self) -> Result<()> { + self.device_change_notifier.notify().map_err(Error::IOError) + } + + /// Get interrupt eventfds for normal Vritio queues. + pub fn get_queue_interrupt_eventfds(&self) -> Vec<&EventFd> { + self.queues + .iter() + .map(|x| x.notifier.notifier().unwrap()) + .collect() + } + + /// Set shared memory region for Virtio-fs. + pub fn set_shm_regions(&mut self, shm_regions: VirtioSharedMemoryList) { + self.shm_regions = Some(shm_regions); + } + + /// Get host address and guest address of the shared memory region. + pub fn get_shm_region_addr(&self) -> Option<(u64, u64)> { + self.shm_regions + .as_ref() + .map(|shms| (shms.host_addr, shms.guest_addr.raw_value())) + } + + /// Gets a shared reference to the guest memory object. + pub fn lock_guest_memory(&self) -> AS::T { + self.vm_as.memory() + } +} + +/// Device memory shared between guest and the device backend driver, defined by the Virtio +/// specification for Virtio-fs devices. +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct VirtioSharedMemory { + /// offset from the bar base + pub offset: u64, + /// len of this shared memory region + pub len: u64, +} + +/// A list of Shared Memory regions +#[derive(Debug)] +pub struct VirtioSharedMemoryList { + /// Host address + pub host_addr: u64, + /// Guest address + pub guest_addr: GuestAddress, + /// Length + pub len: GuestUsize, + /// kvm_userspace_memory_region flags + pub kvm_userspace_memory_region_flags: u32, + /// kvm_userspace_memory_region slot + pub kvm_userspace_memory_region_slot: u32, + /// List of shared regions. + pub region_list: Vec, + + /// List of mmap()ed regions managed through GuestRegionMmap instances. Using + /// GuestRegionMmap will perform the unmapping automatically when the instance + /// is dropped, which happens when the VirtioDevice gets dropped. + /// + /// GuestRegionMmap is used instead of MmapRegion. Because We need to insert + /// this region into vm_as,but vm_as uses GuestRegionMmap to manage regions. + /// If MmapRegion is used in here, the MmapRegion needs to be clone() to create + /// new GuestRegionMmap for vm_as. MmapRegion clone() will cause the problem of + /// duplicate unmap during automatic drop, so we should try to avoid the clone + /// of MmapRegion. This problem does not exist with GuestRegionMmap because + /// vm_as and VirtioSharedMemoryList can share GuestRegionMmap through Arc. + pub mmap_region: Arc, +} + +impl Clone for VirtioSharedMemoryList { + fn clone(&self) -> Self { + Self { + host_addr: self.host_addr, + guest_addr: self.guest_addr, + len: self.len, + kvm_userspace_memory_region_slot: self.kvm_userspace_memory_region_slot, + kvm_userspace_memory_region_flags: self.kvm_userspace_memory_region_flags, + region_list: self.region_list.clone(), + mmap_region: self.mmap_region.clone(), + } + } +} + +/// A callback for the VMM to insert memory region for virtio devices that +/// has device memory, such as DAX of virtiofs, pmem. +/// +/// insert_region function is used to solve the problem that the virtio device cannot +/// find the host address corresponding to the guest address when reading the +/// guest device memory. +/// +/// For example, the guest application executes the following code: +/// { +/// // "dax_fd" is virtio-fs file that support dax +/// // "no_dax_fd" is virtio-fs file that do not support dax +/// void *dax_ptr = (void*)mmap(NUMM, 4096, PORT, MAP_SHARED, dax_fd, 0); +/// write(no_dax_fd, dax_ptr, 4096); +/// } +/// dragonball will coredump. +/// +/// This is because the virtiofs device cannot resolve the dax_ptr address +/// when calling vm_as.get_slice(). There is no DAX region in vm_as. This +/// trait inserts the virtio device memory region, such as DAX region, into +/// vm_as. This trait should be implemented in VMM when creating virtio +/// devices with device memory, because the virtio device does not have +/// permission to change vm_as. +pub trait VirtioRegionHandler: Send { + /// Insert GuestRegionMmap to vm_as & address_space. + fn insert_region(&mut self, region: Arc) -> Result<()>; +} + +/// Trait for Virtio transport layer to manage virtio devices. +/// +/// The virtio transport driver takes the responsibility to manage lifecycle of virtio devices. +/// The device manager registers virtio devices to the transport driver, which will then manage +/// the device by: +/// - query device's resource requirement and allocate resources for it. +/// - handle guest register access by forwarding requests to the device. +/// - call activate()/reset() when the device is activated/reset by the guest. +/// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the +/// device. Once the guest driver has configured the device, `VirtioDevice::activate` will be called +/// and all the events, memory, and queues for device operation will be moved into the device. +/// Optionally, a virtio device can implement device reset in which it returns said resources and +/// resets its internal. +pub trait VirtioDevice: Send { + /// The virtio device type. + fn device_type(&self) -> u32; + + /// The maximum size of each queue that this device supports. + fn queue_max_sizes(&self) -> &[u16]; + + /// The maxinum size of control queue + fn ctrl_queue_max_sizes(&self) -> u16 { + 0 + } + + /// The set of feature bits shifted by `page * 32`. + fn get_avail_features(&self, page: u32) -> u32 { + let _ = page; + 0 + } + + /// Acknowledges that this set of features should be enabled. + fn set_acked_features(&mut self, page: u32, value: u32); + + /// Reads this device configuration space at `offset`. + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult; + + /// Writes to this device configuration space at `offset`. + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult; + + /// Activates this device for real usage. + fn activate(&mut self, config: VirtioDeviceConfig) -> ActivateResult; + + /// Deactivates this device. + fn reset(&mut self) -> ActivateResult { + Err(ActivateError::InternalError) + } + + /// Removes this devices. + fn remove(&mut self) {} + + /// every new device object has its resource requirements + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ); + + /// Assigns requested resources back to virtio device + fn set_resource( + &mut self, + _vm_fd: Arc, + _resource: DeviceResources, + ) -> Result>> { + Ok(None) + } + + /// Used to downcast to the specific type. + fn as_any(&self) -> &dyn Any; + fn as_any_mut(&mut self) -> &mut dyn Any; +} + +/// A helper struct to support basic operations for emulated VirtioDevice backend devices. +pub struct VirtioDeviceInfo { + /// Name of the virtio backend device. + pub driver_name: String, + /// Available features of the virtio backend device. + pub avail_features: u64, + /// Acknowledged features of the virtio backend device. + pub acked_features: u64, + /// Array of queue sizes. + pub queue_sizes: Arc>, + /// Space to store device specific configuration data. + pub config_space: Vec, + /// EventManager SubscriberOps to register/unregister epoll events. + pub epoll_manager: EpollManager, +} + +/// A helper struct to support basic operations for emulated VirtioDevice backend devices. +impl VirtioDeviceInfo { + /// Creates a VirtioDeviceInfo instance. + pub fn new( + driver_name: String, + avail_features: u64, + queue_sizes: Arc>, + config_space: Vec, + epoll_manager: EpollManager, + ) -> Self { + VirtioDeviceInfo { + driver_name, + avail_features, + acked_features: 0u64, + queue_sizes, + config_space, + epoll_manager, + } + } + + /// Gets available features of virtio backend device. + #[inline] + pub fn avail_features(&self) -> u64 { + self.avail_features + } + + /// Gets available features of virtio backend device. + pub fn get_avail_features(&self, page: u32) -> u32 { + match page { + // Get the lower 32-bits of the features bitfield. + 0 => self.avail_features as u32, + // Get the upper 32-bits of the features bitfield. + 1 => (self.avail_features >> 32) as u32, + _ => { + warn!("{}: query features page: {}", self.driver_name, page); + 0u32 + } + } + } + + /// Gets acknowledged features of virtio backend device. + #[inline] + pub fn acked_features(&self) -> u64 { + self.acked_features + } + + /// Sets acknowledged features of virtio backend device. + pub fn set_acked_features(&mut self, page: u32, value: u32) { + let mut v = match page { + 0 => value as u64, + 1 => (value as u64) << 32, + _ => { + warn!("{}: ack unknown feature page: {}", self.driver_name, page); + 0u64 + } + }; + + // Check if the guest is ACK'ing a feature that we didn't claim to have. + let unrequested_features = v & !self.avail_features; + if unrequested_features != 0 { + warn!("{}: ackknowlege unknown feature: {:x}", self.driver_name, v); + // Don't count these features as acked. + v &= !unrequested_features; + } + self.acked_features |= v; + } + + /// Reads device specific configuration data of virtio backend device. + /// + /// The `offset` is based of 0x100 from the MMIO configuration address space. + pub fn read_config(&self, offset: u64, mut data: &mut [u8]) -> ConfigResult { + let config_len = self.config_space.len() as u64; + if offset >= config_len { + error!( + "{}: config space read request out of range, offset {}", + self.driver_name, offset + ); + return Err(ConfigError::InvalidOffset(offset)); + } + if let Some(end) = offset.checked_add(data.len() as u64) { + // This write can't fail, offset and end are checked against config_len. + data.write_all(&self.config_space[offset as usize..cmp::min(end, config_len) as usize]) + .unwrap(); + } + Ok(()) + } + + /// Writes device specific configuration data of virtio backend device. + /// + /// The `offset` is based of 0x100 from the MMIO configuration address space. + pub fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + let data_len = data.len() as u64; + let config_len = self.config_space.len() as u64; + if offset >= config_len { + error!( + "{}: config space write request out of range, offset {}", + self.driver_name, offset + ); + return Err(ConfigError::InvalidOffset(offset)); + } + if offset.checked_add(data_len).is_none() { + error!( + "{}: config space write request out of range, offset {}, data length {}", + self.driver_name, offset, data_len + ); + return Err(ConfigError::PlusOverflow(offset, data_len)); + } + if offset + data_len > config_len { + error!( + "{}: config space write request out of range, offset {}, data length {}", + self.driver_name, offset, data_len + ); + return Err(ConfigError::InvalidOffsetPlusDataLen(offset + data_len)); + } + + let dst = &mut self.config_space[offset as usize..(offset + data_len) as usize]; + dst.copy_from_slice(data); + Ok(()) + } + + /// Validate size of queues and queue eventfds. + pub fn check_queue_sizes(&self, queues: &[VirtioQueueConfig]) -> ActivateResult { + if queues.is_empty() || queues.len() != self.queue_sizes.len() { + error!( + "{}: invalid configuration: maximum {} queue(s), got {} queues", + self.driver_name, + self.queue_sizes.len(), + queues.len(), + ); + return Err(ActivateError::InvalidParam); + } + Ok(()) + } + + /// Register event handler for the device. + pub fn register_event_handler(&self, handler: EpollSubscriber) -> SubscriberId { + self.epoll_manager.add_subscriber(handler) + } + + /// Unregister event handler for the device. + pub fn remove_event_handler(&mut self, id: SubscriberId) -> Result { + self.epoll_manager.remove_subscriber(id).map_err(|e| { + Error::IOError(std::io::Error::new( + std::io::ErrorKind::Other, + format!("remove_event_handler failed: {e:?}"), + )) + }) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use dbs_interrupt::{ + InterruptManager, InterruptSourceType, InterruptStatusRegister32, LegacyNotifier, + }; + use dbs_utils::epoll_manager::{EventOps, Events, MutEventSubscriber}; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap, GuestMemoryRegion, MmapRegion}; + + use super::*; + use crate::{VIRTIO_INTR_CONFIG, VIRTIO_INTR_VRING}; + + pub fn create_virtio_device_config() -> VirtioDeviceConfig> { + let (vmfd, irq_manager) = crate::tests::create_vm_and_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::LegacyIrq, 0, 1) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + let device_change_notifier = Arc::new(LegacyNotifier::new( + group.clone(), + status.clone(), + VIRTIO_INTR_CONFIG, + )); + + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap()); + + let mut queues = Vec::new(); + for idx in 0..8 { + queues.push(VirtioQueueConfig::new( + QueueSync::new(512).unwrap(), + Arc::new(EventFd::new(0).unwrap()), + Arc::new(LegacyNotifier::new( + group.clone(), + status.clone(), + VIRTIO_INTR_VRING, + )), + idx, + )); + } + + VirtioDeviceConfig::new( + mem, + vmfd, + DeviceResources::new(), + queues, + None, + device_change_notifier, + ) + } + + #[test] + fn test_create_virtio_queue_config() { + let (_vmfd, irq_manager) = crate::tests::create_vm_and_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::LegacyIrq, 0, 1) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + let notifier = Arc::new(LegacyNotifier::new(group, status, VIRTIO_INTR_VRING)); + + let mut cfg = VirtioQueueConfig::::create(1024, 1).unwrap(); + cfg.set_interrupt_notifier(notifier); + + let mem = + Arc::new(GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap()); + let desc = cfg.get_next_descriptor(mem.memory()).unwrap(); + assert!(matches!(desc, None)); + + cfg.notify().unwrap(); + assert_eq!(cfg.index(), 1); + assert_eq!(cfg.max_size(), 1024); + assert_eq!(cfg.actual_size(), 1024); + cfg.generate_event().unwrap(); + assert_eq!(cfg.consume_event().unwrap(), 1); + } + + #[test] + fn test_clone_virtio_queue_config() { + let (_vmfd, irq_manager) = crate::tests::create_vm_and_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::LegacyIrq, 0, 1) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + let notifier = Arc::new(LegacyNotifier::new(group, status, VIRTIO_INTR_VRING)); + + let mut cfg = VirtioQueueConfig::::create(1024, 1).unwrap(); + cfg.set_interrupt_notifier(notifier); + let mut cfg = cfg.clone(); + + let mem = + Arc::new(GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap()); + let desc = cfg.get_next_descriptor(mem.memory()).unwrap(); + assert!(matches!(desc, None)); + + { + let mut guard = cfg.queue_mut().lock(); + let mut iter = guard.iter(mem.memory()).unwrap(); + assert!(matches!(iter.next(), None)); + } + + cfg.notify().unwrap(); + assert_eq!(cfg.index(), 1); + assert_eq!(cfg.max_size(), 1024); + assert_eq!(cfg.actual_size(), 1024); + assert_eq!(cfg.queue.max_size(), 1024); + cfg.generate_event().unwrap(); + assert_eq!(cfg.consume_event().unwrap(), 1); + } + + #[test] + fn test_create_virtio_device_config() { + let mut device_config = create_virtio_device_config(); + + device_config.notify_device_changes().unwrap(); + assert_eq!(device_config.get_queue_interrupt_eventfds().len(), 8); + + let shared_mem = + GuestRegionMmap::new(MmapRegion::new(4096).unwrap(), GuestAddress(0)).unwrap(); + + let list = VirtioSharedMemoryList { + host_addr: 0x1234, + guest_addr: GuestAddress(0x5678), + len: shared_mem.len(), + kvm_userspace_memory_region_flags: 0, + kvm_userspace_memory_region_slot: 1, + region_list: vec![VirtioSharedMemory { + offset: 0, + len: 4096, + }], + mmap_region: Arc::new(shared_mem), + }; + + device_config.set_shm_regions(list); + let (host_addr, guest_addr) = device_config.get_shm_region_addr().unwrap(); + assert_eq!(host_addr, 0x1234); + assert_eq!(guest_addr, 0x5678); + let list = device_config.shm_regions.unwrap(); + assert_eq!(list.kvm_userspace_memory_region_slot, 1); + assert_eq!(list.kvm_userspace_memory_region_flags, 0); + assert_eq!(list.region_list.len(), 1); + } + + struct DummyDevice { + queue_size: Arc>, + device_info: VirtioDeviceInfo, + } + + impl VirtioDevice, QueueSync, GuestRegionMmap> for DummyDevice { + fn device_type(&self) -> u32 { + 0xffff + } + fn queue_max_sizes(&self) -> &[u16] { + &self.queue_size + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + fn set_acked_features(&mut self, page: u32, value: u32) { + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + self.device_info.read_config(offset, data) + } + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + self.device_info.write_config(offset, data) + } + fn activate( + &mut self, + _config: VirtioDeviceConfig>, + ) -> ActivateResult { + Ok(()) + } + fn get_resource_requirements( + &self, + _requests: &mut Vec, + _use_generic_irq: bool, + ) { + } + fn as_any(&self) -> &dyn Any { + self + } + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + } + + struct DummyHandler; + impl MutEventSubscriber for DummyHandler { + fn process(&mut self, _events: Events, _ops: &mut EventOps) {} + fn init(&mut self, _ops: &mut EventOps) {} + } + + #[test] + fn test_virtio_device() { + let epoll_mgr = EpollManager::default(); + + let avail_features = 0x1234 << 32 | 0x4567; + let config_space = vec![1; 16]; + let queue_size = Arc::new(vec![256; 1]); + let device_info = VirtioDeviceInfo::new( + String::from("dummy-device"), + avail_features, + queue_size.clone(), + config_space, + epoll_mgr, + ); + + let mut device = DummyDevice { + queue_size, + device_info, + }; + assert_eq!(device.device_type(), 0xffff); + assert_eq!(device.queue_max_sizes(), &[256]); + assert_eq!(device.ctrl_queue_max_sizes(), 0); + + device.get_resource_requirements(&mut Vec::new(), true); + + // tests avail features + assert_eq!(device.get_avail_features(0), 0x4567); + assert_eq!( + device.get_avail_features(1), + (device.device_info.avail_features() >> 32) as u32 + ); + assert_eq!(device.get_avail_features(2), 0); + + // tests acked features + assert_eq!(device.device_info.acked_features(), 0); + device.set_acked_features(2, 0x0004 | 0x0002); + assert_eq!(device.device_info.acked_features(), 0); + device.set_acked_features(1, 0x0004 | 0x0002); + assert_eq!(device.device_info.acked_features(), 0x0004 << 32); + device.set_acked_features(0, 0x4567 | 0x0008); + assert_eq!(device.device_info.acked_features(), 0x4567 | 0x0004 << 32); + + // test config space invalid read + let mut data = vec![0u8; 16]; + assert_eq!( + device.read_config(16, data.as_mut_slice()).unwrap_err(), + ConfigError::InvalidOffset(16) + ); + assert_eq!(data, vec![0; 16]); + // test read config + device.read_config(4, &mut data[..14]).unwrap(); + assert_eq!(data, vec![1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]); + device.read_config(0, data.as_mut_slice()).unwrap(); + assert_eq!(data, vec![1; 16]); + + // test config space invalid write + let write_data = vec![0xffu8; 16]; + let mut read_data = vec![0x0; 16]; + assert_eq!( + device.write_config(4, &write_data[..13]).unwrap_err(), + ConfigError::InvalidOffsetPlusDataLen(17) + ); + assert_eq!( + device.write_config(16, &write_data[..4]).unwrap_err(), + ConfigError::InvalidOffset(16) + ); + device.read_config(0, read_data.as_mut_slice()).unwrap(); + assert_eq!(read_data, vec![0x1; 16]); + + // test config space write + device.write_config(4, &write_data[6..10]).unwrap(); + assert_eq!( + device.device_info.config_space, + vec![1, 1, 1, 1, 0xff, 0xff, 0xff, 0xff, 1, 1, 1, 1, 1, 1, 1, 1] + ); + + // test device info check_queue_sizes + let queue_size = Vec::new(); + assert!(matches!( + device + .device_info + .check_queue_sizes::(&queue_size), + Err(ActivateError::InvalidParam) + )); + + assert!(matches!(device.reset(), Err(ActivateError::InternalError))); + + // test event handler + let handler = DummyHandler; + let id = device.device_info.register_event_handler(Box::new(handler)); + device.device_info.remove_event_handler(id).unwrap(); + assert!(matches!( + device.device_info.remove_event_handler(id), + Err(Error::IOError(_)) + )); + + // test device activate + let region_size = 0x400; + let regions = vec![ + (GuestAddress(0x0), region_size), + (GuestAddress(0x1000), region_size), + ]; + let gmm = GuestMemoryMmap::from_ranges(®ions).unwrap(); + let gm = GuestMemoryAtomic::::new(gmm); + + let queues = vec![ + VirtioQueueConfig::create(2, 0).unwrap(), + VirtioQueueConfig::create(2, 0).unwrap(), + ]; + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let device_config = VirtioDeviceConfig::new( + gm, + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + device.activate(device_config).unwrap(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/epoll_helper.rs b/src/dragonball/src/dbs_virtio_devices/src/epoll_helper.rs new file mode 100644 index 000000000000..42732d31cc97 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/epoll_helper.rs @@ -0,0 +1,157 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright © 2020 Intel Corporation +// +// Copyright © 2021 Ant Group Corporation + +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use log::error; + +pub struct EpollHelper { + epoll_file: File, +} + +#[derive(Debug)] +pub enum EpollHelperError { + CreateFd(std::io::Error), + Ctl(std::io::Error), + IoError(std::io::Error), + Wait(std::io::Error), +} + +pub trait EpollHelperHandler { + // Return true if execution of the loop should be stopped + fn handle_event(&mut self, helper: &mut EpollHelper, event: &epoll::Event) -> bool; +} + +impl EpollHelper { + pub fn new() -> std::result::Result { + // Create the epoll file descriptor + let epoll_fd = epoll::create(true).map_err(EpollHelperError::CreateFd)?; + // Use 'File' to enforce closing on 'epoll_fd' + let epoll_file = unsafe { File::from_raw_fd(epoll_fd) }; + + Ok(Self { epoll_file }) + } + + pub fn add_event(&mut self, fd: RawFd, id: u32) -> std::result::Result<(), EpollHelperError> { + self.add_event_custom(fd, id, epoll::Events::EPOLLIN) + } + + pub fn add_event_custom( + &mut self, + fd: RawFd, + id: u32, + evts: epoll::Events, + ) -> std::result::Result<(), EpollHelperError> { + epoll::ctl( + self.epoll_file.as_raw_fd(), + epoll::ControlOptions::EPOLL_CTL_ADD, + fd, + epoll::Event::new(evts, id.into()), + ) + .map_err(EpollHelperError::Ctl) + } + + pub fn del_event_custom( + &mut self, + fd: RawFd, + id: u32, + evts: epoll::Events, + ) -> std::result::Result<(), EpollHelperError> { + epoll::ctl( + self.epoll_file.as_raw_fd(), + epoll::ControlOptions::EPOLL_CTL_DEL, + fd, + epoll::Event::new(evts, id.into()), + ) + .map_err(EpollHelperError::Ctl) + } + + pub fn run( + &mut self, + handler: &mut dyn EpollHelperHandler, + ) -> std::result::Result<(), EpollHelperError> { + const EPOLL_EVENTS_LEN: usize = 100; + let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN]; + + loop { + let num_events = match epoll::wait(self.epoll_file.as_raw_fd(), -1, &mut events[..]) { + Ok(res) => res, + Err(e) => { + if e.kind() == std::io::ErrorKind::Interrupted { + // It's well defined from the epoll_wait() syscall + // documentation that the epoll loop can be interrupted + // before any of the requested events occurred or the + // timeout expired. In both those cases, epoll_wait() + // returns an error of type EINTR, but this should not + // be considered as a regular error. Instead it is more + // appropriate to retry, by calling into epoll_wait(). + continue; + } + error!("io thread epoll wait failed: {:?}", e); + return Err(EpollHelperError::Wait(e)); + } + }; + + for event in events.iter().take(num_events) { + if handler.handle_event(self, event) { + return Ok(()); + } + } + } + } +} + +impl AsRawFd for EpollHelper { + fn as_raw_fd(&self) -> RawFd { + self.epoll_file.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + use std::os::unix::io::AsRawFd; + use vmm_sys_util::eventfd::EventFd; + + use super::EpollHelper; + + #[test] + fn test_new_epoller() { + let helper = EpollHelper::new(); + assert!(helper.is_ok()); + } + + #[test] + fn test_add_event() { + let helper = EpollHelper::new(); + assert!(helper.is_ok()); + + let eventfd = EventFd::new(0).unwrap(); + + let res = helper.unwrap().add_event(eventfd.as_raw_fd(), 0); + assert!(res.is_ok()) + } + + #[test] + fn test_delete_event() { + let helper = EpollHelper::new(); + assert!(helper.is_ok()); + + let eventfd = EventFd::new(0).unwrap(); + let mut helper = helper.unwrap(); + let res = helper.add_event(eventfd.as_raw_fd(), 0); + assert!(res.is_ok()); + + let res = helper.del_event_custom(eventfd.as_raw_fd(), 0, epoll::Events::EPOLLIN); + assert!(res.is_ok()); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs b/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs new file mode 100644 index 000000000000..2f9c2c28372d --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/fs/device.rs @@ -0,0 +1,1797 @@ +// Copyright 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::any::Any; +use std::collections::HashMap; +use std::ffi::CString; +use std::fs::File; +use std::io::{BufRead, BufReader, Read}; +use std::marker::PhantomData; +use std::ops::Deref; +use std::os::unix::io::FromRawFd; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::{mpsc, Arc}; +use std::time::Duration; + +use caps::{CapSet, Capability}; +use dbs_device::resources::{DeviceResources, ResourceConstraint}; +use dbs_utils::epoll_manager::{EpollManager, SubscriberId}; +use dbs_utils::rate_limiter::{BucketUpdate, RateLimiter}; +use fuse_backend_rs::api::{Vfs, VfsIndex, VfsOptions}; +use fuse_backend_rs::passthrough::{CachePolicy, Config as PassthroughConfig, PassthroughFs}; +use kvm_bindings::kvm_userspace_memory_region; +use kvm_ioctls::VmFd; +use log::{debug, error, info, trace, warn}; +use nix::sys::memfd; +use nydus_api::ConfigV2; +use nydus_rafs::blobfs::{BlobFs, Config as BlobfsConfig}; +use nydus_rafs::{fs::Rafs, RafsIoRead}; +use rlimit::Resource; +use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1; +use virtio_queue::QueueT; +use vm_memory::{ + FileOffset, GuestAddress, GuestAddressSpace, GuestRegionMmap, GuestUsize, MmapRegion, +}; +use vmm_sys_util::eventfd::EventFd; + +use crate::{ + ActivateError, ActivateResult, ConfigResult, Error, Result, VirtioDevice, VirtioDeviceConfig, + VirtioDeviceInfo, VirtioRegionHandler, VirtioSharedMemory, VirtioSharedMemoryList, + TYPE_VIRTIO_FS, +}; + +use super::{ + CacheHandler, Error as FsError, Result as FsResult, VirtioFsEpollHandler, VIRTIO_FS_NAME, +}; + +const CONFIG_SPACE_TAG_SIZE: usize = 36; +const CONFIG_SPACE_NUM_QUEUES_SIZE: usize = 4; +const CONFIG_SPACE_SIZE: usize = CONFIG_SPACE_TAG_SIZE + CONFIG_SPACE_NUM_QUEUES_SIZE; +const NUM_QUEUE_OFFSET: usize = 1; + +// Attr and entry timeout values +const CACHE_ALWAYS_TIMEOUT: u64 = 86_400; // 1 day +const CACHE_AUTO_TIMEOUT: u64 = 1; +const CACHE_NONE_TIMEOUT: u64 = 0; + +// VirtioFs backend fs type +pub(crate) const PASSTHROUGHFS: &str = "passthroughfs"; +pub(crate) const BLOBFS: &str = "blobfs"; +pub(crate) const RAFS: &str = "rafs"; + +/// Info of backend filesystems of VirtioFs +#[allow(dead_code)] +pub struct BackendFsInfo { + pub(crate) index: VfsIndex, + pub(crate) fstype: String, + // (source, config), only suitable for Rafs + pub(crate) src_cfg: Option<(String, String)>, +} + +/// Virtio device for virtiofs +pub struct VirtioFs { + pub(crate) device_info: VirtioDeviceInfo, + pub(crate) cache_size: u64, + pub(crate) queue_sizes: Arc>, + pub(crate) thread_pool_size: u16, + pub(crate) cache_policy: CachePolicy, + pub(crate) writeback_cache: bool, + pub(crate) no_open: bool, + pub(crate) killpriv_v2: bool, + pub(crate) no_readdir: bool, + pub(crate) xattr: bool, + pub(crate) handler: Box, + pub(crate) fs: Arc, + pub(crate) backend_fs: HashMap, + pub(crate) subscriber_id: Option, + pub(crate) id: String, + pub(crate) rate_limiter: Option, + pub(crate) patch_rate_limiter_fd: EventFd, + pub(crate) sender: Option>, + phantom: PhantomData, +} + +impl VirtioFs +where + AS: GuestAddressSpace + 'static, +{ + pub fn set_patch_rate_limiters(&self, bytes: BucketUpdate, ops: BucketUpdate) -> Result<()> { + match &self.sender { + Some(sender) => { + sender.send((bytes, ops)).map_err(|e| { + error!( + "{}: failed to send rate-limiter patch data {:?}", + VIRTIO_FS_NAME, e + ); + Error::InternalError + })?; + self.patch_rate_limiter_fd.write(1).map_err(|e| { + error!( + "{}: failed to write rate-limiter patch event {:?}", + VIRTIO_FS_NAME, e + ); + Error::InternalError + })?; + Ok(()) + } + None => { + error!( + "{}: failed to establish channel to send rate-limiter patch data", + VIRTIO_FS_NAME + ); + Err(Error::InternalError) + } + } + } +} + +#[allow(clippy::too_many_arguments)] +impl VirtioFs { + /// Create a new virtiofs device. + pub fn new( + tag: &str, + req_num_queues: usize, + queue_size: u16, + cache_size: u64, + cache_policy: &str, + thread_pool_size: u16, + writeback_cache: bool, + no_open: bool, + killpriv_v2: bool, + xattr: bool, + drop_sys_resource: bool, + no_readdir: bool, + handler: Box, + epoll_mgr: EpollManager, + rate_limiter: Option, + ) -> Result { + info!( + "{}: tag {} req_num_queues {} queue_size {} cache_size {} cache_policy {} thread_pool_size {} writeback_cache {} no_open {} killpriv_v2 {} xattr {} drop_sys_resource {} no_readdir {}", + VIRTIO_FS_NAME, tag, req_num_queues, queue_size, cache_size, cache_policy, thread_pool_size, writeback_cache, no_open, killpriv_v2, xattr, drop_sys_resource, no_readdir + ); + + let num_queues = NUM_QUEUE_OFFSET + req_num_queues; + + // Create virtio device config space. + // First by adding the tag. + let mut config_space = tag.to_string().into_bytes(); + config_space.resize(CONFIG_SPACE_SIZE, 0); + + // And then by copying the number of queues. + let mut num_queues_slice: [u8; 4] = (req_num_queues as u32).to_be_bytes(); + num_queues_slice.reverse(); + config_space[CONFIG_SPACE_TAG_SIZE..CONFIG_SPACE_SIZE].copy_from_slice(&num_queues_slice); + + let cache = match CachePolicy::from_str(cache_policy) { + Ok(c) => c, + Err(e) => { + error!( + "{}: Parse cache_policy \"{}\" failed: {:?}", + VIRTIO_FS_NAME, cache_policy, e + ); + return Err(Error::InvalidInput); + } + }; + + // Set rlimit first, in case we dropped CAP_SYS_RESOURCE later and hit EPERM. + if let Err(e) = set_default_rlimit_nofile() { + warn!("{}: failed to set rlimit: {:?}", VIRTIO_FS_NAME, e); + } + + if drop_sys_resource && writeback_cache { + error!( + "{}: writeback_cache is not compatible with drop_sys_resource", + VIRTIO_FS_NAME + ); + return Err(Error::InvalidInput); + } + + // Drop CAP_SYS_RESOURCE when creating VirtioFs device, not in activate(), as it's vcpu + // thread that calls activate(), but we do I/O in vmm epoll thread, so drop cap here. + if drop_sys_resource { + info!( + "{}: Dropping CAP_SYS_RESOURCE, tid {:?}", + VIRTIO_FS_NAME, + nix::unistd::gettid() + ); + if let Err(e) = caps::drop(None, CapSet::Effective, Capability::CAP_SYS_RESOURCE) { + warn!( + "{}: failed to drop CAP_SYS_RESOURCE: {:?}", + VIRTIO_FS_NAME, e + ); + } + } + + let vfs_opts = VfsOptions { + no_writeback: !writeback_cache, + no_open, + killpriv_v2, + no_readdir, + ..VfsOptions::default() + }; + + Ok(VirtioFs { + device_info: VirtioDeviceInfo::new( + VIRTIO_FS_NAME.to_string(), + 1u64 << VIRTIO_F_VERSION_1, + Arc::new(vec![queue_size; num_queues]), + config_space, + epoll_mgr, + ), + cache_size, + queue_sizes: Arc::new(vec![queue_size; num_queues]), + thread_pool_size, + cache_policy: cache, + writeback_cache, + no_open, + no_readdir, + killpriv_v2, + xattr, + handler, + fs: Arc::new(Vfs::new(vfs_opts)), + backend_fs: HashMap::new(), + subscriber_id: None, + id: tag.to_string(), + rate_limiter, + patch_rate_limiter_fd: EventFd::new(0).unwrap(), + sender: None, + phantom: PhantomData, + }) + } + + fn is_dax_on(&self) -> bool { + self.cache_size > 0 + } + + fn get_timeout(&self) -> Duration { + match self.cache_policy { + CachePolicy::Always => Duration::from_secs(CACHE_ALWAYS_TIMEOUT), + CachePolicy::Never => Duration::from_secs(CACHE_NONE_TIMEOUT), + CachePolicy::Auto => Duration::from_secs(CACHE_AUTO_TIMEOUT), + } + } + + fn parse_blobfs_cfg( + &self, + source: &str, + config: Option, + dax_threshold_size_kb: Option, + ) -> FsResult<(String, String, Option)> { + let (blob_cache_dir, blob_ondemand_cfg) = match config.as_ref() { + Some(cfg) => { + let conf = ConfigV2::from_str(cfg).map_err(|e| { + error!("failed to load rafs config {} error: {:?}", &cfg, e); + FsError::InvalidData + })?; + + // v6 doesn't support digest validation yet. + if conf.rafs.ok_or(FsError::InvalidData)?.validate { + error!("config.digest_validate needs to be false"); + return Err(FsError::InvalidData); + } + + let work_dir = conf + .cache + .ok_or(FsError::InvalidData)? + .file_cache + .ok_or(FsError::InvalidData)? + .work_dir; + + let blob_ondemand_cfg = format!( + r#" + {{ + "rafs_conf": {}, + "bootstrap_path": "{}", + "blob_cache_dir": "{}" + }}"#, + cfg, source, &work_dir + ); + + (work_dir, blob_ondemand_cfg) + } + None => return Err(FsError::BackendFs("no rafs config file".to_string())), + }; + + let dax_file_size = match dax_threshold_size_kb { + Some(size) => Some(kb_to_bytes(size)?), + None => None, + }; + + Ok((blob_cache_dir, blob_ondemand_cfg, dax_file_size)) + } + + pub fn manipulate_backend_fs( + &mut self, + source: Option, + fstype: Option, + mountpoint: &str, + config: Option, + ops: &str, + prefetch_list_path: Option, + dax_threshold_size_kb: Option, + ) -> FsResult<()> { + debug!( + "source {:?}, fstype {:?}, mountpoint {:?}, config {:?}, ops {:?}, prefetch_list_path {:?}, dax_threshold_size_kb 0x{:x?}", + source, fstype, mountpoint, config, ops, prefetch_list_path, dax_threshold_size_kb + ); + match ops { + "mount" => { + if source.is_none() { + error!("{}: source is required for mount.", VIRTIO_FS_NAME); + return Err(FsError::InvalidData); + } + // safe because is not None + let source = source.unwrap(); + match fstype.as_deref() { + Some("Blobfs") | Some(BLOBFS) => { + self.mount_blobfs(source, mountpoint, config, dax_threshold_size_kb) + } + Some("PassthroughFs") | Some(PASSTHROUGHFS) => { + self.mount_passthroughfs(source, mountpoint, dax_threshold_size_kb) + } + Some("Rafs") | Some(RAFS) => { + self.mount_rafs(source, mountpoint, config, prefetch_list_path) + } + _ => { + error!("http_server: type is not invalid."); + Err(FsError::InvalidData) + } + } + } + "umount" => { + self.fs.umount(mountpoint).map_err(|e| { + error!("umount {:?}", e); + FsError::InvalidData + })?; + self.backend_fs.remove(mountpoint); + Ok(()) + } + "update" => { + info!("switch backend"); + self.update_rafs(source, mountpoint, config) + } + _ => { + error!("invalid ops, mount failed."); + Err(FsError::InvalidData) + } + } + } + + fn mount_blobfs( + &mut self, + source: String, + mountpoint: &str, + config: Option, + dax_threshold_size_kb: Option, + ) -> FsResult<()> { + debug!("http_server blobfs"); + let timeout = self.get_timeout(); + let (blob_cache_dir, blob_ondemand_cfg, dax_file_size) = + self.parse_blobfs_cfg(&source, config, dax_threshold_size_kb)?; + + let fs_cfg = BlobfsConfig { + ps_config: PassthroughConfig { + root_dir: blob_cache_dir, + do_import: true, + writeback: self.writeback_cache, + no_open: self.no_open, + xattr: self.xattr, + cache_policy: self.cache_policy.clone(), + entry_timeout: timeout, + attr_timeout: timeout, + dax_file_size, + ..Default::default() + }, + blob_ondemand_cfg, + }; + let blob_fs = BlobFs::new(fs_cfg).map_err(FsError::IOError)?; + blob_fs.import().map_err(FsError::IOError)?; + debug!("blobfs mounted"); + + let fs = Box::new(blob_fs); + match self.fs.mount(fs, mountpoint) { + Ok(idx) => { + self.backend_fs.insert( + mountpoint.to_string(), + BackendFsInfo { + index: idx, + fstype: BLOBFS.to_string(), + src_cfg: None, + }, + ); + Ok(()) + } + Err(e) => { + error!("blobfs mount {:?}", e); + Err(FsError::InvalidData) + } + } + } + + fn mount_passthroughfs( + &mut self, + source: String, + mountpoint: &str, + dax_threshold_size_kb: Option, + ) -> FsResult<()> { + debug!("http_server passthrough"); + let timeout = self.get_timeout(); + + let dax_threshold_size = match dax_threshold_size_kb { + Some(size) => Some(kb_to_bytes(size)?), + None => None, + }; + + let fs_cfg = PassthroughConfig { + root_dir: source, + do_import: false, + writeback: self.writeback_cache, + no_open: self.no_open, + no_readdir: self.no_readdir, + killpriv_v2: self.killpriv_v2, + xattr: self.xattr, + cache_policy: self.cache_policy.clone(), + entry_timeout: timeout, + attr_timeout: timeout, + dax_file_size: dax_threshold_size, + ..Default::default() + }; + + let passthrough_fs = PassthroughFs::<()>::new(fs_cfg).map_err(FsError::IOError)?; + passthrough_fs.import().map_err(FsError::IOError)?; + debug!("passthroughfs mounted"); + + let fs = Box::new(passthrough_fs); + match self.fs.mount(fs, mountpoint) { + Ok(idx) => { + self.backend_fs.insert( + mountpoint.to_string(), + BackendFsInfo { + index: idx, + fstype: PASSTHROUGHFS.to_string(), + src_cfg: None, + }, + ); + Ok(()) + } + Err(e) => { + error!("passthroughfs mount {:?}", e); + Err(FsError::InvalidData) + } + } + } + + fn mount_rafs( + &mut self, + source: String, + mountpoint: &str, + config: Option, + prefetch_list_path: Option, + ) -> FsResult<()> { + debug!("http_server rafs"); + let file = Path::new(&source); + let (mut rafs, rafs_cfg) = match config.as_ref() { + Some(cfg) => { + let rafs_conf: Arc = Arc::new( + serde_json::from_str(cfg).map_err(|e| FsError::BackendFs(e.to_string()))?, + ); + + ( + Rafs::new(&rafs_conf, mountpoint, file) + .map_err(|e| FsError::BackendFs(format!("Rafs::new() failed: {e:?}")))?, + cfg.clone(), + ) + } + None => return Err(FsError::BackendFs("no rafs config file".to_string())), + }; + let prefetch_files = parse_prefetch_files(prefetch_list_path.clone()); + debug!( + "{}: Import rafs with prefetch_files {:?}", + VIRTIO_FS_NAME, prefetch_files + ); + rafs.0 + .import(rafs.1, prefetch_files) + .map_err(|e| FsError::BackendFs(format!("Import rafs failed: {e:?}")))?; + info!( + "{}: Rafs imported with prefetch_list_path {:?}", + VIRTIO_FS_NAME, prefetch_list_path + ); + let fs = Box::new(rafs.0); + match self.fs.mount(fs, mountpoint) { + Ok(idx) => { + self.backend_fs.insert( + mountpoint.to_string(), + BackendFsInfo { + index: idx, + fstype: RAFS.to_string(), + src_cfg: Some((source, rafs_cfg)), + }, + ); + Ok(()) + } + Err(e) => { + error!("Rafs mount failed: {:?}", e); + Err(FsError::InvalidData) + } + } + } + + fn update_rafs( + &mut self, + source: Option, + mountpoint: &str, + config: Option, + ) -> FsResult<()> { + if config.is_none() { + return Err(FsError::BackendFs("no rafs config file".to_string())); + } + if source.is_none() { + return Err(FsError::BackendFs(format!( + "rafs mounted at {mountpoint} doesn't have source configured" + ))); + } + // safe because config is not None. + let config = config.unwrap(); + let source = source.unwrap(); + let rafs_conf: Arc = + Arc::new(serde_json::from_str(&config).map_err(|e| FsError::BackendFs(e.to_string()))?); + // Update rafs config, update BackendFsInfo as well. + let new_info = match self.backend_fs.get(mountpoint) { + Some(orig_info) => BackendFsInfo { + index: orig_info.index, + fstype: orig_info.fstype.clone(), + src_cfg: Some((source.to_string(), config)), + }, + None => { + return Err(FsError::BackendFs(format!( + "rafs mount point {mountpoint} is not mounted" + ))); + } + }; + let rootfs = match self.fs.get_rootfs(mountpoint) { + Ok(fs) => match fs { + Some(f) => f, + None => { + return Err(FsError::BackendFs(format!( + "rafs get_rootfs() failed: mountpoint {mountpoint} not mounted" + ))); + } + }, + Err(e) => { + return Err(FsError::BackendFs(format!( + "rafs get_rootfs() failed: {e:?}" + ))); + } + }; + let any_fs = rootfs.deref().as_any(); + if let Some(fs_swap) = any_fs.downcast_ref::() { + let mut file = ::from_file(&source) + .map_err(|e| FsError::BackendFs(format!("RafsIoRead failed: {e:?}")))?; + + fs_swap + .update(&mut file, &rafs_conf) + .map_err(|e| FsError::BackendFs(format!("Update rafs failed: {e:?}")))?; + self.backend_fs.insert(mountpoint.to_string(), new_info); + Ok(()) + } else { + Err(FsError::BackendFs("no rafs is found".to_string())) + } + } + + fn register_mmap_region( + &mut self, + vm_fd: Arc, + guest_addr: u64, + len: u64, + slot_res: &[u32], + ) -> Result> { + // Create file backend for virtiofs's mmap region to let goku and + // vhost-user slave can remap memory by memfd. However, this is not a + // complete solution, because when dax is actually on, they need to be + // notified of the change in the dax memory mapping relationship. + let file_offset = { + let fd = memfd::memfd_create( + // safe to unwrap, no nul byte in file name + &CString::new("virtio_fs_mem").unwrap(), + memfd::MemFdCreateFlag::empty(), + ) + .map_err(|e| Error::VirtioFs(FsError::MemFdCreate(e)))?; + let file: File = unsafe { File::from_raw_fd(fd) }; + file.set_len(len) + .map_err(|e| Error::VirtioFs(FsError::SetFileSize(e)))?; + Some(FileOffset::new(file, 0)) + }; + + // unmap will be handled on MmapRegion'd Drop. + let mmap_region = MmapRegion::build( + file_offset, + len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_NORESERVE | libc::MAP_PRIVATE, + ) + .map_err(Error::NewMmapRegion)?; + + let host_addr: u64 = mmap_region.as_ptr() as u64; + let kvm_mem_region = kvm_userspace_memory_region { + slot: slot_res[0], + flags: 0, + guest_phys_addr: guest_addr, + memory_size: len, + userspace_addr: host_addr, + }; + debug!( + "{}: mmio shared memory kvm_region: {:?}", + self.id, kvm_mem_region, + ); + + // Safe because the user mem region is just created, and kvm slot is allocated + // by resource allocator. + unsafe { + vm_fd + .set_user_memory_region(kvm_mem_region) + .map_err(Error::SetUserMemoryRegion)? + }; + + let region = Arc::new( + GuestRegionMmap::new(mmap_region, GuestAddress(guest_addr)) + .map_err(Error::InsertMmap)?, + ); + self.handler.insert_region(region.clone())?; + + Ok(region) + } +} + +fn parse_prefetch_files(prefetch_list_path: Option) -> Option> { + let prefetch_files: Option> = match prefetch_list_path { + Some(p) => { + match File::open(p.as_str()) { + Ok(f) => { + let r = BufReader::new(f); + // All prefetch files should be absolute path + let v: Vec = r + .lines() + .filter(|l| { + let lref = l.as_ref(); + lref.is_ok() && lref.unwrap().starts_with('/') + }) + .map(|l| PathBuf::from(l.unwrap().as_str())) + .collect(); + if v.is_empty() { + None + } else { + Some(v) + } + } + Err(e) => { + // We could contineu without prefetch files, just print warning and return + warn!( + "{}: Open prefetch_file_path {} failed: {:?}", + VIRTIO_FS_NAME, + p.as_str(), + e + ); + None + } + } + } + None => None, + }; + prefetch_files +} + +fn kb_to_bytes(kb: u64) -> FsResult { + if (kb & 0xffc0_0000_0000_0000) != 0 { + error!( + "dax_threshold_size_kb * 1024 overflow. dax_threshold_size_kb is 0x{:x}.", + kb + ); + return Err(FsError::InvalidData); + } + + let bytes = kb << 10; + Ok(bytes) +} + +fn set_default_rlimit_nofile() -> Result<()> { + // Our default RLIMIT_NOFILE target. + let mut max_fds: u64 = 300_000; + // leave at least this many fds free + let reserved_fds: u64 = 16_384; + + // Reduce max_fds below the system-wide maximum, if necessary. + // This ensures there are fds available for other processes so we + // don't cause resource exhaustion. + let mut file_max = String::new(); + let mut f = File::open("/proc/sys/fs/file-max").map_err(|e| { + error!( + "{}: failed to read /proc/sys/fs/file-max {:?}", + VIRTIO_FS_NAME, e + ); + Error::IOError(e) + })?; + f.read_to_string(&mut file_max)?; + let file_max = file_max.trim().parse::().map_err(|e| { + error!("{}: read fs.file-max sysctl wrong {:?}", VIRTIO_FS_NAME, e); + Error::InvalidInput + })?; + if file_max < 2 * reserved_fds { + error!( + "{}: The fs.file-max sysctl ({}) is too low to allow a reasonable number of open files ({}).", + VIRTIO_FS_NAME, file_max, 2 * reserved_fds + ); + return Err(Error::InvalidInput); + } + + max_fds = std::cmp::min(file_max - reserved_fds, max_fds); + let rlimit_nofile = Resource::NOFILE + .get() + .map(|(curr, _)| if curr >= max_fds { 0 } else { max_fds }) + .map_err(|e| { + error!("{}: failed to get rlimit {:?}", VIRTIO_FS_NAME, e); + Error::IOError(e) + })?; + + if rlimit_nofile == 0 { + info!( + "{}: original rlimit nofile is greater than max_fds({}), keep rlimit nofile setting", + VIRTIO_FS_NAME, max_fds + ); + Ok(()) + } else { + info!( + "{}: set rlimit {} (max_fds {})", + VIRTIO_FS_NAME, rlimit_nofile, max_fds + ); + + Resource::NOFILE + .set(rlimit_nofile, rlimit_nofile) + .map_err(|e| { + error!("{}: failed to set rlimit {:?}", VIRTIO_FS_NAME, e); + Error::IOError(e) + }) + } +} + +impl VirtioDevice for VirtioFs +where + AS: 'static + GuestAddressSpace + Clone + Send + Sync, + AS::T: Send, + AS::M: Sync + Send, + Q: QueueT + Send + 'static, +{ + fn device_type(&self) -> u32 { + TYPE_VIRTIO_FS + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.queue_sizes + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::set_acked_features({}, 0x{:x})", + self.id, + page, + value + ); + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::read_config(0x{:x}, {:?})", + self.id, + offset, + data + ); + self.device_info.read_config(offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::write_config(0x{:x}, {:?})", + self.id, + offset, + data + ); + self.device_info.write_config(offset, data) + } + + fn activate(&mut self, config: VirtioDeviceConfig) -> ActivateResult { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::activate()", + self.id + ); + + self.device_info.check_queue_sizes(&config.queues)?; + + let (sender, receiver) = mpsc::channel(); + self.sender = Some(sender); + let rate_limiter = self.rate_limiter.take().unwrap_or_default(); + let patch_rate_limiter_fd = self.patch_rate_limiter_fd.try_clone().map_err(|e| { + error!( + "{}: failed to clone patch rate limiter eventfd {:?}", + VIRTIO_FS_NAME, e + ); + ActivateError::InternalError + })?; + + let cache_handler = if let Some((addr, _guest_addr)) = config.get_shm_region_addr() { + let handler = CacheHandler { + cache_size: self.cache_size, + mmap_cache_addr: addr, + id: self.id.clone(), + }; + + Some(handler) + } else { + None + }; + + let handler = VirtioFsEpollHandler::new( + config, + self.fs.clone(), + cache_handler, + self.thread_pool_size, + self.id.clone(), + rate_limiter, + patch_rate_limiter_fd, + Some(receiver), + ); + + self.subscriber_id = Some(self.device_info.register_event_handler(Box::new(handler))); + + Ok(()) + } + + // Please keep in synchronization with vhost/fs.rs + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::get_resource_requirements()", + self.id + ); + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + // Allocate one irq for device configuration change events, and one irq for each queue. + requests.push(ResourceConstraint::GenericIrq { + size: (self.queue_sizes.len() + 1) as u32, + }); + } + + // Check if we have dax enabled or not, just return if no dax window requested. + if !self.is_dax_on() { + info!("{}: DAX window is disabled.", self.id); + return; + } + + // Request for DAX window. The memory needs to be 2MiB aligned in order to support + // hugepages, and needs to be above 4G to avoid confliction with lapic/ioapic devices. + requests.push(ResourceConstraint::MmioAddress { + range: Some((0x1_0000_0000, std::u64::MAX)), + align: 0x0020_0000, + size: self.cache_size, + }); + + // Request for new kvm memory slot for DAX window. + requests.push(ResourceConstraint::KvmMemSlot { + slot: None, + size: 1, + }); + } + + // Please keep in synchronization with vhost/fs.rs + fn set_resource( + &mut self, + vm_fd: Arc, + resource: DeviceResources, + ) -> Result>> { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioDevice::set_resource()", + self.id + ); + + let mmio_res = resource.get_mmio_address_ranges(); + let slot_res = resource.get_kvm_mem_slots(); + + // Do nothing if there's no dax window requested. + if mmio_res.is_empty() { + return Ok(None); + } + + // Make sure we have the correct resource as requested, and currently we only support one + // shm region for DAX window (version table and journal are not supported yet). + if mmio_res.len() != slot_res.len() || mmio_res.len() != 1 { + error!( + "{}: wrong number of mmio or kvm slot resource ({}, {})", + self.id, + mmio_res.len(), + slot_res.len() + ); + return Err(Error::InvalidResource); + } + + let guest_addr = mmio_res[0].0; + let cache_len = mmio_res[0].1; + + let mmap_region = self.register_mmap_region(vm_fd, guest_addr, cache_len, &slot_res)?; + + Ok(Some(VirtioSharedMemoryList { + host_addr: mmap_region.deref().deref().as_ptr() as u64, + guest_addr: GuestAddress(guest_addr), + len: cache_len as GuestUsize, + kvm_userspace_memory_region_flags: 0, + kvm_userspace_memory_region_slot: slot_res[0], + region_list: vec![VirtioSharedMemory { + offset: 0, + len: cache_len, + }], + mmap_region, + })) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[cfg(test)] +pub mod tests { + #[cfg(feature = "test-resources")] + use std::env::temp_dir; + use std::io::Write; + use std::path::PathBuf; + use std::sync::Arc; + + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::GuestMemoryRegion; + use vm_memory::{GuestAddress, GuestMemoryMmap, GuestRegionMmap}; + use vmm_sys_util::tempfile::TempFile; + use Error as VirtIoError; + + use super::*; + use crate::device::VirtioRegionHandler; + use crate::{ActivateError, VirtioQueueConfig, TYPE_VIRTIO_FS}; + + pub(crate) const TAG: &str = "test"; + pub(crate) const NUM_QUEUES: usize = 1; + pub(crate) const QUEUE_SIZE: u16 = 1024; + pub(crate) const CACHE_SIZE: u64 = 0; + pub(crate) const THREAD_NUM: u16 = 10; + pub(crate) const CACHE_POLICY: &str = "auto"; + pub(crate) const WB_CACHE: bool = true; + pub(crate) const NO_OPEN: bool = true; + pub(crate) const NO_READDIR: bool = false; + pub(crate) const KILLPRIV_V2: bool = false; + pub(crate) const XATTR: bool = false; + pub(crate) const DROP_SYS_RSC: bool = false; + pub(crate) const FS_EVENTS_COUNT: u32 = 4; + + pub struct DummyVirtioRegionHandler {} + + impl VirtioRegionHandler for DummyVirtioRegionHandler { + fn insert_region( + &mut self, + _region: Arc, + ) -> std::result::Result<(), VirtIoError> { + Ok(()) + } + } + + pub fn new_dummy_handler_helper() -> Box { + Box::new(DummyVirtioRegionHandler {}) + } + + #[cfg(feature = "test-resources")] + fn create_fs_device_default() -> VirtioFs> { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + + fs + } + + pub(crate) fn create_fs_epoll_handler( + id: String, + ) -> VirtioFsEpollHandler, QueueSync, GuestRegionMmap> { + let vfs = Arc::new(Vfs::new(VfsOptions::default())); + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap()); + let queues = vec![ + VirtioQueueConfig::create(256, 0).unwrap(), + VirtioQueueConfig::create(256, 0).unwrap(), + ]; + let rate_limiter = RateLimiter::default(); + + // Call for kvm too frequently would cause error in some host kernel. + std::thread::sleep(std::time::Duration::from_millis(5)); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::new( + mem, + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + VirtioFsEpollHandler::new( + config, + vfs, + None, + 2, + id, + rate_limiter, + EventFd::new(0).unwrap(), + None, + ) + } + + #[test] + fn test_virtio_fs_device_create_error() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + + // invalid cache policy + let res: Result>> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + "dummy_policy", + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager.clone(), + Some(rate_limiter), + ); + assert!(res.is_err()); + + // drop_sys_resource with write_back_cache + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let res: Result>> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + true, + NO_OPEN, + KILLPRIV_V2, + XATTR, + true, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ); + assert!(res.is_err()); + } + + #[test] + fn test_virtio_fs_device_normal() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + + assert!(!fs.is_dax_on()); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type(&fs), + TYPE_VIRTIO_FS + ); + let queue_size = vec![QUEUE_SIZE; NUM_QUEUE_OFFSET + NUM_QUEUES]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::queue_max_sizes( + &fs + ), + &queue_size[..] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&fs, 0), + fs.device_info.get_avail_features(0) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&fs, 1), + fs.device_info.get_avail_features(1) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&fs, 2), + fs.device_info.get_avail_features(2) + ); + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_acked_features( + &mut fs, 2, 0, + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&fs, 2), + 0); + let mut config: [u8; 1] = [0]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut fs, + 0, + &mut config, + ) + .unwrap(); + let config: [u8; 16] = [0; 16]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut fs, 0, &config, + ) + .unwrap(); + } + + #[test] + fn test_virtio_fs_device_active() { + let epoll_manager = EpollManager::default(); + { + // config queue size is not 2 + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager.clone(), + Some(rate_limiter), + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues: Vec> = Vec::new(); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + assert!(matches!( + fs.activate(config), + Err(ActivateError::InvalidParam) + )); + } + + { + // Ok + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::::create(1024, 0).unwrap(), + VirtioQueueConfig::::create(2, 0).unwrap(), + ]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + let result = fs.activate(config); + assert!(result.is_ok()); + } + } + + // this test case need specific resources and is recommended to run + // via dbuvm docker image + #[test] + #[cfg(feature = "test-resources")] + fn test_fs_manipulate_backend_fs() { + let source = "/test_resources/nydus-rs/bootstrap/image_v2.boot"; + let source_path = PathBuf::from(source); + let bootstrapfile = source_path.to_str().unwrap().to_string(); + if !source_path.exists() { + panic!("Test resource file not found: {}", bootstrapfile); + } + // mount + { + // invalid fs type + { + let mut fs = create_fs_device_default(); + let res = fs.manipulate_backend_fs( + None, + Some(String::from("dummyFs")), + "/mountpoint", + None, + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + } + // passthroughFs + { + let mut fs = create_fs_device_default(); + + // no mount source + let res = fs.manipulate_backend_fs( + None, + Some(String::from("PassthroughFs")), + "/mountpoint", + None, + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // invalid mount source + let res = fs.manipulate_backend_fs( + Some(String::from("dummy_source_path")), + Some(String::from("PassthroughFs")), + "/mountpoint", + None, + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // success + let mount_dir = temp_dir(); + let mount_path = mount_dir.into_os_string().into_string().unwrap(); + fs.manipulate_backend_fs( + Some(mount_path), + Some(String::from("PassthroughFs")), + "/mountpoint", + None, + "mount", + None, + None, + ) + .unwrap(); + } + // Rafs + { + let mut fs = create_fs_device_default(); + + // no mount source + let res = fs.manipulate_backend_fs( + None, + Some(String::from("Rafs")), + "/mountpoint", + None, + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // invalid mount source + let res = fs.manipulate_backend_fs( + Some(String::from("dummy_source_path")), + Some(String::from("Rafs")), + "/mountpoint", + None, + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // invalid rafs cfg format + let dummy_rafs_cfg = r#" + { + "device": { + "backend": { + "type": "oss", + "config": { + "endpoint": "test" + } + } + } + }"#; + let res = fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(dummy_rafs_cfg)), + "mount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // success + let rafs_cfg = r#" + { + "device": { + "backend": { + "type": "oss", + "config": { + "endpoint": "test", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/", + "scheme": "http" + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 10, + "merging_size": 131072, + "bandwidth_rate": 10485760 + } + }"#; + fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(rafs_cfg)), + "mount", + None, + None, + ) + .unwrap(); + } + } + // umount + { + let mut fs = create_fs_device_default(); + + // invalid mountpoint + let res = fs.manipulate_backend_fs( + None, + None, + "/dummy_mountpoint", + None, + "umount", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // success + let mut fs = create_fs_device_default(); + let dummy_dir = temp_dir(); + let dummy_path = dummy_dir.into_os_string().into_string().unwrap(); + fs.manipulate_backend_fs( + Some(dummy_path), + Some(String::from("PassthroughFs")), + "/mountpoint", + None, + "mount", + None, + None, + ) + .unwrap(); + fs.manipulate_backend_fs(None, None, "/mountpoint", None, "umount", None, None) + .unwrap(); + } + + // update + { + let mut fs = create_fs_device_default(); + let rafs_cfg = r#" + { + "device": { + "backend": { + "type": "oss", + "config": { + "endpoint": "test", + "access_key_id": "test", + "access_key_secret": "test", + "bucket_name": "antsys-nydus", + "object_prefix":"nydus_v2/", + "scheme": "http" + } + } + }, + "mode": "direct", + "digest_validate": false, + "enable_xattr": true, + "fs_prefetch": { + "enable": true, + "threads_count": 10, + "merging_size": 131072, + "bandwidth_rate": 10485760 + } + }"#; + // no config + let res = fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + None, + "update", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // no source configured + let res = fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(rafs_cfg)), + "update", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // invalid mountpoint + fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(rafs_cfg)), + "mount", + None, + None, + ) + .unwrap(); + + let res = fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/dummy_mountpoint", + Some(String::from(rafs_cfg)), + "update", + None, + None, + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + + // success + fs.manipulate_backend_fs( + Some(bootstrapfile.clone()), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(rafs_cfg)), + "mount", + None, + None, + ) + .unwrap(); + + let res = fs.manipulate_backend_fs( + Some(bootstrapfile), + Some(String::from("Rafs")), + "/mountpoint", + Some(String::from(rafs_cfg)), + "update", + None, + None, + ); + assert!(res.is_ok()); + } + + // invalid operation + { + let mut fs = create_fs_device_default(); + let res = fs.manipulate_backend_fs( + None, + None, + "/mountpoint", + None, + "dummy_ops", + None, + Some(1024 * 1024 * 1024), + ); + assert!(matches!(res, Err(FsError::BackendFs(_)))); + } + } + + #[test] + fn test_parse_prefetch_files() { + // Non-empty prefetch list + let tmp_file = TempFile::new().unwrap(); + writeln!(tmp_file.as_file(), "/hello.txt").unwrap(); + writeln!(tmp_file.as_file()).unwrap(); + writeln!(tmp_file.as_file(), " ").unwrap(); + writeln!(tmp_file.as_file(), "\t").unwrap(); + writeln!(tmp_file.as_file(), "/").unwrap(); + writeln!(tmp_file.as_file(), "\n").unwrap(); + writeln!(tmp_file.as_file(), "test").unwrap(); + + let files = parse_prefetch_files(Some(tmp_file.as_path().to_str().unwrap().to_string())); + assert_eq!( + files, + Some(vec![PathBuf::from("/hello.txt"), PathBuf::from("/")]) + ); + + // Empty prefetch list + let tmp_file = TempFile::new().unwrap(); + let files = parse_prefetch_files(Some(tmp_file.as_path().to_str().unwrap().to_string())); + assert_eq!(files, None); + + // None prefetch list + let files = parse_prefetch_files(None); + assert_eq!(files, None); + + // Not exist prefetch list + let files = parse_prefetch_files(Some("no_such_file".to_string())); + assert_eq!(files, None); + } + + #[test] + #[allow(clippy::unusual_byte_groupings)] + fn test_kb_to_bytes() { + let kb = 0x1000; + assert_eq!(kb_to_bytes(kb).unwrap(), 0x400_000); + + let kb = 0x100_0000; + assert_eq!(kb_to_bytes(kb).unwrap(), 0x400_00_0000); + + let kb = 0x20_0000_0000_0000; + assert_eq!(kb_to_bytes(kb).unwrap(), 0x8000_0000_0000_0000); + + let kb = 0x100_0000_0000_0000; + assert!(kb_to_bytes(kb).is_err()); + + let kb = 0x1000_0000_0000_0000; + assert!(kb_to_bytes(kb).is_err()); + + let kb = 0x1100_0000_0000_0000; + assert!(kb_to_bytes(kb).is_err()); + } + + #[test] + fn test_get_timeout() { + fn create_fs_device_with_cache_policy(policy: &str) -> VirtioFs> { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + policy, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + fs + } + let fs = create_fs_device_with_cache_policy("auto"); + assert_eq!(fs.get_timeout(), Duration::from_secs(CACHE_AUTO_TIMEOUT)); + let fs = create_fs_device_with_cache_policy("always"); + assert_eq!(fs.get_timeout(), Duration::from_secs(CACHE_ALWAYS_TIMEOUT)); + let fs = create_fs_device_with_cache_policy("never"); + assert_eq!(fs.get_timeout(), Duration::from_secs(CACHE_NONE_TIMEOUT)); + } + + #[test] + fn test_register_mmap_region() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let mut resources = DeviceResources::new(); + let entry = dbs_device::resources::Resource::MmioAddressRange { + base: 0x1000, + size: 0x1000, + }; + resources.append(entry); + let entry = dbs_device::resources::Resource::KvmMemSlot(0); + resources.append(entry); + + let mmio_res = resources.get_mmio_address_ranges(); + let slot_res = resources.get_kvm_mem_slots(); + let start = mmio_res[0].0; + let len = mmio_res[0].1; + let res = fs.register_mmap_region(vm_fd, start, len, &slot_res); + assert!(res.is_ok()); + assert_eq!(res.unwrap().start_addr(), GuestAddress(0x1000)); + } + + #[test] + fn test_get_resource_requirements() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let dax_on = 0x4000; + let fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + dax_on, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + let mut requirements = vec![ + ResourceConstraint::new_mmio(0x1), + ResourceConstraint::new_mmio(0x2), + ]; + VirtioDevice::, QueueSync, GuestRegionMmap>::get_resource_requirements( + &fs, + &mut requirements, + true, + ); + + assert_eq!(requirements[2], ResourceConstraint::LegacyIrq { irq: None }); + assert_eq!(requirements[3], ResourceConstraint::GenericIrq { size: 3 }); + assert_eq!( + requirements[5], + ResourceConstraint::KvmMemSlot { + slot: None, + size: 1 + } + ); + } + + #[test] + fn test_set_resource() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let mut resources = DeviceResources::new(); + let entry = dbs_device::resources::Resource::MmioAddressRange { + base: 0x1000, + size: 0x1000, + }; + resources.append(entry); + let entry = dbs_device::resources::Resource::KvmMemSlot(0); + resources.append(entry); + + let res = VirtioDevice::, QueueSync, GuestRegionMmap>::set_resource( + &mut fs, vm_fd, resources, + ); + assert!(res.is_ok()); + let content = res.unwrap().unwrap(); + assert_eq!(content.kvm_userspace_memory_region_slot, 0); + assert_eq!(content.region_list[0].offset, 0); + assert_eq!(content.region_list[0].len, 0x1000); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/fs/handler.rs b/src/dragonball/src/dbs_virtio_devices/src/fs/handler.rs new file mode 100644 index 000000000000..b976c89a138c --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/fs/handler.rs @@ -0,0 +1,781 @@ +// Copyright 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::io::Error as IOError; +use std::ops::Deref; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::{mpsc, Arc, Mutex}; + +use dbs_utils::epoll_manager::{EventOps, EventSet, Events, MutEventSubscriber}; +use dbs_utils::rate_limiter::{BucketUpdate, RateLimiter, TokenType}; +use fuse_backend_rs::abi::virtio_fs::RemovemappingOne; +use fuse_backend_rs::api::server::Server; +use fuse_backend_rs::api::Vfs; +use fuse_backend_rs::transport::{FsCacheReqHandler, Reader, VirtioFsWriter, Writer}; +use log::{debug, error, info, trace}; +use threadpool::ThreadPool; +use virtio_queue::{QueueOwnedT, QueueT}; +use vm_memory::{GuestAddressSpace, GuestMemoryRegion}; +use vmm_sys_util::eventfd::EventFd; + +use crate::{Error, Result, VirtioDeviceConfig}; + +use super::{Error as FsError, VIRTIO_FS_NAME}; + +// New descriptors are pending on the virtio queue. +const QUEUE_AVAIL_EVENT: u32 = 0; + +// two rate limiter events +const RATE_LIMITER_EVENT_COUNT: u32 = 2; + +/// CacheHandler handles DAX window mmap/unmap operations +#[derive(Clone)] +pub struct CacheHandler { + /// the size of memory region allocated for virtiofs + pub(crate) cache_size: u64, + + /// the address of mmap region corresponding to the memory region + pub(crate) mmap_cache_addr: u64, + + /// the device ID + pub(crate) id: String, +} + +impl CacheHandler { + /// Make sure request is within cache range + fn is_req_valid(&self, offset: u64, len: u64) -> bool { + // TODO: do we need to validate alignment here? + match offset.checked_add(len) { + Some(n) => n <= self.cache_size, + None => false, + } + } +} + +impl FsCacheReqHandler for CacheHandler { + // Do not close fd in here. The fd is automatically closed in the setupmapping + // of passthrough_fs when destructing. + fn map( + &mut self, + foffset: u64, + moffset: u64, + len: u64, + flags: u64, + fd: RawFd, + ) -> std::result::Result<(), IOError> { + let addr = self.mmap_cache_addr + moffset; + trace!( + target: VIRTIO_FS_NAME, + "{}: CacheHandler::map(): fd={}, foffset=0x{:x}, moffset=0x{:x}(host addr: 0x{:x}), len=0x{:x}, flags=0x{:x}", + self.id, + fd, + foffset, + moffset, + addr, + len, + flags + ); + + if !self.is_req_valid(moffset, len) { + error!( + "{}: CacheHandler::map(): Wrong offset or length, offset=0x{:x} len=0x{:x} cache_size=0x{:x}", + self.id, moffset, len, self.cache_size + ); + return Err(IOError::from_raw_os_error(libc::EINVAL)); + } + + // TODO: + // In terms of security, DAX does not easily handle all kinds of write + // scenarios, especially append write. Therefore, to prevent guest users + // from using the DAX to write files maliciously, we do not support guest + // write permission configuration. If DAX needs to support write, we can + // add write permissions by Control path. + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + libc::PROT_READ, + libc::MAP_SHARED | libc::MAP_FIXED, + fd, + foffset as libc::off_t, + ) + }; + if ret == libc::MAP_FAILED { + let e = IOError::last_os_error(); + error!("{}: CacheHandler::map() failed: {}", VIRTIO_FS_NAME, e); + return Err(e); + } + + Ok(()) + } + + fn unmap(&mut self, requests: Vec) -> std::result::Result<(), IOError> { + trace!(target: VIRTIO_FS_NAME, "{}: CacheHandler::unmap()", self.id,); + + for req in requests { + let mut offset = req.moffset; + let mut len = req.len; + + // Ignore if the length is 0. + if len == 0 { + continue; + } + + debug!( + "{}: do unmap(): offset=0x{:x} len=0x{:x} cache_size=0x{:x}", + self.id, offset, len, self.cache_size + ); + + // Need to handle a special case where the slave ask for the unmapping + // of the entire mapping. + if len == 0xffff_ffff_ffff_ffff { + len = self.cache_size; + offset = 0; + } + + if !self.is_req_valid(offset, len) { + error!( + "{}: CacheHandler::unmap(): Wrong offset or length, offset=0x{:x} len=0x{:x} cache_size=0x{:x}", + self.id, offset, len, self.cache_size + ); + return Err(IOError::from_raw_os_error(libc::EINVAL)); + } + + let addr = self.mmap_cache_addr + offset; + // Use mmap + PROT_NONE can reserve host userspace address while unmap memory. + // In this way, guest will not be able to access the memory, and dragonball + // also can reserve the HVA. + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, + -1, + 0_i64, + ) + }; + if ret == libc::MAP_FAILED { + let e = IOError::last_os_error(); + error!("{}: CacheHandler::unmap() failed, {}", self.id, e); + return Err(e); + } + } + + Ok(()) + } +} + +pub(crate) struct VirtioFsEpollHandler< + AS: 'static + GuestAddressSpace, + Q: QueueT, + R: GuestMemoryRegion, +> { + pub(crate) config: Arc>>, + server: Arc>>, + cache_handler: Option, + thread_pool: Option, + id: String, + rate_limiter: RateLimiter, + patch_rate_limiter_fd: EventFd, + receiver: Option>, +} + +impl VirtioFsEpollHandler +where + AS: GuestAddressSpace + Clone + Send, + AS::T: Send, + AS::M: Sync + Send, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Send + Sync + 'static, +{ + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + config: VirtioDeviceConfig, + fs: Arc, + cache_handler: Option, + thread_pool_size: u16, + id: String, + rate_limiter: RateLimiter, + patch_rate_limiter_fd: EventFd, + receiver: Option>, + ) -> Self { + let thread_pool = if thread_pool_size > 0 { + Some(ThreadPool::with_name( + "virtiofs-thread".to_string(), + thread_pool_size as usize, + )) + } else { + None + }; + Self { + config: Arc::new(Mutex::new(config)), + server: Arc::new(Server::new(fs)), + cache_handler, + thread_pool, + id, + rate_limiter, + patch_rate_limiter_fd, + receiver, + } + } + + fn process_queue(&mut self, queue_index: usize) -> Result<()> { + let mut config_guard = self.config.lock().unwrap(); + let mem = config_guard.lock_guest_memory(); + let vm_as = config_guard.vm_as.clone(); + let queue = &mut config_guard.queues[queue_index]; + let (tx, rx) = mpsc::channel::<(u16, u32)>(); + let mut used_count = 0; + let mut rate_limited = false; + // TODO: use multiqueue to process new entries. + + let mut queue_guard = queue.queue_mut().lock(); + let mut iter = queue_guard + .iter(mem.clone()) + .map_err(Error::VirtioQueueError)?; + + for desc_chain in &mut iter { + // Prepare a set of objects that can be moved to the worker thread. + if !self.rate_limiter.consume(1, TokenType::Ops) { + rate_limited = true; + break; + } + + let head_index = desc_chain.head_index(); + let server = self.server.clone(); + let vm_as = vm_as.clone(); + let config = self.config.clone(); + let pooled = self.is_multi_thread(); + let tx = tx.clone(); + used_count += 1; + let mut cache_handler = self.cache_handler.clone(); + + let work_func = move || { + let guard = vm_as.memory(); + let mem = guard.deref(); + let reader = Reader::from_descriptor_chain(mem, desc_chain.clone()) + .map_err(FsError::InvalidDescriptorChain) + .unwrap(); + let writer = Writer::VirtioFs( + VirtioFsWriter::new(mem, desc_chain) + .map_err(FsError::InvalidDescriptorChain) + .unwrap(), + ); + let total = server + .handle_message( + reader, + writer, + cache_handler + .as_mut() + .map(|x| x as &mut dyn FsCacheReqHandler), + None, + ) + .map_err(FsError::ProcessQueue) + .unwrap(); + + if pooled { + let queue = &mut config.lock().unwrap().queues[queue_index]; + queue.add_used(mem, head_index, total as u32); + if let Err(e) = queue.notify() { + error!("failed to signal used queue: {:?}", e); + } + } else { + tx.send((head_index, total as u32)) + .expect("virtiofs: failed to send fuse result"); + } + }; + + if let Some(pool) = &self.thread_pool { + trace!("{}: poping new fuse req to thread pool.", VIRTIO_FS_NAME,); + pool.execute(work_func); + } else { + work_func(); + } + } + if rate_limited { + iter.go_to_previous_position(); + } + + let notify = !self.is_multi_thread() && used_count > 0; + // unlock QueueT + drop(queue_guard); + while !self.is_multi_thread() && used_count > 0 { + used_count -= 1; + let (idx, ret) = rx + .recv() + .expect("virtiofs: failed to recv result from thread pool"); + queue.add_used(mem.deref(), idx, ret); + } + + if notify { + if let Err(e) = queue.notify() { + error!("failed to signal used queue: {:?}", e); + } + } + + Ok(()) + } + + pub fn get_patch_rate_limiters(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { + info!("{}: Update rate limiter for fs device", VIRTIO_FS_NAME); + match &bytes { + BucketUpdate::Update(tb) => { + info!( + "{}: update bandwidth, \"size\": {}, \"one_time_burst\": {}, \"refill_time\": {}", + VIRTIO_FS_NAME, + tb.capacity(), + tb.one_time_burst(), + tb.refill_time_ms() + ); + } + BucketUpdate::None => { + info!("{}: no update for bandwidth", VIRTIO_FS_NAME); + } + _ => { + info!("{}: bandwidth limiting is disabled", VIRTIO_FS_NAME); + } + } + match &ops { + BucketUpdate::Update(tb) => { + info!( + "{}: update ops, \"size\": {}, \"one_time_burst\": {}, \"refill_time\": {}", + VIRTIO_FS_NAME, + tb.capacity(), + tb.one_time_burst(), + tb.refill_time_ms() + ); + } + BucketUpdate::None => { + info!("{}: no update for ops", VIRTIO_FS_NAME); + } + _ => { + info!("{}: ops limiting is disabled", VIRTIO_FS_NAME); + } + } + self.rate_limiter.update_buckets(bytes, ops); + } + + // True if thread pool is enabled. + fn is_multi_thread(&self) -> bool { + self.thread_pool.is_some() + } +} + +impl MutEventSubscriber for VirtioFsEpollHandler +where + AS: GuestAddressSpace + Send + Sync + 'static + Clone, + AS::T: Send, + AS::M: Sync + Send, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Send + Sync + 'static, +{ + fn process(&mut self, events: Events, _ops: &mut EventOps) { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioFsHandler::process({})", + self.id, + events.data() + ); + + let slot = events.data(); + let config = &self.config.clone(); + let guard = config.lock().unwrap(); + let queues = &guard.queues; + + let queues_len = queues.len() as u32; + // Rate limiter budget is now available. + let rate_limiter_event = QUEUE_AVAIL_EVENT + queues_len; + // patch request of rate limiter has arrived + let patch_rate_limiter_event = rate_limiter_event + 1; + + match slot { + s if s >= RATE_LIMITER_EVENT_COUNT + QUEUE_AVAIL_EVENT + queues_len => { + error!("{}: unknown epoll event slot {}", VIRTIO_FS_NAME, slot); + } + + s if s == rate_limiter_event => match self.rate_limiter.event_handler() { + Ok(()) => { + drop(guard); + for idx in QUEUE_AVAIL_EVENT as usize..(QUEUE_AVAIL_EVENT + queues_len) as usize + { + if let Err(e) = self.process_queue(idx) { + error!("{}: error in queue {}, {:?}", VIRTIO_FS_NAME, idx, e); + } + } + } + Err(e) => { + error!( + "{}: the rate limiter is disabled or is not blocked, {:?}", + VIRTIO_FS_NAME, e + ); + } + }, + + s if s == patch_rate_limiter_event => { + if let Err(e) = self.patch_rate_limiter_fd.read() { + error!("{}: failed to get patch event, {:?}", VIRTIO_FS_NAME, e); + } + if let Some(receiver) = &self.receiver { + if let Ok((bytes, ops)) = receiver.try_recv() { + self.get_patch_rate_limiters(bytes, ops); + } + } + } + + // QUEUE_AVAIL_EVENT + _ => { + let idx = (slot - QUEUE_AVAIL_EVENT) as usize; + if let Err(e) = queues[idx].consume_event() { + error!("{}: failed to read queue event, {:?}", VIRTIO_FS_NAME, e); + return; + } + drop(guard); + + if let Err(e) = self.process_queue(idx) { + error!( + "{}: process_queue failed due to error {:?}", + VIRTIO_FS_NAME, e + ); + } + } + } + } + + fn init(&mut self, ops: &mut EventOps) { + trace!( + target: VIRTIO_FS_NAME, + "{}: VirtioFsHandler::init()", + self.id + ); + + let queues = &self.config.lock().unwrap().queues; + + for (idx, queue) in queues.iter().enumerate() { + let events = Events::with_data( + queue.eventfd.as_ref(), + QUEUE_AVAIL_EVENT + idx as u32, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register epoll event for event queue {}, {:?}", + VIRTIO_FS_NAME, idx, e + ); + } + } + + let rate_limiter_fd = self.rate_limiter.as_raw_fd(); + if rate_limiter_fd != -1 { + if let Err(e) = ops.add(Events::with_data_raw( + rate_limiter_fd, + QUEUE_AVAIL_EVENT + queues.len() as u32, + EventSet::IN, + )) { + error!( + "{}: failed to register rate limiter event, {:?}", + VIRTIO_FS_NAME, e + ); + } + } + + if let Err(e) = ops.add(Events::with_data( + &self.patch_rate_limiter_fd, + 1 + QUEUE_AVAIL_EVENT + queues.len() as u32, + EventSet::IN, + )) { + error!( + "{}: failed to register rate limiter patch event {:?}", + VIRTIO_FS_NAME, e + ); + } + } +} + +#[cfg(test)] +pub mod tests { + use std::io::Seek; + use std::io::Write; + use std::sync::Arc; + + use dbs_interrupt::NoopNotifier; + use dbs_utils::epoll_manager::EpollManager; + use dbs_utils::epoll_manager::SubscriberOps; + use dbs_utils::rate_limiter::TokenBucket; + use vm_memory::{GuestAddress, GuestMemoryMmap}; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::fs::device::tests::*; + use crate::fs::*; + use crate::tests::{VirtQueue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::VirtioQueueConfig; + + #[test] + fn test_is_req_valid() { + let handler = CacheHandler { + cache_size: 0x1000, + mmap_cache_addr: 0x1000, + id: "test".to_string(), + }; + + // Normal case. + assert!(handler.is_req_valid(0x0, 0x500)); + + // Invalid case. + assert!(!handler.is_req_valid(0x500, 0x1000)); + } + + #[test] + fn test_map() { + let mmap_addr = 0x10000; + let moffset = 0x5000; + let mut handler = CacheHandler { + cache_size: 0x10000, + mmap_cache_addr: mmap_addr, + id: "test".to_string(), + }; + + // Normal case. + let original_content = b"hello world"; + let mut file = TempFile::new().unwrap().into_file(); + file.set_len(0x1000).unwrap(); + file.write_all(original_content).unwrap(); + file.rewind().unwrap(); + let fd = file.as_raw_fd(); + handler.map(0x0, moffset, 0x5000, 0, fd).unwrap(); + let mapped_addr = (mmap_addr + moffset) as *const [u8; 11]; + unsafe { + let content = mapped_addr.read(); + assert_eq!(&content, original_content); + } + + // Invalid argument case. + assert!(matches!( + handler + .map(0x0, 0x5000, 0xc000, 0, fd) + .err() + .unwrap() + .kind(), + std::io::ErrorKind::InvalidInput + )); + + // Bad file descriptor case. + let fd = TempFile::new().unwrap().as_file().as_raw_fd(); + assert!(format!( + "{:?}", + handler.map(0x0, 0x5000, 0x5000, 0, fd).err().unwrap() + ) + .contains("Bad file descriptor")); + } + + #[test] + fn test_unmap() { + let mmap_addr = 0x10000; + let moffset = 0x5000; + let mut handler = CacheHandler { + cache_size: 0x10000, + mmap_cache_addr: mmap_addr, + id: "test".to_string(), + }; + + // Normal case after map. + let original_content = b"hello world"; + let mut file = TempFile::new().unwrap().into_file(); + file.set_len(0x1000).unwrap(); + file.write_all(original_content).unwrap(); + file.rewind().unwrap(); + let fd = file.as_raw_fd(); + handler.map(0x0, moffset, 0x5000, 0, fd).unwrap(); + let mapped_addr = (mmap_addr + moffset) as *const [u8; 11]; + unsafe { + let content = mapped_addr.read(); + assert_eq!(&content, original_content); + } + let requests = vec![ + RemovemappingOne { + moffset: 0x5000, + len: 0x1000, + }, + RemovemappingOne { + moffset: 0x6000, + len: 0x2500, + }, + ]; + assert!(handler.unmap(requests).is_ok()); + + // Normal case. + let mut handler = CacheHandler { + cache_size: 0x10000, + mmap_cache_addr: mmap_addr, + id: "test".to_string(), + }; + let requests = vec![ + RemovemappingOne { + moffset: 0x5000, + len: 0x1000, + }, + RemovemappingOne { + moffset: 0x6000, + len: 0x2500, + }, + ]; + assert!(handler.unmap(requests).is_ok()); + + // Invalid argument case. + let requests = vec![RemovemappingOne { + moffset: 0x5000, + len: 0x10000, + }]; + assert!(matches!( + handler.unmap(requests).err().unwrap().kind(), + std::io::ErrorKind::InvalidInput + )); + } + + #[test] + fn test_fs_get_patch_rate_limiters() { + let mut handler = create_fs_epoll_handler(String::from("1")); + let tokenbucket = TokenBucket::new(1, 1, 4); + + handler.get_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::Update(tokenbucket.clone()), + ); + assert_eq!(handler.rate_limiter.ops().unwrap(), &tokenbucket); + + handler.get_patch_rate_limiters( + BucketUpdate::Update(tokenbucket.clone()), + BucketUpdate::None, + ); + assert_eq!(handler.rate_limiter.bandwidth().unwrap(), &tokenbucket); + + handler.get_patch_rate_limiters(BucketUpdate::None, BucketUpdate::None); + assert_eq!(handler.rate_limiter.ops().unwrap(), &tokenbucket); + + handler.get_patch_rate_limiters(BucketUpdate::None, BucketUpdate::Disabled); + assert_eq!(handler.rate_limiter.ops(), None); + + handler.get_patch_rate_limiters(BucketUpdate::Disabled, BucketUpdate::None); + assert_eq!(handler.rate_limiter.bandwidth(), None); + } + + #[test] + fn test_fs_set_patch_rate_limiters() { + let epoll_manager = EpollManager::default(); + let rate_limiter = RateLimiter::new(100, 0, 300, 10, 0, 300).unwrap(); + let mut fs: VirtioFs> = VirtioFs::new( + TAG, + NUM_QUEUES, + QUEUE_SIZE, + CACHE_SIZE, + CACHE_POLICY, + THREAD_NUM, + WB_CACHE, + NO_OPEN, + KILLPRIV_V2, + XATTR, + DROP_SYS_RSC, + NO_READDIR, + new_dummy_handler_helper(), + epoll_manager, + Some(rate_limiter), + ) + .unwrap(); + + // No sender + assert!(fs + .set_patch_rate_limiters(BucketUpdate::None, BucketUpdate::None) + .is_err()); + + // Success + let (sender, receiver) = mpsc::channel(); + fs.sender = Some(sender); + assert!(fs + .set_patch_rate_limiters(BucketUpdate::None, BucketUpdate::None) + .is_ok()); + + // Send error + drop(receiver); + assert!(fs + .set_patch_rate_limiters(BucketUpdate::None, BucketUpdate::None) + .is_err()); + } + + #[test] + fn test_fs_epoll_handler_handle_event() { + let handler = create_fs_epoll_handler("test_1".to_string()); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_fs_epoll_handler("test_2".to_string()); + + // test for QUEUE_AVAIL_EVENT + let events = Events::with_data(&event_fd, QUEUE_AVAIL_EVENT, event_set); + handler.process(events, &mut event_op); + handler.config.lock().unwrap().queues[0] + .generate_event() + .unwrap(); + handler.process(events, &mut event_op); + + // test for RATE_LIMITER_EVENT + let queues_len = handler.config.lock().unwrap().queues.len() as u32; + let events = Events::with_data(&event_fd, QUEUE_AVAIL_EVENT + queues_len, event_set); + handler.process(events, &mut event_op); + + // test for PATCH_RATE_LIMITER_EVENT + if let Err(e) = handler.patch_rate_limiter_fd.write(1) { + error!( + "{} test: failed to write patch_rate_limiter_fd, {:?}", + VIRTIO_FS_NAME, e + ); + } + let events = Events::with_data(&event_fd, 1 + QUEUE_AVAIL_EVENT + queues_len, event_set); + handler.process(events, &mut event_op); + } + + #[test] + fn test_fs_epoll_handler_handle_unknown_event() { + let handler = create_fs_epoll_handler("test_1".to_string()); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_fs_epoll_handler("test_2".to_string()); + + // test for unknown event + let events = Events::with_data(&event_fd, FS_EVENTS_COUNT + 10, event_set); + handler.process(events, &mut event_op); + } + + #[test] + fn test_fs_epoll_handler_process_queue() { + { + let mut handler = create_fs_epoll_handler("test_1".to_string()); + + let m = &handler.config.lock().unwrap().vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + + handler.config.lock().unwrap().queues = vec![VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + )]; + assert!(handler.process_queue(0).is_ok()); + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/fs/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/fs/mod.rs new file mode 100644 index 000000000000..a505bb3068dd --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/fs/mod.rs @@ -0,0 +1,44 @@ +// Copyright 2020 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +mod handler; +pub(crate) use self::handler::*; +mod device; +pub use self::device::*; + +use std::io::Error as IOError; + +use fuse_backend_rs::transport::Error as FuseTransportError; +use fuse_backend_rs::Error as FuseServerError; +use nix::Error as NixError; + +pub const VIRTIO_FS_NAME: &str = "virtio-fs"; + +/// Error for virtio fs device. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Invalid Virtio descriptor chain. + #[error("invalid descriptorchain: {0}")] + InvalidDescriptorChain(FuseTransportError), + /// Processing queue failed. + #[error("process queue failed: {0}")] + ProcessQueue(FuseServerError), + #[error("invalid data.")] + InvalidData, + /// Failed to attach/detach a backend fs. + #[error("attach/detach a backend filesystem failed:: {0}")] + BackendFs(String), + /// Error from IO error. + #[error("io error: {0}")] + IOError(#[from] IOError), + /// Failed to create memfd + #[error("failed to create memfd: {0}")] + MemFdCreate(NixError), + /// Failed to set file size + #[error("failed to set file size: {0}")] + SetFileSize(IOError), +} + +/// Specialized std::result::Result for Virtio fs device operations. +pub type Result = std::result::Result; diff --git a/src/dragonball/src/dbs_virtio_devices/src/lib.rs b/src/dragonball/src/dbs_virtio_devices/src/lib.rs new file mode 100644 index 000000000000..ec5fcdc143e1 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/lib.rs @@ -0,0 +1,498 @@ +// Copyright 2019-2020 Alibaba Cloud. All rights reserved. +// Portions Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Interfaces and implementations of virtio devices. +//! +//! Please refer to [Virtio Specification] +//! (http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-1090002) +//! for more information. + +mod device; +pub use self::device::*; + +mod notifier; +pub use self::notifier::*; + +pub mod epoll_helper; + +#[cfg(feature = "virtio-mmio")] +pub mod mmio; + +#[cfg(feature = "virtio-vsock")] +pub mod vsock; + +#[cfg(feature = "virtio-net")] +pub mod net; + +#[cfg(feature = "virtio-blk")] +pub mod block; + +#[cfg(feature = "virtio-fs")] +pub mod fs; + +#[cfg(feature = "virtio-mem")] +pub mod mem; + +#[cfg(feature = "virtio-balloon")] +pub mod balloon; + +use std::io::Error as IOError; + +use virtio_queue::Error as VqError; +use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemoryError}; + +pub trait DbsGuestAddressSpace: GuestAddressSpace + 'static + Clone + Send + Sync {} + +impl DbsGuestAddressSpace for T where T: GuestAddressSpace + 'static + Clone + Send + Sync {} + +/// Version of virtio specifications supported by PCI virtio devices. +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum VirtioVersion { + /// Unknown/non-virtio VFIO device. + VIRTIO_VERSION_UNKNOWN, + /// Virtio specification 0.95(Legacy). + VIRTIO_VERSION_0_95, + /// Virtio specification 1.0/1.1. + VIRTIO_VERSION_1_X, +} + +/// Page size for legacy PCI virtio devices. Assume it's 4K. +pub const VIRTIO_LEGACY_PAGE_SIZE: u32 = 0x1000; + +/// Initial state after device initialization/reset. +pub const DEVICE_INIT: u32 = 0x0; +/// Indicates that the guest OS has found the device and recognized it as a valid virtio device. +pub const DEVICE_ACKNOWLEDGE: u32 = 0x01; +/// Indicates that the guest OS knows how to drive the device. +pub const DEVICE_DRIVER: u32 = 0x02; +/// Indicates that the driver is set up and ready to drive the device. +pub const DEVICE_DRIVER_OK: u32 = 0x04; +/// Indicates that the driver has acknowledged all the features it understands, and feature +/// negotiation is complete. +pub const DEVICE_FEATURES_OK: u32 = 0x08; +/// Indicates that the device has experienced an error from which it can’t recover. +pub const DEVICE_NEEDS_RESET: u32 = 0x40; +/// Indicates that something went wrong in the guest, and it has given up on the device. +/// This could be an internal error, or the driver didn’t like the device for some reason, or even +/// a fatal error during device operation. +pub const DEVICE_FAILED: u32 = 0x80; + +/// Virtio network card device. +pub const TYPE_NET: u32 = 1; +/// Virtio block device. +pub const TYPE_BLOCK: u32 = 2; +/// Virtio-rng device. +pub const TYPE_RNG: u32 = 4; +/// Virtio balloon device. +pub const TYPE_BALLOON: u32 = 5; +/// Virtio vsock device. +pub const TYPE_VSOCK: u32 = 19; +/// Virtio mem device. +pub const TYPE_MEM: u32 = 24; +/// Virtio-fs virtual device. +pub const TYPE_VIRTIO_FS: u32 = 26; +/// Virtio-pmem device. +pub const TYPE_PMEM: u32 = 27; + +// Interrupt status flags for legacy interrupts. It happens to be the same for both PCI and MMIO +// virtio devices. +/// Data available in used queue. +pub const VIRTIO_INTR_VRING: u32 = 0x01; +/// Device configuration changed. +pub const VIRTIO_INTR_CONFIG: u32 = 0x02; + +/// Error code for VirtioDevice::activate(). +#[derive(Debug, thiserror::Error)] +pub enum ActivateError { + #[error("Invalid param.")] + InvalidParam, + #[error("Internal error.")] + InternalError, + #[error("Invalid queue config.")] + InvalidQueueConfig, + #[error("IO: {0}.")] + IOError(#[from] IOError), +} + +/// Error code for VirtioDevice::read_config()/write_config(). +#[derive(Debug, thiserror::Error, Eq, PartialEq)] +pub enum ConfigError { + #[error("Invalid offset: {0}.")] + InvalidOffset(u64), + #[error("Offset({0}) plus data length ({0}) overflow.")] + PlusOverflow(u64, u64), + #[error("Invalid offset plus data length: {0}.")] + InvalidOffsetPlusDataLen(u64), +} + +/// Specialized std::result::Result for VirtioDevice::activate(). +pub type ActivateResult = std::result::Result<(), ActivateError>; +/// Specialized std::result::Result for VirtioDevice::read_config()/write_config(). +pub type ConfigResult = std::result::Result<(), ConfigError>; + +/// Error for virtio devices to handle requests from guests. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Guest gave us too few descriptors in a descriptor chain. + #[error("not enough descriptors for request.")] + DescriptorChainTooShort, + /// Guest gave us a descriptor that was too short to use. + #[error("descriptor length too small.")] + DescriptorLengthTooSmall, + /// Guest gave us a descriptor that was too big to use. + #[error("descriptor length too big.")] + DescriptorLengthTooBig, + /// Guest gave us a write only descriptor that protocol says to read from. + #[error("unexpected write only descriptor.")] + UnexpectedWriteOnlyDescriptor, + /// Guest gave us a read only descriptor that protocol says to write to. + #[error("unexpected read only descriptor.")] + UnexpectedReadOnlyDescriptor, + /// Invalid input parameter or status. + #[error("invalid input parameter or status.")] + InvalidInput, + /// The requested operation would cause a seek beyond disk end. + #[error("invalid offset.")] + InvalidOffset, + /// Internal unspecific error + #[error("internal unspecific error.")] + InternalError, + /// Device resource doesn't match what requested + #[error("invalid resource.")] + InvalidResource, + /// Generic IO error + #[error("IO: {0}.")] + IOError(#[from] IOError), + /// Error from virtio_queue + #[error("virtio queue error: {0}")] + VirtioQueueError(#[from] VqError), + /// Error from Device activate. + #[error("Device activate error: {0}")] + ActivateError(#[from] ActivateError), + /// Error from Interrupt. + #[error("Interrupt error: {0}")] + InterruptError(IOError), + /// Guest gave us bad memory addresses. + #[error("failed to access guest memory. {0}")] + GuestMemory(GuestMemoryError), + /// Guest gave us an invalid guest memory address. + #[error("invalid guest memory address. {0:?}")] + InvalidGuestAddress(GuestAddress), + /// Failed creating a new MmapRegion instance. + #[error("new mmap region failed: {0}")] + NewMmapRegion(vm_memory::mmap::MmapRegionError), + /// Failed setting kvm user memory region. + #[error("set user memory region failed: {0}")] + SetUserMemoryRegion(kvm_ioctls::Error), + /// Inserting mmap region failed. + #[error("inserting mmap region failed: {0}")] + InsertMmap(vm_memory::mmap::Error), + /// Failed to set madvise on guest memory region. + #[error("failed to set madvice() on guest memory region")] + Madvise(#[source] nix::Error), + + #[cfg(feature = "virtio-vsock")] + #[error("virtio-vsock error: {0}")] + VirtioVsockError(#[from] self::vsock::VsockError), + + #[cfg(feature = "virtio-net")] + #[error("Virtio-net error: {0}")] + VirtioNetError(#[from] crate::net::NetError), + + #[cfg(feature = "virtio-fs")] + /// Error from Virtio fs. + #[error("virtio-fs error: {0}")] + VirtioFs(fs::Error), + + #[cfg(feature = "virtio-mem")] + #[error("Virtio-mem error: {0}")] + VirtioMemError(#[from] mem::MemError), + + #[cfg(feature = "virtio-balloon")] + #[error("Virtio-balloon error: {0}")] + VirtioBalloonError(#[from] balloon::BalloonError), +} + +/// Specialized std::result::Result for Virtio device operations. +pub type Result = std::result::Result; + +#[allow(unused_macros)] +macro_rules! warn_or_panic { + ($($arg:tt)*) => { + if cfg!(test) { + panic!($($arg)*) + } else { + log::warn!($($arg)*) + } + } +} +#[allow(unused_imports)] +pub(crate) use warn_or_panic; + +#[cfg(test)] +pub mod tests { + use std::marker::PhantomData; + use std::mem; + use std::sync::Arc; + + use dbs_interrupt::KvmIrqManager; + use kvm_ioctls::{Kvm, VmFd}; + use virtio_queue::{QueueSync, QueueT}; + use vm_memory::{ + Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestUsize, VolatileMemory, + VolatileRef, VolatileSlice, + }; + + pub const VIRTQ_DESC_F_NEXT: u16 = 0x1; + pub const VIRTQ_DESC_F_WRITE: u16 = 0x2; + + pub fn create_vm_and_irq_manager() -> (Arc, Arc) { + let kvm = Kvm::new().unwrap(); + let vmfd = Arc::new(kvm.create_vm().unwrap()); + assert!(vmfd.create_irq_chip().is_ok()); + let irq_manager = Arc::new(KvmIrqManager::new(vmfd.clone())); + assert!(irq_manager.initialize().is_ok()); + + (vmfd, irq_manager) + } + + // Represents a virtio descriptor in guest memory. + pub struct VirtqDesc<'a> { + pub desc: VolatileSlice<'a>, + } + + #[repr(C)] + // Used to calculate field offset + pub struct DescriptorTmp { + addr: vm_memory::Le64, + len: vm_memory::Le32, + flags: vm_memory::Le16, + next: vm_memory::Le16, + } + + macro_rules! offset_of { + ($ty:ty, $field:ident) => { + unsafe { + let base = std::mem::MaybeUninit::<$ty>::uninit(); + let base_ptr = base.as_ptr(); + let c = std::ptr::addr_of!((*base_ptr).$field); + (c as usize) - (base_ptr as usize) + } + }; + } + + impl<'a> VirtqDesc<'a> { + fn new(dtable: &'a VolatileSlice<'a>, i: u16) -> Self { + let desc = dtable + .get_slice((i as usize) * Self::dtable_len(1), Self::dtable_len(1)) + .unwrap(); + VirtqDesc { desc } + } + + pub fn addr(&self) -> VolatileRef { + self.desc.get_ref(offset_of!(DescriptorTmp, addr)).unwrap() + } + + pub fn len(&self) -> VolatileRef { + self.desc.get_ref(offset_of!(DescriptorTmp, len)).unwrap() + } + + pub fn flags(&self) -> VolatileRef { + self.desc.get_ref(offset_of!(DescriptorTmp, flags)).unwrap() + } + + pub fn next(&self) -> VolatileRef { + self.desc.get_ref(offset_of!(DescriptorTmp, next)).unwrap() + } + + pub fn set(&self, addr: u64, len: u32, flags: u16, next: u16) { + self.addr().store(addr); + self.len().store(len); + self.flags().store(flags); + self.next().store(next); + } + + fn dtable_len(nelem: u16) -> usize { + 16 * nelem as usize + } + } + + // Represents a virtio queue ring. The only difference between the used and available rings, + // is the ring element type. + pub struct VirtqRing<'a, T> { + pub ring: VolatileSlice<'a>, + pub start: GuestAddress, + pub qsize: u16, + _marker: PhantomData<*const T>, + } + + impl<'a, T> VirtqRing<'a, T> + where + T: vm_memory::ByteValued, + { + fn new( + start: GuestAddress, + mem: &'a GuestMemoryMmap, + qsize: u16, + alignment: GuestUsize, + ) -> Self { + assert_eq!(start.0 & (alignment - 1), 0); + + let (region, addr) = mem.to_region_addr(start).unwrap(); + let size = Self::ring_len(qsize); + let ring = region.get_slice(addr.0 as usize, size).unwrap(); + + let result = VirtqRing { + ring, + start, + qsize, + _marker: PhantomData, + }; + + result.flags().store(0); + result.idx().store(0); + result.event().store(0); + result + } + + pub fn start(&self) -> GuestAddress { + self.start + } + + pub fn end(&self) -> GuestAddress { + self.start.unchecked_add(self.ring.len() as GuestUsize) + } + + pub fn flags(&self) -> VolatileRef { + self.ring.get_ref(0).unwrap() + } + + pub fn idx(&self) -> VolatileRef { + self.ring.get_ref(2).unwrap() + } + + fn ring_offset(i: u16) -> usize { + 4 + mem::size_of::() * (i as usize) + } + + pub fn ring(&self, i: u16) -> VolatileRef { + assert!(i < self.qsize); + self.ring.get_ref(Self::ring_offset(i)).unwrap() + } + + pub fn event(&self) -> VolatileRef { + self.ring.get_ref(Self::ring_offset(self.qsize)).unwrap() + } + + fn ring_len(qsize: u16) -> usize { + Self::ring_offset(qsize) + 2 + } + } + + #[repr(C)] + #[derive(Clone, Copy, Default)] + pub struct VirtqUsedElem { + pub id: u32, + pub len: u32, + } + + unsafe impl vm_memory::ByteValued for VirtqUsedElem {} + + pub type VirtqAvail<'a> = VirtqRing<'a, u16>; + pub type VirtqUsed<'a> = VirtqRing<'a, VirtqUsedElem>; + + trait GuestAddressExt { + fn align_up(&self, x: GuestUsize) -> GuestAddress; + } + impl GuestAddressExt for GuestAddress { + fn align_up(&self, x: GuestUsize) -> GuestAddress { + Self((self.0 + (x - 1)) & !(x - 1)) + } + } + + pub struct VirtQueue<'a> { + pub start: GuestAddress, + pub dtable: VolatileSlice<'a>, + pub avail: VirtqAvail<'a>, + pub used: VirtqUsed<'a>, + } + + impl<'a> VirtQueue<'a> { + // We try to make sure things are aligned properly :-s + pub fn new(start: GuestAddress, mem: &'a GuestMemoryMmap, qsize: u16) -> Self { + // power of 2? + assert!(qsize > 0 && qsize & (qsize - 1) == 0); + + let (region, addr) = mem.to_region_addr(start).unwrap(); + let dtable = region + .get_slice(addr.0 as usize, VirtqDesc::dtable_len(qsize)) + .unwrap(); + + const AVAIL_ALIGN: GuestUsize = 2; + + let avail_addr = start + .unchecked_add(VirtqDesc::dtable_len(qsize) as GuestUsize) + .align_up(AVAIL_ALIGN); + let avail = VirtqAvail::new(avail_addr, mem, qsize, AVAIL_ALIGN); + + const USED_ALIGN: GuestUsize = 4; + + let used_addr = avail.end().align_up(USED_ALIGN); + let used = VirtqUsed::new(used_addr, mem, qsize, USED_ALIGN); + + VirtQueue { + start, + dtable, + avail, + used, + } + } + + fn size(&self) -> u16 { + (self.dtable.len() / VirtqDesc::dtable_len(1)) as u16 + } + + pub fn dtable(&self, i: u16) -> VirtqDesc { + VirtqDesc::new(&self.dtable, i) + } + + fn dtable_start(&self) -> GuestAddress { + self.start + } + + fn avail_start(&self) -> GuestAddress { + self.avail.start() + } + + fn used_start(&self) -> GuestAddress { + self.used.start() + } + + // Creates a new QueueSync, using the underlying memory regions represented by the VirtQueue. + pub fn create_queue(&self) -> QueueSync { + let mut q = QueueSync::new(self.size()).unwrap(); + + q.set_size(self.size()); + q.set_ready(true); + let _ = q.lock().try_set_desc_table_address(self.dtable_start()); + let _ = q.lock().try_set_avail_ring_address(self.avail_start()); + let _ = q.lock().try_set_used_ring_address(self.used_start()); + + q + } + + pub fn start(&self) -> GuestAddress { + self.dtable_start() + } + + pub fn end(&self) -> GuestAddress { + self.used.end() + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/mem.rs b/src/dragonball/src/dbs_virtio_devices/src/mem.rs new file mode 100644 index 000000000000..d71aa0c40d8c --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/mem.rs @@ -0,0 +1,2061 @@ +// Copyright (C) 2020 Alibaba Cloud Computing. All rights reserved. +// Copyright (c) 2020 Ant Financial +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::cmp; +use std::io::{self, Write}; +use std::marker::PhantomData; +use std::mem::size_of; +use std::ops::Deref; +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex}; + +use dbs_device::resources::{DeviceResources, ResourceConstraint}; +use dbs_interrupt::{InterruptNotifier, NoopNotifier}; +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use kvm_ioctls::VmFd; +use log::{debug, error, info, trace, warn}; +use virtio_bindings::bindings::virtio_blk::VIRTIO_F_VERSION_1; +use virtio_queue::{DescriptorChain, QueueOwnedT, QueueSync, QueueT}; +use vm_memory::{ + ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryError, + GuestMemoryRegion, GuestRegionMmap, GuestUsize, MemoryRegionAddress, +}; + +use crate::device::{VirtioDevice, VirtioDeviceConfig, VirtioDeviceInfo}; +use crate::{ + ActivateError, ActivateResult, ConfigResult, DbsGuestAddressSpace, Error, Result, + VirtioSharedMemoryList, TYPE_MEM, +}; + +/// Use 4 MiB alignment because current kernel use it as the subblock_size. +pub const VIRTIO_MEM_DEFAULT_BLOCK_SIZE: u64 = 4 << 20; + +/// The memory block size of guest when initial memory is less than 64GiB. +/// When initial memory is more than 64GiB, the memory block size maybe 1GiB or +/// 2GiB, and the specific algorithm is in +/// `arch/x86/mm/int_64.c:memory_block_size_bytes()`. So if we want to use +/// virtio-mem when initial memory is larger than 64GiB, we should use the +/// algorithm in kernel to get the actual memory block size. +pub const VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT: u64 = 128 * 1024 * 1024; + +const VIRTIO_MEM_MAP_REGION_SHIFT: u64 = 31; +const VIRTIO_MEM_MAP_REGION_SIZE: u64 = 1 << VIRTIO_MEM_MAP_REGION_SHIFT; +const VIRTIO_MEM_MAP_REGION_MASK: u64 = !(std::u64::MAX << VIRTIO_MEM_MAP_REGION_SHIFT); + +/// Max memory block size used in guest kernel. +const MAX_MEMORY_BLOCK_SIZE: u64 = 2 << 30; +/// Amount of boot ram to judge whether to use large memory blocks. +const BOOT_MEM_SIZE_FOR_LARGE_BLOCK: u64 = 64 << 30; + +const MEM_DRIVER_NAME: &str = "virtio-mem"; + +const QUEUE_SIZE: u16 = 128; +const NUM_QUEUES: usize = 1; +const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; + +// Request processed successfully, applicable for +// - VIRTIO_MEM_REQ_PLUG +// - VIRTIO_MEM_REQ_UNPLUG +// - VIRTIO_MEM_REQ_UNPLUG_ALL +// - VIRTIO_MEM_REQ_STATE +const VIRTIO_MEM_RESP_ACK: u16 = 0; + +// Request denied - e.g. trying to plug more than requested, applicable for +// - VIRTIO_MEM_REQ_PLUG +const VIRTIO_MEM_RESP_NACK: u16 = 1; + +// Request cannot be processed right now, try again later, applicable for +// - VIRTIO_MEM_REQ_PLUG +// - VIRTIO_MEM_REQ_UNPLUG +// - VIRTIO_MEM_REQ_UNPLUG_ALL +// VIRTIO_MEM_RESP_BUSY: u16 = 2; + +// Error in request (e.g. addresses/alignment), applicable for +// - VIRTIO_MEM_REQ_PLUG +// - VIRTIO_MEM_REQ_UNPLUG +// - VIRTIO_MEM_REQ_STATE +const VIRTIO_MEM_RESP_ERROR: u16 = 3; + +// State of memory blocks is "plugged" +const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; +// State of memory blocks is "unplugged" +const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; +// State of memory blocks is "mixed" +const VIRTIO_MEM_STATE_MIXED: u16 = 2; + +// request to plug memory blocks +const VIRTIO_MEM_REQ_PLUG: u16 = 0; +// request to unplug memory blocks +const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; +// request to unplug all blocks and shrink the usable size +const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; +// request information about the plugged state of memory blocks +const VIRTIO_MEM_REQ_STATE: u16 = 3; + +// Virtio features +const VIRTIO_MEM_F_ACPI_PXM: u8 = 0; + +type MapRegions = Arc)>>>; + +type MultiRegions = Option<(MapRegions, Arc>)>; + +#[derive(Debug, thiserror::Error)] +pub enum MemError { + /// Guest gave us bad memory addresses. + #[error("failed to access guest memory. {0}")] + GuestMemory(GuestMemoryError), + /// Guest gave us a write only descriptor that protocol says to read from. + #[error("unexpected write only descriptor.")] + UnexpectedWriteOnlyDescriptor, + /// Guest gave us a read only descriptor that protocol says to write to. + #[error("unexpected read only descriptor.")] + UnexpectedReadOnlyDescriptor, + #[error("not enough descriptors for request.")] + /// Guest gave us too few descriptors in a descriptor chain. + DescriptorChainTooShort, + /// Guest gave us a descriptor that was too short to use. + #[error("descriptor length too small.")] + DescriptorLengthTooSmall, + /// Guest sent us invalid request. + #[error("Guest sent us invalid request.")] + InvalidRequest, + /// virtio-mem resize usable region fail + #[error("resize usable region fail: {0}")] + RsizeUsabeRegionFail(String), +} + +/// Specialied std::result::Result for virtio-mem related operations. +pub type MemResult = std::result::Result; + +// Got from qemu/include/standard-headers/linux/virtio_mem.h +// rust union doesn't support std::default::Default that +// need by mem.read_obj. +// Then move virtio_mem_req_plug, virtio_mem_req_unplug and +// virtio_mem_req_state to virtio_mem_req. +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReq { + req_type: u16, + padding: [u16; 3], + addr: u64, + nb_blocks: u16, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioMemReq {} + +// Got from qemu/include/standard-headers/linux/virtio_mem.h +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemRespState { + state: u16, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemResp { + resp_type: u16, + padding: [u16; 3], + state: VirtioMemRespState, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioMemResp {} + +// Got from qemu/include/standard-headers/linux/virtio_mem.h +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default, PartialEq)] +pub(crate) struct VirtioMemConfig { + /// Block size and alignment. Cannot change. + pub(crate) block_size: u64, + /// Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. + pub(crate) node_id: u16, + pub(crate) padding: [u8; 6], + /// Start address of the memory region. Cannot change. + pub(crate) addr: u64, + /// Region size (maximum). Cannot change. + pub(crate) region_size: u64, + /// Currently usable region size. Can grow up to region_size. Can + /// shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config + /// update will be sent). + pub(crate) usable_region_size: u64, + /// Currently used size. Changes due to plug/unplug requests, but no + /// config updates will be sent. + pub(crate) plugged_size: u64, + /// Requested size. New plug requests cannot exceed it. Can change. + pub(crate) requested_size: u64, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl ByteValued for VirtioMemConfig {} + +struct Request { + req: VirtioMemReq, + status_addr: GuestAddress, +} + +impl Request { + fn parse(desc_chain: &mut DescriptorChain<&M>, mem: &M) -> MemResult { + let avail_desc = desc_chain.next().ok_or(MemError::DescriptorChainTooShort)?; + // The head contains the request type which MUST be readable. + if avail_desc.is_write_only() { + return Err(MemError::UnexpectedWriteOnlyDescriptor); + } + if avail_desc.len() as usize != size_of::() { + return Err(MemError::InvalidRequest); + } + let req: VirtioMemReq = mem + .read_obj(avail_desc.addr()) + .map_err(MemError::GuestMemory)?; + + let status_desc = desc_chain.next().ok_or(MemError::DescriptorChainTooShort)?; + + // The status MUST always be writable + if !status_desc.is_write_only() { + return Err(MemError::UnexpectedReadOnlyDescriptor); + } + + if (status_desc.len() as usize) < size_of::() { + return Err(MemError::DescriptorLengthTooSmall); + } + + Ok(Request { + req, + status_addr: status_desc.addr(), + }) + } +} + +struct StateChangeRequest<'a> { + id: &'a str, + config: &'a VirtioMemConfig, + mem_state: &'a mut Vec, + addr: u64, + size: u64, + nb_blocks: u16, + multi_region: bool, + map_regions: MapRegions, + host_fd: Option, + plug: bool, +} + +impl<'a> StateChangeRequest<'a> { + #[allow(clippy::too_many_arguments)] + fn new( + r: &Request, + id: &'a str, + config: &'a VirtioMemConfig, + mem_state: &'a mut Vec, + multi_region: bool, + map_regions: MapRegions, + host_fd: Option, + plug: bool, + ) -> StateChangeRequest<'a> { + let size: u64 = r.req.nb_blocks as u64 * config.block_size; + + StateChangeRequest { + id, + config, + mem_state, + addr: r.req.addr, + size, + nb_blocks: r.req.nb_blocks, + multi_region, + map_regions, + host_fd, + plug, + } + } +} + +/// A hook for the VMM to create memory region for virtio-mem devices. +pub trait MemRegionFactory: Send { + fn create_region( + &mut self, + guest_addr: GuestAddress, + region_len: GuestUsize, + kvm_slot: u32, + ) -> std::result::Result, Error>; + + fn restore_region_addr(&self, guest_addr: GuestAddress) -> std::result::Result<*mut u8, Error>; + + fn get_host_numa_node_id(&self) -> Option; + + fn set_host_numa_node_id(&mut self, host_numa_node_id: Option); +} + +struct MemTool {} + +impl MemTool { + fn virtio_mem_valid_range(config: &VirtioMemConfig, addr: u64, size: u64) -> bool { + // address properly aligned? + if addr % config.block_size != 0 || size % config.block_size != 0 { + return false; + } + + // reasonable size + if addr.checked_add(size).is_none() || size == 0 { + return false; + } + + // start address in usable range? + if addr < config.addr || addr >= config.addr + config.usable_region_size { + return false; + } + + // end address in usable range? + if addr + size > config.addr + config.usable_region_size { + return false; + } + + true + } + + fn virtio_mem_check_bitmap( + bit_index: usize, + nb_blocks: u16, + mem_state: &[bool], + plug: bool, + ) -> bool { + for state in mem_state.iter().skip(bit_index).take(nb_blocks as usize) { + if *state != plug { + return false; + } + } + true + } + + fn virtio_mem_set_bitmap(bit_index: usize, nb_blocks: u16, mem_state: &mut [bool], plug: bool) { + for state in mem_state + .iter_mut() + .skip(bit_index) + .take(nb_blocks as usize) + { + *state = plug; + } + } + + fn virtio_mem_state_change_request(r: &mut StateChangeRequest) -> u16 { + if r.plug && (r.config.plugged_size + r.size > r.config.requested_size) { + return VIRTIO_MEM_RESP_NACK; + } + if !MemTool::virtio_mem_valid_range(r.config, r.addr, r.size) { + return VIRTIO_MEM_RESP_ERROR; + } + + let offset = r.addr - r.config.addr; + let bit_index = (offset / r.config.block_size) as usize; + if !MemTool::virtio_mem_check_bitmap(bit_index, r.nb_blocks, r.mem_state, !r.plug) { + return VIRTIO_MEM_RESP_ERROR; + } + + let host_addr = if r.multi_region { + // Handle map_region + let map_regions = r.map_regions.lock().unwrap(); + let map_region_index = (offset >> VIRTIO_MEM_MAP_REGION_SHIFT) as usize; + if (offset + r.size - 1) >> VIRTIO_MEM_MAP_REGION_SHIFT != map_region_index as u64 { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: try to change more than one map_region", MEM_DRIVER_NAME, r.id, + ); + return VIRTIO_MEM_RESP_ERROR; + } + if map_region_index >= map_regions.len() { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: map_region index {} is not right {:?}", + MEM_DRIVER_NAME, + r.id, + map_region_index, + map_regions, + ); + return VIRTIO_MEM_RESP_ERROR; + } + + let region_host_addr = if let Some(addr_tuple) = map_regions[map_region_index].1 { + addr_tuple.0 + } else { + error!( + "{}: try to access unmap region offset {} size {}", + MEM_DRIVER_NAME, offset, r.size + ); + return VIRTIO_MEM_RESP_ERROR; + }; + (offset & VIRTIO_MEM_MAP_REGION_MASK) + region_host_addr + } else { + let map_regions = r.map_regions.lock().unwrap(); + if let Some(addr_tuple) = map_regions[0].1 { + addr_tuple.0 + offset + } else { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: try to unplug unmap region", MEM_DRIVER_NAME, r.id + ); + return VIRTIO_MEM_RESP_ERROR; + } + }; + + if !r.plug { + if let Some(fd) = r.host_fd { + let res = unsafe { + libc::fallocate64( + fd, + libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, + offset as libc::off64_t, + r.size as libc::off64_t, + ) + }; + if res != 0 { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: fallocate64 get error {}", + MEM_DRIVER_NAME, + r.id, + io::Error::last_os_error() + ); + return VIRTIO_MEM_RESP_ERROR; + } + } + let res = unsafe { + libc::madvise( + host_addr as *mut libc::c_void, + r.size as libc::size_t, + libc::MADV_REMOVE, + ) + }; + if res != 0 { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: madvise get error {}", + MEM_DRIVER_NAME, + r.id, + io::Error::last_os_error() + ); + return VIRTIO_MEM_RESP_ERROR; + } + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: unplug host_addr {} size {}", + MEM_DRIVER_NAME, + r.id, + host_addr, + r.size, + ); + } else { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: plug host_addr {} size {}", + MEM_DRIVER_NAME, + r.id, + host_addr, + r.size, + ); + } + + MemTool::virtio_mem_set_bitmap(bit_index, r.nb_blocks, r.mem_state, r.plug); + + VIRTIO_MEM_RESP_ACK + } + + #[allow(clippy::too_many_arguments)] + fn virtio_mem_unplug_all( + id: &str, + config: &VirtioMemConfig, + mem_state: &mut Vec, + multi_region: bool, + map_regions: MapRegions, + host_fd: Option, + ) -> u16 { + for x in 0..(config.region_size / config.block_size) as usize { + if mem_state[x] { + let mut request = StateChangeRequest { + id, + config, + addr: config.addr + x as u64 * config.block_size, + size: config.block_size, + nb_blocks: 1, + mem_state, + multi_region, + map_regions: map_regions.clone(), + host_fd, + plug: false, + }; + let resp_type = MemTool::virtio_mem_state_change_request(&mut request); + if resp_type != VIRTIO_MEM_RESP_ACK { + return resp_type; + } + mem_state[x] = false; + } + } + + VIRTIO_MEM_RESP_ACK + } + + fn virtio_mem_state_request( + config: &VirtioMemConfig, + addr: u64, + nb_blocks: u16, + mem_state: &mut [bool], + ) -> (u16, u16) { + let size: u64 = nb_blocks as u64 * config.block_size; + let resp_type = if MemTool::virtio_mem_valid_range(config, addr, size) { + VIRTIO_MEM_RESP_ACK + } else { + VIRTIO_MEM_RESP_ERROR + }; + + let offset = addr - config.addr; + let bit_index = (offset / config.block_size) as usize; + let resp_state = if MemTool::virtio_mem_check_bitmap(bit_index, nb_blocks, mem_state, true) + { + VIRTIO_MEM_STATE_PLUGGED + } else if MemTool::virtio_mem_check_bitmap(bit_index, nb_blocks, mem_state, false) { + VIRTIO_MEM_STATE_UNPLUGGED + } else { + VIRTIO_MEM_STATE_MIXED + }; + + (resp_type, resp_state) + } + + /// The idea of virtio_mem_resize_usable_region is get from QEMU virtio_mem_resize_usable_region + /// use alignment to calculate usable extent. + fn virtio_mem_resize_usable_region( + id: &str, + config: &mut VirtioMemConfig, + can_shrink: bool, + alignment: u64, + // map_regions, factory + multi_regions: MultiRegions, + ) -> Result<()> { + let mut newsize = cmp::min(config.region_size, config.requested_size + 2 * alignment); + + /* The usable region size always has to be multiples of the block size. */ + newsize &= !(config.block_size - 1); + + if config.requested_size == 0 { + newsize = 0; + } + + if newsize > config.usable_region_size { + if let Some((map_regions, factory)) = multi_regions { + let mut map_regions = map_regions.lock().unwrap(); + let mut first_index = + (config.usable_region_size >> VIRTIO_MEM_MAP_REGION_SHIFT) as usize; + let mut last_index = (newsize >> VIRTIO_MEM_MAP_REGION_SHIFT) as usize; + if first_index >= map_regions.len() { + first_index = map_regions.len() - 1; + } + if last_index >= map_regions.len() { + last_index = map_regions.len() - 1; + } + // Find the first unmap index + let mut first_unmap_index = None; + for index in first_index..last_index + 1 { + if map_regions[index].1.is_none() { + first_unmap_index = Some(index); + break; + } + } + if let Some(first_index) = first_unmap_index { + let regions_num = (last_index - first_index + 1) as u64; + // Setup a new map region + let mut guest_addr = + config.addr + ((first_index as u64) << VIRTIO_MEM_MAP_REGION_SHIFT); + let region_len = ((regions_num - 1) << VIRTIO_MEM_MAP_REGION_SHIFT) + + if last_index + 1 == map_regions.len() { + config.region_size + - ((last_index as u64) << VIRTIO_MEM_MAP_REGION_SHIFT) + } else { + VIRTIO_MEM_MAP_REGION_SIZE + }; + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: try to get new map_region index {}-{} guest_addr 0x{:x} len 0x{:x} slot {}", + MEM_DRIVER_NAME, + id, + first_index, + last_index, + guest_addr, + region_len, + map_regions[first_index].0, + ); + let region = factory.lock().unwrap().create_region( + GuestAddress(guest_addr), + region_len, + map_regions[first_index].0, + )?; + let mut host_addr = region + .get_host_address(MemoryRegionAddress(0)) + .map_err(|e| MemError::RsizeUsabeRegionFail(format!("{:?}", e)))? + as u64; + info!(target: MEM_DRIVER_NAME, + "{}: {}: new map_region index {}-{} new region guest_addr 0x{:x}-0x{:x} host_addr 0x{:x} len 0x{:x}", + MEM_DRIVER_NAME, id, first_index, last_index, guest_addr, guest_addr + region_len, host_addr, region_len); + for index in first_index..last_index + 1 { + map_regions[index].1 = Some((host_addr, guest_addr)); + host_addr += VIRTIO_MEM_MAP_REGION_SIZE; + guest_addr += VIRTIO_MEM_MAP_REGION_SIZE; + } + } + } + } + if newsize < config.usable_region_size && !can_shrink { + return Ok(()); + } + + let oldsize = config.usable_region_size; + info!( + target: MEM_DRIVER_NAME, + "{}: {}: virtio_mem_resize_usable_region {:?} {:?}", + MEM_DRIVER_NAME, + id, + oldsize, + newsize + ); + config.usable_region_size = newsize; + + Ok(()) + } +} + +pub(crate) struct MemEpollHandler< + AS: GuestAddressSpace, + Q: QueueT + Send = QueueSync, + R: GuestMemoryRegion = GuestRegionMmap, +> { + pub(crate) config: VirtioDeviceConfig, + mem_config: Arc>, + pub(crate) multi_region: bool, + // kvm_slot, Option(host_addr, guest_addr) + pub(crate) map_regions: MapRegions, + host_fd: Option, + pub(crate) mem_state: Vec, + id: String, +} + +impl MemEpollHandler { + fn process_queue(&mut self, queue_index: usize) -> bool { + // Do not expect poisoned lock. + let config = &mut self.mem_config.lock().unwrap(); + let conf = &mut self.config; + let guard = conf.lock_guest_memory(); + let mem = guard.deref(); + let queue = &mut conf.queues[queue_index]; + let mut guard = queue.queue_mut().lock(); + let mut used_desc_heads = Vec::with_capacity(QUEUE_SIZE as usize); + + let mut iter = match guard.iter(mem) { + Err(e) => { + error!( + "{}: {}: failed to process queue. {}", + MEM_DRIVER_NAME, self.id, e + ); + return false; + } + Ok(iter) => iter, + }; + + for mut avail_desc in &mut iter { + let len = match Request::parse(&mut avail_desc, mem) { + Err(e) => { + debug!( + target: MEM_DRIVER_NAME, + "{}: {}: failed parse VirtioMemReq, {:?}", MEM_DRIVER_NAME, self.id, e + ); + 0 + } + Ok(r) => match r.req.req_type { + VIRTIO_MEM_REQ_PLUG => { + let mut request = StateChangeRequest::new( + &r, + &self.id, + config, + &mut self.mem_state, + self.multi_region, + self.map_regions.clone(), + self.host_fd, + true, + ); + let resp_type = MemTool::virtio_mem_state_change_request(&mut request); + let size = request.size; + drop(request); + if resp_type == VIRTIO_MEM_RESP_ACK { + config.plugged_size += size; + let new_plugged_size = config.plugged_size; + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: process_queue VIRTIO_MEM_REQ_PLUG {:?} plugged_size {:?}", + MEM_DRIVER_NAME, + self.id, + size, + new_plugged_size + ); + } + Self::send_response(&self.id, mem, r.status_addr, resp_type, 0) + } + VIRTIO_MEM_REQ_UNPLUG => { + let mut request = StateChangeRequest::new( + &r, + &self.id, + config, + &mut self.mem_state, + self.multi_region, + self.map_regions.clone(), + self.host_fd, + false, + ); + let resp_type = MemTool::virtio_mem_state_change_request(&mut request); + let size = request.size; + drop(request); + if resp_type == VIRTIO_MEM_RESP_ACK { + config.plugged_size -= size; + let new_plugged_size = config.plugged_size; + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: process_queue VIRTIO_MEM_REQ_UNPLUG {:?} plugged_size {:?}", + MEM_DRIVER_NAME, self.id, size, new_plugged_size + ); + } + Self::send_response(&self.id, mem, r.status_addr, resp_type, 0) + } + VIRTIO_MEM_REQ_UNPLUG_ALL => { + let resp_type = MemTool::virtio_mem_unplug_all( + &self.id, + config, + &mut self.mem_state, + self.multi_region, + self.map_regions.clone(), + self.host_fd, + ); + if resp_type == VIRTIO_MEM_RESP_ACK { + config.plugged_size = 0; + /* Does not call MemTool::virtio_mem_resize_usable_region because current doesn't support unmap region. */ + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: process_queue VIRTIO_MEM_REQ_UNPLUG_ALL", + MEM_DRIVER_NAME, + self.id, + ); + } + Self::send_response(&self.id, mem, r.status_addr, resp_type, 0) + } + VIRTIO_MEM_REQ_STATE => { + let (resp_type, resp_state) = MemTool::virtio_mem_state_request( + config, + r.req.addr, + r.req.nb_blocks, + &mut self.mem_state, + ); + Self::send_response(&self.id, mem, r.status_addr, resp_type, resp_state) + } + _ => { + debug!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioMemReq unknown request type {:?}", + MEM_DRIVER_NAME, + self.id, + r.req.req_type + ); + 0 + } + }, + }; + + used_desc_heads.push((avail_desc.head_index(), len)); + } + + drop(guard); + + for &(desc_index, len) in &used_desc_heads { + queue.add_used(mem, desc_index, len); + } + + !used_desc_heads.is_empty() + } + + fn send_response( + id: &str, + mem: &AS::M, + status_addr: GuestAddress, + resp_type: u16, + state: u16, + ) -> u32 { + let mut resp = VirtioMemResp { + resp_type, + ..VirtioMemResp::default() + }; + resp.state.state = state; + match mem.write_obj(resp, status_addr) { + Ok(_) => size_of::() as u32, + Err(e) => { + debug!( + target: MEM_DRIVER_NAME, + "{}: {}: bad guest memory address, {}", MEM_DRIVER_NAME, id, e + ); + 0 + } + } + } +} + +impl MutEventSubscriber + for MemEpollHandler +{ + fn process(&mut self, events: Events, _ops: &mut EventOps) { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: MemEpollHandler::process()", + MEM_DRIVER_NAME, + self.id + ); + + let idx = events.data() as usize; + if idx >= self.config.queues.len() { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: invalid queue index {}", MEM_DRIVER_NAME, self.id, idx + ); + return; + } + + if let Err(e) = self.config.queues[idx].consume_event() { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: failed to get queue event, {:?}", MEM_DRIVER_NAME, self.id, e + ); + } else if self.process_queue(idx) { + if let Err(e) = self.config.queues[idx].notify() { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: failed to signal used queue, {}", MEM_DRIVER_NAME, self.id, e + ); + } + } + } + + fn init(&mut self, ops: &mut EventOps) { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: MemEpollHandler::init()", + MEM_DRIVER_NAME, + self.id + ); + + for (idx, queue) in self.config.queues.iter().enumerate() { + ops.add(Events::with_data( + queue.eventfd.as_ref(), + idx as u32, + EventSet::IN, + )) + .unwrap_or_else(|_| { + panic!( + "{}: {}: failed to register queue event handler", + MEM_DRIVER_NAME, self.id + ) + }); + } + } +} + +fn get_map_regions_num(region_size: u64) -> usize { + ((region_size >> VIRTIO_MEM_MAP_REGION_SHIFT) + + u64::from(region_size & VIRTIO_MEM_MAP_REGION_MASK > 0)) as usize +} + +/// Virtio device for exposing memory hotplug to the guest OS through virtio. +pub struct Mem { + pub(crate) device_info: VirtioDeviceInfo, + config: Arc>, + capacity: u64, + factory: Arc>, + host_fd: Option, + device_change_notifier: Arc, + subscriber_id: Option, + id: String, + phantom: PhantomData, + alignment: u64, + // used for liveupgrade to record the memory state map in epoll handler + mem_state_map: Option>, + multi_region: bool, + // kvm_slot, Option(host_addr, guest_addr) + map_regions: MapRegions, +} + +impl Mem { + /// Create a new virtio-mem device. + #[allow(clippy::too_many_arguments)] + pub fn new( + id: String, + mut capacity: u64, + requested_size_mib: u64, + mut multi_region: bool, + numa_node_id: Option, + epoll_mgr: EpollManager, + factory: Arc>, + boot_mem_byte: u64, + ) -> Result { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: Mem::new()", + MEM_DRIVER_NAME, + id + ); + + let mut avail_features = 1u64 << VIRTIO_F_VERSION_1 as u64; + + // calculate alignment depending on boot memory size + // algorithm is from kernel (arch/x86/mm/init_64.c: probe_memory_block_size()) + let alignment = { + if boot_mem_byte < BOOT_MEM_SIZE_FOR_LARGE_BLOCK { + VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT + } else { + let mut bz = MAX_MEMORY_BLOCK_SIZE; + while bz > VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT { + if boot_mem_byte & (bz - 1) == 0 { + break; + } + bz >>= 1 + } + bz + } + }; + + // Align to 2 * alignment (256MB when boot mem size < 64G). + capacity = capacity * 1024 * 1024; + let usable_extent = 2 * alignment; + capacity = (capacity + usable_extent - 1) & !(usable_extent - 1); + let requested_size = requested_size_mib * 1024 * 1024; + if capacity == 0 + || requested_size > capacity + || requested_size % VIRTIO_MEM_DEFAULT_BLOCK_SIZE != 0 + { + return Err(Error::InvalidInput); + } + + let mut config = VirtioMemConfig::default(); + if let Some(node_id) = numa_node_id { + avail_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; + config.node_id = node_id; + } + config.block_size = VIRTIO_MEM_DEFAULT_BLOCK_SIZE; + config.region_size = capacity; + config.requested_size = requested_size; + //config.usable_region_size will be setup in set_resource through virtio_mem_resize_usable_region + + if config.region_size <= VIRTIO_MEM_MAP_REGION_SIZE { + multi_region = false; + } + + // For warning unaligned_references + // adding curly braces means that a copy of the field is made, stored + // in a (properly aligned) temporary, and a reference to that temporary + // is being formatted. + info!(target: MEM_DRIVER_NAME, "{}: {}: new block_size: 0x{:x} region_size: 0x{:x} requested_size: 0x{:x} usable_region_size: 0x{:x} multi_region: {} numa_node_id: {:?}", + MEM_DRIVER_NAME, id, {config.block_size}, {config.region_size}, {config.requested_size}, {config.usable_region_size}, multi_region, numa_node_id); + + let device_info = VirtioDeviceInfo::new( + MEM_DRIVER_NAME.to_string(), + avail_features, + Arc::new(vec![QUEUE_SIZE; NUM_QUEUES]), + config.as_slice().to_vec(), + epoll_mgr, + ); + + Ok(Mem { + device_info, + config: Arc::new(Mutex::new(config)), + capacity, + factory, + device_change_notifier: Arc::new(NoopNotifier::new()), + host_fd: None, + subscriber_id: None, + id, + phantom: PhantomData, + alignment, + mem_state_map: None, + multi_region, + map_regions: Arc::new(Mutex::new(Vec::new())), + }) + } + + /// Set requested size of the memory device. + pub fn set_requested_size(&self, requested_size_mb: u64) -> Result<()> { + // Align to 4MB. + let requested_size = requested_size_mb * 1024 * 1024; + if requested_size > self.capacity || requested_size % VIRTIO_MEM_DEFAULT_BLOCK_SIZE != 0 { + return Err(Error::InvalidInput); + } + + let mem_config = &mut self.config.lock().unwrap(); + /* + * QEMU set config.requested_size after call + * virtio_mem_resize_usable_region. + * But virtio_mem_resize_usable_region of QEMU use new size as + * the requested_size. + * So this part should set requested_size before call + * MemTool::virtio_mem_resize_usable_region. + * Then MemTool::virtio_mem_resize_usable_region will get the new size + * from mem_config.requested_size. + */ + info!( + target: MEM_DRIVER_NAME, + "{}: {}: set_requested_size {} Mib", MEM_DRIVER_NAME, self.id, requested_size_mb + ); + mem_config.requested_size = requested_size; + MemTool::virtio_mem_resize_usable_region( + &self.id, + mem_config, + false, + self.alignment, + if self.multi_region { + Some((self.map_regions.clone(), self.factory.clone())) + } else { + None + }, + )?; + if let Err(e) = self.device_change_notifier.notify() { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: failed to signal device change event: {}", MEM_DRIVER_NAME, self.id, e + ); + return Err(Error::IOError(e)); + } + + Ok(()) + } +} + +impl VirtioDevice for Mem +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Sync + Send + 'static, +{ + fn device_type(&self) -> u32 { + TYPE_MEM + } + + fn queue_max_sizes(&self) -> &[u16] { + QUEUE_SIZES + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioDevice::set_acked_features({}, 0x{:x})", + MEM_DRIVER_NAME, + self.id, + page, + value + ); + + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, mut data: &mut [u8]) -> ConfigResult { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioDevice::read_config(0x{:x}, {:?})", + MEM_DRIVER_NAME, + self.id, + offset, + data + ); + + // Do not expect poisoned lock. + let mem_config = self.config.lock().unwrap(); + let config_space = mem_config.as_slice().to_vec(); + let config_len = config_space.len() as u64; + + if offset >= config_len { + debug!( + target: MEM_DRIVER_NAME, + "{}: {}: config space read request out of range, offset {}", + MEM_DRIVER_NAME, + self.id, + offset + ); + } else if let Some(end) = offset.checked_add(data.len() as u64) { + let end = cmp::min(end, config_len) as usize; + // This write can't fail, offset and end are checked against config_len. + let _ = data.write(&config_space[offset as usize..end]).unwrap(); + } + Ok(()) + } + + fn write_config(&mut self, _offset: u64, _data: &[u8]) -> ConfigResult { + debug!( + target: MEM_DRIVER_NAME, + "{}: {}: device configuration is read-only", MEM_DRIVER_NAME, self.id + ); + Ok(()) + } + + fn activate(&mut self, config: VirtioDeviceConfig) -> ActivateResult { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioDevice::activate()", + MEM_DRIVER_NAME, + self.id + ); + + // Do not support control queue and multi queue. + if config.queues.len() != 1 { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: failed to activate, invalid queue_num {}.", + MEM_DRIVER_NAME, + self.id, + config.queues.len() + ); + return Err(ActivateError::InvalidParam); + } + self.device_info.check_queue_sizes(&config.queues)?; + + self.device_change_notifier = config.device_change_notifier.clone(); + + // Do not expect poisoned lock + let mem_config = self.config.lock().unwrap(); + + let slot_num = if self.multi_region { + get_map_regions_num(mem_config.region_size) + } else { + 1 + }; + + let map_regions_len = self.map_regions.lock().unwrap().len(); + if map_regions_len != slot_num { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: map_region.len {}, slot_num {}", + MEM_DRIVER_NAME, + self.id, + map_regions_len, + slot_num + ); + return Err(ActivateError::InternalError); + } + + let mem_state = self.mem_state_map.take().unwrap_or_else(|| { + vec![false; mem_config.region_size as usize / mem_config.block_size as usize] + }); + + let handler = Box::new(MemEpollHandler { + config, + mem_config: self.config.clone(), + multi_region: self.multi_region, + map_regions: self.map_regions.clone(), + host_fd: self.host_fd, + mem_state, + id: self.id.clone(), + }); + + self.subscriber_id = Some(self.device_info.register_event_handler(handler)); + + Ok(()) + } + + fn remove(&mut self) { + if let Some(subscriber_id) = self.subscriber_id { + // Remove MemEpollHandler from event manager, so it could be dropped and the resources + // could be freed. + match self.device_info.remove_event_handler(subscriber_id) { + Ok(_) => debug!("virtio-mem: removed subscriber_id {:?}", subscriber_id), + Err(e) => { + warn!("virtio-mem: failed to remove event handler: {:?}", e); + } + } + } + self.subscriber_id = None; + } + + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioDevice::get_resource_requirements()", + MEM_DRIVER_NAME, + self.id + ); + + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + // Allocate one irq for device configuration change events, and one irq for each queue. + requests.push(ResourceConstraint::GenericIrq { + size: (self.device_info.queue_sizes.len() + 1) as u32, + }); + } + + // Do not expect poisoned lock. + let config = self.config.lock().unwrap(); + + // The memory needs to be 2MiB aligned in order to support huge pages. + // And we also need to align the memory's start address to guest's + // memory block size (usually 128MB), or the virtio-mem driver in guest + // kernel would cause some memory unusable which outside the alignment. + // Then, the memory needs to be above 4G to avoid conflicts with + // lapic/ioapic devices. + requests.push(ResourceConstraint::MemAddress { + range: None, + align: self.alignment, + size: config.region_size, + }); + + // Request for new kvm memory slot. + let slot_num = if self.multi_region { + get_map_regions_num(config.region_size) + } else { + 1 + }; + for _ in 0..slot_num { + requests.push(ResourceConstraint::KvmMemSlot { + slot: None, + size: 1, + }); + } + } + + fn set_resource( + &mut self, + _vm_fd: Arc, + resource: DeviceResources, + ) -> Result>> { + trace!( + target: MEM_DRIVER_NAME, + "{}: {}: VirtioDevice::set_resource()", + MEM_DRIVER_NAME, + self.id + ); + + let mem_res = resource.get_mem_address_ranges(); + let slot_res = resource.get_kvm_mem_slots(); + + // Check if we get memory resource. + if mem_res.is_empty() { + return Err(Error::InvalidResource); + } + + let mut mem_config = self.config.lock().unwrap(); + + let slot_num = if self.multi_region { + get_map_regions_num(mem_config.region_size) + } else { + 1 + }; + + // Make sure we have the correct resource as requested. + if slot_res.len() != slot_num + || mem_res.len() != 1 + || mem_res[0].1 != mem_config.region_size + { + error!( + target: MEM_DRIVER_NAME, + "{}: {}: wrong mem or kvm slot resource ({:?}, {:?})", + MEM_DRIVER_NAME, + self.id, + mem_res.len(), + slot_res.len() + ); + return Err(Error::InvalidResource); + } + + // update mem config's addr + mem_config.addr = mem_res[0].0; + + // Setup map_regions + let mut map_regions = self.map_regions.lock().unwrap(); + if map_regions.is_empty() { + if self.multi_region { + for slot in slot_res { + map_regions.push((slot, None)); + } + } else { + let region = self.factory.lock().unwrap().create_region( + GuestAddress(mem_config.addr), + mem_config.region_size, + slot_res[0], + )?; + let addr = region.get_host_address(MemoryRegionAddress(0)).unwrap() as u64; + map_regions.push((slot_res[0], Some((addr, mem_config.addr)))); + let guest_addr = mem_config.addr; + let size = mem_config.region_size; + info!( + "{}: {}: set_resource new region guest addr 0x{:x}-0x{:x} host addr 0x{:x} size {}", + MEM_DRIVER_NAME, + self.id, + guest_addr, + guest_addr + size, + addr, + size, + ); + } + } + drop(map_regions); + + MemTool::virtio_mem_resize_usable_region( + &self.id, + &mut mem_config, + false, + self.alignment, + if self.multi_region { + Some((self.map_regions.clone(), self.factory.clone())) + } else { + None + }, + )?; + + Ok(None) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::ffi::CString; + use std::fs::File; + use std::os::unix::io::FromRawFd; + + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use dbs_utils::epoll_manager::SubscriberOps; + use kvm_ioctls::Kvm; + use nix::sys::memfd; + use virtio_queue::QueueSync; + use vm_memory::{ + FileOffset, GuestAddress, GuestMemoryMmap, GuestRegionMmap, GuestUsize, MmapRegion, + }; + use vmm_sys_util::eventfd::EventFd; + + use super::*; + use crate::tests::{VirtQueue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::VirtioQueueConfig; + + struct DummyMemRegionFactory {} + + impl MemRegionFactory for DummyMemRegionFactory { + fn create_region( + &mut self, + guest_addr: GuestAddress, + region_len: GuestUsize, + _kvm_slot: u32, + ) -> std::result::Result, Error> { + let file_offset = { + let fd = memfd::memfd_create( + // safe to unwrap, no nul byte in file name + &CString::new("virtio_fs_mem").unwrap(), + memfd::MemFdCreateFlag::empty(), + ) + .map_err(|_| Error::InvalidInput)?; + let file: File = unsafe { File::from_raw_fd(fd) }; + file.set_len(region_len).map_err(|_| Error::InvalidInput)?; + Some(FileOffset::new(file, 0)) + }; + + // unmap will be handled on MmapRegion'd Drop. + let mmap_region = MmapRegion::build( + file_offset, + region_len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_NORESERVE | libc::MAP_PRIVATE, + ) + .map_err(Error::NewMmapRegion)?; + + let region = + Arc::new(GuestRegionMmap::new(mmap_region, guest_addr).map_err(Error::InsertMmap)?); + + Ok(region) + } + + fn restore_region_addr( + &self, + _guest_addr: GuestAddress, + ) -> std::result::Result<*mut u8, Error> { + Err(Error::InvalidInput) + } + + fn get_host_numa_node_id(&self) -> Option { + None + } + + fn set_host_numa_node_id(&mut self, _host_numa_node_id: Option) {} + } + + fn create_mem_epoll_handler(id: String) -> MemEpollHandler> { + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap()); + let queues = vec![VirtioQueueConfig::create(256, 0).unwrap()]; + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::new( + mem, + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + let mem_config = Arc::new(Mutex::new(VirtioMemConfig::default())); + let map_regions = vec![(0, Some((0, 0)))]; + MemEpollHandler { + config, + mem_config, + multi_region: false, + map_regions: Arc::new(Mutex::new(map_regions)), + host_fd: None, + mem_state: Vec::new(), + id, + } + } + + #[test] + fn test_mem_request_parse() { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + + assert!(vq.end().0 < 0x1000); + + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + // write only request type descriptor + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_WRITE, 1); + m.write_obj::(114, GuestAddress(0x1000 + 8)).unwrap(); + assert!(matches!( + Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m), + Err(MemError::UnexpectedWriteOnlyDescriptor) + )); + } + // desc len error + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).flags().store(0); + m.write_obj::(114, GuestAddress(0x1000 + 8)).unwrap(); + assert!(matches!( + Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m), + Err(MemError::InvalidRequest) + )); + } + // desc chain too short + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).flags().store(0); + vq.dtable(0).set(0x1000, 0x18, 0, 1); + assert!(matches!( + Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m), + Err(MemError::DescriptorChainTooShort) + )); + } + // unexpected read only descriptor + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).set(0x1000, 0x18, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x18, VIRTQ_DESC_F_NEXT, 2); + assert!(matches!( + Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m), + Err(MemError::UnexpectedReadOnlyDescriptor) + )); + } + // desc len too short + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).set(0x1000, 0x18, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x9, VIRTQ_DESC_F_WRITE, 2); + assert!(matches!( + Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m), + Err(MemError::DescriptorLengthTooSmall) + )); + } + // success + { + let mut queue = vq.create_queue(); + let mut q = queue.lock(); + vq.dtable(0).set(0x1000, 0x18, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x18, VIRTQ_DESC_F_WRITE, 2); + assert!(Request::parse(&mut q.iter(m).unwrap().next().unwrap(), m).is_ok()); + } + } + + #[test] + fn test_mem_tool_valid_range() { + let config = VirtioMemConfig { + block_size: 0x100, + addr: 0x1000, + usable_region_size: 0x1000, + ..Default::default() + }; + + // address not properly aligned. + assert!(!MemTool::virtio_mem_valid_range(&config, 0x14, 0x100)); + assert!(!MemTool::virtio_mem_valid_range(&config, 0x100, 5)); + + // unreasonable size. + assert!(!MemTool::virtio_mem_valid_range( + &config, + 0x1000, + i32::MAX as u64 + )); + assert!(!MemTool::virtio_mem_valid_range(&config, 0x1000, 0)); + + // start address not in usable range. + assert!(!MemTool::virtio_mem_valid_range(&config, 0x200, 0x200)); + assert!(!MemTool::virtio_mem_valid_range(&config, 0x3000, 0x200),); + + // end address not in usable range. + assert!(!MemTool::virtio_mem_valid_range(&config, 0x1000, 0x2000),); + + // success + assert!(MemTool::virtio_mem_valid_range(&config, 0x1000, 0x500),); + } + + #[test] + fn test_mem_tool_check_bitmap() { + let bit_index = 2; + let nb_blocks = 2; + let mut mem_state = [false, false, false, false]; + let plug = false; + + // true + assert!(MemTool::virtio_mem_check_bitmap( + bit_index, nb_blocks, &mem_state, plug + ),); + + mem_state[2] = true; + // false + assert!(!MemTool::virtio_mem_check_bitmap( + bit_index, nb_blocks, &mem_state, plug + ),); + } + + #[test] + fn test_mem_tool_set_bitmap() { + let bit_index = 2; + let nb_blocks = 2; + let mut mem_state = vec![false, false, false, false]; + let plug = true; + + MemTool::virtio_mem_set_bitmap(bit_index, nb_blocks, &mut mem_state, plug); + assert!(mem_state[2]); + assert!(mem_state[3]); + } + + #[test] + fn test_mem_tool_state_request() { + let config = VirtioMemConfig { + block_size: 0x100, + addr: 0x1000, + usable_region_size: 0x1000, + ..Default::default() + }; + let mut mem_state = vec![false, false, false, false]; + + // invalid range. + let (resp_type, resp_state) = + MemTool::virtio_mem_state_request(&config, 0x2000, 0, &mut mem_state); + assert_eq!(resp_type, VIRTIO_MEM_RESP_ERROR); + assert_eq!(resp_state, VIRTIO_MEM_STATE_PLUGGED); + + // valid range & unplugged. + let (resp_type, resp_state) = + MemTool::virtio_mem_state_request(&config, 0x1200, 2, &mut mem_state); + assert_eq!(resp_type, VIRTIO_MEM_RESP_ACK); + assert_eq!(resp_state, VIRTIO_MEM_STATE_UNPLUGGED); + + // mixed mem state. + mem_state = vec![false, false, true, false]; + let (resp_type, resp_state) = + MemTool::virtio_mem_state_request(&config, 0x1200, 2, &mut mem_state); + assert_eq!(resp_type, VIRTIO_MEM_RESP_ACK); + assert_eq!(resp_state, VIRTIO_MEM_STATE_MIXED); + + // plugged. + mem_state = vec![true, true, true, true]; + let (resp_type, resp_state) = + MemTool::virtio_mem_state_request(&config, 0x1200, 2, &mut mem_state); + assert_eq!(resp_type, VIRTIO_MEM_RESP_ACK); + assert_eq!(resp_state, VIRTIO_MEM_STATE_PLUGGED); + } + + #[test] + fn test_mem_tool_resize_usable_region() { + use std::ptr::{addr_of, read_unaligned}; + + let mut config = VirtioMemConfig { + region_size: 0x200, + block_size: 0x100, + usable_region_size: 0x1000, + requested_size: 0, + ..Default::default() + }; + + let id = "mem0".to_string(); + + // unshrink. + MemTool::virtio_mem_resize_usable_region( + &id, + &mut config, + false, + VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT, + None, + ) + .unwrap(); + assert_eq!( + unsafe { read_unaligned(addr_of!(config.usable_region_size)) }, + 0x1000 + ); + + // request size is 0. + MemTool::virtio_mem_resize_usable_region( + &id, + &mut config, + true, + VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT, + None, + ) + .unwrap(); + assert_eq!( + unsafe { read_unaligned(addr_of!(config.usable_region_size)) }, + 0 + ); + + // shrink. + config.requested_size = 0x5; + MemTool::virtio_mem_resize_usable_region( + &id, + &mut config, + true, + VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT, + None, + ) + .unwrap(); + assert_eq!( + unsafe { read_unaligned(addr_of!(config.usable_region_size)) }, + 0x200 + ); + + // test alignment + config.region_size = 2 << 30; + config.requested_size = 1 << 30; + // alignment unchanged. + MemTool::virtio_mem_resize_usable_region( + &id, + &mut config, + true, + VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT, + None, + ) + .unwrap(); + assert_eq!( + unsafe { read_unaligned(addr_of!(config.usable_region_size)) }, + (1 << 30) + 2 * VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT + ); + // alignemnt changed. + MemTool::virtio_mem_resize_usable_region( + &id, + &mut config, + true, + MAX_MEMORY_BLOCK_SIZE, + None, + ) + .unwrap(); + assert_eq!( + unsafe { read_unaligned(addr_of!(config.usable_region_size)) }, + 2 << 30 + ); + } + + #[test] + fn test_mem_virtio_device_normal() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + let mut dev = + Mem::>::new(id, 200, 200, false, None, epoll_mgr, factory, 200) + .unwrap(); + + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type(&dev), + TYPE_MEM + ); + let queue_size = vec![128]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::queue_max_sizes( + &dev + ), + &queue_size[..] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 0), + dev.device_info.get_avail_features(0) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 1), + dev.device_info.get_avail_features(1) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + dev.device_info.get_avail_features(2) + ); + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_acked_features( + &mut dev, 2, 0, + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + 0, + ); + + let mut data: [u8; 8] = [1; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut dev, 0, &mut data, + ) + .unwrap(); + let config: [u8; 8] = [0; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut dev, 0, &config, + ) + .unwrap(); + let mut data2: [u8; 8] = [1; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut dev, 0, &mut data2, + ) + .unwrap(); + assert_eq!(data, data2); + } + + #[test] + fn test_mem_virtio_device_get_resource_requirements() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + let dev = Mem::>::new( + id, 0x100, 0x100, false, None, epoll_mgr, factory, 0xc0000000, + ) + .unwrap(); + let mut requirements = vec![ + ResourceConstraint::new_mmio(0x1000), + ResourceConstraint::new_mmio(0x1000), + ]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_resource_requirements( + &dev, &mut requirements, true, + ); + assert_eq!(requirements[2], ResourceConstraint::LegacyIrq { irq: None }); + assert_eq!(requirements[3], ResourceConstraint::GenericIrq { size: 2 }); + assert_eq!( + requirements[4], + ResourceConstraint::MemAddress { + range: None, + align: VIRTIO_MEM_DEFAULT_BLOCK_ALIGNMENT, + size: 0x100 << 20, + } + ); + assert_eq!( + requirements[5], + ResourceConstraint::KvmMemSlot { + slot: None, + size: 1 + } + ); + } + + #[test] + fn test_mem_virtio_device_set_resource() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + + // enable multi-region in virtio-mem + { + let mut dev = Mem::>::new( + id.clone(), + 0xc00, + 0xc00, + true, + None, + epoll_mgr.clone(), + factory.clone(), + 0xc0000000, + ) + .unwrap(); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let mut resources = DeviceResources::new(); + let entry = dbs_device::resources::Resource::MemAddressRange { + base: 0x100000000, + size: 0xc00 << 20, + }; + resources.append(entry); + let entry = dbs_device::resources::Resource::KvmMemSlot(0); + resources.append(entry); + let entry = dbs_device::resources::Resource::KvmMemSlot(1); + resources.append(entry); + let content = + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_resource( + &mut dev, vm_fd, resources, + ) + .unwrap(); + assert!(content.is_none()); + } + + // disable multi-region in virtio-mem + { + let mut dev = Mem::>::new( + id, 0xc00, 0xc00, false, None, epoll_mgr, factory, 0xc0000000, + ) + .unwrap(); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let mut resources = DeviceResources::new(); + let entry = dbs_device::resources::Resource::MemAddressRange { + base: 0x100000000, + size: 0xc00 << 20, + }; + resources.append(entry); + let entry = dbs_device::resources::Resource::KvmMemSlot(0); + resources.append(entry); + let content = + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_resource( + &mut dev, vm_fd, resources, + ) + .unwrap(); + assert!(content.is_none()); + } + } + + #[test] + fn test_mem_virtio_device_spec() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + let dev = + Mem::>::new(id, 200, 200, false, None, epoll_mgr, factory, 200) + .unwrap(); + assert!(dev.set_requested_size(200).is_ok()); + } + + #[test] + fn test_mem_virtio_device_activate() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + // queue length error + { + let mut dev = Mem::>::new( + id.clone(), + 200, + 200, + false, + None, + epoll_mgr.clone(), + factory.clone(), + 200, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::::create(16, 0).unwrap(), + VirtioQueueConfig::::create(16, 0).unwrap(), + ]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + let result = dev.activate(config); + assert!(matches!(result, Err(ActivateError::InvalidParam))); + } + // fail because map_regions should not be empty + { + let mut dev = Mem::>::new( + id.clone(), + 200, + 200, + false, + None, + epoll_mgr.clone(), + factory.clone(), + 200, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![VirtioQueueConfig::::create(128, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + let result = dev.activate(config); + assert!(matches!(result, Err(ActivateError::InternalError))); + } + // test activate mem device is correct + { + let mut dev = Mem::>::new( + id, 200, 200, false, None, epoll_mgr, factory, 200, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![VirtioQueueConfig::::create(128, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + dev.map_regions.lock().unwrap().push((0, None)); + assert!(dev.activate(config).is_ok()); + } + } + + #[test] + fn test_mem_virtio_device_remove() { + let epoll_mgr = EpollManager::default(); + let id = "mem0".to_string(); + let factory = Arc::new(Mutex::new(DummyMemRegionFactory {})); + let mut dev = + Mem::>::new(id, 200, 200, false, None, epoll_mgr, factory, 200) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![VirtioQueueConfig::::create(128, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + dev.map_regions.lock().unwrap().push((0, None)); + + // test activate mem device is correct + assert!(dev.activate(config).is_ok()); + assert!(dev.subscriber_id.is_some()); + // test remove mem device is correct + VirtioDevice::>, QueueSync, GuestRegionMmap>::remove(&mut dev); + assert!(dev.subscriber_id.is_none()); + } + + #[test] + fn test_mem_epoll_handler_handle_event() { + let handler = create_mem_epoll_handler("test_1".to_string()); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_mem_epoll_handler("test_2".to_string()); + + //invalid queue index + let events = Events::with_data(&event_fd, 1024, event_set); + handler.config.queues[0].generate_event().unwrap(); + handler.process(events, &mut event_op); + //valid + let events = Events::with_data(&event_fd, 0, event_set); + handler.config.queues[0].generate_event().unwrap(); + handler.process(events, &mut event_op); + } + + #[test] + fn test_mem_epoll_handler_process_queue() { + let mut handler = create_mem_epoll_handler("test_1".to_string()); + let m = &handler.config.vm_as.clone(); + // fail to parse available descriptor chain + { + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x400, VIRTQ_DESC_F_NEXT, 1); + handler.config.queues = vec![VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + )]; + handler.config.queues[0].generate_event().unwrap(); + assert!(handler.process_queue(0)); + } + // success + { + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x4, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1).set(0x2000, 0x4, VIRTQ_DESC_F_WRITE, 2); + handler.config.queues = vec![VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + )]; + handler.config.queues[0].generate_event().unwrap(); + assert!(handler.process_queue(0)); + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/dragonball.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/dragonball.rs new file mode 100644 index 000000000000..7cceb2094f41 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/dragonball.rs @@ -0,0 +1,203 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Related to Dragonball MMIO extension. + +/// Device Vendor ID for virtio devices emulated by Dragonball. +/// The upper 24 bits are used as vendor id, and the lower 8 bits are used as features. +pub const MMIO_VENDOR_ID_DRAGONBALL: u32 = 0xdbfcdb00; + +/// Mask for feature flags in the vendor id field +pub const DRAGONBALL_FEATURE_MASK: u32 = 0xff; + +/// Assume `MMIO_INT_VRING` is always set in the interrupt status register when handling interrupts. +/// With this feature available, the device driver may optimize the way to handle interrupts. +pub const DRAGONBALL_FEATURE_INTR_USED: u32 = 0x1; + +/// The device supports Message Signaled Interrupt. +pub const DRAGONBALL_FEATURE_MSI_INTR: u32 = 0x2; + +/// The device implements per-queue notification register. +/// If this feature bit is set, the VIRTIO_MMIO_QUEUE_NOTIFY register becomes read-write. +/// On reading, the lower 16-bit contains doorbell base offset starting from the MMIO window base, +/// and the upper 16-bit contains scale for the offset. The notification register address for +/// virtque is: +/// offset = base + doorbell_base + doorbell_scale * queue_idx +pub const DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY: u32 = 0x4; + +/// PVDMA feature enabled +pub const DRAGONBALL_FEATURE_PVDMA: u32 = 0x08; + +/// Default size resrved for virtio-mmio doorbell address space. +/// +/// This represents the size of the mmio device reserved for doorbell which used to per queue notify, +/// we need to request resource with the `MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE` +pub const DRAGONBALL_MMIO_DOORBELL_SIZE: u64 = 0x1000; + +/// Default offset of the mmio doorbell +pub const DRAGONBALL_MMIO_DOORBELL_OFFSET: u64 = 0x1000; + +/// Max queue num when the `fast-mmio` enabled, because we only reserved 0x200 memory region for +/// per queue notify +pub const DRAGONBALL_MMIO_MAX_QUEUE_NUM: u64 = 255; + +/// Scale of the doorbell for per queue notify +pub const DRAGONBALL_MMIO_DOORBELL_SCALE: u64 = 0x04; + +/// This represents the offset at which the device should call DeviceIo::write in order to write +/// to its configuration space. +pub const MMIO_CFG_SPACE_OFF: u64 = 0x100; + +// The format of the 16-bit MSI Control and Status register. +// On read: +// - bit 15: 1 if MSI is supported, 0 if MSI is not supported. +// - bit 0-14: reserved, read as zero. +// On write: +// - bit 15: 1 to enable MSI, 0 to disable MSI. +// - bit 0-14: ignored. + +/// Message Signaled Interrupt is supported when reading from the CSR. +pub const MMIO_MSI_CSR_SUPPORTED: u16 = 0x8000; + +/// Enable MSI if this bit is set when writing to the CSR, otherwise disable MSI. +pub const MMIO_MSI_CSR_ENABLED: u16 = 0x8000; + +// The format of the 16-bit write-only MSI Command register. +// - bit 12-15: command code +// - bit 0-11: command parameter + +/// Mask for the command code in the MSI command register. +pub const MMIO_MSI_CMD_CODE_MASK: u16 = 0xf000; + +/// Mask for the command argument in the MSI command register. +pub const MMIO_MSI_CMD_ARG_MASK: u16 = 0x0fff; + +/// Command code to update MSI entry configuration. +/// The argument is the MSI vector number to update. +pub const MMIO_MSI_CMD_CODE_UPDATE: u16 = 0x1000; +/// Comamnd to mask and unmask msi interrupt +pub const MMIO_MSI_CMD_CODE_INT_MASK: u16 = 0x2000; +pub const MMIO_MSI_CMD_CODE_INT_UNMASK: u16 = 0x3000; + +// Define a 16-byte area to control MMIO MSI + +// MSI control/status register offset +pub const REG_MMIO_MSI_CSR: u64 = 0x0c0; +// MSI command register offset +pub const REG_MMIO_MSI_COMMAND: u64 = 0x0c2; +// MSI address_lo register offset +pub const REG_MMIO_MSI_ADDRESS_L: u64 = 0x0c4; +// MSI address_hi register offset +pub const REG_MMIO_MSI_ADDRESS_H: u64 = 0x0c8; +// MSI data register offset +pub const REG_MMIO_MSI_DATA: u64 = 0x0cc; + +// RW: MSI feature enabled +pub const REG_MMIO_MSI_CSR_ENABLE: u64 = 0x8000; +// RO: Maximum queue size available +pub const REG_MMIO_MSI_CSR_QMASK: u64 = 0x07ff; +// Reserved +pub const REG_MMIO_MSI_CSR_RESERVED: u64 = 0x7800; + +pub const REG_MMIO_MSI_CMD_UPDATE: u64 = 0x1; + +/// Defines the offset and scale of the mmio doorbell. +/// +/// Support per-virtque doorbell, so the guest kernel may directly write to the doorbells provided +/// by hardware virtio devices. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct DoorBell { + offset: u32, + scale: u32, +} + +impl DoorBell { + /// Creates a Doorbell. + pub fn new(offset: u32, scale: u32) -> Self { + Self { offset, scale } + } + + /// Returns the offset. + pub fn offset(&self) -> u32 { + self.offset + } + + /// Returns the scale. + pub fn scale(&self) -> u32 { + self.scale + } + + /// Returns the offset with the specified index of virtio queue. + pub fn queue_offset(&self, queue_index: usize) -> u64 { + (self.offset as u64) + (self.scale as u64) * (queue_index as u64) + } + + /// Returns the register data. + pub fn register_data(&self) -> u32 { + self.offset | (self.scale << 16) + } +} + +/// MSI interrupts. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct Msi { + pub index_select: u32, + pub address_low: u32, + pub address_high: u32, + pub data: u32, +} + +impl Msi { + /// Sets index select. + pub fn set_index_select(&mut self, v: u32) { + self.index_select = v; + } + /// Sets address low. + pub fn set_address_low(&mut self, v: u32) { + self.address_low = v; + } + /// Sets address high. + pub fn set_address_high(&mut self, v: u32) { + self.address_high = v; + } + /// Sets msi data. + pub fn set_data(&mut self, v: u32) { + self.data = v; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_doorbell() { + let door = DoorBell::new( + DRAGONBALL_MMIO_DOORBELL_OFFSET as u32, + DRAGONBALL_MMIO_DOORBELL_SCALE as u32, + ); + assert_eq!(door.offset(), DRAGONBALL_MMIO_DOORBELL_OFFSET as u32); + assert_eq!(door.scale(), DRAGONBALL_MMIO_DOORBELL_SCALE as u32); + assert_eq!(door.queue_offset(0), DRAGONBALL_MMIO_DOORBELL_OFFSET); + assert_eq!(door.queue_offset(4), 0x1010); + assert_eq!(door.register_data(), 0x1000 | 0x40000); + } + + #[test] + fn test_msi() { + let mut msi = Msi::default(); + msi.set_index_select(1); + msi.set_address_low(2); + msi.set_address_high(3); + msi.set_data(4); + assert_eq!( + msi, + Msi { + index_select: 1, + address_low: 2, + address_high: 3, + data: 4 + } + ); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs new file mode 100644 index 000000000000..434be51a915b --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_state.rs @@ -0,0 +1,665 @@ +// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +/////////////////////////////////////////////////////////////// +// TODO: we really need better support of device reset, error recovery, exceptions handling. +/////////////////////////////////////////////////////////////// + +use std::ops::Deref; +use std::sync::Arc; + +use dbs_device::resources::DeviceResources; +use dbs_interrupt::{DeviceInterruptManager, DeviceInterruptMode, InterruptIndex, KvmIrqManager}; +use kvm_bindings::kvm_userspace_memory_region; +use kvm_ioctls::{IoEventAddress, NoDatamatch, VmFd}; +use log::{debug, error, info, warn}; +use virtio_queue::QueueT; +use vm_memory::{GuestAddressSpace, GuestMemoryRegion}; + +use crate::{ + mmio::*, warn_or_panic, ActivateError, Error, Result, VirtioDevice, VirtioDeviceConfig, + VirtioQueueConfig, VirtioSharedMemory, VirtioSharedMemoryList, DEVICE_DRIVER_OK, DEVICE_FAILED, +}; + +/// The state of Virtio Mmio device. +pub struct MmioV2DeviceState { + device: Box>, + vm_fd: Arc, + vm_as: AS, + intr_mgr: DeviceInterruptManager>, + device_resources: DeviceResources, + queues: Vec>, + + mmio_base: u64, + has_ctrl_queue: bool, + device_activated: bool, + ioevent_registered: bool, + + features_select: u32, + acked_features_select: u32, + queue_select: u32, + + msi: Option, + doorbell: Option, + + shm_region_id: u32, + shm_regions: Option>, +} + +impl MmioV2DeviceState +where + AS: GuestAddressSpace + Clone, + Q: QueueT + Clone, + R: GuestMemoryRegion, +{ + /// Returns a reference to the internal device object. + pub fn get_inner_device(&self) -> &dyn VirtioDevice { + self.device.as_ref() + } + + /// Returns a mutable reference to the internal device object. + pub fn get_inner_device_mut(&mut self) -> &mut dyn VirtioDevice { + self.device.as_mut() + } + + pub(crate) fn new( + mut device: Box>, + vm_fd: Arc, + vm_as: AS, + irq_manager: Arc, + device_resources: DeviceResources, + mmio_base: u64, + doorbell_enabled: bool, + ) -> Result { + let intr_mgr = + DeviceInterruptManager::new(irq_manager, &device_resources).map_err(Error::IOError)?; + + let (queues, has_ctrl_queue) = Self::create_queues(device.as_ref())?; + + // Assign requested device resources back to virtio device and let it do necessary setups, + // as only virtio device knows how to use such resources. And if there's + // VirtioSharedMemoryList returned, assigned it to MmioV2DeviceState + let shm_regions = device + .set_resource(vm_fd.clone(), device_resources.clone()) + .map_err(|e| { + error!("Failed to assign device resource to virtio device: {}", e); + e + })?; + + let doorbell = if doorbell_enabled { + Some(DoorBell::new( + DRAGONBALL_MMIO_DOORBELL_OFFSET as u32, + DRAGONBALL_MMIO_DOORBELL_SCALE as u32, + )) + } else { + None + }; + + Ok(MmioV2DeviceState { + device, + vm_fd, + vm_as, + intr_mgr, + device_resources, + queues, + mmio_base, + has_ctrl_queue, + ioevent_registered: false, + device_activated: false, + features_select: 0, + acked_features_select: 0, + queue_select: 0, + doorbell, + msi: None, + shm_region_id: 0, + shm_regions, + }) + } + + pub(crate) fn activate(&mut self, device: &MmioV2Device) -> Result<()> { + if self.device_activated { + return Ok(()); + } + + // If the driver incorrectly sets up the queues, the following check will fail and take + // the device into an unusable state. + if !self.check_queues_valid() { + return Err(Error::ActivateError(ActivateError::InvalidQueueConfig)); + } + + self.register_ioevent()?; + + self.intr_mgr.enable()?; + + let config = self.create_device_config(device)?; + + self.device + .activate(config) + .map(|_| self.device_activated = true) + .map_err(|e| { + error!("device activate error: {:?}", e); + Error::ActivateError(e) + }) + } + + fn create_queues( + device: &dyn VirtioDevice, + ) -> Result<(Vec>, bool)> { + let mut queues = Vec::new(); + for (idx, size) in device.queue_max_sizes().iter().enumerate() { + queues.push(VirtioQueueConfig::create(*size, idx as u16)?); + } + + // The ctrl queue must be append to Queue Vec, because the guest will + // configure it which is same with other queues. + let has_ctrl_queue = device.ctrl_queue_max_sizes() > 0; + if has_ctrl_queue { + queues.push(VirtioQueueConfig::create( + device.ctrl_queue_max_sizes(), + queues.len() as u16, + )?); + } + + Ok((queues, has_ctrl_queue)) + } + + fn create_queue_config( + &mut self, + device: &MmioV2Device, + ) -> Result>> { + // Safe because we have just called self.intr_mgr.enable(). + let group = self.intr_mgr.get_group().unwrap(); + let mut queues = Vec::new(); + for queue in self.queues.iter() { + //The first interrupt index is device config change. + let queue_notifier = crate::notifier::create_queue_notifier( + group.clone(), + device.interrupt_status(), + queue.index() as InterruptIndex + 1, + ); + queues.push(VirtioQueueConfig::new( + queue.queue.clone(), + queue.eventfd.clone(), + queue_notifier, + queue.index(), + )); + } + Ok(queues) + } + + fn create_device_config( + &mut self, + device: &MmioV2Device, + ) -> Result> { + let mut queues = self.create_queue_config(device)?; + let ctrl_queue = if self.has_ctrl_queue { + queues.pop() + } else { + None + }; + + // Safe because we have just called self.intr_mgr.enable(). + let group = self.intr_mgr.get_group().unwrap(); + //The first interrupt index is device config change. + let notifier = crate::notifier::create_device_notifier(group, device.interrupt_status(), 0); + + let mut config = VirtioDeviceConfig::new( + self.vm_as.clone(), + self.vm_fd.clone(), + self.device_resources.clone(), + queues, + ctrl_queue, + notifier, + ); + if let Some(shm_regions) = self.shm_regions.as_ref() { + config.set_shm_regions((*shm_regions).clone()); + } + Ok(config) + } + + fn register_ioevent(&mut self) -> Result<()> { + for (i, queue) in self.queues.iter().enumerate() { + if let Some(doorbell) = self.doorbell.as_ref() { + let io_addr = IoEventAddress::Mmio(self.mmio_base + doorbell.queue_offset(i)); + if let Err(e) = self + .vm_fd + .register_ioevent(&queue.eventfd, &io_addr, NoDatamatch) + { + self.revert_ioevent(i, &io_addr, true); + return Err(Error::IOError(std::io::Error::from_raw_os_error(e.errno()))); + } + } + // always register ioeventfd in MMIO_NOTIFY_REG_OFFSET to avoid guest kernel which not support doorbell + let io_addr = IoEventAddress::Mmio(self.mmio_base + MMIO_NOTIFY_REG_OFFSET as u64); + if let Err(e) = self + .vm_fd + .register_ioevent(&queue.eventfd, &io_addr, i as u32) + { + self.unregister_ioevent_doorbell(); + self.revert_ioevent(i, &io_addr, false); + return Err(Error::IOError(std::io::Error::from_raw_os_error(e.errno()))); + } + } + self.ioevent_registered = true; + + Ok(()) + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn queues(&self) -> &Vec> { + &self.queues + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn queues_mut(&mut self) -> &mut Vec> { + &mut self.queues + } + + #[inline] + pub(crate) fn features_select(&self) -> u32 { + self.features_select + } + + #[inline] + pub(crate) fn set_features_select(&mut self, v: u32) { + self.features_select = v; + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn acked_features_select(&mut self) -> u32 { + self.acked_features_select + } + + #[inline] + pub(crate) fn set_acked_features_select(&mut self, v: u32) { + self.acked_features_select = v; + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn queue_select(&mut self) -> u32 { + self.queue_select + } + + #[inline] + pub(crate) fn set_queue_select(&mut self, v: u32) { + self.queue_select = v; + } + + #[inline] + pub(crate) fn set_acked_features(&mut self, v: u32) { + self.device + .set_acked_features(self.acked_features_select, v) + } + + #[inline] + pub(crate) fn set_shm_region_id(&mut self, v: u32) { + self.shm_region_id = v; + } + + #[inline] + pub(crate) fn set_msi_address_low(&mut self, v: u32) { + if let Some(m) = self.msi.as_mut() { + m.set_address_low(v) + } + } + + #[inline] + pub(crate) fn set_msi_address_high(&mut self, v: u32) { + if let Some(m) = self.msi.as_mut() { + m.set_address_high(v) + } + } + + #[inline] + pub(crate) fn set_msi_data(&mut self, v: u32) { + if let Some(m) = self.msi.as_mut() { + m.set_data(v) + } + } + + #[inline] + pub(crate) fn shm_regions(&self) -> Option<&VirtioSharedMemoryList> { + self.shm_regions.as_ref() + } + + #[inline] + pub(crate) fn device_activated(&self) -> bool { + self.device_activated + } + + #[inline] + pub(crate) fn doorbell(&self) -> Option<&DoorBell> { + self.doorbell.as_ref() + } + + pub(crate) fn deactivate(&mut self) { + if self.device_activated { + self.device_activated = false; + } + } + + pub(crate) fn reset(&mut self) -> Result<()> { + if self.device_activated { + warn!("reset device while it's still in active state"); + Ok(()) + } else { + // . Keep interrupt_evt and queue_evts as is. There may be pending + // notifications in those eventfds, but nothing will happen other + // than supurious wakeups. + // . Do not reset config_generation and keep it monotonically increasing + for queue in self.queues.iter_mut() { + let new_queue = Q::new(queue.queue.max_size()); + if let Err(e) = new_queue { + warn!("reset device failed because new virtio-queue could not be created due to {:?}", e); + return Err(Error::VirtioQueueError(e)); + } else { + // unwrap is safe here since we have checked new_queue result above. + queue.queue = new_queue.unwrap(); + } + } + + let _ = self.intr_mgr.reset(); + self.unregister_ioevent(); + self.features_select = 0; + self.acked_features_select = 0; + self.queue_select = 0; + self.msi = None; + self.doorbell = None; + Ok(()) + } + } + + fn unregister_ioevent(&mut self) { + if self.ioevent_registered { + let io_addr = IoEventAddress::Mmio(self.mmio_base + MMIO_NOTIFY_REG_OFFSET as u64); + for (i, queue) in self.queues.iter().enumerate() { + let _ = self + .vm_fd + .unregister_ioevent(&queue.eventfd, &io_addr, i as u32); + self.ioevent_registered = false; + } + } + } + + fn revert_ioevent(&mut self, num: usize, io_addr: &IoEventAddress, wildcard: bool) { + assert!(num < self.queues.len()); + let mut idx = num; + while idx > 0 { + let datamatch = if wildcard { + NoDatamatch.into() + } else { + idx as u64 + }; + idx -= 1; + let _ = self + .vm_fd + .unregister_ioevent(&self.queues[idx].eventfd, io_addr, datamatch); + } + } + + fn unregister_ioevent_doorbell(&mut self) { + if let Some(doorbell) = self.doorbell.as_ref() { + for (i, queue) in self.queues.iter().enumerate() { + let io_addr = IoEventAddress::Mmio(self.mmio_base + doorbell.queue_offset(i)); + let _ = self + .vm_fd + .unregister_ioevent(&queue.eventfd, &io_addr, NoDatamatch); + } + } + } + + pub(crate) fn check_queues_valid(&self) -> bool { + let mem = self.vm_as.memory(); + // All queues must have been enabled, we doesn't allow disabled queues. + self.queues.iter().all(|c| c.queue.is_valid(mem.deref())) + } + + pub(crate) fn with_queue(&self, d: U, f: F) -> U + where + F: FnOnce(&Q) -> U, + { + match self.queues.get(self.queue_select as usize) { + Some(config) => f(&config.queue), + None => d, + } + } + + pub(crate) fn with_queue_mut(&mut self, f: F) -> bool { + if let Some(config) = self.queues.get_mut(self.queue_select as usize) { + f(&mut config.queue); + true + } else { + false + } + } + + pub(crate) fn get_shm_field(&mut self, d: U, f: F) -> U + where + F: FnOnce(&VirtioSharedMemory) -> U, + { + if let Some(regions) = self.shm_regions.as_ref() { + match regions.region_list.get(self.shm_region_id as usize) { + Some(region) => f(region), + None => d, + } + } else { + d + } + } + + pub(crate) fn update_msi_enable(&mut self, v: u16, device: &MmioV2Device) { + // Can't switch interrupt mode once the device has been activated. + if device.driver_status() & DEVICE_DRIVER_OK != 0 { + if device.driver_status() & DEVICE_FAILED == 0 { + debug!("mmio_v2: can not switch interrupt mode for active device"); + device.set_driver_failed(); + } + return; + } + + if v & MMIO_MSI_CSR_ENABLED != 0 { + // Guest enable msi interrupt + if self.msi.is_none() { + debug!("mmio_v2: switch to MSI interrupt mode"); + match self + .intr_mgr + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + { + Ok(_) => self.msi = Some(Msi::default()), + Err(e) => { + warn!("mmio_v2: failed to switch to MSI interrupt mode: {:?}", e); + device.set_driver_failed(); + } + } + } + } else if self.msi.is_some() { + // Guest disable msi interrupt + match self + .intr_mgr + .set_working_mode(DeviceInterruptMode::LegacyIrq) + { + Ok(_) => self.msi = None, + Err(e) => { + warn!( + "mmio_v2: failed to switch to legacy interrupt mode: {:?}", + e + ); + device.set_driver_failed(); + } + } + } + } + + fn update_msi_cfg(&mut self, v: u16) -> Result<()> { + if let Some(msi) = self.msi.as_mut() { + msi.index_select = v as u32; + self.intr_mgr + .set_msi_low_address(msi.index_select, msi.address_low) + .map_err(Error::InterruptError)?; + self.intr_mgr + .set_msi_high_address(msi.index_select, msi.address_high) + .map_err(Error::InterruptError)?; + self.intr_mgr + .set_msi_data(msi.index_select, msi.data) + .map_err(Error::InterruptError)?; + if self.intr_mgr.is_enabled() { + self.intr_mgr + .update(msi.index_select) + .map_err(Error::InterruptError)?; + } + } + + Ok(()) + } + + fn mask_msi_int(&mut self, index: u32, mask: bool) -> Result<()> { + if self.intr_mgr.is_enabled() { + if let Some(group) = self.intr_mgr.get_group() { + let old_mask = self + .intr_mgr + .get_msi_mask(index) + .map_err(Error::InterruptError)?; + debug!("mmio_v2 old mask {}, mask {}", old_mask, mask); + + if !old_mask && mask { + group.mask(index)?; + self.intr_mgr + .set_msi_mask(index, true) + .map_err(Error::InterruptError)?; + } else if old_mask && !mask { + group.unmask(index)?; + self.intr_mgr + .set_msi_mask(index, false) + .map_err(Error::InterruptError)?; + } + } + } + + Ok(()) + } + + pub(crate) fn handle_msi_cmd(&mut self, v: u16, device: &MmioV2Device) { + let arg = v & MMIO_MSI_CMD_ARG_MASK; + match v & MMIO_MSI_CMD_CODE_MASK { + MMIO_MSI_CMD_CODE_UPDATE => { + if arg > self.device.queue_max_sizes().len() as u16 { + info!("mmio_v2: configure interrupt for invalid vector {}", v,); + } else if let Err(e) = self.update_msi_cfg(arg) { + warn_or_panic!("mmio_v2: failed to configure vector {}, {:?}", v, e); + } + } + MMIO_MSI_CMD_CODE_INT_MASK => { + if let Err(e) = self.mask_msi_int(arg as u32, true) { + warn_or_panic!("mmio_v2: failed to mask {}, {:?}", v, e); + } + } + MMIO_MSI_CMD_CODE_INT_UNMASK => { + if let Err(e) = self.mask_msi_int(arg as u32, false) { + warn_or_panic!("mmio_v2: failed to unmask {}, {:?}", v, e); + } + } + _ => { + warn!("mmio_v2: unknown msi command: 0x{:x}", v); + device.set_driver_failed(); + } + } + } +} + +impl Drop for MmioV2DeviceState +where + AS: GuestAddressSpace + Clone, + Q: QueueT, + R: GuestMemoryRegion, +{ + fn drop(&mut self) { + if let Some(memlist) = &self.shm_regions { + let mmio_res = self.device_resources.get_mmio_address_ranges(); + let slots_res = self.device_resources.get_kvm_mem_slots(); + let shm_regions_num = mmio_res.len(); + let slots_num = slots_res.len(); + assert_eq!((shm_regions_num, slots_num), (1, 1)); + let kvm_mem_region = kvm_userspace_memory_region { + slot: slots_res[0], + flags: 0, + guest_phys_addr: memlist.guest_addr.0, + memory_size: 0, + userspace_addr: memlist.host_addr, + }; + unsafe { + self.vm_fd.set_user_memory_region(kvm_mem_region).unwrap(); + } + } + } +} + +#[cfg(test)] +pub(crate) mod tests { + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{GuestAddress, GuestMemoryMmap, GuestRegionMmap}; + + use super::*; + use crate::mmio::mmio_v2::tests::*; + + pub fn get_mmio_state( + have_msi: bool, + doorbell: bool, + ctrl_queue_size: u16, + ) -> MmioV2DeviceState, QueueSync, GuestRegionMmap> { + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap()); + + let mmio_base = 0; + let device_resources = get_device_resource(have_msi, false); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + vm_fd.create_irq_chip().unwrap(); + + let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone())); + irq_manager.initialize().unwrap(); + + let device = MmioDevice::new(ctrl_queue_size); + + MmioV2DeviceState::new( + Box::new(device), + vm_fd, + mem, + irq_manager, + device_resources, + mmio_base, + doorbell, + ) + .unwrap() + } + + #[test] + fn test_virtio_mmio_state_new() { + let mut state = get_mmio_state(false, false, 1); + + assert_eq!(state.queues.len(), 3); + assert!(!state.check_queues_valid()); + + state.queue_select = 0; + assert_eq!(state.with_queue(0, |q| q.max_size()), 16); + assert!(state.with_queue_mut(|q| q.set_size(16))); + assert_eq!(state.queues[state.queue_select as usize].queue.size(), 16); + + state.queue_select = 1; + assert_eq!(state.with_queue(0, |q| q.max_size()), 32); + assert!(state.with_queue_mut(|q| q.set_size(8))); + assert_eq!(state.queues[state.queue_select as usize].queue.size(), 8); + + state.queue_select = 3; + assert_eq!(state.with_queue(0xff, |q| q.max_size()), 0xff); + assert!(!state.with_queue_mut(|q| q.set_size(16))); + + assert!(!state.check_queues_valid()); + + drop(state); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs new file mode 100644 index 000000000000..2b6df1b21369 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs @@ -0,0 +1,1237 @@ +// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex, MutexGuard}; + +use byteorder::{ByteOrder, LittleEndian}; +use dbs_device::resources::{DeviceResources, Resource}; +use dbs_device::{DeviceIo, IoAddress}; +use dbs_interrupt::{InterruptStatusRegister32, KvmIrqManager}; +use kvm_ioctls::VmFd; +use log::{debug, info, warn}; +use virtio_queue::QueueT; +use vm_memory::{GuestAddressSpace, GuestMemoryRegion}; + +use crate::{ + mmio::*, Error, Result, VirtioDevice, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, DEVICE_DRIVER_OK, + DEVICE_FAILED, DEVICE_FEATURES_OK, DEVICE_INIT, VIRTIO_INTR_VRING, +}; + +const DEVICE_STATUS_INIT: u32 = DEVICE_INIT; +const DEVICE_STATUS_ACKNOWLEDE: u32 = DEVICE_STATUS_INIT | DEVICE_ACKNOWLEDGE; +const DEVICE_STATUS_DRIVER: u32 = DEVICE_STATUS_ACKNOWLEDE | DEVICE_DRIVER; +const DEVICE_STATUS_FEATURE_OK: u32 = DEVICE_STATUS_DRIVER | DEVICE_FEATURES_OK; +const DEVICE_STATUS_DRIVER_OK: u32 = DEVICE_STATUS_FEATURE_OK | DEVICE_DRIVER_OK; + +/// Implements the +/// [MMIO](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-1090002) +/// transport for virtio devices. +/// +/// This requires 3 points of installation to work with a VM: +/// +/// 1. Mmio reads and writes must be sent to this device at what is referred to here as MMIO base. +/// 1. `Mmio::queue_evts` must be installed at `MMIO_NOTIFY_REG_OFFSET` offset from the MMIO +/// base. Each event in the array must be signaled if the index is written at that offset. +/// 1. `Mmio::interrupt_evt` must signal an interrupt that the guest driver is listening to when it +/// is written to. +/// +/// Typically one page (4096 bytes) of MMIO address space is sufficient to handle this transport +/// and inner virtio device. +pub struct MmioV2Device { + state: Mutex>, + assigned_resources: DeviceResources, + mmio_cfg_res: Resource, + device_vendor: u32, + driver_status: AtomicU32, + config_generation: AtomicU32, + interrupt_status: Arc, +} + +impl MmioV2Device +where + AS: GuestAddressSpace + Clone, + Q: QueueT + Clone, + R: GuestMemoryRegion, +{ + /// Constructs a new MMIO transport for the given virtio device. + pub fn new( + vm_fd: Arc, + vm_as: AS, + irq_manager: Arc, + device: Box>, + resources: DeviceResources, + mut features: Option, + ) -> Result { + let mut device_resources = DeviceResources::new(); + let mut mmio_cfg_resource = None; + let mut mmio_base = 0; + let mut doorbell_enabled = false; + + for res in resources.iter() { + if let Resource::MmioAddressRange { base, size } = res { + if mmio_cfg_resource.is_none() + && *size == MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE + { + mmio_base = *base; + mmio_cfg_resource = Some(res.clone()); + continue; + } + } + device_resources.append(res.clone()); + } + let mmio_cfg_res = match mmio_cfg_resource { + Some(v) => v, + None => return Err(Error::InvalidInput), + }; + + let msi_feature = if resources.get_generic_msi_irqs().is_some() { + DRAGONBALL_FEATURE_MSI_INTR + } else { + 0 + }; + + if let Some(ref mut ft) = features { + if (*ft & DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY != 0) + && vm_fd.check_extension(kvm_ioctls::Cap::IoeventfdNoLength) + { + doorbell_enabled = true; + } else { + *ft &= !DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY; + } + } + + debug!("mmiov2: fast-mmio enabled: {}", doorbell_enabled); + + let state = MmioV2DeviceState::new( + device, + vm_fd, + vm_as, + irq_manager, + device_resources, + mmio_base, + doorbell_enabled, + )?; + + let mut device_vendor = MMIO_VENDOR_ID_DRAGONBALL | msi_feature; + if let Some(ft) = features { + debug!("mmiov2: feature bit is 0x{:0X}", ft); + device_vendor |= ft & DRAGONBALL_FEATURE_MASK; + } + + Ok(MmioV2Device { + state: Mutex::new(state), + assigned_resources: resources, + mmio_cfg_res, + device_vendor, + driver_status: AtomicU32::new(DEVICE_INIT), + config_generation: AtomicU32::new(0), + interrupt_status: Arc::new(InterruptStatusRegister32::new()), + }) + } + + /// Acquires the state while holding the lock. + pub fn state(&self) -> MutexGuard> { + // Safe to unwrap() because we don't expect poisoned lock here. + self.state.lock().unwrap() + } + + /// Removes device. + pub fn remove(&self) { + self.state().get_inner_device_mut().remove(); + } + + /// Returns the Resource. + pub fn get_mmio_cfg_res(&self) -> Resource { + self.mmio_cfg_res.clone() + } + + /// Returns the type of device. + pub fn get_device_type(&self) -> u32 { + self.state().get_inner_device().device_type() + } + + pub(crate) fn interrupt_status(&self) -> Arc { + self.interrupt_status.clone() + } + + #[inline] + /// Atomic sets the drive state to fail. + pub(crate) fn set_driver_failed(&self) { + self.driver_status.fetch_or(DEVICE_FAILED, Ordering::SeqCst); + } + + #[inline] + pub(crate) fn driver_status(&self) -> u32 { + self.driver_status.load(Ordering::SeqCst) + } + + #[inline] + fn check_driver_status(&self, set: u32, clr: u32) -> bool { + self.driver_status() & (set | clr) == set + } + + #[inline] + fn exchange_driver_status(&self, old: u32, new: u32) -> std::result::Result { + self.driver_status + .compare_exchange(old, new, Ordering::SeqCst, Ordering::SeqCst) + } + + /// Update driver status according to the state machine defined by VirtIO Spec 1.0. + /// Please refer to VirtIO Spec 1.0, section 2.1.1 and 3.1.1. + /// + /// The driver MUST update device status, setting bits to indicate the completed steps + /// of the driver initialization sequence specified in 3.1. The driver MUST NOT clear + /// a device status bit. If the driver sets the FAILED bit, the driver MUST later reset + /// the device before attempting to re-initialize. + fn update_driver_status(&self, v: u32) { + // Serialize to update device state. + let mut state = self.state(); + let mut result = Err(DEVICE_FAILED); + if v == DEVICE_STATUS_ACKNOWLEDE { + result = self.exchange_driver_status(DEVICE_STATUS_INIT, DEVICE_STATUS_ACKNOWLEDE); + } else if v == DEVICE_STATUS_DRIVER { + result = self.exchange_driver_status(DEVICE_STATUS_ACKNOWLEDE, DEVICE_STATUS_DRIVER); + } else if v == DEVICE_STATUS_FEATURE_OK { + result = self.exchange_driver_status(DEVICE_STATUS_DRIVER, DEVICE_STATUS_FEATURE_OK); + } else if v == DEVICE_STATUS_DRIVER_OK { + result = self.exchange_driver_status(DEVICE_STATUS_FEATURE_OK, DEVICE_STATUS_DRIVER_OK); + if result.is_ok() { + if let Err(e) = state.activate(self) { + // Reset internal status to initial state on failure. + // Error is ignored since the device will go to DEVICE_FAILED status. + let _ = state.reset(); + warn!("failed to activate MMIO Virtio device: {:?}", e); + result = Err(DEVICE_FAILED); + } + } + } else if v == 0 { + if self.driver_status() == DEVICE_INIT { + result = Ok(0); + } else if state.device_activated() { + let ret = state.get_inner_device_mut().reset(); + if ret.is_err() { + warn!("failed to reset MMIO Virtio device: {:?}.", ret); + } else { + state.deactivate(); + // it should reset the device's status to init, otherwise, the guest would + // get the wrong device's status. + if let Err(e) = state.reset() { + warn!("failed to reset device state due to {:?}", e); + result = Err(DEVICE_FAILED); + } else { + result = self + .exchange_driver_status(DEVICE_STATUS_DRIVER_OK, DEVICE_STATUS_INIT); + } + } + } + } else if v == self.driver_status() { + // No real state change, nothing to do. + result = Ok(0); + } else if v & DEVICE_FAILED != 0 { + // Guest driver marks device as failed. + self.set_driver_failed(); + result = Ok(0); + } + + if result.is_err() { + warn!( + "invalid virtio driver status transition: 0x{:x} -> 0x{:x}", + self.driver_status(), + v + ); + // TODO: notify backend driver to stop the device + self.set_driver_failed(); + } + } + + fn update_queue_field(&self, f: F) { + // Use mutex for state to protect device.write_config() + let mut state = self.state(); + if self.check_driver_status(DEVICE_FEATURES_OK, DEVICE_DRIVER_OK | DEVICE_FAILED) { + state.with_queue_mut(f); + } else { + info!( + "update virtio queue in invalid state 0x{:x}", + self.driver_status() + ); + } + } + + fn tweak_intr_flags(&self, flags: u32) -> u32 { + // The MMIO virtio transport layer only supports legacy IRQs. And the typical way to + // inject interrupt into the guest is: + // 1) the vhost-user-net slave sends notifcaticaiton to dragonball by writing to eventfd. + // 2) dragonball consumes the notification by read the eventfd. + // 3) dragonball updates interrupt status register. + // 4) dragonball injects interrupt to the guest by writing to an irqfd. + // + // We play a trick here to always report "descriptor ready in the used virtque". + // This trick doesn't break the virtio spec because it allow virtio devices to inject + // supurous interrupts. By applying this trick, the way to inject interrupts gets + // simplified as: + // 1) the vhost-user-net slave sends interrupt to the guest by writing to the irqfd. + if self.device_vendor & DRAGONBALL_FEATURE_INTR_USED != 0 { + flags | VIRTIO_INTR_VRING + } else { + flags + } + } + + fn device_features(&self) -> u32 { + let state = self.state(); + let features_select = state.features_select(); + let mut features = state.get_inner_device().get_avail_features(features_select); + if features_select == 1 { + features |= 0x1; // enable support of VirtIO Version 1 + } + features + } + + fn set_acked_features(&self, v: u32) { + // Use mutex for state to protect device.ack_features() + let mut state = self.state(); + if self.check_driver_status(DEVICE_DRIVER, DEVICE_FEATURES_OK | DEVICE_FAILED) { + state.set_acked_features(v); + } else { + info!( + "ack virtio features in invalid state 0x{:x}", + self.driver_status() + ); + } + } + + fn get_device_config(&self, offset: u64, data: &mut [u8]) { + // Use mutex for state to protect device.write_config() + let mut state = self.state(); + if self.check_driver_status(DEVICE_DRIVER, DEVICE_FAILED) { + if let Err(e) = state.get_inner_device_mut().read_config(offset, data) { + warn!("device read config err: {}", e); + } + } else { + info!("can not read from device config data area before driver is ready"); + } + } + + fn set_device_config(&self, offset: u64, data: &[u8]) { + // Use mutex for state to protect device.write_config() + let mut state = self.state(); + if self.check_driver_status(DEVICE_DRIVER, DEVICE_FAILED) { + if let Err(e) = state.get_inner_device_mut().write_config(offset, data) { + warn!("device write config err: {}", e); + } + } else { + info!("can not write to device config data area before driver is ready"); + } + } + + fn get_shm_base_low(&self) -> u32 { + let mut state = self.state(); + let guest_addr: u64 = match state.shm_regions() { + Some(regions) => regions.guest_addr.0, + None => 0, + }; + state.get_shm_field(0xffff_ffff, |s| (s.offset + guest_addr) as u32) + } + + fn get_shm_base_high(&self) -> u32 { + let mut state = self.state(); + let guest_addr: u64 = match state.shm_regions() { + Some(regions) => regions.guest_addr.0, + None => 0, + }; + state.get_shm_field(0xffff_ffff, |s| ((s.offset + guest_addr) >> 32) as u32) + } +} + +impl DeviceIo for MmioV2Device +where + AS: 'static + GuestAddressSpace + Send + Sync + Clone, + Q: 'static + QueueT + Send + Clone, + R: 'static + GuestMemoryRegion + Send + Sync, +{ + fn read(&self, _base: IoAddress, offset: IoAddress, data: &mut [u8]) { + let offset = offset.raw_value(); + + if offset >= MMIO_CFG_SPACE_OFF { + self.get_device_config(offset - MMIO_CFG_SPACE_OFF, data); + } else if data.len() == 4 { + let v = match offset { + REG_MMIO_MAGIC_VALUE => MMIO_MAGIC_VALUE, + REG_MMIO_VERSION => MMIO_VERSION_2, + REG_MMIO_DEVICE_ID => self.state().get_inner_device().device_type(), + REG_MMIO_VENDOR_ID => self.device_vendor, + REG_MMIO_DEVICE_FEATURE => self.device_features(), + REG_MMIO_QUEUE_NUM_MA => self.state().with_queue(0, |q| q.max_size() as u32), + REG_MMIO_QUEUE_READY => self.state().with_queue(0, |q| q.ready() as u32), + REG_MMIO_QUEUE_NOTIF if self.state().doorbell().is_some() => { + // Safe to unwrap() because we have determined the option is a Some value. + self.state() + .doorbell() + .map(|doorbell| doorbell.register_data()) + .unwrap() + } + REG_MMIO_INTERRUPT_STAT => self.tweak_intr_flags(self.interrupt_status.read()), + REG_MMIO_STATUS => self.driver_status(), + REG_MMIO_SHM_LEN_LOW => self.state().get_shm_field(0xffff_ffff, |s| s.len as u32), + REG_MMIO_SHM_LEN_HIGH => self + .state() + .get_shm_field(0xffff_ffff, |s| (s.len >> 32) as u32), + REG_MMIO_SHM_BASE_LOW => self.get_shm_base_low(), + REG_MMIO_SHM_BASE_HIGH => self.get_shm_base_high(), + REG_MMIO_CONFIG_GENERATI => self.config_generation.load(Ordering::SeqCst), + _ => { + info!("unknown virtio mmio readl at 0x{:x}", offset); + return; + } + }; + LittleEndian::write_u32(data, v); + } else if data.len() == 2 { + let v = match offset { + REG_MMIO_MSI_CSR => { + if (self.device_vendor & DRAGONBALL_FEATURE_MSI_INTR) != 0 { + MMIO_MSI_CSR_SUPPORTED + } else { + 0 + } + } + _ => { + info!("unknown virtio mmio readw from 0x{:x}", offset); + return; + } + }; + LittleEndian::write_u16(data, v); + } else { + info!( + "unknown virtio mmio register read: 0x{:x}/0x{:x}", + offset, + data.len() + ); + } + } + + fn write(&self, _base: IoAddress, offset: IoAddress, data: &[u8]) { + let offset = offset.raw_value(); + // Write to the device configuration area. + if (MMIO_CFG_SPACE_OFF..DRAGONBALL_MMIO_DOORBELL_OFFSET).contains(&offset) { + self.set_device_config(offset - MMIO_CFG_SPACE_OFF, data); + } else if data.len() == 4 { + let v = LittleEndian::read_u32(data); + match offset { + REG_MMIO_DEVICE_FEATURES_S => self.state().set_features_select(v), + REG_MMIO_DRIVER_FEATURE => self.set_acked_features(v), + REG_MMIO_DRIVER_FEATURES_S => self.state().set_acked_features_select(v), + REG_MMIO_QUEUE_SEL => self.state().set_queue_select(v), + REG_MMIO_QUEUE_NUM => self.update_queue_field(|q| q.set_size(v as u16)), + REG_MMIO_QUEUE_READY => self.update_queue_field(|q| q.set_ready(v == 1)), + REG_MMIO_INTERRUPT_AC => self.interrupt_status.clear_bits(v), + REG_MMIO_STATUS => self.update_driver_status(v), + REG_MMIO_QUEUE_DESC_LOW => { + self.update_queue_field(|q| q.set_desc_table_address(Some(v), None)) + } + REG_MMIO_QUEUE_DESC_HIGH => { + self.update_queue_field(|q| q.set_desc_table_address(None, Some(v))) + } + REG_MMIO_QUEUE_AVAIL_LOW => { + self.update_queue_field(|q| q.set_avail_ring_address(Some(v), None)) + } + REG_MMIO_QUEUE_AVAIL_HIGH => { + self.update_queue_field(|q| q.set_avail_ring_address(None, Some(v))) + } + REG_MMIO_QUEUE_USED_LOW => { + self.update_queue_field(|q| q.set_used_ring_address(Some(v), None)) + } + REG_MMIO_QUEUE_USED_HIGH => { + self.update_queue_field(|q| q.set_used_ring_address(None, Some(v))) + } + REG_MMIO_SHM_SEL => self.state().set_shm_region_id(v), + REG_MMIO_MSI_ADDRESS_L => self.state().set_msi_address_low(v), + REG_MMIO_MSI_ADDRESS_H => self.state().set_msi_address_high(v), + REG_MMIO_MSI_DATA => self.state().set_msi_data(v), + _ => info!("unknown virtio mmio writel to 0x{:x}", offset), + } + } else if data.len() == 2 { + let v = LittleEndian::read_u16(data); + match offset { + REG_MMIO_MSI_CSR => self.state().update_msi_enable(v, self), + REG_MMIO_MSI_COMMAND => self.state().handle_msi_cmd(v, self), + _ => { + info!("unknown virtio mmio writew to 0x{:x}", offset); + } + } + } else { + info!( + "unknown virtio mmio register write: 0x{:x}/0x{:x}", + offset, + data.len() + ); + } + } + + fn get_assigned_resources(&self) -> DeviceResources { + self.assigned_resources.clone() + } + + fn get_trapped_io_resources(&self) -> DeviceResources { + let mut resources = DeviceResources::new(); + + resources.append(self.mmio_cfg_res.clone()); + + resources + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::any::Any; + use std::sync::Mutex; + + use byteorder::{ByteOrder, LittleEndian}; + use dbs_device::resources::{MsiIrqType, Resource, ResourceConstraint}; + use dbs_device::{DeviceIo, IoAddress}; + use dbs_utils::epoll_manager::EpollManager; + use kvm_bindings::kvm_userspace_memory_region; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{ + GuestAddress, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, MemoryRegionAddress, + MmapRegion, + }; + + use super::*; + use crate::{ + ActivateResult, ConfigResult, Error, VirtioDeviceConfig, VirtioDeviceInfo, + VirtioSharedMemory, VirtioSharedMemoryList, DEVICE_ACKNOWLEDGE, DEVICE_DRIVER, + DEVICE_FEATURES_OK, + }; + + pub struct MmioDevice { + state: Mutex, + config: Mutex>>>, + ctrl_queue_size: u16, + } + + impl MmioDevice { + pub fn new(ctrl_queue_size: u16) -> Self { + let epoll_mgr = EpollManager::default(); + let state = VirtioDeviceInfo::new( + "dummy".to_string(), + 0xf, + Arc::new(vec![16u16, 32u16]), + vec![0xffu8; 256], + epoll_mgr, + ); + MmioDevice { + state: Mutex::new(state), + config: Mutex::new(None), + ctrl_queue_size, + } + } + } + + impl VirtioDevice, QueueSync, GuestRegionMmap> for MmioDevice { + fn device_type(&self) -> u32 { + 123 + } + + fn queue_max_sizes(&self) -> &[u16] { + &[16, 32] + } + + fn ctrl_queue_max_sizes(&self) -> u16 { + self.ctrl_queue_size + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + self.state.lock().unwrap().set_acked_features(page, value); + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + self.state.lock().unwrap().read_config(offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + self.state.lock().unwrap().write_config(offset, data) + } + + fn activate(&mut self, config: VirtioDeviceConfig>) -> ActivateResult { + self.config.lock().unwrap().replace(config); + Ok(()) + } + + fn reset(&mut self) -> ActivateResult { + Ok(()) + } + + fn set_resource( + &mut self, + vm_fd: Arc, + resource: DeviceResources, + ) -> Result>> { + let mmio_res = resource.get_mmio_address_ranges(); + let slot_res = resource.get_kvm_mem_slots(); + + if mmio_res.is_empty() || slot_res.is_empty() { + return Ok(None); + } + + let guest_addr = mmio_res[0].0; + let len = mmio_res[0].1; + + let mmap_region = GuestRegionMmap::new( + MmapRegion::new(len as usize).unwrap(), + GuestAddress(guest_addr), + ) + .unwrap(); + let host_addr: u64 = mmap_region + .get_host_address(MemoryRegionAddress(0)) + .unwrap() as u64; + let kvm_mem_region = kvm_userspace_memory_region { + slot: slot_res[0], + flags: 0, + guest_phys_addr: guest_addr, + memory_size: len, + userspace_addr: host_addr, + }; + unsafe { vm_fd.set_user_memory_region(kvm_mem_region).unwrap() }; + Ok(Some(VirtioSharedMemoryList { + host_addr, + guest_addr: GuestAddress(guest_addr), + len, + kvm_userspace_memory_region_flags: 0, + kvm_userspace_memory_region_slot: slot_res[0], + region_list: vec![VirtioSharedMemory { + offset: 0x40_0000, + len, + }], + mmap_region: Arc::new(mmap_region), + })) + } + + fn get_resource_requirements( + &self, + _requests: &mut Vec, + _use_generic_irq: bool, + ) { + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + } + + pub fn set_driver_status( + d: &mut MmioV2Device, QueueSync, GuestRegionMmap>, + status: u32, + ) { + let mut buf = vec![0; 4]; + LittleEndian::write_u32(&mut buf[..], status); + d.write(IoAddress(0), IoAddress(REG_MMIO_STATUS), &buf[..]); + } + + pub fn get_device_resource(have_msi_feature: bool, shared_memory: bool) -> DeviceResources { + let mut resources = DeviceResources::new(); + resources.append(Resource::MmioAddressRange { + base: 0, + size: MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE, + }); + resources.append(Resource::LegacyIrq(5)); + if have_msi_feature { + resources.append(Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base: 24, + size: 1, + }); + } + if shared_memory { + resources.append(Resource::MmioAddressRange { + base: 0x1_0000_0000, + size: 0x1000, + }); + + resources.append(Resource::KvmMemSlot(1)); + } + resources + } + + pub fn get_mmio_device_inner( + doorbell: bool, + ctrl_queue_size: u16, + resources: DeviceResources, + ) -> MmioV2Device, QueueSync, GuestRegionMmap> { + let device = MmioDevice::new(ctrl_queue_size); + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap()); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + vm_fd.create_irq_chip().unwrap(); + let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone())); + irq_manager.initialize().unwrap(); + + let features = if doorbell { + Some(DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY) + } else { + None + }; + + MmioV2Device::new( + vm_fd, + mem, + irq_manager, + Box::new(device), + resources, + features, + ) + .unwrap() + } + + pub fn get_mmio_device() -> MmioV2Device, QueueSync, GuestRegionMmap> { + let resources = get_device_resource(false, false); + get_mmio_device_inner(false, 0, resources) + } + + #[test] + fn test_virtio_mmio_v2_device_new() { + // test create error. + let resources = DeviceResources::new(); + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap()); + let device = MmioDevice::new(0); + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + vm_fd.create_irq_chip().unwrap(); + let irq_manager = Arc::new(KvmIrqManager::new(vm_fd.clone())); + irq_manager.initialize().unwrap(); + let ret = MmioV2Device::new(vm_fd, mem, irq_manager, Box::new(device), resources, None); + assert!(matches!(ret, Err(Error::InvalidInput))); + + // test create without msi + let mut d = get_mmio_device(); + + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE); + assert_eq!(d.driver_status(), DEVICE_STATUS_ACKNOWLEDE); + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + assert_eq!(d.driver_status(), DEVICE_STATUS_DRIVER); + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK, + ); + assert_eq!(d.driver_status(), DEVICE_STATUS_FEATURE_OK); + + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_STATUS_DRIVER_OK, + ); + assert_ne!(d.driver_status() & DEVICE_FAILED, 0); + + // test create with msi + let d_mmio_feature = get_mmio_device_inner(false, 0, get_device_resource(true, false)); + assert_ne!( + d_mmio_feature.device_vendor & DRAGONBALL_FEATURE_MSI_INTR, + 0 + ); + + // test create with doorbell features + let d_doorbell = get_mmio_device_inner(true, 0, get_device_resource(false, false)); + assert_ne!( + d_doorbell.device_vendor & DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY, + 0 + ); + + // test ctrl queue + let d_ctrl = get_mmio_device_inner(true, 1, get_device_resource(false, false)); + assert_eq!(d_ctrl.state().queues().len(), 3); + } + + #[test] + fn test_bus_device_read() { + let mut d = get_mmio_device(); + + let mut buf = vec![0xff, 0, 0xfe, 0]; + let buf_copy = buf.to_vec(); + + // The following read shouldn't be valid, because the length of the buf is not 4. + buf.push(0); + d.read(IoAddress(0), IoAddress(0), &mut buf[..]); + assert_eq!(buf[..4], buf_copy[..]); + + // the length is ok again + buf.pop(); + + let mut dev_cfg = vec![0; 4]; + d.read( + IoAddress(0), + IoAddress(MMIO_CFG_SPACE_OFF), + &mut dev_cfg[..], + ); + assert_eq!(LittleEndian::read_u32(&dev_cfg[..]), 0x0); + + // Now we test that reading at various predefined offsets works as intended. + d.read(IoAddress(0), IoAddress(REG_MMIO_MAGIC_VALUE), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), MMIO_MAGIC_VALUE); + + d.read(IoAddress(0), IoAddress(REG_MMIO_VERSION), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), MMIO_VERSION_2); + + d.read(IoAddress(0), IoAddress(REG_MMIO_DEVICE_ID), &mut buf[..]); + assert_eq!( + LittleEndian::read_u32(&buf[..]), + d.state().get_inner_device().device_type() + ); + + d.read(IoAddress(0), IoAddress(REG_MMIO_VENDOR_ID), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), MMIO_VENDOR_ID_DRAGONBALL); + + d.state().set_features_select(0); + d.read( + IoAddress(0), + IoAddress(REG_MMIO_DEVICE_FEATURE), + &mut buf[..], + ); + assert_eq!( + LittleEndian::read_u32(&buf[..]), + d.state().get_inner_device().get_avail_features(0) + ); + + d.state().set_features_select(1); + d.read( + IoAddress(0), + IoAddress(REG_MMIO_DEVICE_FEATURE), + &mut buf[..], + ); + assert_eq!( + LittleEndian::read_u32(&buf[..]), + d.state().get_inner_device().get_avail_features(0) | 0x1 + ); + + d.read(IoAddress(0), IoAddress(REG_MMIO_QUEUE_NUM_MA), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 16); + + d.read(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), false as u32); + + d.read( + IoAddress(0), + IoAddress(REG_MMIO_INTERRUPT_STAT), + &mut buf[..], + ); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0); + + d.read(IoAddress(0), IoAddress(REG_MMIO_STATUS), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0); + + d.config_generation.store(5, Ordering::SeqCst); + d.read( + IoAddress(0), + IoAddress(REG_MMIO_CONFIG_GENERATI), + &mut buf[..], + ); + assert_eq!(LittleEndian::read_u32(&buf[..]), 5); + + // This read shouldn't do anything, as it's past the readable generic registers, and + // before the device specific configuration space. Btw, reads from the device specific + // conf space are going to be tested a bit later, alongside writes. + buf = buf_copy.to_vec(); + d.read(IoAddress(0), IoAddress(0xfd), &mut buf[..]); + assert_eq!(buf[..], buf_copy[..]); + + // Read from an invalid address in generic register range. + d.read(IoAddress(0), IoAddress(0xfb), &mut buf[..]); + assert_eq!(buf[..], buf_copy[..]); + + // Read from an invalid length in generic register range. + d.read(IoAddress(0), IoAddress(0xfc), &mut buf[..3]); + assert_eq!(buf[..], buf_copy[..]); + + // test for no msi_feature + let mut buf = vec![0; 2]; + d.read(IoAddress(0), IoAddress(REG_MMIO_MSI_CSR), &mut buf[..]); + assert_eq!(LittleEndian::read_u16(&buf[..]), 0); + + // test for msi_feature + d.device_vendor |= DRAGONBALL_FEATURE_MSI_INTR; + let mut buf = vec![0; 2]; + d.read(IoAddress(0), IoAddress(REG_MMIO_MSI_CSR), &mut buf[..]); + assert_eq!(LittleEndian::read_u16(&buf[..]), MMIO_MSI_CSR_SUPPORTED); + + let mut dev_cfg = vec![0; 4]; + assert_eq!( + d.exchange_driver_status(0, DEVICE_DRIVER | DEVICE_INIT) + .unwrap(), + 0 + ); + d.read( + IoAddress(0), + IoAddress(MMIO_CFG_SPACE_OFF), + &mut dev_cfg[..], + ); + assert_eq!(LittleEndian::read_u32(&dev_cfg[..]), 0xffffffff); + } + + #[test] + fn test_bus_device_write() { + let mut d = get_mmio_device(); + + let mut buf = vec![0; 5]; + LittleEndian::write_u32(&mut buf[..4], 1); + + // Nothing should happen, because the slice len > 4. + d.state().set_features_select(0); + d.write( + IoAddress(0), + IoAddress(REG_MMIO_DEVICE_FEATURES_S), + &buf[..], + ); + assert_eq!(d.state().features_select(), 0); + + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE); + assert_eq!(d.driver_status(), DEVICE_STATUS_ACKNOWLEDE); + set_driver_status(&mut d, DEVICE_STATUS_DRIVER); + assert_eq!(d.driver_status(), DEVICE_STATUS_DRIVER); + + let mut buf = vec![0; 4]; + buf[0] = 0xa5; + d.write(IoAddress(0), IoAddress(MMIO_CFG_SPACE_OFF), &buf[..]); + buf[0] = 0; + d.read(IoAddress(0), IoAddress(MMIO_CFG_SPACE_OFF), &mut buf[..]); + assert_eq!(buf[0], 0xa5); + assert_eq!(buf[1], 0); + + // Acking features in invalid state shouldn't take effect. + d.state().set_acked_features_select(0x0); + LittleEndian::write_u32(&mut buf[..], 1); + d.write(IoAddress(0), IoAddress(REG_MMIO_DRIVER_FEATURE), &buf[..]); + // TODO: find a way to check acked features + + // now writes should work + d.state().set_features_select(0); + LittleEndian::write_u32(&mut buf[..], 1); + d.write( + IoAddress(0), + IoAddress(REG_MMIO_DEVICE_FEATURES_S), + &buf[..], + ); + assert_eq!(d.state().features_select(), 1); + + d.state().set_acked_features_select(0x123); + LittleEndian::write_u32(&mut buf[..], 1); + d.write(IoAddress(0), IoAddress(REG_MMIO_DRIVER_FEATURE), &buf[..]); + // TODO: find a way to check acked features + + d.state().set_acked_features_select(0); + LittleEndian::write_u32(&mut buf[..], 2); + d.write( + IoAddress(0), + IoAddress(REG_MMIO_DRIVER_FEATURES_S), + &buf[..], + ); + assert_eq!(d.state().acked_features_select(), 2); + + set_driver_status(&mut d, DEVICE_STATUS_FEATURE_OK); + assert_eq!(d.driver_status(), DEVICE_STATUS_FEATURE_OK); + + // Setup queues + d.state().set_queue_select(0); + LittleEndian::write_u32(&mut buf[..], 3); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_SEL), &buf[..]); + assert_eq!(d.state().queue_select(), 3); + + d.state().set_queue_select(0); + assert_eq!(d.state().queues()[0].queue.size(), 16); + LittleEndian::write_u32(&mut buf[..], 8); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_NUM), &buf[..]); + assert_eq!(d.state().queues()[0].queue.size(), 8); + + assert!(!d.state().queues()[0].queue.ready()); + LittleEndian::write_u32(&mut buf[..], 1); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &buf[..]); + assert!(d.state().queues()[0].queue.ready()); + + LittleEndian::write_u32(&mut buf[..], 0b111); + d.write(IoAddress(0), IoAddress(REG_MMIO_INTERRUPT_AC), &buf[..]); + + assert_eq!(d.state().queues_mut()[0].queue.lock().desc_table(), 0); + + // When write descriptor, descriptor table will judge like this: + // if desc_table.mask(0xf) != 0 { + // virtio queue descriptor table breaks alignment constraints + // return + // desc_table is the data that will be written. + LittleEndian::write_u32(&mut buf[..], 0x120); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_DESC_LOW), &buf[..]); + assert_eq!(d.state().queues_mut()[0].queue.lock().desc_table(), 0x120); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_DESC_HIGH), &buf[..]); + assert_eq!( + d.state().queues_mut()[0].queue.lock().desc_table(), + 0x120 + (0x120 << 32) + ); + + assert_eq!(d.state().queues_mut()[0].queue.lock().avail_ring(), 0); + LittleEndian::write_u32(&mut buf[..], 124); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_AVAIL_LOW), &buf[..]); + assert_eq!(d.state().queues_mut()[0].queue.lock().avail_ring(), 124); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_AVAIL_HIGH), &buf[..]); + assert_eq!( + d.state().queues_mut()[0].queue.lock().avail_ring(), + 124 + (124 << 32) + ); + + assert_eq!(d.state().queues_mut()[0].queue.lock().used_ring(), 0); + LittleEndian::write_u32(&mut buf[..], 128); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_USED_LOW), &buf[..]); + assert_eq!(d.state().queues_mut()[0].queue.lock().used_ring(), 128); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_USED_HIGH), &buf[..]); + assert_eq!( + d.state().queues_mut()[0].queue.lock().used_ring(), + 128 + (128 << 32) + ); + + // Write to an invalid address in generic register range. + LittleEndian::write_u32(&mut buf[..], 0xf); + d.config_generation.store(0, Ordering::SeqCst); + d.write(IoAddress(0), IoAddress(0xfb), &buf[..]); + assert_eq!(d.config_generation.load(Ordering::SeqCst), 0); + + // Write to an invalid length in generic register range. + d.write(IoAddress(0), IoAddress(REG_MMIO_CONFIG_GENERATI), &buf[..2]); + assert_eq!(d.config_generation.load(Ordering::SeqCst), 0); + } + + #[test] + fn test_bus_device_activate() { + // invalid state transition should failed + let mut d = get_mmio_device(); + + assert!(!d.state().check_queues_valid()); + assert!(!d.state().device_activated()); + assert_eq!(d.driver_status(), DEVICE_INIT); + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE); + assert_eq!(d.driver_status(), DEVICE_ACKNOWLEDGE); + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + assert_eq!(d.driver_status(), DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + // Invalid state set + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK, + ); + assert_eq!( + d.driver_status(), + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FAILED + ); + + // valid state transition + let mut d = get_mmio_device(); + + assert!(!d.state().check_queues_valid()); + assert!(!d.state().device_activated()); + assert_eq!(d.driver_status(), DEVICE_INIT); + + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE); + assert_eq!(d.driver_status(), DEVICE_ACKNOWLEDGE); + set_driver_status(&mut d, DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + assert_eq!(d.driver_status(), DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK, + ); + assert_eq!( + d.driver_status(), + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK + ); + + let mut buf = vec![0; 4]; + let size = d.state().queues().len(); + for q in 0..size { + d.state().set_queue_select(q as u32); + LittleEndian::write_u32(&mut buf[..], 16); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_NUM), &buf[..]); + LittleEndian::write_u32(&mut buf[..], 1); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &buf[..]); + } + assert!(d.state().check_queues_valid()); + assert!(!d.state().device_activated()); + + // Device should be ready for activation now. + + // A couple of invalid writes; will trigger warnings; shouldn't activate the device. + d.write(IoAddress(0), IoAddress(0xa8), &buf[..]); + assert!(!d.state().device_activated()); + + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_DRIVER_OK, + ); + assert_eq!( + d.driver_status(), + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_DRIVER_OK + ); + assert!(d.state().device_activated()); + + // activate again + set_driver_status( + &mut d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_DRIVER_OK, + ); + assert!(d.state().device_activated()); + + // A write which changes the size of a queue after activation; currently only triggers + // a warning path and have no effect on queue state. + LittleEndian::write_u32(&mut buf[..], 0); + d.state().set_queue_select(0); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &buf[..]); + d.read(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 1); + } + + fn activate_device(d: &mut MmioV2Device, QueueSync, GuestRegionMmap>) { + set_driver_status(d, DEVICE_ACKNOWLEDGE); + set_driver_status(d, DEVICE_ACKNOWLEDGE | DEVICE_DRIVER); + set_driver_status(d, DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK); + + // Setup queue data structures + let mut buf = vec![0; 4]; + let size = d.state().queues().len(); + for q in 0..size { + d.state().set_queue_select(q as u32); + LittleEndian::write_u32(&mut buf[..], 16); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_NUM), &buf[..]); + LittleEndian::write_u32(&mut buf[..], 1); + d.write(IoAddress(0), IoAddress(REG_MMIO_QUEUE_READY), &buf[..]); + } + assert!(d.state().check_queues_valid()); + assert!(!d.state().device_activated()); + + // Device should be ready for activation now. + set_driver_status( + d, + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_DRIVER_OK, + ); + assert_eq!( + d.driver_status(), + DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_FEATURES_OK | DEVICE_DRIVER_OK + ); + assert!(d.state().device_activated()); + } + + #[test] + fn test_bus_device_reset() { + let resources = get_device_resource(false, false); + let mut d = get_mmio_device_inner(true, 0, resources); + let mut buf = vec![0; 4]; + + assert!(!d.state().check_queues_valid()); + assert!(!d.state().device_activated()); + assert_eq!(d.driver_status(), 0); + activate_device(&mut d); + + // Marking device as FAILED should not affect device_activated state + LittleEndian::write_u32(&mut buf[..], 0x8f); + d.write(IoAddress(0), IoAddress(REG_MMIO_STATUS), &buf[..]); + assert_eq!(d.driver_status(), 0x8f); + assert!(d.state().device_activated()); + + // Nothing happens when backend driver doesn't support reset + LittleEndian::write_u32(&mut buf[..], 0x0); + d.write(IoAddress(0), IoAddress(REG_MMIO_STATUS), &buf[..]); + assert_eq!(d.driver_status(), 0x8f); + assert!(!d.state().device_activated()); + + // test for reactivate device + // but device don't support reactivate now + d.state().deactivate(); + assert!(!d.state().device_activated()); + } + + #[test] + fn test_mmiov2_device_resources() { + let d = get_mmio_device(); + + let resources = d.get_assigned_resources(); + assert_eq!(resources.len(), 2); + let resources = d.get_trapped_io_resources(); + assert_eq!(resources.len(), 1); + let mmio_cfg_res = resources.get_mmio_address_ranges(); + assert_eq!(mmio_cfg_res.len(), 1); + assert_eq!( + mmio_cfg_res[0].1, + MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE + ); + } + + #[test] + fn test_mmio_v2_device_msi() { + let resources = get_device_resource(true, false); + let mut d = get_mmio_device_inner(true, 0, resources); + + let mut buf = vec![0; 4]; + LittleEndian::write_u32(&mut buf[..], 0x1234); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_ADDRESS_L), &buf[..]); + LittleEndian::write_u32(&mut buf[..], 0x5678); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_ADDRESS_H), &buf[..]); + LittleEndian::write_u32(&mut buf[..], 0x11111111); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_DATA), &buf[..]); + + // Enable msi + LittleEndian::write_u16(&mut buf[..], MMIO_MSI_CSR_ENABLED); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_CSR), &buf[..2]); + + // Activate the device, it will enable interrupts. + activate_device(&mut d); + + // update msi index + LittleEndian::write_u16(&mut buf[..], MMIO_MSI_CMD_CODE_UPDATE); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_COMMAND), &buf[..2]); + + // update msi int mask + LittleEndian::write_u16(&mut buf[..], MMIO_MSI_CMD_CODE_INT_MASK); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_COMMAND), &buf[..2]); + + // update msi int unmask + LittleEndian::write_u16(&mut buf[..], MMIO_MSI_CMD_CODE_INT_UNMASK); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_COMMAND), &buf[..2]); + + // unknown msi command + LittleEndian::write_u16(&mut buf[..], 0x4000); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_COMMAND), &buf[..2]); + assert_ne!(d.driver_status() & DEVICE_FAILED, 0); + + // Disable msi + LittleEndian::write_u16(&mut buf[..], 0); + d.write(IoAddress(0), IoAddress(REG_MMIO_MSI_CSR), &buf[..2]); + } + + #[test] + fn test_mmio_shared_memory() { + let resources = get_device_resource(true, true); + let d = get_mmio_device_inner(true, 0, resources); + + let mut buf = vec![0; 4]; + + // shm select 0 + d.write(IoAddress(0), IoAddress(REG_MMIO_SHM_SEL), &buf[..]); + + d.read(IoAddress(0), IoAddress(REG_MMIO_SHM_LEN_LOW), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0x1000); + + d.read(IoAddress(0), IoAddress(REG_MMIO_SHM_LEN_HIGH), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0x0); + + d.read(IoAddress(0), IoAddress(REG_MMIO_SHM_BASE_LOW), &mut buf[..]); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0x40_0000); + + d.read( + IoAddress(0), + IoAddress(REG_MMIO_SHM_BASE_HIGH), + &mut buf[..], + ); + assert_eq!(LittleEndian::read_u32(&buf[..]), 0x1); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mod.rs new file mode 100644 index 000000000000..d2208241124f --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mod.rs @@ -0,0 +1,137 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +// +// Portions Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Implementations of the Virtio MMIO Transport Layer. +//! +//! The Virtio specifications have defined two versions for the Virtio MMIO transport layer. The +//! version 1 is called legacy mode, and the version 2 is preferred currently. The common parts +//! of both versions are defined here. + +mod mmio_state; +pub use self::mmio_state::*; + +mod mmio_v2; +pub use self::mmio_v2::*; + +mod dragonball; +pub use self::dragonball::*; + +/// Magic number for MMIO virtio devices. +/// Required by the virtio mmio device register layout at offset 0 from base +pub const MMIO_MAGIC_VALUE: u32 = 0x74726976; + +/// Version number for legacy MMIO virito devices. +pub const MMIO_VERSION_1: u32 = 1; + +/// Current version specified by the mmio standard. +pub const MMIO_VERSION_2: u32 = 2; + +/// Offset from the base MMIO address of a virtio device used by the guest to notify the device of +/// queue events. +pub const MMIO_NOTIFY_REG_OFFSET: u32 = 0x50; + +/// Default size for MMIO device configuration address space. +/// +/// This represents the size of the mmio device specified to the kernel as a cmdline option +/// It has to be larger than 0x100 (the offset where the configuration space starts from +/// the beginning of the memory mapped device registers) + the size of the configuration space +/// Currently hardcoded to 4K +pub const MMIO_DEFAULT_CFG_SIZE: u64 = 0x1000; + +/// +/// Control registers + +// Magic value ("virt" string) - Read Only +pub const REG_MMIO_MAGIC_VALUE: u64 = 0x000; + +// Virtio device version - Read Only +pub const REG_MMIO_VERSION: u64 = 0x004; + +// Virtio device ID - Read Only +pub const REG_MMIO_DEVICE_ID: u64 = 0x008; + +// Virtio vendor ID - Read Only +pub const REG_MMIO_VENDOR_ID: u64 = 0x00c; + +// Bitmask of the features supported by the device (host) +// (32 bits per set) - Read Only +pub const REG_MMIO_DEVICE_FEATURE: u64 = 0x010; + +// Device (host) features set selector - Write Only +pub const REG_MMIO_DEVICE_FEATURES_S: u64 = 0x014; + +// Bitmask of features activated by the driver (guest) +// (32 bits per set) - Write Only +pub const REG_MMIO_DRIVER_FEATURE: u64 = 0x020; + +// Activated features set selector - Write Only */ +pub const REG_MMIO_DRIVER_FEATURES_S: u64 = 0x024; + +// Guest's memory page size in bytes - Write Only +pub const REG_MMIO_GUEST_PAGE_SIZ: u64 = 0x028; + +// Queue selector - Write Only +pub const REG_MMIO_QUEUE_SEL: u64 = 0x030; + +// Maximum size of the currently selected queue - Read Only +pub const REG_MMIO_QUEUE_NUM_MA: u64 = 0x034; + +// Queue size for the currently selected queue - Write Only +pub const REG_MMIO_QUEUE_NUM: u64 = 0x038; + +// Used Ring alignment for the currently selected queue - Write Only +pub const REG_MMIO_QUEUE_ALIGN: u64 = 0x03c; + +// Guest's PFN for the currently selected queue - Read Write +pub const REG_MMIO_QUEUE_PFN: u64 = 0x040; + +// Ready bit for the currently selected queue - Read Write +pub const REG_MMIO_QUEUE_READY: u64 = 0x044; + +// Queue notifier - Write Only +pub const REG_MMIO_QUEUE_NOTIF: u64 = 0x050; + +// Interrupt status - Read Only +pub const REG_MMIO_INTERRUPT_STAT: u64 = 0x060; + +// Interrupt acknowledge - Write Only +pub const REG_MMIO_INTERRUPT_AC: u64 = 0x064; + +// Device status register - Read Write +pub const REG_MMIO_STATUS: u64 = 0x070; + +// Selected queue's Descriptor Table address, 64 bits in two halves +pub const REG_MMIO_QUEUE_DESC_LOW: u64 = 0x080; +pub const REG_MMIO_QUEUE_DESC_HIGH: u64 = 0x084; + +// Selected queue's Available Ring address, 64 bits in two halves +pub const REG_MMIO_QUEUE_AVAIL_LOW: u64 = 0x090; +pub const REG_MMIO_QUEUE_AVAIL_HIGH: u64 = 0x094; + +// Selected queue's Used Ring address, 64 bits in two halves +pub const REG_MMIO_QUEUE_USED_LOW: u64 = 0x0a0; +pub const REG_MMIO_QUEUE_USED_HIGH: u64 = 0x0a4; + +// Shared memory region id +pub const REG_MMIO_SHM_SEL: u64 = 0x0ac; + +// Shared memory region length, 64 bits in two halves +pub const REG_MMIO_SHM_LEN_LOW: u64 = 0x0b0; +pub const REG_MMIO_SHM_LEN_HIGH: u64 = 0x0b4; + +// Shared memory region base address, 64 bits in two halves +pub const REG_MMIO_SHM_BASE_LOW: u64 = 0x0b8; +pub const REG_MMIO_SHM_BASE_HIGH: u64 = 0x0bc; + +// Configuration atomicity value +pub const REG_MMIO_CONFIG_GENERATI: u64 = 0x0fc; + +// The config space is defined by each driver +// the per-driver configuration space - Read Write +pub const REG_MMIO_CONFIG: u64 = 0x100; diff --git a/src/dragonball/src/dbs_virtio_devices/src/net.rs b/src/dragonball/src/dbs_virtio_devices/src/net.rs new file mode 100644 index 000000000000..bbae070c310e --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/net.rs @@ -0,0 +1,1448 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::any::Any; +use std::cmp; +use std::io::{self, Read, Write}; +use std::marker::PhantomData; +use std::mem; +use std::ops::Deref; +use std::os::unix::io::AsRawFd; +use std::sync::{mpsc, Arc}; + +use dbs_device::resources::ResourceConstraint; +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use dbs_utils::metric::{IncMetric, SharedIncMetric}; +use dbs_utils::net::{net_gen, MacAddr, Tap, MAC_ADDR_LEN}; +use dbs_utils::rate_limiter::{BucketUpdate, RateLimiter, TokenType}; +use libc; +use log::{debug, error, info, trace, warn}; +use serde::Serialize; +use virtio_bindings::bindings::virtio_net::*; +use virtio_queue::{QueueOwnedT, QueueSync, QueueT}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap}; +use vmm_sys_util::eventfd::EventFd; + +use crate::device::{VirtioDeviceConfig, VirtioDeviceInfo}; +use crate::{ + ActivateError, ActivateResult, ConfigResult, DbsGuestAddressSpace, Error, Result, VirtioDevice, + VirtioQueueConfig, TYPE_NET, +}; + +const NET_DRIVER_NAME: &str = "virtio-net"; + +/// The maximum buffer size when segmentation offload is enabled. This +/// includes the 12-byte virtio net header. +/// http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html#x1-1740003 +const MAX_BUFFER_SIZE: usize = 65562; + +// A frame is available for reading from the tap device to receive in the guest. +const RX_TAP_EVENT: u32 = 0; +// The guest has made a buffer available to receive a frame into. +const RX_QUEUE_EVENT: u32 = 1; +// The transmit queue has a frame that is ready to send from the guest. +const TX_QUEUE_EVENT: u32 = 2; +// rx rate limiter budget is now available. +const RX_RATE_LIMITER_EVENT: u32 = 3; +// tx rate limiter budget is now available. +const TX_RATE_LIMITER_EVENT: u32 = 4; +// patch request of rate limiters has arrived +const PATCH_RATE_LIMITER_EVENT: u32 = 5; +// Number of DeviceEventT events supported by this implementation. +pub const NET_EVENTS_COUNT: u32 = 6; + +/// Error for virtio-net devices to handle requests from guests. +#[derive(Debug, thiserror::Error)] +pub enum NetError { + /// Open tap device failed. + #[error("open tap device failed: {0}")] + TapOpen(#[source] dbs_utils::net::TapError), + /// Setting tap interface offload flags failed. + #[error("set tap device vnet header size failed: {0}")] + TapSetOffload(#[source] dbs_utils::net::TapError), + /// Setting vnet header size failed. + #[error("set tap device vnet header size failed: {0}")] + TapSetVnetHdrSize(#[source] dbs_utils::net::TapError), +} + +/// Metrics specific to the net device. +#[derive(Default, Serialize)] +pub struct NetDeviceMetrics { + /// Number of times when handling events on a network device. + pub event_count: SharedIncMetric, + /// Number of times when activate failed on a network device. + pub activate_fails: SharedIncMetric, + /// Number of times when interacting with the space config of a network device failed. + pub cfg_fails: SharedIncMetric, + /// Number of times when handling events on a network device failed. + pub event_fails: SharedIncMetric, + /// Number of events associated with the receiving queue. + pub rx_queue_event_count: SharedIncMetric, + /// Number of events associated with the rate limiter installed on the receiving path. + pub rx_event_rate_limiter_count: SharedIncMetric, + /// Number of events received on the associated tap. + pub rx_tap_event_count: SharedIncMetric, + /// Number of bytes received. + pub rx_bytes_count: SharedIncMetric, + /// Number of packets received. + pub rx_packets_count: SharedIncMetric, + /// Number of errors while receiving data. + pub rx_fails: SharedIncMetric, + /// Number of transmitted bytes. + pub tx_bytes_count: SharedIncMetric, + /// Number of errors while transmitting data. + pub tx_fails: SharedIncMetric, + /// Number of transmitted packets. + pub tx_packets_count: SharedIncMetric, + /// Number of events associated with the transmitting queue. + pub tx_queue_event_count: SharedIncMetric, + /// Number of events associated with the rate limiter installed on the transmitting path. + pub tx_rate_limiter_event_count: SharedIncMetric, +} + +struct TxVirtio { + queue: VirtioQueueConfig, + rate_limiter: RateLimiter, + iovec: Vec<(GuestAddress, usize)>, + used_desc_heads: Vec, + frame_buf: [u8; MAX_BUFFER_SIZE], +} + +impl TxVirtio { + fn new(queue: VirtioQueueConfig, rate_limiter: RateLimiter) -> Self { + let tx_queue_max_size = queue.max_size() as usize; + + TxVirtio { + queue, + rate_limiter, + iovec: Vec::with_capacity(tx_queue_max_size), + used_desc_heads: vec![0u16; tx_queue_max_size], + frame_buf: [0u8; MAX_BUFFER_SIZE], + } + } +} + +struct RxVirtio { + queue: VirtioQueueConfig, + rate_limiter: RateLimiter, + deferred_frame: bool, + deferred_irqs: bool, + bytes_read: usize, + frame_buf: [u8; MAX_BUFFER_SIZE], +} + +impl RxVirtio { + fn new(queue: VirtioQueueConfig, rate_limiter: RateLimiter) -> Self { + RxVirtio { + queue, + rate_limiter, + deferred_frame: false, + deferred_irqs: false, + bytes_read: 0, + frame_buf: [0u8; MAX_BUFFER_SIZE], + } + } +} + +fn vnet_hdr_len() -> usize { + mem::size_of::() +} + +#[allow(dead_code)] +pub(crate) struct NetEpollHandler< + AS: GuestAddressSpace, + Q: QueueT + Send = QueueSync, + R: GuestMemoryRegion = GuestRegionMmap, +> { + tap: Tap, + rx: RxVirtio, + tx: TxVirtio, + config: VirtioDeviceConfig, + id: String, + patch_rate_limiter_fd: EventFd, + receiver: Option>, + metrics: Arc, +} + +impl NetEpollHandler { + // Attempts to copy a single frame into the guest if there is enough rate limiting budget. + // Returns true on successful frame delivery. + fn rate_limited_rx_single_frame(&mut self, mem: &AS::M) -> bool { + // If limiter.consume() fails it means there is no more TokenType::Ops + // budget and rate limiting is in effect. + if !self.rx.rate_limiter.consume(1, TokenType::Ops) { + return false; + } + // If limiter.consume() fails it means there is no more TokenType::Bytes + // budget and rate limiting is in effect. + if !self + .rx + .rate_limiter + .consume(self.rx.bytes_read as u64, TokenType::Bytes) + { + // revert the OPS consume() + self.rx.rate_limiter.manual_replenish(1, TokenType::Ops); + return false; + } + + // Attempt frame delivery. + let success = self.rx_single_frame(mem); + + // Undo the tokens consumption if guest delivery failed. + if !success { + self.rx.rate_limiter.manual_replenish(1, TokenType::Ops); + self.rx + .rate_limiter + .manual_replenish(self.rx.bytes_read as u64, TokenType::Bytes); + } + + success + } + + // Copies a single frame from `self.rx.frame_buf` into the guest. + // + // Returns true if a buffer was used, and false if the frame must be deferred until a buffer + // is made available by the driver. + fn rx_single_frame(&mut self, mem: &AS::M) -> bool { + let mut next_desc; + let mut desc_chain; + let mut write_count = 0; + + { + let queue = &mut self.rx.queue.queue_mut().lock(); + let mut iter = match queue.iter(mem) { + Err(e) => { + error!("{}: failed to process queue. {}", self.id, e); + return false; + } + Ok(iter) => iter, + }; + desc_chain = match iter.next() { + Some(v) => v, + None => return false, + }; + next_desc = desc_chain.next(); + + // Copy from frame into buffer, which may span multiple descriptors. + loop { + match next_desc { + Some(desc) => { + if !desc.is_write_only() { + self.metrics.rx_fails.inc(); + debug!("{}: receiving buffer is not write-only", self.id); + break; + } + + let limit = cmp::min(write_count + desc.len() as usize, self.rx.bytes_read); + let source_slice = &self.rx.frame_buf[write_count..limit]; + match mem.write(source_slice, desc.addr()) { + Ok(sz) => write_count += sz, + Err(e) => { + self.metrics.rx_fails.inc(); + debug!("{}: failed to write guest memory slice, {:?}", self.id, e); + break; + } + }; + + if write_count >= self.rx.bytes_read { + break; + } + next_desc = desc_chain.next(); + } + None => { + self.metrics.rx_fails.inc(); + debug!("{}: receiving buffer is too small", self.id); + break; + } + } + } + } + self.rx + .queue + .add_used(mem, desc_chain.head_index(), write_count as u32); + + // Mark that we have at least one pending packet and we need to interrupt the guest. + self.rx.deferred_irqs = true; + + // Current descriptor chain is too small, need a bigger one. + if write_count < self.rx.bytes_read { + return false; + } + + self.metrics.rx_bytes_count.add(write_count); + self.metrics.rx_packets_count.inc(); + true + } + + // Sends frame to the host TAP. + // + // `frame_buf` should contain the frame bytes in a slice of exact length. + // Returns whether MMDS consumed the frame. + fn write_to_tap(frame_buf: &[u8], tap: &mut Tap, metrics: &Arc) { + match tap.write(frame_buf) { + Ok(_) => { + metrics.tx_bytes_count.add(frame_buf.len()); + metrics.tx_packets_count.inc(); + } + Err(e) => { + metrics.tx_fails.inc(); + error!("{}: failed to write to tap, {:?}", NET_DRIVER_NAME, e); + } + } + } + + // Read from regular network packets. + fn read_from_tap(&mut self) -> io::Result { + self.tap.read(&mut self.rx.frame_buf) + } + + fn process_rx(&mut self, mem: &AS::M) -> Result<()> { + // Read as many frames as possible. + loop { + match self.read_from_tap() { + Ok(count) => { + self.rx.bytes_read = count; + if !self.rate_limited_rx_single_frame(mem) { + self.rx.deferred_frame = true; + break; + } + } + Err(e) => { + // The tap device is non-blocking, so any error aside from EAGAIN is unexpected. + match e.raw_os_error() { + Some(err) if err == libc::EAGAIN => (), + _ => { + self.metrics.rx_fails.inc(); + error!("{}: failed to read tap: {:?}", self.id, e); + return Err(e.into()); + } + }; + break; + } + } + } + + if self.rx.deferred_irqs { + self.rx.deferred_irqs = false; + self.rx.queue.notify() + } else { + Ok(()) + } + } + + fn resume_rx(&mut self, mem: &AS::M) -> Result<()> { + if self.rx.deferred_frame { + if self.rate_limited_rx_single_frame(mem) { + self.rx.deferred_frame = false; + // process_rx() was interrupted possibly before consuming all + // packets in the tap; try continuing now. + self.process_rx(mem) + } else if self.rx.deferred_irqs { + self.rx.deferred_irqs = false; + self.rx.queue.notify() + } else { + Ok(()) + } + } else { + Ok(()) + } + } + + fn process_tx(&mut self, mem: &AS::M) -> Result<()> { + let mut rate_limited = false; + let mut used_count = 0; + { + let queue = &mut self.tx.queue.queue_mut().lock(); + + let mut iter = match queue.iter(mem) { + Err(e) => { + return Err(Error::VirtioQueueError(e)); + } + Ok(iter) => iter, + }; + + for desc_chain in &mut iter { + // If limiter.consume() fails it means there is no more TokenType::Ops + // budget and rate limiting is in effect. + if !self.tx.rate_limiter.consume(1, TokenType::Ops) { + rate_limited = true; + // Stop processing the queue. + + break; + } + + let mut read_count = 0; + let header_index = desc_chain.head_index(); + self.tx.iovec.clear(); + + for desc in desc_chain { + if desc.is_write_only() { + break; + } + self.tx.iovec.push((desc.addr(), desc.len() as usize)); + read_count += desc.len() as usize; + } + + // If limiter.consume() fails it means there is no more TokenType::Bytes + // budget and rate limiting is in effect. + if !self + .tx + .rate_limiter + .consume(read_count as u64, TokenType::Bytes) + { + rate_limited = true; + // revert the OPS consume() + self.tx.rate_limiter.manual_replenish(1, TokenType::Ops); + // stop processing the queue + break; + } + + read_count = 0; + // Copy buffer from across multiple descriptors. + // TODO(performance - Issue #420): change this to use `writev()` instead of `write()` + // and get rid of the intermediate buffer. + for (desc_addr, desc_len) in self.tx.iovec.drain(..) { + let limit = cmp::min(read_count + desc_len, self.tx.frame_buf.len()); + + let read_result = + mem.read(&mut self.tx.frame_buf[read_count..limit], desc_addr); + match read_result { + Ok(sz) => read_count += sz, + Err(e) => { + self.metrics.tx_fails.inc(); + error!("{}: failed to read slice: {:?}", self.id, e); + break; + } + } + } + + Self::write_to_tap( + &self.tx.frame_buf[..read_count], + &mut self.tap, + &self.metrics, + ); + + self.tx.used_desc_heads[used_count] = header_index; + used_count += 1; + } + if rate_limited { + // If rate limiting kicked in, queue had advanced one element that we aborted + // processing; go back one element so it can be processed next time. + iter.go_to_previous_position(); + } + } + if used_count != 0 { + // TODO(performance - Issue #425): find a way around RUST mutability enforcements to + // allow calling queue.add_used() inside the loop. This would lead to better distribution + // of descriptor usage between the dragonball thread and the guest tx thread. + // One option to do this is to call queue.add_used() from a static function. + for &desc_index in &self.tx.used_desc_heads[..used_count] { + self.tx.queue.add_used(mem, desc_index, 0); + } + + if let Err(e) = self.tx.queue.notify() { + error!("{}: failed to send tx interrupt to guest, {:?}", self.id, e); + } + } + Ok(()) + } + + pub fn get_patch_rate_limiters( + &mut self, + rx_bytes: BucketUpdate, + rx_ops: BucketUpdate, + tx_bytes: BucketUpdate, + tx_ops: BucketUpdate, + ) { + self.rx.rate_limiter.update_buckets(rx_bytes, rx_ops); + self.tx.rate_limiter.update_buckets(tx_bytes, tx_ops); + info!("{}: Update rate limiters", self.id); + } +} + +impl MutEventSubscriber + for NetEpollHandler +{ + fn process(&mut self, events: Events, _ops: &mut EventOps) { + let guard = self.config.lock_guest_memory(); + let mem = guard.deref(); + self.metrics.event_count.inc(); + match events.data() { + RX_QUEUE_EVENT => { + self.metrics.rx_queue_event_count.inc(); + if let Err(e) = self.rx.queue.consume_event() { + self.metrics.event_fails.inc(); + error!("{}: failed to get rx queue event, {:?}", self.id, e); + } else if !self.rx.rate_limiter.is_blocked() { + // If the limiter is not blocked, resume the receiving of bytes. + // There should be a buffer available now to receive the frame into. + if let Err(e) = self.resume_rx(mem) { + self.metrics.event_fails.inc(); + error!("{}: failed to resume rx_queue event, {:?}", self.id, e); + } + } + } + RX_TAP_EVENT => { + self.metrics.rx_tap_event_count.inc(); + + // While limiter is blocked, don't process any more incoming. + if self.rx.rate_limiter.is_blocked() { + // TODO: this may cause busy loop when rate limiting. + // Process a deferred frame first if available. Don't read from tap again + // until we manage to receive this deferred frame. + } else if self.rx.deferred_frame { + if self.rate_limited_rx_single_frame(mem) { + self.rx.deferred_frame = false; + // Process more packats from the tap device. + if let Err(e) = self.process_rx(mem) { + self.metrics.event_fails.inc(); + error!("{}: failed to process rx queue, {:?}", self.id, e); + } + } else if self.rx.deferred_irqs { + self.rx.deferred_irqs = false; + if let Err(e) = self.rx.queue.notify() { + error!("{}: failed to send rx interrupt to guest, {:?}", self.id, e); + } + } + } else if let Err(e) = self.process_rx(mem) { + error!("{}: failed to process rx queue, {:?}", self.id, e); + } + } + TX_QUEUE_EVENT => { + self.metrics.tx_queue_event_count.inc(); + if let Err(e) = self.tx.queue.consume_event() { + self.metrics.event_fails.inc(); + error!("{}: failed to get tx queue event: {:?}", self.id, e); + // If the limiter is not blocked, continue transmitting bytes. + } else if !self.tx.rate_limiter.is_blocked() { + if let Err(e) = self.process_tx(mem) { + self.metrics.event_fails.inc(); + error!("{}: failed to process tx queue, {:?}", self.id, e); + } + } + } + RX_RATE_LIMITER_EVENT => { + // Upon rate limiter event, call the rate limiter handler and restart processing + // the rx queue. + self.metrics.rx_event_rate_limiter_count.inc(); + match self.rx.rate_limiter.event_handler() { + // There might be enough budget now to receive the frame. + Ok(_) => { + if let Err(e) = self.resume_rx(mem) { + self.metrics.event_fails.inc(); + error!("{}: failed to resume rx, {:?}", self.id, e); + } + } + Err(e) => { + self.metrics.event_fails.inc(); + error!("{}: failed to get rx rate-limiter event: {:?}", self.id, e); + } + } + } + TX_RATE_LIMITER_EVENT => { + // Upon rate limiter event, call the rate limiter handler and restart processing + // the tx queue. + self.metrics.tx_rate_limiter_event_count.inc(); + match self.tx.rate_limiter.event_handler() { + // There might be enough budget now to send the frame. + Ok(_) => { + if let Err(e) = self.process_tx(mem) { + self.metrics.event_fails.inc(); + error!("{}: failed to resume tx, {:?}", self.id, e); + } + } + Err(e) => { + self.metrics.event_fails.inc(); + error!("{}: failed to get tx rate-limiter event, {:?}", self.id, e); + } + } + } + PATCH_RATE_LIMITER_EVENT => { + if let Some(receiver) = &self.receiver { + if let Ok((rx_bytes, rx_ops, tx_bytes, tx_ops)) = receiver.try_recv() { + self.get_patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops); + if let Err(e) = self.patch_rate_limiter_fd.read() { + error!("{}: failed to get patch event, {:?}", self.id, e); + } + } + } + } + _ => error!("{}: unknown epoll event slot {}", self.id, events.data()), + } + } + + fn init(&mut self, ops: &mut EventOps) { + trace!(target: "virtio-net", "{}: NetEpollHandler::init()", self.id); + + let events = Events::with_data(&self.tap, RX_TAP_EVENT, EventSet::IN); + if let Err(e) = ops.add(events) { + error!("{}: failed to register TAP RX event, {:?}", self.id, e); + } + + let events = + Events::with_data(self.rx.queue.eventfd.as_ref(), RX_QUEUE_EVENT, EventSet::IN); + if let Err(e) = ops.add(events) { + error!("{}: failed to register RX queue event, {:?}", self.id, e); + } + + let events = + Events::with_data(self.tx.queue.eventfd.as_ref(), TX_QUEUE_EVENT, EventSet::IN); + if let Err(e) = ops.add(events) { + error!("{}: failed to register TX queue event, {:?}", self.id, e); + } + + let rx_rate_limiter_fd = self.rx.rate_limiter.as_raw_fd(); + if rx_rate_limiter_fd >= 0 { + let events = + Events::with_data_raw(rx_rate_limiter_fd, RX_RATE_LIMITER_EVENT, EventSet::IN); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register RX rate limit event, {:?}", + self.id, e + ); + } + } + + let tx_rate_limiter_fd = self.tx.rate_limiter.as_raw_fd(); + if tx_rate_limiter_fd >= 0 { + let events = + Events::with_data_raw(tx_rate_limiter_fd, TX_RATE_LIMITER_EVENT, EventSet::IN); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register TX rate limit event, {:?}", + self.id, e + ); + } + } + + let events = Events::with_data( + &self.patch_rate_limiter_fd, + PATCH_RATE_LIMITER_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register rate limiter patch event, {:?}", + self.id, e + ); + } + } +} + +pub struct Net { + pub(crate) device_info: VirtioDeviceInfo, + pub tap: Option, + pub queue_sizes: Arc>, + pub rx_rate_limiter: Option, + pub tx_rate_limiter: Option, + pub subscriber_id: Option, + id: String, + phantom: PhantomData, + patch_rate_limiter_fd: EventFd, + sender: Option>, + metrics: Arc, +} + +impl Net { + /// Create a new virtio network device with the given TAP interface. + pub fn new_with_tap( + tap: Tap, + guest_mac: Option<&MacAddr>, + queue_sizes: Arc>, + event_mgr: EpollManager, + rx_rate_limiter: Option, + tx_rate_limiter: Option, + ) -> Result { + trace!(target: "virtio-net", "{}: Net::new_with_tap()", NET_DRIVER_NAME); + + // Set offload flags to match the virtio features below. + tap.set_offload( + net_gen::TUN_F_CSUM | net_gen::TUN_F_UFO | net_gen::TUN_F_TSO4 | net_gen::TUN_F_TSO6, + ) + .map_err(NetError::TapSetOffload)?; + + let vnet_hdr_size = vnet_hdr_len() as i32; + tap.set_vnet_hdr_size(vnet_hdr_size) + .map_err(NetError::TapSetVnetHdrSize)?; + info!("net tap set finished"); + + let mut avail_features = 1u64 << VIRTIO_NET_F_GUEST_CSUM + | 1u64 << VIRTIO_NET_F_CSUM + | 1u64 << VIRTIO_NET_F_GUEST_TSO4 + | 1u64 << VIRTIO_NET_F_GUEST_UFO + | 1u64 << VIRTIO_NET_F_HOST_TSO4 + | 1u64 << VIRTIO_NET_F_HOST_UFO + | 1u64 << VIRTIO_F_VERSION_1; + + let mut config_space = Vec::new(); + if let Some(mac) = guest_mac { + config_space.resize(MAC_ADDR_LEN, 0); + config_space[..].copy_from_slice(mac.get_bytes()); + // When this feature isn't available, the driver generates a random MAC address. + // Otherwise, it should attempt to read the device MAC address from the config space. + avail_features |= 1u64 << VIRTIO_NET_F_MAC; + } + + let device_info = VirtioDeviceInfo::new( + NET_DRIVER_NAME.to_string(), + avail_features, + queue_sizes.clone(), + config_space, + event_mgr, + ); + let id = device_info.driver_name.clone(); + Ok(Net { + tap: Some(tap), + device_info, + queue_sizes, + rx_rate_limiter, + tx_rate_limiter, + subscriber_id: None, + id, + phantom: PhantomData, + patch_rate_limiter_fd: EventFd::new(0).unwrap(), + sender: None, + metrics: Arc::new(NetDeviceMetrics::default()), + }) + } + + /// Create a new virtio network device with the given Host Device Name + pub fn new( + host_dev_name: String, + guest_mac: Option<&MacAddr>, + queue_sizes: Arc>, + epoll_mgr: EpollManager, + rx_rate_limiter: Option, + tx_rate_limiter: Option, + ) -> Result { + info!("open net tap {}", host_dev_name); + let tap = Tap::open_named(host_dev_name.as_str(), false).map_err(NetError::TapOpen)?; + info!("net tap opened"); + + Self::new_with_tap( + tap, + guest_mac, + queue_sizes, + epoll_mgr, + rx_rate_limiter, + tx_rate_limiter, + ) + } + + pub fn metrics(&self) -> Arc { + self.metrics.clone() + } +} + +impl Net { + pub fn set_patch_rate_limiters( + &self, + rx_bytes: BucketUpdate, + rx_ops: BucketUpdate, + tx_bytes: BucketUpdate, + tx_ops: BucketUpdate, + ) -> Result<()> { + if let Some(sender) = &self.sender { + if sender.send((rx_bytes, rx_ops, tx_bytes, tx_ops)).is_ok() { + if let Err(e) = self.patch_rate_limiter_fd.write(1) { + error!( + "virtio-net: failed to write rate-limiter patch event {:?}", + e + ); + Err(Error::InternalError) + } else { + Ok(()) + } + } else { + error!("virtio-net: failed to send rate-limiter patch data"); + Err(Error::InternalError) + } + } else { + error!("virtio-net: failed to establish channel to send rate-limiter patch data"); + Err(Error::InternalError) + } + } +} + +impl VirtioDevice for Net +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Sync + Send + 'static, +{ + fn device_type(&self) -> u32 { + TYPE_NET + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.queue_sizes + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + trace!(target: "virtio-net", "{}: VirtioDevice::set_acked_features({}, 0x{:x})", + self.id, page, value); + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + trace!(target: "virtio-net", "{}: VirtioDevice::read_config(0x{:x}, {:?})", + self.id, offset, data); + self.device_info.read_config(offset, data).map_err(|e| { + self.metrics.cfg_fails.inc(); + e + }) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + trace!(target: "virtio-net", "{}: VirtioDevice::write_config(0x{:x}, {:?})", + self.id, offset, data); + self.device_info.write_config(offset, data).map_err(|e| { + self.metrics.cfg_fails.inc(); + e + }) + } + + fn activate(&mut self, mut config: VirtioDeviceConfig) -> ActivateResult { + trace!(target: "virtio-net", "{}: VirtioDevice::activate()", self.id); + // Do not support control queue and multi queue. + if config.queues.len() != 2 { + self.metrics.activate_fails.inc(); + return Err(ActivateError::InvalidParam); + } + + self.device_info + .check_queue_sizes(&config.queues[..]) + .map_err(|e| { + self.metrics.activate_fails.inc(); + e + })?; + let tap = self.tap.take().ok_or_else(|| { + self.metrics.activate_fails.inc(); + ActivateError::InvalidParam + })?; + let (sender, receiver) = mpsc::channel(); + self.sender = Some(sender); + let rx_queue = config.queues.remove(0); + let tx_queue = config.queues.remove(0); + let rx = RxVirtio::::new(rx_queue, self.rx_rate_limiter.take().unwrap_or_default()); + let tx = TxVirtio::::new(tx_queue, self.tx_rate_limiter.take().unwrap_or_default()); + let patch_rate_limiter_fd = self.patch_rate_limiter_fd.try_clone().unwrap(); + + let handler = Box::new(NetEpollHandler { + tap, + rx, + tx, + config, + id: self.id.clone(), + patch_rate_limiter_fd, + receiver: Some(receiver), + metrics: self.metrics.clone(), + }); + + self.subscriber_id = Some(self.device_info.register_event_handler(handler)); + Ok(()) + } + + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + trace!(target: "virtio-net", "{}: VirtioDevice::get_resource_requirements()", self.id); + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + requests.push(ResourceConstraint::GenericIrq { + size: (self.queue_sizes.len() + 1) as u32, + }); + } + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn remove(&mut self) { + let subscriber_id = self.subscriber_id.take(); + if let Some(subscriber_id) = subscriber_id { + match self.device_info.remove_event_handler(subscriber_id) { + Ok(_) => debug!("virtio-net: removed subscriber_id {:?}", subscriber_id), + Err(err) => warn!("virtio-net: failed to remove event handler: {:?}", err), + }; + } else { + self.tap.take(); + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::thread; + use std::time::Duration; + + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use dbs_utils::epoll_manager::SubscriberOps; + use dbs_utils::rate_limiter::TokenBucket; + use kvm_ioctls::Kvm; + use vm_memory::{GuestAddress, GuestMemoryMmap}; + + use super::*; + use crate::tests::{VirtQueue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::ConfigError; + + static NEXT_IP: AtomicUsize = AtomicUsize::new(1); + + #[allow(dead_code)] + const MAX_REQ_SIZE: u32 = 0x10000; + + fn create_net_epoll_handler(id: String) -> NetEpollHandler> { + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let rx = RxVirtio::new( + VirtioQueueConfig::create(256, 0).unwrap(), + RateLimiter::default(), + ); + let tx = TxVirtio::new( + VirtioQueueConfig::create(256, 0).unwrap(), + RateLimiter::default(), + ); + let mem = Arc::new(GuestMemoryMmap::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap()); + let queues = vec![VirtioQueueConfig::create(256, 0).unwrap()]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::new( + mem, + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + NetEpollHandler { + tap, + rx, + tx, + config, + id, + patch_rate_limiter_fd: EventFd::new(0).unwrap(), + receiver: None, + metrics: Arc::new(NetDeviceMetrics::default()), + } + } + + #[test] + fn test_net_virtio_device_normal() { + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let epoll_mgr = EpollManager::default(); + + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128]), + epoll_mgr, + None, + None, + ) + .unwrap(); + + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type(&dev), + TYPE_NET + ); + let queue_size = vec![128]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::queue_max_sizes( + &dev + ), + &queue_size[..] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 0), + dev.device_info.get_avail_features(0) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 1), + dev.device_info.get_avail_features(1) + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + dev.device_info.get_avail_features(2) + ); + VirtioDevice::>, QueueSync, GuestRegionMmap>::set_acked_features( + &mut dev, 2, 0, + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features(&dev, 2), + 0 + ); + // device config length is 0 because guest_mac is None + let mut config: [u8; 1] = [0]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut dev, + 0, + &mut config, + ) + .unwrap_err(), + ConfigError::InvalidOffset(0) + ); + let config: [u8; 16] = [0; 16]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut dev, 0, &config, + ) + .unwrap_err(), + ConfigError::InvalidOffset(0) + ); + } + + #[test] + fn test_net_virtio_device_active() { + let epoll_mgr = EpollManager::default(); + { + // config queue size is not 2 + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128]), + epoll_mgr.clone(), + None, + None, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = Vec::new(); + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = + VirtioDeviceConfig::>, QueueSync, GuestRegionMmap>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + matches!(dev.activate(config), Err(ActivateError::InvalidParam)); + } + { + // check queue sizes error + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128]), + epoll_mgr.clone(), + None, + None, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::create(2, 0).unwrap(), + VirtioQueueConfig::create(2, 0).unwrap(), + ]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = + VirtioDeviceConfig::>, QueueSync, GuestRegionMmap>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + matches!(dev.activate(config), Err(ActivateError::InvalidParam)); + } + { + // test no tap + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128, 128]), + epoll_mgr.clone(), + None, + None, + ) + .unwrap(); + dev.tap = None; + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::create(128, 0).unwrap(), + VirtioQueueConfig::create(128, 0).unwrap(), + ]; + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = + VirtioDeviceConfig::>, QueueSync, GuestRegionMmap>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + matches!(dev.activate(config), Err(ActivateError::InvalidParam)); + } + { + // Ok + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128, 128]), + epoll_mgr, + None, + None, + ) + .unwrap(); + + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::create(128, 0).unwrap(), + VirtioQueueConfig::create(128, 0).unwrap(), + ]; + + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = + VirtioDeviceConfig::>, QueueSync, GuestRegionMmap>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + assert!(dev.activate(config).is_ok()); + } + } + + #[test] + fn test_net_set_patch_rate_limiters() { + let next_ip = NEXT_IP.fetch_add(1, Ordering::SeqCst); + let tap = Tap::open_named(&format!("tap{next_ip}"), false).unwrap(); + let epoll_mgr = EpollManager::default(); + + let mut dev = Net::>::new_with_tap( + tap, + None, + Arc::new(vec![128]), + epoll_mgr, + None, + None, + ) + .unwrap(); + + //No sender + assert!(dev + .set_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::None + ) + .is_err()); + + let (sender, _receiver) = mpsc::channel(); + dev.sender = Some(sender); + assert!(dev + .set_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::None + ) + .is_ok()); + } + + #[test] + fn test_net_get_patch_rate_limiters() { + let mut handler = create_net_epoll_handler("test_1".to_string()); + let tokenbucket = TokenBucket::new(1, 1, 4); + + //update rx + handler.get_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::Update(tokenbucket.clone()), + BucketUpdate::None, + BucketUpdate::None, + ); + assert_eq!(handler.rx.rate_limiter.ops().unwrap(), &tokenbucket); + + //update tx + handler.get_patch_rate_limiters( + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::None, + BucketUpdate::Update(tokenbucket.clone()), + ); + assert_eq!(handler.tx.rate_limiter.ops().unwrap(), &tokenbucket); + } + + #[test] + fn test_net_epoll_handler_handle_event() { + let handler = create_net_epoll_handler("test_1".to_string()); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_net_epoll_handler("test_2".to_string()); + + // test for RX_QUEUE_EVENT + let events = Events::with_data(&event_fd, RX_QUEUE_EVENT, event_set); + handler.process(events, &mut event_op); + handler.config.queues[0].generate_event().unwrap(); + handler.process(events, &mut event_op); + + // test for TX_QUEUE_EVENT + let events = Events::with_data(&event_fd, TX_QUEUE_EVENT, event_set); + handler.process(events, &mut event_op); + handler.config.queues[0].generate_event().unwrap(); + handler.process(events, &mut event_op); + + // test for RX_TAP_EVENT + let events = Events::with_data(&event_fd, RX_TAP_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for RX&TX RATE_LIMITER_EVENT + let events = Events::with_data(&event_fd, RX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + let events = Events::with_data(&event_fd, TX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + + // test for PATCH_RATE_LIMITER_EVENT + let events = Events::with_data(&event_fd, PATCH_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + } + + #[test] + fn test_net_epoll_handler_handle_unknown_event() { + let handler = create_net_epoll_handler("test_1".to_string()); + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_net_epoll_handler("test_2".to_string()); + + // test for unknown event + let events = Events::with_data(&event_fd, NET_EVENTS_COUNT + 10, event_set); + handler.process(events, &mut event_op); + } + + #[test] + fn test_net_epoll_handler_process_queue() { + { + let mut handler = create_net_epoll_handler("test_1".to_string()); + + let m = &handler.config.vm_as.clone(); + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + let q = vq.create_queue(); + vq.dtable(0).set(0x1000, 0x1000, VIRTQ_DESC_F_NEXT, 1); + vq.dtable(1) + .set(0x2000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); + vq.dtable(2).set(0x3000, 1, VIRTQ_DESC_F_WRITE, 1); + + handler.config.queues = vec![VirtioQueueConfig::new( + q, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + 0, + )]; + assert!(handler.process_rx(m).is_ok()); + } + } + + #[test] + fn test_net_bandwidth_rate_limiter() { + let handler = create_net_epoll_handler("test_1".to_string()); + + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_net_epoll_handler("test_2".to_string()); + let m = &handler.config.vm_as.clone(); + + // Test TX bandwidth rate limiting + { + // create bandwidth rate limiter + let mut rl = RateLimiter::new(0x1000, 0, 100, 0, 0, 0).unwrap(); + // use up the budget + assert!(rl.consume(0x1000, TokenType::Bytes)); + + // set this tx rate limiter to be used + handler.tx.rate_limiter = rl; + // try doing TX + let vq = VirtQueue::new(GuestAddress(0), m, 16); + + let q = vq.create_queue(); + + vq.avail.idx().store(1); + vq.avail.ring(0).store(0); + vq.dtable(0).set(0x2000, 0x1000, 0, 0); + handler.tx.queue.queue = q; + + let events = Events::with_data(&event_fd, TX_QUEUE_EVENT, event_set); + assert!(handler.tx.queue.generate_event().is_ok()); + handler.process(events, &mut event_op); + assert!(handler.tx.rate_limiter.is_blocked()); + + thread::sleep(Duration::from_millis(200)); + + let events = Events::with_data(&event_fd, TX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + assert!(!handler.tx.rate_limiter.is_blocked()); + } + // Test RX bandwidth rate limiting + { + // create bandwidth rate limiter + let mut rl = RateLimiter::new(0x1000, 0, 100, 0, 0, 0).unwrap(); + // use up the budget + assert!(rl.consume(0x1000, TokenType::Bytes)); + + // set this rx rate limiter to be used + handler.rx.rate_limiter = rl; + // try doing RX + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + vq.dtable(0).set(0x2000, 0x1000, VIRTQ_DESC_F_WRITE, 0); + + let q = vq.create_queue(); + handler.rx.queue.queue = q; + + handler.rx.deferred_frame = true; + handler.rx.bytes_read = 0x1000; + + let events = Events::with_data(&event_fd, RX_QUEUE_EVENT, event_set); + assert!(handler.rx.queue.generate_event().is_ok()); + handler.process(events, &mut event_op); + assert!(handler.rx.rate_limiter.is_blocked()); + + thread::sleep(Duration::from_millis(200)); + + let events = Events::with_data(&event_fd, RX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + assert!(!handler.rx.rate_limiter.is_blocked()); + } + } + + #[test] + fn test_net_ops_rate_limiter() { + let handler = create_net_epoll_handler("test_1".to_string()); + + let event_fd = EventFd::new(0).unwrap(); + let mgr = EpollManager::default(); + let id = mgr.add_subscriber(Box::new(handler)); + let mut inner_mgr = mgr.mgr.lock().unwrap(); + let mut event_op = inner_mgr.event_ops(id).unwrap(); + let event_set = EventSet::EDGE_TRIGGERED; + let mut handler = create_net_epoll_handler("test_2".to_string()); + let m = &handler.config.vm_as.clone(); + + // Test TX ops rate limiting + { + // create ops rate limiter + let mut rl = RateLimiter::new(0, 0, 0, 2, 0, 100).unwrap(); + // use up the budget + assert!(rl.consume(2, TokenType::Ops)); + + // set this tx rate limiter to be used + handler.tx.rate_limiter = rl; + // try doing TX + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + + let q = vq.create_queue(); + handler.tx.queue.queue = q; + + let events = Events::with_data(&event_fd, TX_QUEUE_EVENT, event_set); + assert!(handler.tx.queue.generate_event().is_ok()); + handler.process(events, &mut event_op); + assert!(handler.tx.rate_limiter.is_blocked()); + + thread::sleep(Duration::from_millis(100)); + + let events = Events::with_data(&event_fd, TX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + assert!(!handler.tx.rate_limiter.is_blocked()); + } + // Test RX ops rate limiting + { + // create ops rate limiter + let mut rl = RateLimiter::new(0, 0, 0, 2, 0, 100).unwrap(); + // use up the budget + assert!(rl.consume(2, TokenType::Ops)); + + // set this rx rate limiter to be used + handler.rx.rate_limiter = rl; + // try doing RX + let vq = VirtQueue::new(GuestAddress(0), m, 16); + vq.avail.ring(0).store(0); + vq.avail.idx().store(1); + + let q = vq.create_queue(); + handler.rx.queue.queue = q; + + handler.rx.deferred_frame = true; + + let events = Events::with_data(&event_fd, RX_QUEUE_EVENT, event_set); + assert!(handler.rx.queue.generate_event().is_ok()); + handler.process(events, &mut event_op); + assert!(handler.rx.rate_limiter.is_blocked()); + + thread::sleep(Duration::from_millis(100)); + + let events = Events::with_data(&event_fd, RX_RATE_LIMITER_EVENT, event_set); + handler.process(events, &mut event_op); + assert!(!handler.rx.rate_limiter.is_blocked()); + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/notifier.rs b/src/dragonball/src/dbs_virtio_devices/src/notifier.rs new file mode 100644 index 000000000000..4688a395ad07 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/notifier.rs @@ -0,0 +1,89 @@ +// Copyright 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +//! Wrappers over `InterruptNotifier` to support virtio device interrupt management. + +use std::sync::Arc; + +use dbs_interrupt::{ + InterruptIndex, InterruptNotifier, InterruptSourceGroup, InterruptSourceType, + InterruptStatusRegister32, LegacyNotifier, MsiNotifier, +}; + +use crate::{VIRTIO_INTR_CONFIG, VIRTIO_INTR_VRING}; + +/// Create an interrupt notifier for virtio device change events. +pub fn create_device_notifier( + group: Arc>, + intr_status: Arc, + intr_index: InterruptIndex, +) -> Arc { + match group.interrupt_type() { + InterruptSourceType::LegacyIrq => { + Arc::new(LegacyNotifier::new(group, intr_status, VIRTIO_INTR_CONFIG)) + } + InterruptSourceType::MsiIrq => Arc::new(MsiNotifier::new(group, intr_index)), + } +} + +/// Create an interrupt notifier for virtio queue notification events. +pub fn create_queue_notifier( + group: Arc>, + intr_status: Arc, + intr_index: InterruptIndex, +) -> Arc { + match group.interrupt_type() { + InterruptSourceType::LegacyIrq => { + Arc::new(LegacyNotifier::new(group, intr_status, VIRTIO_INTR_VRING)) + } + InterruptSourceType::MsiIrq => Arc::new(MsiNotifier::new(group, intr_index)), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use dbs_interrupt::InterruptManager; + + #[test] + fn test_create_virtio_legacy_notifier() { + let (_vmfd, irq_manager) = crate::tests::create_vm_and_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::LegacyIrq, 0, 1) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + assert_eq!(status.read(), 0); + + let notifer = create_queue_notifier(group.clone(), status.clone(), 0); + notifer.notify().unwrap(); + assert!(notifer.notifier().is_some()); + + assert_eq!(status.read(), VIRTIO_INTR_VRING); + status.clear_bits(VIRTIO_INTR_VRING); + assert_eq!(status.read(), 0); + let eventfd = notifer.notifier().unwrap(); + eventfd.write(2).unwrap(); + assert_eq!(eventfd.read().unwrap(), 3); + } + + #[test] + fn test_create_virtio_msi_notifier() { + let (_vmfd, irq_manager) = crate::tests::create_vm_and_irq_manager(); + let group = irq_manager + .create_group(InterruptSourceType::MsiIrq, 0, 3) + .unwrap(); + let status = Arc::new(InterruptStatusRegister32::new()); + + let notifier1 = create_device_notifier(group.clone(), status.clone(), 1); + let notifier2 = create_queue_notifier(group.clone(), status.clone(), 2); + let notifier3 = create_queue_notifier(group.clone(), status, 3); + assert!(notifier1.notifier().is_some()); + assert!(notifier2.notifier().is_some()); + assert!(notifier3.notifier().is_none()); + notifier1.notify().unwrap(); + notifier1.notify().unwrap(); + notifier2.notify().unwrap(); + assert_eq!(notifier1.notifier().unwrap().read().unwrap(), 2); + assert_eq!(notifier2.notifier().unwrap().read().unwrap(), 1); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/hybrid_stream.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/hybrid_stream.rs new file mode 100644 index 000000000000..f566e0d69437 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/hybrid_stream.rs @@ -0,0 +1,94 @@ +// Copyright 2023 Ant Group. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::io::{Error, Read, Write}; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::time::Duration; + +use log::error; +use nix::errno::Errno; + +use super::{VsockBackendType, VsockStream}; + +pub struct HybridStream { + pub hybrid_stream: std::fs::File, + pub slave_stream: Option>, +} + +impl AsRawFd for HybridStream { + fn as_raw_fd(&self) -> RawFd { + self.hybrid_stream.as_raw_fd() + } +} + +impl Read for HybridStream { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.hybrid_stream.read(buf) + } +} + +impl Write for HybridStream { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + // The slave stream was only used to reply the connect result "ok ", + // thus it was only used once here, and the data would be replied by the + // main stream. + if let Some(mut stream) = self.slave_stream.take() { + stream.write(buf) + } else { + self.hybrid_stream.write(buf) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + self.hybrid_stream.flush() + } +} + +impl VsockStream for HybridStream { + fn backend_type(&self) -> VsockBackendType { + VsockBackendType::HybridStream + } + + fn set_nonblocking(&mut self, nonblocking: bool) -> std::io::Result<()> { + let fd = self.hybrid_stream.as_raw_fd(); + let mut flag = unsafe { libc::fcntl(fd, libc::F_GETFL) }; + + if nonblocking { + flag = flag | libc::O_NONBLOCK; + } else { + flag = flag & !libc::O_NONBLOCK; + } + + let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag) }; + + if ret < 0 { + error!("failed to set fcntl for fd {} with ret {}", fd, ret); + return Err(Error::last_os_error()); + } + + Ok(()) + } + + fn set_read_timeout(&mut self, _dur: Option) -> std::io::Result<()> { + error!("unsupported!"); + Err(Errno::ENOPROTOOPT.into()) + } + + fn set_write_timeout(&mut self, _dur: Option) -> std::io::Result<()> { + error!("unsupported!"); + Err(Errno::ENOPROTOOPT.into()) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn recv_data_fd( + &self, + _bytes: &mut [u8], + _fds: &mut [RawFd], + ) -> std::io::Result<(usize, usize)> { + Err(Errno::ENOPROTOOPT.into()) + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/inner.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/inner.rs new file mode 100644 index 000000000000..1cecc0fa467c --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/inner.rs @@ -0,0 +1,923 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::io::{Error, ErrorKind, Read, Result, Write}; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{channel, Receiver, RecvTimeoutError, Sender, TryRecvError}; +use std::sync::Arc; +use std::time::Duration; + +use log::error; +use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK, EFD_SEMAPHORE}; + +use super::{VsockBackend, VsockBackendType, VsockStream}; + +#[derive(Debug)] +enum InnerStreamRole { + Internal, + External, +} + +/// The stream implementation of vsock inner backend. It can be used like a +/// normal unix stream. +/// +/// When working with epoll, VsockInnerStream only can be used with +/// `level-trigged` mode. +pub struct VsockInnerStream { + stream_event: Arc, + peer_event: Arc, + writer: Sender>, + reader: Receiver>, + read_buf: Option<(Vec, usize)>, + stream_nonblocking: Arc, + peer_nonblocking: Arc, + read_timeout: Option, + role: InnerStreamRole, +} + +impl VsockInnerStream { + fn new( + stream_event: Arc, + peer_event: Arc, + writer: Sender>, + reader: Receiver>, + stream_nonblocking: Arc, + peer_nonblocking: Arc, + role: InnerStreamRole, + ) -> Self { + VsockInnerStream { + stream_event, + peer_event, + writer, + reader, + read_buf: None, + stream_nonblocking, + peer_nonblocking, + read_timeout: None, + role, + } + } + + fn recv_msg_from_channel( + &mut self, + buf: &mut [u8], + msg: Vec, + total_read_len: &mut usize, + ) -> Result { + let read_len = Self::read_msg_from_vec(buf, &msg, *total_read_len, 0); + let mut read_finish = false; + *total_read_len += read_len; + + if read_len < msg.len() { + // buf is full, but msg is not fully read, save it in read_buf (the + // previous read_buf should have been read through before) + self.read_buf = Some((msg, read_len)); + read_finish = true; + } else { + // if msg is fully read, consume one event, and go + // on read next message + self.consume_event()?; + } + + Ok(read_finish) + } + + fn trigger_peer_event(&self) -> Result<()> { + self.peer_event.write(1).map_err(|e| { + error!( + "vsock inner stream {:?}: trigger peer event failed: {:?}", + self.role, e + ); + e + })?; + + Ok(()) + } + + fn consume_event(&self) -> Result<()> { + self.stream_event.read().map_err(|e| { + error!( + "vsock inner stream {:?}: consume event failed: {:?}", + self.role, e + ); + e + })?; + + Ok(()) + } + + fn read_msg_from_vec(buf: &mut [u8], msg: &[u8], buf_start: usize, msg_start: usize) -> usize { + let min_len = std::cmp::min(buf.len() - buf_start, msg.len() - msg_start); + buf[buf_start..buf_start + min_len].copy_from_slice(&msg[msg_start..msg_start + min_len]); + min_len + } +} + +impl AsRawFd for VsockInnerStream { + fn as_raw_fd(&self) -> RawFd { + self.stream_event.as_raw_fd() + } +} + +impl Read for VsockInnerStream { + fn read(&mut self, buf: &mut [u8]) -> Result { + let mut total_read_len = 0; + // if read_buf is not empty, get data from read_buf first + if let Some((read_buf, buf_read_len)) = self.read_buf.as_mut() { + let read_len = Self::read_msg_from_vec(buf, read_buf, total_read_len, *buf_read_len); + total_read_len += read_len; + *buf_read_len += read_len; + + // if read_buf is all read, consume one event + if *buf_read_len == read_buf.len() { + self.consume_event()?; + self.read_buf.take(); + } + } + + // if buf is full, just return + if total_read_len == buf.len() { + return Ok(total_read_len); + } + + // continously fetch data from channel to fill the buf, until the buf is + // full + loop { + // fetch data from channel + match self.reader.try_recv() { + Ok(msg) => { + if self.recv_msg_from_channel(buf, msg, &mut total_read_len)? { + return Ok(total_read_len); + } + } + // this arm indicates there's no more data can fetch from + // channel + Err(TryRecvError::Empty) => { + if total_read_len > 0 { + return Ok(total_read_len); + } else { + // - non-blocking mode: return `WouldBlock` directly + // - blocking mode: use channel's `recv`/`recv_timeout` + // function to block until channel have new data again + if self.stream_nonblocking.load(Ordering::SeqCst) { + return Err(Error::from(ErrorKind::WouldBlock)); + } else { + // - no read timeout: use channel's `recv` function + // to block until a message comes + // - have read timeout: use channel's `recv_timeout` + // to block until a message comes or reach the + // timeout time + if let Some(dur) = self.read_timeout { + match self.reader.recv_timeout(dur) { + Ok(msg) => { + if self.recv_msg_from_channel( + buf, + msg, + &mut total_read_len, + )? { + return Ok(total_read_len); + } + } + Err(RecvTimeoutError::Timeout) => { + return Err(Error::from(ErrorKind::TimedOut)) + } + Err(RecvTimeoutError::Disconnected) => { + return Err(Error::from(ErrorKind::ConnectionReset)) + } + } + } else { + match self.reader.recv() { + Ok(msg) => { + if self.recv_msg_from_channel( + buf, + msg, + &mut total_read_len, + )? { + return Ok(total_read_len); + } + } + Err(_) => return Err(Error::from(ErrorKind::ConnectionReset)), + } + } + } + } + } + Err(TryRecvError::Disconnected) => { + return Err(Error::from(ErrorKind::ConnectionReset)); + } + } + } + } +} + +impl Write for VsockInnerStream { + fn write(&mut self, buf: &[u8]) -> Result { + // We need to carefully distinguish between the timing of the trigger + // eventfd and the writing of data to the channel, because the streams + // on both ends may be working in different threads, and these two + // operations are not atomic! + let peer_nonblocking = self.peer_nonblocking.load(Ordering::SeqCst); + + // In blocking mode, the other end will simulate blocking io by blocking + // on the recv() method of the channel, at which point, if data is + // written to the channel, the other end will immediately return and + // perform the operation of fetching data, during this, one important + // things is to confirm that all the data sent has been read in this + // time, which is done by reading eventfd. + // + // However, if the other side executes faster and we haven't finished + // the trigger eventfd by the time it reads the eventfd, then it will + // return a failure. Therefore, in blocking mode, the eventfd should be + // triggered before writing data to the channel. + if !peer_nonblocking { + self.trigger_peer_event()?; + } + + if let Err(_e) = self.writer.send(buf.to_vec()) { + return Err(Error::from(ErrorKind::ConnectionReset)); + } + + // On the contrary, in nonblocking mode, the peer does not block in the + // recv() method of the channel, but generally adds eventfd to the epoll + // event loop, at this point, if we trigger eventfd, the peer will + // return immediately and perform the fetch operation, but if we do not + // send the data to the channel, then the fetching may fail. Therefore, + // in nonblocking mode, we need to trigger eventfd after writing data + // to the channel. + if peer_nonblocking { + self.trigger_peer_event()?; + } + Ok(buf.len()) + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + +impl Drop for VsockInnerStream { + fn drop(&mut self) { + // we need to notify peer stream when dropping, peer stream will sense + // that this side of read channel has been disconnected and return an + // error for the upper layer to drop it + if let Err(e) = self.trigger_peer_event() { + error!( + "VsockInnerStream {:?}: can't notify peer inner stream that should be drop: {}", + self.role, e + ); + } + } +} + +impl VsockStream for VsockInnerStream { + fn backend_type(&self) -> VsockBackendType { + VsockBackendType::Inner + } + + fn set_nonblocking(&mut self, nonblocking: bool) -> Result<()> { + self.stream_nonblocking.store(nonblocking, Ordering::SeqCst); + Ok(()) + } + + fn set_read_timeout(&mut self, dur: Option) -> Result<()> { + self.read_timeout = dur; + Ok(()) + } + + fn set_write_timeout(&mut self, _dur: Option) -> Result<()> { + // here's a infinite channel for write, no need to consider about write + // timeout. + Ok(()) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// Vsock inner connector is used to connect to vsock inner backend. +#[derive(Clone)] +pub struct VsockInnerConnector { + backend_event: Arc, + conn_sender: Sender, +} + +impl std::fmt::Debug for VsockInnerConnector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("VsockInnerConnector") + } +} + +impl VsockInnerConnector { + /// Connect to vsock inner backend and get a new inner stream. + pub fn connect(&self) -> Result> { + self.connect_() + .map(|stream| Box::new(stream) as Box) + } + + fn connect_(&self) -> Result { + let (internal_sender, external_receiver) = channel(); + let (external_sender, internal_receiver) = channel(); + // use `EFD_SEMAPHORE` mode to make EventFd as a write counter for + // channel. + let internal_event = Arc::new(EventFd::new(EFD_NONBLOCK | EFD_SEMAPHORE)?); + let external_event = Arc::new(EventFd::new(EFD_NONBLOCK | EFD_SEMAPHORE)?); + let internal_nonblocking = Arc::new(AtomicBool::new(false)); + let external_nonblocking = Arc::new(AtomicBool::new(false)); + + let mut internal_stream = VsockInnerStream::new( + internal_event.clone(), + external_event.clone(), + internal_sender, + internal_receiver, + internal_nonblocking.clone(), + external_nonblocking.clone(), + InnerStreamRole::Internal, + ); + // internal stream is vsock internal used, we need non-blocking mode + internal_stream.set_nonblocking(true)?; + + // external stream is used for others, the mode can be set by them. + let external_stream = VsockInnerStream::new( + external_event, + internal_event, + external_sender, + external_receiver, + external_nonblocking, + internal_nonblocking, + InnerStreamRole::External, + ); + + // send the inner stream to connection pending list for later accept. + self.conn_sender.send(internal_stream).map_err(|e| { + Error::new( + ErrorKind::ConnectionRefused, + format!("vsock inner stream sender err: {e}"), + ) + })?; + self.backend_event.write(1)?; + + Ok(external_stream) + } +} + +/// The backend implemenation that can be used in-process, no need to forward +/// data by the OS. +pub struct VsockInnerBackend { + /// The eventfd used for notify the connection requests. + backend_event: Arc, + /// The pending connections waiting to be accepted. + pending_conns: Receiver, + /// A sender can Send pending connections to inner backend. + conn_sender: Sender, +} + +impl VsockInnerBackend { + pub fn new() -> Result { + let (conn_sender, pending_conns) = channel(); + // use `EFD_SEMAPHORE` mode to make EventFd as a write counter for + // pending_conns channel. + let backend_event = Arc::new(EventFd::new(EFD_NONBLOCK | EFD_SEMAPHORE)?); + + Ok(VsockInnerBackend { + backend_event, + pending_conns, + conn_sender, + }) + } + + /// Create a inner connector instance. + pub fn get_connector(&self) -> VsockInnerConnector { + VsockInnerConnector { + backend_event: self.backend_event.clone(), + conn_sender: self.conn_sender.clone(), + } + } + + fn accept_(&self) -> Result { + self.backend_event.read()?; + match self.pending_conns.try_recv() { + Ok(stream) => Ok(stream), + Err(_) => Err(Error::from(ErrorKind::ConnectionAborted)), + } + } +} + +impl AsRawFd for VsockInnerBackend { + /// Don't read/write this fd, just use it to get signal. + fn as_raw_fd(&self) -> RawFd { + self.backend_event.as_raw_fd() + } +} + +impl VsockBackend for VsockInnerBackend { + fn accept(&mut self) -> Result> { + self.accept_() + .map(|stream| Box::new(stream) as Box) + } + + fn connect(&self, _dst_port: u32) -> Result> { + Err(Error::new( + ErrorKind::ConnectionRefused, + "vsock inner backend doesn't support incoming connection request", + )) + } + + fn r#type(&self) -> VsockBackendType { + VsockBackendType::Inner + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +#[cfg(test)] +mod tests { + use std::sync::{Condvar, Mutex}; + use std::thread; + use std::time::{Duration, Instant}; + + use super::*; + + #[test] + fn test_inner_backend_create() { + assert!(VsockInnerBackend::new().is_ok()); + } + + #[test] + fn test_inner_backend_accept() { + let mut vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + + // no connect request send, accept would return error + assert!(vsock_backend.accept().is_err()); + + // connect once, can accept once + connector.connect().unwrap(); + assert!(vsock_backend.accept().is_ok()); + assert!(vsock_backend.accept().is_err()); + + // connect twice, can accept twice + connector.connect().unwrap(); + connector.connect().unwrap(); + assert!(vsock_backend.accept().is_ok()); + assert!(vsock_backend.accept().is_ok()); + assert!(vsock_backend.accept().is_err()); + } + + #[test] + fn test_inner_backend_communication() { + let test_string = String::from("TEST"); + let mut buffer = [0; 10]; + + let mut vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let mut stream_connect = connector.connect().unwrap(); + stream_connect.set_nonblocking(true).unwrap(); + let mut stream_backend = vsock_backend.accept().unwrap(); + + assert!(stream_connect + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_backend.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + + assert!(stream_backend + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_connect.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + } + + #[test] + fn test_inner_backend_connect() { + let vsock_backend = VsockInnerBackend::new().unwrap(); + // inner backend don't support peer connection now + assert!(vsock_backend.connect(0).is_err()); + } + + #[test] + fn test_inner_backend_type() { + let vsock_backend = VsockInnerBackend::new().unwrap(); + assert_eq!(vsock_backend.r#type(), VsockBackendType::Inner); + } + + #[test] + fn test_inner_backend_vsock_stream() { + let vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let mut vsock_stream = connector.connect().unwrap(); + + assert!(vsock_stream.set_nonblocking(true).is_ok()); + assert!(vsock_stream + .set_read_timeout(Some(Duration::from_secs(1))) + .is_ok()); + assert!(vsock_stream.set_read_timeout(None).is_ok()); + assert!(vsock_stream + .set_write_timeout(Some(Duration::from_secs(2))) + .is_ok()); + } + + fn get_inner_backend_stream_pair() -> (VsockInnerStream, VsockInnerStream) { + let vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let outer_stream = connector.connect_().unwrap(); + let inner_stream = vsock_backend.accept_().unwrap(); + + (inner_stream, outer_stream) + } + + #[test] + #[allow(clippy::unused_io_amount)] + fn test_inner_stream_nonblocking() { + // write once, read multi times + { + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + outer_stream.set_nonblocking(true).unwrap(); + + // write data into inner stream with length of 10 + let wirter_buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + inner_stream.write_all(&wirter_buf).unwrap(); + + // first, read data from outer stream with length of 5 + let mut reader_buf1 = [0; 5]; + outer_stream.read(&mut reader_buf1).unwrap(); + assert_eq!(reader_buf1, [0, 1, 2, 3, 4]); + // test the unread data in outer stream + assert_eq!(outer_stream.read_buf, Some((Vec::from(&wirter_buf[..]), 5))); + + // second, read more data in outer stream + let mut reader_buf2 = [0; 3]; + outer_stream.read(&mut reader_buf2).unwrap(); + assert_eq!(reader_buf2, [5, 6, 7]); + // test the unread data in outer stream + assert_eq!(outer_stream.read_buf, Some((Vec::from(&wirter_buf[..]), 8))); + + // then, read the last data in outer stream + let mut reader_buf3 = [0; 2]; + outer_stream.read(&mut reader_buf3).unwrap(); + assert_eq!(reader_buf3, [8, 9]); + // there's no unread data in outer stream + assert_eq!(outer_stream.read_buf, None); + + // last, try to read again, it would return error + let mut reader_buf3 = [0; 1]; + assert_eq!( + outer_stream.read(&mut reader_buf3).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + } + + // write multi times, read all + { + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + outer_stream.set_nonblocking(true).unwrap(); + + // first, write some data into inner stream + let writer_buf1 = [0, 1, 2, 3]; + inner_stream.write_all(&writer_buf1).unwrap(); + + // second, write more data into inner stream + let writer_buf2 = [4, 5, 6]; + inner_stream.write_all(&writer_buf2).unwrap(); + + // then, read all data from outer stream + let mut reader_buf1 = [0; 7]; + outer_stream.read(&mut reader_buf1).unwrap(); + assert_eq!(reader_buf1, [0, 1, 2, 3, 4, 5, 6]); + // there's no unread data in outer stream + assert_eq!(outer_stream.read_buf, None); + + // last, try to read again, it would return error + let mut reader_buf2 = [0; 1]; + assert_eq!( + outer_stream.read(&mut reader_buf2).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + } + + // write multi times, then read multi times + { + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + outer_stream.set_nonblocking(true).unwrap(); + + // first, write some data into inner stream + let writer_buf1 = [0, 1, 2, 3]; + inner_stream.write_all(&writer_buf1).unwrap(); + + // second, write more data into inner stream + let writer_buf2 = [4, 5]; + inner_stream.write_all(&writer_buf2).unwrap(); + + // third, write more data into inner stream + let writer_buf3 = [6, 7, 8]; + inner_stream.write_all(&writer_buf3).unwrap(); + + // forth, write more data into inner stream + let writer_buf4 = [9, 10]; + inner_stream.write_all(&writer_buf4).unwrap(); + + // fifth, read some data from outer stream + let mut reader_buf1 = [0; 2]; + outer_stream.read(&mut reader_buf1).unwrap(); + assert_eq!(reader_buf1, [0, 1]); + // now, the content in read buf is writer buf1 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf1[..]), 2)) + ); + + // sixth, continue read some data from outer steam + let mut reader_buf2 = [0; 3]; + outer_stream.read(&mut reader_buf2).unwrap(); + assert_eq!(reader_buf2, [2, 3, 4]); + // now, the content in read buf is writer buf2 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf2[..]), 1)) + ); + + // seventh, continue read some data from outer steam + let mut reader_buf3 = [0; 5]; + outer_stream.read(&mut reader_buf3).unwrap(); + assert_eq!(reader_buf3, [5, 6, 7, 8, 9]); + // now, the content in read buf is writer buf4 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf4[..]), 1)) + ); + + // then, read the rest data from outer stream + let mut reader_buf4 = [0; 3]; + outer_stream.read(&mut reader_buf4).unwrap(); + assert_eq!(reader_buf4, [10, 0, 0]); + // now, there's no unread data in outer stream + assert_eq!(outer_stream.read_buf, None); + + // last, try to read again, it would return error + let mut reader_buf5 = [0; 5]; + assert_eq!( + outer_stream.read(&mut reader_buf5).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + } + + // write and read multi times + { + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + outer_stream.set_nonblocking(true).unwrap(); + + // first, try to read data, it would return error + let mut reader_buf1 = [0; 5]; + assert_eq!( + outer_stream.read(&mut reader_buf1).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + + // second, write some data into inner stream + let writer_buf1 = [0, 1, 2, 3]; + inner_stream.write_all(&writer_buf1).unwrap(); + + // third, read some data from outer stream + let mut reader_buf2 = [0; 2]; + outer_stream.read(&mut reader_buf2).unwrap(); + assert_eq!(reader_buf2, [0, 1]); + // the content in read buf is writer buf1 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf1[..]), 2)) + ); + + // forth, write some data into inner stream + let writer_buf2 = [4, 5]; + inner_stream.write_all(&writer_buf2).unwrap(); + + // fifth, read some data from outer stream + let mut reader_buf3 = [0; 3]; + outer_stream.read(&mut reader_buf3).unwrap(); + assert_eq!(reader_buf3, [2, 3, 4]); + // the content in read buf is writer buf2 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf2[..]), 1)) + ); + + // sixth, write some data twice into inner steam + let writer_buf3 = [6]; + inner_stream.write_all(&writer_buf3).unwrap(); + let writer_buf4 = [7, 8, 9]; + inner_stream.write_all(&writer_buf4).unwrap(); + + // seventh, read all data from outer stream + let mut reader_buf4 = [0; 10]; + outer_stream.read(&mut reader_buf4).unwrap(); + assert_eq!(reader_buf4, [5, 6, 7, 8, 9, 0, 0, 0, 0, 0]); + // there's no unread data in outer stream + assert_eq!(outer_stream.read_buf, None); + + // eighth, write some data again into inner stream + let writer_buf5 = [10, 11, 12]; + inner_stream.write_all(&writer_buf5).unwrap(); + + // ninth, read some data from outer stream + let mut reader_buf5 = [0; 1]; + outer_stream.read(&mut reader_buf5).unwrap(); + assert_eq!(reader_buf5, [10]); + // the content in read buf is writer buf5 + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf5[..]), 1)) + ); + + // then, read all data from outer stream + let mut reader_buf6 = [0; 4]; + outer_stream.read(&mut reader_buf6).unwrap(); + assert_eq!(reader_buf6, [11, 12, 0, 0]); + // there's no unread data in outer stream + assert_eq!(outer_stream.read_buf, None); + + // last, try to read again, it would return error + let mut reader_buf7 = [0; 1]; + assert_eq!( + outer_stream.read(&mut reader_buf7).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + } + + // write and read duplex multi times + { + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + outer_stream.set_nonblocking(true).unwrap(); + + // first, try to read data from outer and inner stream, they would + // return error + let mut reader_buf1 = [0; 1]; + assert_eq!( + outer_stream.read(&mut reader_buf1).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + let mut reader_buf2 = [0; 1]; + assert_eq!( + inner_stream.read(&mut reader_buf2).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + + // second, write some data into inner and outer stream + let writer_buf1 = [0, 1, 2]; + inner_stream.write_all(&writer_buf1).unwrap(); + let writer_buf2 = [0, 1]; + outer_stream.write_all(&writer_buf2).unwrap(); + + // third, read all data from outer and inner stream + let mut reader_buf3 = [0; 5]; + outer_stream.read(&mut reader_buf3).unwrap(); + assert_eq!(reader_buf3, [0, 1, 2, 0, 0]); + assert_eq!(outer_stream.read_buf, None); + let mut reader_buf4 = [0; 5]; + inner_stream.read(&mut reader_buf4).unwrap(); + assert_eq!(reader_buf4, [0, 1, 0, 0, 0]); + assert_eq!(inner_stream.read_buf, None); + + // forth, write data twicd into inner and outer stream + let writer_buf3 = [3, 4, 5, 6]; + inner_stream.write_all(&writer_buf3).unwrap(); + let writer_buf4 = [2, 3, 4]; + outer_stream.write_all(&writer_buf4).unwrap(); + let writer_buf5 = [7, 8]; + inner_stream.write_all(&writer_buf5).unwrap(); + let writer_buf6 = [5, 6, 7]; + outer_stream.write_all(&writer_buf6).unwrap(); + + // fifth, read some data from outer and inner stream + let mut reader_buf5 = [0; 5]; + outer_stream.read(&mut reader_buf5).unwrap(); + assert_eq!(reader_buf5, [3, 4, 5, 6, 7]); + assert_eq!( + outer_stream.read_buf, + Some((Vec::from(&writer_buf5[..]), 1)) + ); + let mut reader_buf6 = [0; 5]; + inner_stream.read(&mut reader_buf6).unwrap(); + assert_eq!(reader_buf6, [2, 3, 4, 5, 6]); + assert_eq!( + inner_stream.read_buf, + Some((Vec::from(&writer_buf6[..]), 2)) + ); + + // then, read all data from inner and outer stream + let mut reader_buf7 = [0; 5]; + inner_stream.read(&mut reader_buf7).unwrap(); + assert_eq!(reader_buf7, [7, 0, 0, 0, 0]); + assert_eq!(inner_stream.read_buf, None); + let mut reader_buf8 = [0; 5]; + outer_stream.read(&mut reader_buf8).unwrap(); + assert_eq!(reader_buf8, [8, 0, 0, 0, 0]); + assert_eq!(outer_stream.read_buf, None); + + // last, read data from outer and inner stream again, they would + // return error + let mut reader_buf9 = [0; 1]; + assert_eq!( + outer_stream.read(&mut reader_buf9).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + let mut reader_buf10 = [0; 1]; + assert_eq!( + inner_stream.read(&mut reader_buf10).unwrap_err().kind(), + ErrorKind::WouldBlock + ); + } + } + + #[test] + fn test_inner_stream_block() { + // outer stream is in block mode + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + + let start_time = Instant::now(); + let handler = thread::spawn(move || { + let mut reader_buf = [0; 5]; + assert!(outer_stream.read_exact(&mut reader_buf).is_ok()); + assert_eq!(reader_buf, [1, 2, 3, 4, 5]); + assert!(Instant::now().duration_since(start_time).as_millis() >= 500); + }); + + // sleep 500ms + thread::sleep(Duration::from_millis(500)); + let writer_buf = [1, 2, 3, 4, 5]; + inner_stream.write_all(&writer_buf).unwrap(); + + handler.join().unwrap(); + } + + #[test] + #[allow(clippy::mutex_atomic)] + fn test_inner_stream_timeout() { + // outer stream is in block mode + let (mut inner_stream, mut outer_stream) = get_inner_backend_stream_pair(); + // set write timeout always return Ok, and no effect + assert!(outer_stream + .set_write_timeout(Some(Duration::from_secs(10))) + .is_ok()); + // set read timeout always return ok, can take effect + assert!(outer_stream + .set_read_timeout(Some(Duration::from_millis(150))) + .is_ok()); + + let cond_pair = Arc::new((Mutex::new(false), Condvar::new())); + let cond_pair_2 = Arc::clone(&cond_pair); + let handler = thread::Builder::new() + .spawn(move || { + // notify handler thread start + let (lock, cvar) = &*cond_pair_2; + let mut started = lock.lock().unwrap(); + *started = true; + cvar.notify_one(); + drop(started); + + let start_time1 = Instant::now(); + let mut reader_buf = [0; 5]; + // first read would timed out + assert_eq!( + outer_stream.read_exact(&mut reader_buf).unwrap_err().kind(), + ErrorKind::TimedOut + ); + let end_time1 = Instant::now().duration_since(start_time1).as_millis(); + assert!((150..250).contains(&end_time1)); + + // second read would ok + assert!(outer_stream.read_exact(&mut reader_buf).is_ok()); + assert_eq!(reader_buf, [1, 2, 3, 4, 5]); + + // cancel the read timeout + let start_time2 = Instant::now(); + outer_stream.set_read_timeout(None).unwrap(); + assert!(outer_stream.read_exact(&mut reader_buf).is_ok()); + let end_time2 = Instant::now().duration_since(start_time2).as_millis(); + assert!(end_time2 >= 500); + }) + .unwrap(); + + // wait handler thread started + let (lock, cvar) = &*cond_pair; + let mut started = lock.lock().unwrap(); + while !*started { + started = cvar.wait(started).unwrap(); + } + + // sleep 300ms, test timeout + thread::sleep(Duration::from_millis(300)); + let writer_buf = [1, 2, 3, 4, 5]; + inner_stream.write_all(&writer_buf).unwrap(); + + // sleep 500ms again, test cancel timeout + thread::sleep(Duration::from_millis(500)); + let writer_buf = [1, 2, 3, 4, 5]; + inner_stream.write_all(&writer_buf).unwrap(); + + handler.join().unwrap(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/mod.rs new file mode 100644 index 000000000000..bc916b33c508 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/mod.rs @@ -0,0 +1,76 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// This module implements backends for vsock - the host side vsock endpoint, +/// which can translate vsock stream into host's protocol, eg. AF_UNIX, AF_INET +/// or even the protocol created by us. +use std::any::Any; +use std::io::{Read, Write}; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::time::Duration; + +mod hybrid_stream; +mod inner; +mod tcp; +mod unix_stream; + +pub use self::hybrid_stream::HybridStream; +pub use self::inner::{VsockInnerBackend, VsockInnerConnector, VsockInnerStream}; +pub use self::tcp::VsockTcpBackend; +pub use self::unix_stream::VsockUnixStreamBackend; + +/// The type of vsock backend. +#[derive(PartialEq, Eq, Hash, Debug, Clone)] +pub enum VsockBackendType { + /// Unix stream + UnixStream, + /// Tcp socket + Tcp, + /// Inner backend + Inner, + /// Fd passed hybrid stream backend + HybridStream, + /// For test purpose + #[cfg(test)] + Test, +} + +/// The generic abstract of Vsock Backend, looks like socket's API. +pub trait VsockBackend: AsRawFd + Send { + /// Accept a host-initiated connection. + fn accept(&mut self) -> std::io::Result>; + /// Connect by a guest-initiated connection. + fn connect(&self, dst_port: u32) -> std::io::Result>; + /// The type of backend. + fn r#type(&self) -> VsockBackendType; + /// Used to downcast to the specific type. + fn as_any(&self) -> &dyn Any; +} + +/// The generic abstract of Vsock Stream. +pub trait VsockStream: Read + Write + AsRawFd + Send { + /// The type of backend which created the stream. + fn backend_type(&self) -> VsockBackendType; + /// Moves VsockStream into or out of nonblocking mode + fn set_nonblocking(&mut self, _nonblocking: bool) -> std::io::Result<()> { + Err(std::io::Error::from(std::io::ErrorKind::WouldBlock)) + } + /// Set the read timeout to the time duration specified. + fn set_read_timeout(&mut self, _dur: Option) -> std::io::Result<()> { + Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)) + } + /// Set the write timeout to the time duration specified. + fn set_write_timeout(&mut self, _dur: Option) -> std::io::Result<()> { + Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)) + } + /// Receive the port and fd from the peer. + fn recv_data_fd( + &self, + _bytes: &mut [u8], + _fds: &mut [RawFd], + ) -> std::io::Result<(usize, usize)> { + Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)) + } + /// Used to downcast to the specific type. + fn as_any(&self) -> &dyn Any; +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/tcp.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/tcp.rs new file mode 100644 index 000000000000..f3593148302d --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/tcp.rs @@ -0,0 +1,170 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::net::{TcpListener, TcpStream}; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::time::Duration; + +use log::info; + +use super::super::{Result, VsockError}; +use super::{VsockBackend, VsockBackendType, VsockStream}; + +impl VsockStream for TcpStream { + fn backend_type(&self) -> VsockBackendType { + VsockBackendType::Tcp + } + + fn set_nonblocking(&mut self, nonblocking: bool) -> std::io::Result<()> { + TcpStream::set_nonblocking(self, nonblocking) + } + + fn set_read_timeout(&mut self, dur: Option) -> std::io::Result<()> { + TcpStream::set_read_timeout(self, dur) + } + + fn set_write_timeout(&mut self, dur: Option) -> std::io::Result<()> { + TcpStream::set_write_timeout(self, dur) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +/// The backend implementation that using TCP Socket. +#[allow(dead_code)] +pub struct VsockTcpBackend { + /// The TCP socket, through which host-initiated connections are accepted. + tcp_sock: TcpListener, + /// The address of TCP socket. + tcp_sock_addr: String, +} + +impl VsockTcpBackend { + pub fn new(tcp_sock_addr: String) -> Result { + info!("open vsock tcp: {}", tcp_sock_addr); + // Open/bind/listen on the host Unix socket, so we can accept + // host-initiated connections. + let tcp_sock = TcpListener::bind(&tcp_sock_addr) + .and_then(|sock| sock.set_nonblocking(true).map(|_| sock)) + .map_err(VsockError::Backend)?; + info!("vsock tcp opened"); + + Ok(VsockTcpBackend { + tcp_sock, + tcp_sock_addr, + }) + } +} + +impl AsRawFd for VsockTcpBackend { + fn as_raw_fd(&self) -> RawFd { + self.tcp_sock.as_raw_fd() + } +} + +impl VsockBackend for VsockTcpBackend { + fn accept(&mut self) -> std::io::Result> { + let (stream, _) = self.tcp_sock.accept()?; + stream.set_nonblocking(true)?; + + Ok(Box::new(stream)) + } + + // Peer connection doesn't supported by tcp backend yet. + fn connect(&self, _dst_port: u32) -> std::io::Result> { + Err(std::io::Error::new( + std::io::ErrorKind::ConnectionRefused, + "vsock net backend doesn't support incoming connection request", + )) + } + + fn r#type(&self) -> VsockBackendType { + VsockBackendType::Tcp + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +#[cfg(test)] +mod tests { + use std::io::{Read, Write}; + use std::net::TcpStream; + + use super::*; + + #[test] + fn test_tcp_backend_bind() { + let tcp_sock_addr = String::from("127.0.0.2:9000"); + assert!(VsockTcpBackend::new(tcp_sock_addr).is_ok()); + } + + #[test] + fn test_tcp_backend_accept() { + let tcp_sock_addr = String::from("127.0.0.2:9001"); + + let mut vsock_backend = VsockTcpBackend::new(tcp_sock_addr.clone()).unwrap(); + let _stream = TcpStream::connect(&tcp_sock_addr).unwrap(); + + assert!(vsock_backend.accept().is_ok()); + } + + #[test] + fn test_tcp_backend_communication() { + let tcp_sock_addr = String::from("127.0.0.2:9002"); + let test_string = String::from("TEST"); + let mut buffer = [0; 10]; + + let mut vsock_backend = VsockTcpBackend::new(tcp_sock_addr.clone()).unwrap(); + let mut stream_connect = TcpStream::connect(&tcp_sock_addr).unwrap(); + stream_connect.set_nonblocking(true).unwrap(); + let mut stream_backend = vsock_backend.accept().unwrap(); + + assert!(stream_connect + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_backend.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + + assert!(stream_backend + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_connect.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + } + + #[test] + fn test_tcp_backend_connect() { + let tcp_sock_addr = String::from("127.0.0.2:9003"); + let vsock_backend = VsockTcpBackend::new(tcp_sock_addr).unwrap(); + // tcp backend don't support peer connection + assert!(vsock_backend.connect(0).is_err()); + } + + #[test] + fn test_tcp_backend_type() { + let tcp_sock_addr = String::from("127.0.0.2:9004"); + let vsock_backend = VsockTcpBackend::new(tcp_sock_addr).unwrap(); + assert_eq!(vsock_backend.r#type(), VsockBackendType::Tcp); + } + + #[test] + fn test_tcp_backend_vsock_stream() { + let tcp_sock_addr = String::from("127.0.0.2:9005"); + let _vsock_backend = VsockTcpBackend::new(tcp_sock_addr.clone()).unwrap(); + let vsock_stream = TcpStream::connect(&tcp_sock_addr).unwrap(); + + assert!(vsock_stream.set_nonblocking(true).is_ok()); + assert!(vsock_stream + .set_read_timeout(Some(Duration::from_secs(1))) + .is_ok()); + assert!(vsock_stream.set_read_timeout(None).is_ok()); + assert!(vsock_stream + .set_write_timeout(Some(Duration::from_secs(2))) + .is_ok()); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/unix_stream.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/unix_stream.rs new file mode 100644 index 000000000000..8f03a836dfdc --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/backend/unix_stream.rs @@ -0,0 +1,206 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::any::Any; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::time::Duration; + +use log::info; +use sendfd::RecvWithFd; + +use super::super::{Result, VsockError}; +use super::{VsockBackend, VsockBackendType, VsockStream}; + +impl VsockStream for UnixStream { + fn backend_type(&self) -> VsockBackendType { + VsockBackendType::UnixStream + } + + fn set_nonblocking(&mut self, nonblocking: bool) -> std::io::Result<()> { + UnixStream::set_nonblocking(self, nonblocking) + } + + fn set_read_timeout(&mut self, dur: Option) -> std::io::Result<()> { + UnixStream::set_read_timeout(self, dur) + } + + fn set_write_timeout(&mut self, dur: Option) -> std::io::Result<()> { + UnixStream::set_write_timeout(self, dur) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn recv_data_fd(&self, bytes: &mut [u8], fds: &mut [RawFd]) -> std::io::Result<(usize, usize)> { + self.recv_with_fd(bytes, fds) + } +} + +/// The backend implementation that using Unix Stream. +pub struct VsockUnixStreamBackend { + /// The Unix socket, through which host-initiated connections are accepted. + pub(crate) host_sock: UnixListener, + /// The file system path of the host-side Unix socket. + pub(crate) host_sock_path: String, +} + +impl VsockUnixStreamBackend { + pub fn new(host_sock_path: String) -> Result { + info!("Open vsock uds: {}", host_sock_path); + // Open/bind/listen on the host Unix socket, so we can accept + // host-initiated connections. + let host_sock = UnixListener::bind(&host_sock_path) + .and_then(|sock| sock.set_nonblocking(true).map(|_| sock)) + .map_err(VsockError::Backend)?; + info!("vsock uds opened"); + + Ok(VsockUnixStreamBackend { + host_sock, + host_sock_path, + }) + } +} + +impl AsRawFd for VsockUnixStreamBackend { + fn as_raw_fd(&self) -> RawFd { + self.host_sock.as_raw_fd() + } +} + +impl VsockBackend for VsockUnixStreamBackend { + fn accept(&mut self) -> std::io::Result> { + let (stream, _) = self.host_sock.accept()?; + stream.set_nonblocking(true)?; + + Ok(Box::new(stream)) + } + + fn connect(&self, dst_port: u32) -> std::io::Result> { + // We can figure out the path to Unix sockets listening on specific + // ports using `host_sock_path` field. I.e. "_". + let port_path = format!("{}_{}", self.host_sock_path, dst_port); + let stream = UnixStream::connect(port_path)?; + stream.set_nonblocking(true)?; + + Ok(Box::new(stream)) + } + + fn r#type(&self) -> VsockBackendType { + VsockBackendType::UnixStream + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl Drop for VsockUnixStreamBackend { + fn drop(&mut self) { + std::fs::remove_file(&self.host_sock_path).ok(); + } +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::io::{Read, Write}; + use std::os::unix::net::UnixStream; + use std::path::Path; + + use super::*; + + #[test] + fn test_unix_backend_bind() { + let host_sock_path = String::from("/tmp/host_sock_path_1"); + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + + assert!(VsockUnixStreamBackend::new(host_sock_path.clone()).is_ok()); + + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + } + + #[test] + fn test_unix_backend_accept() { + let host_sock_path = String::from("/tmp/host_sock_path_2"); + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + + let mut vsock_backend = VsockUnixStreamBackend::new(host_sock_path.clone()).unwrap(); + let _stream = UnixStream::connect(&host_sock_path).unwrap(); + + assert!(vsock_backend.accept().is_ok()); + + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + } + + #[test] + fn test_unix_backend_communication() { + let host_sock_path = String::from("/tmp/host_sock_path_3"); + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + let test_string = String::from("TEST"); + let mut buffer = [0; 10]; + + let mut vsock_backend = VsockUnixStreamBackend::new(host_sock_path.clone()).unwrap(); + let mut stream_connect = UnixStream::connect(&host_sock_path).unwrap(); + stream_connect.set_nonblocking(true).unwrap(); + let mut stream_backend = vsock_backend.accept().unwrap(); + + assert!(stream_connect + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_backend.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + + assert!(stream_backend + .write(&test_string.clone().into_bytes()) + .is_ok()); + assert!(stream_connect.read(&mut buffer).is_ok()); + assert_eq!(&buffer[0..test_string.len()], test_string.as_bytes()); + + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + } + + #[test] + fn test_unix_backend_connect() { + let host_sock_path = String::from("/tmp/host_sock_path_4"); + let local_server_port = 1; + let local_server_path = format!("{host_sock_path}_{local_server_port}"); + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + fs::remove_file(Path::new(&local_server_path)).unwrap_or_default(); + + let _local_listener = UnixListener::bind(&local_server_path).unwrap(); + let vsock_backend = VsockUnixStreamBackend::new(host_sock_path.clone()).unwrap(); + + assert!(vsock_backend.connect(local_server_port).is_ok()); + + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + fs::remove_file(Path::new(&local_server_path)).unwrap_or_default(); + } + + #[test] + fn test_unix_backend_type() { + let host_sock_path = String::from("/tmp/host_sock_path_5"); + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + + let vsock_backend = VsockUnixStreamBackend::new(host_sock_path.clone()).unwrap(); + assert_eq!(vsock_backend.r#type(), VsockBackendType::UnixStream); + + fs::remove_file(Path::new(&host_sock_path)).unwrap_or_default(); + } + + #[test] + fn test_unix_backend_vsock_stream() { + let (sock1, _sock2) = UnixStream::pair().unwrap(); + let mut vsock_stream: Box = Box::new(sock1); + + assert!(vsock_stream.set_nonblocking(true).is_ok()); + assert!(vsock_stream + .set_read_timeout(Some(Duration::from_secs(1))) + .is_ok()); + assert!(vsock_stream.set_read_timeout(None).is_ok()); + assert!(vsock_stream + .set_write_timeout(Some(Duration::from_secs(2))) + .is_ok()); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/connection.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/connection.rs new file mode 100644 index 000000000000..e2ca7e3339af --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/connection.rs @@ -0,0 +1,1282 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +/// The main job of `VsockConnection` is to forward data traffic, back and +/// forth, between a guest-side AF_VSOCK socket and a host-side generic `Read + +/// Write + AsRawFd` stream, while also managing its internal state. To that +/// end, `VsockConnection` implements: +/// - `VsockChannel` for: +/// - moving data from the host stream to a guest-provided RX buffer, via +/// `recv_pkt()`; and +/// - moving data from a guest-provided TX buffer to the host stream, via +/// `send_pkt()`; and +/// - updating its internal state, by absorbing control packets (anything +/// other than VSOCK_OP_RW). +/// - `VsockEpollListener` for getting notified about the availability of data +/// or free buffer space at the host stream. +/// +/// Note: there is a certain asymmetry to the RX and TX data flows: +/// - RX transfers do not need any data buffering, since data is read straight +/// from the host stream and into the guest-provided RX buffer; +/// - TX transfers may require some data to be buffered by `VsockConnection`, if +/// the host peer can't keep up with reading the data that we're writing. +/// This is because, once the guest driver provides some data in a virtio +/// TX buffer, the vsock device must consume it. If that data can't be +/// forwarded straight to the host stream, we'll have to store it in a +/// buffer (and flush it at a later time). Vsock flow control ensures that +/// our TX buffer doesn't overflow. +// The code in this file is best read with a fresh memory of the vsock protocol +// inner-workings. To help with that, here is a +// +// Short primer on the vsock protocol +// ---------------------------------- +// +// 1. Establishing a connection A vsock connection is considered established +// after a two-way handshake: +// - the initiating peer sends a connection request packet (`hdr.op` == +// VSOCK_OP_REQUEST); then +// - the listening peer sends back a connection response packet (`hdr.op` == +// VSOCK_OP_RESPONSE). +// +// 2. Terminating a connection When a peer wants to shut down an established +// connection, it sends a VSOCK_OP_SHUTDOWN packet. Two header flags are used +// with VSOCK_OP_SHUTDOWN, indicating the sender's intention: +// - VSOCK_FLAGS_SHUTDOWN_RCV: the sender will receive no more data for this +// connection; and +// - VSOCK_FLAGS_SHUTDOWN_SEND: the sender will send no more data for this +// connection. After a shutdown packet, the receiving peer will have some +// protocol-undefined time to flush its buffers, and then forcefully +// terminate the connection by sending back an RST packet. If the +// shutdown-initiating peer doesn't receive this RST packet during a +// timeout period, it will send one itself, thus terminating the +// connection. Note: a peer can send more than one VSOCK_OP_SHUTDOWN +// packets. However, read/write indications cannot be undone. E.g. once a +// "no-more-sending" promise was made, it cannot be taken back. That is, +// `hdr.flags` will be ORed between subsequent VSOCK_OP_SHUTDOWN packets. +// +// 3. Flow control Before sending a data packet (VSOCK_OP_RW), the sender must +// make sure that the receiver has enough free buffer space to store that +// data. If this condition is not respected, the receiving peer's behaviour +// is undefined. In this implementation, we forcefully terminate the +// connection by sending back a VSOCK_OP_RST packet. Note: all buffer space +// information is computed and stored on a per-connection basis. Peers keep +// each other informed about the free buffer space they have by filling in +// two packet header members with each packet they send: +// - `hdr.buf_alloc`: the total buffer space the peer has allocated for +// receiving data; and +// - `hdr.fwd_cnt`: the total number of bytes the peer has successfully +// flushed out of its buffer. One can figure out how much space its peer +// has available in its buffer by inspecting the difference between how +// much it has sent to the peer and how much the peer has flushed out +// (i.e. "forwarded", in the vsock spec terminology): `peer_free = +// peer_buf_alloc - (total_bytes_sent_to_peer - peer_fwd_cnt)`. +// +// Note: the above requires that peers constantly keep each other informed on +// their buffer space situation. However, since there are no receipt +// acknowledgement packets defined for the vsock protocol, packet flow can +// often be unidirectional (just one peer sending data to another), so the +// sender's information about the receiver's buffer space can get quickly +// outdated. The vsock protocol defines two solutions to this problem: 1. +// The sender can explicitly ask for a buffer space (i.e. "credit") update +// from its peer, via a VSOCK_OP_CREDIT_REQUEST packet, to which it will +// get a VSOCK_OP_CREDIT_UPDATE response (or any response will do, really, +// since credit information must be included in any packet); 2. The +// receiver can be proactive, and send VSOCK_OP_CREDIT_UPDATE packet, +// whenever it thinks its peer's information is out of date. Our +// implementation uses the proactive approach. +use std::io::{ErrorKind, Read, Write}; +use std::num::Wrapping; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::time::{Duration, Instant}; + +use log::{debug, error, info, warn}; + +use super::super::backend::VsockStream; +use super::super::defs::uapi; +use super::super::packet::VsockPacket; +use super::super::{Result as VsockResult, VsockChannel, VsockEpollListener, VsockError}; +use super::defs; +use super::txbuf::TxBuf; +use super::{ConnState, Error, PendingRx, PendingRxSet, Result}; + +/// A self-managing connection object, that handles communication between a +/// guest-side AF_VSOCK socket and a host-side `Read + Write + AsRawFd` stream. +pub struct VsockConnection { + /// The current connection state. + state: ConnState, + /// The local CID. Most of the time this will be the constant `2` (the vsock + /// host CID). + pub(crate) local_cid: u64, + /// The peer (guest) CID. + pub(crate) peer_cid: u64, + /// The local (host) port. + pub(crate) local_port: u32, + /// The peer (guest) port. + pub(crate) peer_port: u32, + /// The (connected) host-side stream. + pub(crate) stream: Box, + /// The TX buffer for this connection. + tx_buf: TxBuf, + /// Total number of bytes that have been successfully written to + /// `self.stream`, either directly, or flushed from `self.tx_buf`. + fwd_cnt: Wrapping, + /// The amount of buffer space that the peer (guest) has allocated for this + /// connection. + peer_buf_alloc: u32, + /// The total number of bytes that the peer has forwarded away. + peer_fwd_cnt: Wrapping, + /// The total number of bytes sent to the peer (guest vsock driver) + rx_cnt: Wrapping, + /// Our `self.fwd_cnt`, as last sent to the peer. This is used to provide + /// proactive credit updates, and let the peer know it's OK to send more + /// data. + last_fwd_cnt_to_peer: Wrapping, + /// The set of pending RX packet indications that `recv_pkt()` will use to + /// fill in a packet for the peer (guest). + pending_rx: PendingRxSet, + /// Instant when this connection should be scheduled for immediate + /// termination, due to some timeout condition having been fulfilled. + expiry: Option, +} + +impl VsockChannel for VsockConnection { + /// Fill in a vsock packet, to be delivered to our peer (the guest driver). + /// + /// As per the `VsockChannel` trait, this should only be called when there + /// is data to be fetched from the channel (i.e. `has_pending_rx()` is + /// true). Otherwise, it will error out with `VsockError::NoData`. Pending + /// RX indications are set by other mutable actions performed on the + /// channel. For instance, `send_pkt()` could set an Rst indication, if + /// called with a VSOCK_OP_SHUTDOWN packet, or `notify()` could set a Rw + /// indication (a data packet can be fetched from the channel), if data was + /// ready to be read from the host stream. + /// + /// Returns: + /// - `Ok(())`: the packet has been successfully filled in and is ready for + /// delivery; + /// - `Err(VsockError::NoData)`: there was no data available with which to + /// fill in the packet; + /// - `Err(VsockError::PktBufMissing)`: the packet would've been filled in + /// with data, but it is missing the data buffer. + fn recv_pkt(&mut self, pkt: &mut VsockPacket) -> VsockResult<()> { + // Perform some generic initialization that is the same for any packet + // operation (e.g. source, destination, credit, etc). + self.init_pkt(pkt); + + // If forceful termination is pending, there's no point in checking for + // anything else. It's dead, Jim. + if self.pending_rx.remove(PendingRx::Rst) { + pkt.set_op(uapi::VSOCK_OP_RST); + return Ok(()); + } + + // Next up: if we're due a connection confirmation, that's all we need + // to know to fill in this packet. + if self.pending_rx.remove(PendingRx::Response) { + self.state = ConnState::Established; + pkt.set_op(uapi::VSOCK_OP_RESPONSE); + return Ok(()); + } + + // Same thing goes for locally-initiated connections that need to yield + // a connection request. + if self.pending_rx.remove(PendingRx::Request) { + self.expiry = + Some(Instant::now() + Duration::from_millis(defs::CONN_REQUEST_TIMEOUT_MS)); + pkt.set_op(uapi::VSOCK_OP_REQUEST); + return Ok(()); + } + + if self.pending_rx.remove(PendingRx::Rw) { + // We're due to produce a data packet, by reading the data from the + // host-side backend. + + match self.state { + // A data packet is only valid for established connections, and + // connections for which our peer has initiated a graceful + // shutdown, but can still receive data. + ConnState::Established | ConnState::PeerClosed(false, _) => (), + _ => { + // Any other connection state is invalid at this point, and + // we need to kill it with fire. + pkt.set_op(uapi::VSOCK_OP_RST); + return Ok(()); + } + } + + // Oh wait, before we start bringing in the big data, can our peer + // handle receiving so much bytey goodness? + if self.need_credit_update_from_peer() { + self.last_fwd_cnt_to_peer = self.fwd_cnt; + pkt.set_op(uapi::VSOCK_OP_CREDIT_REQUEST); + return Ok(()); + } + + let buf = pkt.buf_mut().ok_or(VsockError::PktBufMissing)?; + + // The maximum amount of data we can read in is limited by both the + // RX buffer size and the peer available buffer space. + let max_len = std::cmp::min(buf.len(), self.peer_avail_credit()); + + // Read data from the stream straight to the RX buffer, for maximum throughput. + match self.stream.read(&mut buf[..max_len]) { + Ok(read_cnt) => { + if read_cnt == 0 { + // A 0-length read means the host stream was closed + // down. In that case, we'll ask our peer to shut down + // the connection. We can neither send nor receive any + // more data. + self.state = ConnState::LocalClosed; + self.expiry = Some( + Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS), + ); + pkt.set_op(uapi::VSOCK_OP_SHUTDOWN) + .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_RCV) + .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); + } else { + // On a successful data read, we fill in the packet with + // the RW op, and length of the read data. + pkt.set_op(uapi::VSOCK_OP_RW).set_len(read_cnt as u32); + } + self.rx_cnt += Wrapping(pkt.len()); + self.last_fwd_cnt_to_peer = self.fwd_cnt; + return Ok(()); + } + Err(err) if err.kind() == ErrorKind::WouldBlock => { + // This shouldn't actually happen (receiving EWOULDBLOCK + // after EPOLLIN), but apparently it does, so we need to + // handle it greacefully. + warn!( + "vsock: unexpected EWOULDBLOCK while reading from backing stream: \ + lp={}, pp={}, err={:?}", + self.local_port, self.peer_port, err + ); + } + Err(err) => { + // We are not expecting any other errors when reading from + // the underlying stream. If any show up, we'll immediately + // kill this connection. + error!( + "vsock: error reading from backing stream: lp={}, pp={}, err={:?}", + self.local_port, self.peer_port, err + ); + pkt.set_op(uapi::VSOCK_OP_RST); + self.last_fwd_cnt_to_peer = self.fwd_cnt; + return Ok(()); + } + }; + } + + // A credit update is basically a no-op, so we should only waste a + // perfectly fine RX buffer on it if we really have nothing else to say, + // hence we check for this RX indication last. + if self.pending_rx.remove(PendingRx::CreditUpdate) && !self.has_pending_rx() { + pkt.set_op(uapi::VSOCK_OP_CREDIT_UPDATE); + self.last_fwd_cnt_to_peer = self.fwd_cnt; + return Ok(()); + } + + // We've already checked for all conditions that would have produced a + // packet, so if we got to here, we don't know how to yield one. + Err(VsockError::NoData) + } + + /// Deliver a guest-generated packet to this connection. + /// + /// This forwards the data in RW packets to the host stream, and absorbs + /// control packets, using them to manage the internal connection state. + /// + /// Returns: always `Ok(())`: the packet has been consumed; + fn send_pkt(&mut self, pkt: &VsockPacket) -> VsockResult<()> { + // Update the peer credit information. + self.peer_buf_alloc = pkt.buf_alloc(); + self.peer_fwd_cnt = Wrapping(pkt.fwd_cnt()); + + match self.state { + // Most frequent case: this is an established connection that needs + // to forward some data to the host stream. Also works for a + // connection that has begun shutting down, but the peer still has + // some data to send. + ConnState::Established | ConnState::PeerClosed(_, false) + if pkt.op() == uapi::VSOCK_OP_RW => + { + if pkt.buf().is_none() { + info!( + "vsock: dropping empty data packet from guest (lp={}, pp={}", + self.local_port, self.peer_port + ); + return Ok(()); + } + + // Unwrapping here is safe, since we just checked `pkt.buf()` + // above. + let buf_slice = &pkt.buf().unwrap()[..(pkt.len() as usize)]; + if let Err(err) = self.send_bytes(buf_slice) { + // If we can't write to the host stream, that's an + // unrecoverable error, so we'll terminate this connection. + warn!( + "vsock: error writing to local stream (lp={}, pp={}): {:?}", + self.local_port, self.peer_port, err + ); + self.kill(); + return Ok(()); + } + + // We might've just consumed some data. If that's the case, we + // might need to update the peer on our buffer space situation, + // so that it can keep sending data packets our way. + if self.peer_needs_credit_update() { + self.pending_rx.insert(PendingRx::CreditUpdate); + } + } + + // Next up: receiving a response / confirmation for a host-initiated + // connection. We'll move to an Established state, and pass on the + // good news through the host stream. + ConnState::LocalInit if pkt.op() == uapi::VSOCK_OP_RESPONSE => { + self.expiry = None; + self.state = ConnState::Established; + } + + // The peer wants to shut down an established connection. If they + // have nothing more to send nor receive, and we don't have to wait + // to drain our TX buffer, we can schedule an RST packet (to + // terminate the connection on the next recv call). Otherwise, we'll + // arm the kill timer. + ConnState::Established if pkt.op() == uapi::VSOCK_OP_SHUTDOWN => { + let recv_off = pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV != 0; + let send_off = pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND != 0; + self.state = ConnState::PeerClosed(recv_off, send_off); + if recv_off && send_off { + if self.tx_buf.is_empty() { + self.pending_rx.insert(PendingRx::Rst); + } else { + self.expiry = Some( + Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS), + ); + } + } + } + + // The peer wants to update a shutdown request, with more + // receive/send indications. The same logic as above applies. + ConnState::PeerClosed(ref mut recv_off, ref mut send_off) + if pkt.op() == uapi::VSOCK_OP_SHUTDOWN => + { + *recv_off = *recv_off || (pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV != 0); + *send_off = *send_off || (pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND != 0); + if *recv_off && *send_off && self.tx_buf.is_empty() { + self.pending_rx.insert(PendingRx::Rst); + } + } + + // A credit update from our peer is valid only in a state which + // allows data transfer towards the peer. + ConnState::Established | ConnState::PeerInit | ConnState::PeerClosed(false, _) + if pkt.op() == uapi::VSOCK_OP_CREDIT_UPDATE => + { + // Nothing to do here; we've already updated peer credit. + } + + // A credit request from our peer is valid only in a state which + // allows data transfer from the peer. We'll respond with a credit + // update packet. + ConnState::Established | ConnState::PeerInit | ConnState::PeerClosed(_, false) + if pkt.op() == uapi::VSOCK_OP_CREDIT_REQUEST => + { + self.pending_rx.insert(PendingRx::CreditUpdate); + } + + _ => { + debug!( + "vsock: dropping invalid TX pkt for connection: state={:?}, pkt.hdr={:?}", + self.state, + pkt.hdr() + ); + } + }; + + Ok(()) + } + + /// Check if the connection has any pending packet addressed to the peer. + fn has_pending_rx(&self) -> bool { + !self.pending_rx.is_empty() + } +} + +impl AsRawFd for VsockConnection { + /// Get the file descriptor that this connection wants polled. + /// + /// The connection is interested in being notified about EPOLLIN / EPOLLOUT + /// events on the host stream. + fn as_raw_fd(&self) -> RawFd { + self.stream.as_raw_fd() + } +} + +impl VsockEpollListener for VsockConnection { + /// Get the event set that this connection is interested in. + /// + /// A connection will want to be notified when: + /// - data is available to be read from the host stream, so that it can + /// store an RW pending RX indication; and + /// - data can be written to the host stream, and the TX buffer needs to be + /// flushed. + fn get_polled_evset(&self) -> epoll::Events { + let mut evset = epoll::Events::empty(); + if !self.tx_buf.is_empty() { + // There's data waiting in the TX buffer, so we are interested in + // being notified when writing to the host stream wouldn't block. + evset.insert(epoll::Events::EPOLLOUT); + } + // We're generally interested in being notified when data can be read + // from the host stream, unless we're in a state which doesn't allow + // moving data from host to guest. + match self.state { + ConnState::Killed | ConnState::LocalClosed | ConnState::PeerClosed(true, _) => (), + _ if self.need_credit_update_from_peer() => (), + _ => evset.insert(epoll::Events::EPOLLIN), + } + evset + } + + /// Notify the connection about an event (or set of events) that it was + /// interested in. + fn notify(&mut self, evset: epoll::Events) { + if evset.contains(epoll::Events::EPOLLIN) { + // Data can be read from the host stream. Setting a Rw pending + // indication, so that the muxer will know to call `recv_pkt()` + // later. + self.pending_rx.insert(PendingRx::Rw); + } + + if evset.contains(epoll::Events::EPOLLOUT) { + // Data can be written to the host stream. Time to flush out the TX + // buffer. + if self.tx_buf.is_empty() { + info!("vsock: connection received unexpected EPOLLOUT event"); + return; + } + let flushed = self + .tx_buf + .flush_to(&mut self.stream) + .unwrap_or_else(|err| { + warn!( + "vsock: error flushing TX buf for (lp={}, pp={}): {:?}", + self.local_port, self.peer_port, err + ); + match err { + Error::TxBufFlush(inner) if inner.kind() == ErrorKind::WouldBlock => { + // This should never happen (EWOULDBLOCK after + // EPOLLOUT), but it does, so let's absorb it. + } + _ => self.kill(), + }; + 0 + }); + self.fwd_cnt += Wrapping(flushed as u32); + + // If this connection was shutting down, but is waiting to drain the + // TX buffer before forceful termination, the wait might be over. + if self.state == ConnState::PeerClosed(true, true) && self.tx_buf.is_empty() { + self.pending_rx.insert(PendingRx::Rst); + } else if self.peer_needs_credit_update() { + // If we've freed up some more buffer space, we may need to let + // the peer know it can safely send more data our way. + self.pending_rx.insert(PendingRx::CreditUpdate); + } + } + } +} + +impl VsockConnection { + /// Create a new guest-initiated connection object. + pub fn new_peer_init( + stream: Box, + local_cid: u64, + peer_cid: u64, + local_port: u32, + peer_port: u32, + peer_buf_alloc: u32, + ) -> Self { + Self { + local_cid, + peer_cid, + local_port, + peer_port, + stream, + state: ConnState::PeerInit, + tx_buf: TxBuf::default(), + fwd_cnt: Wrapping(0), + peer_buf_alloc, + peer_fwd_cnt: Wrapping(0), + rx_cnt: Wrapping(0), + last_fwd_cnt_to_peer: Wrapping(0), + pending_rx: PendingRxSet::from(PendingRx::Response), + expiry: None, + } + } + + /// Create a new host-initiated connection object. + pub fn new_local_init( + stream: Box, + local_cid: u64, + peer_cid: u64, + local_port: u32, + peer_port: u32, + ) -> Self { + Self { + local_cid, + peer_cid, + local_port, + peer_port, + stream, + state: ConnState::LocalInit, + tx_buf: TxBuf::default(), + fwd_cnt: Wrapping(0), + peer_buf_alloc: 0, + peer_fwd_cnt: Wrapping(0), + rx_cnt: Wrapping(0), + last_fwd_cnt_to_peer: Wrapping(0), + pending_rx: PendingRxSet::from(PendingRx::Request), + expiry: None, + } + } + + /// Check if there is an expiry (kill) timer set for this connection, + /// sometime in the future. + pub fn will_expire(&self) -> bool { + match self.expiry { + None => false, + Some(t) => t > Instant::now(), + } + } + + /// Check if this connection needs to be scheduled for forceful termination, + /// due to its kill timer having expired. + pub fn has_expired(&self) -> bool { + match self.expiry { + None => false, + Some(t) => t <= Instant::now(), + } + } + + /// Get the kill timer value, if one is set. + pub fn expiry(&self) -> Option { + self.expiry + } + + /// Schedule the connection to be forcefully terminated ASAP (i.e. the next + /// time the connection is asked to yield a packet, via `recv_pkt()`). + pub fn kill(&mut self) { + self.state = ConnState::Killed; + self.pending_rx.insert(PendingRx::Rst); + } + + /// Return the connections state. + pub fn state(&self) -> ConnState { + self.state + } + + /// Send some raw, untracked, data straight to the underlying connected + /// stream. Returns: number of bytes written, or the error describing the + /// write failure. + /// + /// Warning: this will bypass the connection state machine and write + /// directly to the underlying stream. No account of this write is kept, + /// which includes bypassing vsock flow control. + pub fn send_bytes_raw(&mut self, buf: &[u8]) -> Result { + self.stream.write(buf).map_err(Error::StreamWrite) + } + + /// Send some raw data (a byte-slice) to the host stream. + /// + /// Raw data can either be sent straight to the host stream, or to our TX + /// buffer, if the former fails. + fn send_bytes(&mut self, buf: &[u8]) -> Result<()> { + // If there is data in the TX buffer, that means we're already + // registered for EPOLLOUT events on the underlying stream. Therefore, + // there's no point in attempting a write at this point. `self.notify()` + // will get called when EPOLLOUT arrives, and it will attempt to drain + // the TX buffer then. + if !self.tx_buf.is_empty() { + return self.tx_buf.push(buf); + } + + // The TX buffer is empty, so we can try to write straight to the host + // stream. + let written = match self.stream.write(buf) { + Ok(cnt) => cnt, + Err(e) => { + // Absorb any would-block errors, since we can always try again + // later. + if e.kind() == ErrorKind::WouldBlock { + 0 + } else { + // We don't know how to handle any other write error, so + // we'll send it up the call chain. + return Err(Error::StreamWrite(e)); + } + } + }; + // Move the "forwarded bytes" counter ahead by how much we were able to + // send out. + self.fwd_cnt += Wrapping(written as u32); + + // If we couldn't write the whole slice, we'll need to push the + // remaining data to our buffer. + if written < buf.len() { + self.tx_buf.push(&buf[written..])?; + } + + Ok(()) + } + + /// Check if the credit information the peer has last received from us is + /// outdated. + fn peer_needs_credit_update(&self) -> bool { + let peer_seen_free_buf = + Wrapping(defs::CONN_TX_BUF_SIZE) - (self.fwd_cnt - self.last_fwd_cnt_to_peer); + peer_seen_free_buf < Wrapping(defs::CONN_CREDIT_UPDATE_THRESHOLD) + } + + /// Check if we need to ask the peer for a credit update before sending any + /// more data its way. + fn need_credit_update_from_peer(&self) -> bool { + self.peer_avail_credit() == 0 + } + + /// Get the maximum number of bytes that we can send to our peer, without + /// overflowing its buffer. + fn peer_avail_credit(&self) -> usize { + (Wrapping(self.peer_buf_alloc) - (self.rx_cnt - self.peer_fwd_cnt)).0 as usize + } + + /// Prepare a packet header for transmission to our peer. + fn init_pkt<'a>(&self, pkt: &'a mut VsockPacket) -> &'a mut VsockPacket { + // Make sure the header is zeroed-out first. This looks sub-optimal, but + // it is actually optimized-out in the compiled code to be faster than a + // memset(). + for b in pkt.hdr_mut() { + *b = 0; + } + + pkt.set_src_cid(self.local_cid) + .set_dst_cid(self.peer_cid) + .set_src_port(self.local_port) + .set_dst_port(self.peer_port) + .set_type(uapi::VSOCK_TYPE_STREAM) + .set_buf_alloc(defs::CONN_TX_BUF_SIZE) + .set_fwd_cnt(self.fwd_cnt.0) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::any::Any; + use std::io::{Error as IoError, ErrorKind, Read, Result as IoResult, Write}; + use std::os::unix::io::RawFd; + use std::time::{Duration, Instant}; + + use virtio_queue::QueueT; + use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + + use super::super::super::backend::VsockBackendType; + use super::super::super::defs::{uapi, RXQ_EVENT}; + use super::super::super::tests::TestContext; + use super::super::defs as csm_defs; + use super::*; + + const LOCAL_CID: u64 = 2; + const PEER_CID: u64 = 3; + const LOCAL_PORT: u32 = 1002; + const PEER_PORT: u32 = 1003; + const PEER_BUF_ALLOC: u32 = 64 * 1024; + + enum StreamState { + Closed, + Error(ErrorKind), + Ready, + WouldBlock, + } + + pub struct TestStream { + fd: EventFd, + read_buf: Vec, + read_state: StreamState, + write_buf: Vec, + write_state: StreamState, + } + impl TestStream { + pub fn new() -> Self { + Self { + fd: EventFd::new(EFD_NONBLOCK).unwrap(), + read_state: StreamState::Ready, + write_state: StreamState::Ready, + read_buf: Vec::new(), + write_buf: Vec::new(), + } + } + fn new_with_read_buf(buf: &[u8]) -> Self { + let mut stream = Self::new(); + stream.read_buf = buf.to_vec(); + stream + } + } + + impl AsRawFd for TestStream { + fn as_raw_fd(&self) -> RawFd { + self.fd.as_raw_fd() + } + } + + impl Read for TestStream { + fn read(&mut self, data: &mut [u8]) -> IoResult { + match self.read_state { + StreamState::Closed => Ok(0), + StreamState::Error(kind) => Err(IoError::new(kind, "whatevs")), + StreamState::Ready => { + if self.read_buf.is_empty() { + return Err(IoError::new(ErrorKind::WouldBlock, "EAGAIN")); + } + let len = std::cmp::min(data.len(), self.read_buf.len()); + assert_ne!(len, 0); + data[..len].copy_from_slice(&self.read_buf[..len]); + self.read_buf = self.read_buf.split_off(len); + Ok(len) + } + StreamState::WouldBlock => Err(IoError::new(ErrorKind::WouldBlock, "EAGAIN")), + } + } + } + + impl Write for TestStream { + fn write(&mut self, data: &[u8]) -> IoResult { + match self.write_state { + StreamState::Closed => Err(IoError::new(ErrorKind::BrokenPipe, "EPIPE")), + StreamState::Error(kind) => Err(IoError::new(kind, "whatevs")), + StreamState::Ready => { + self.write_buf.extend_from_slice(data); + Ok(data.len()) + } + StreamState::WouldBlock => Err(IoError::new(ErrorKind::WouldBlock, "EAGAIN")), + } + } + fn flush(&mut self) -> IoResult<()> { + Ok(()) + } + } + + impl VsockStream for TestStream { + fn backend_type(&self) -> VsockBackendType { + VsockBackendType::Test + } + + fn as_any(&self) -> &dyn Any { + self + } + } + + impl VsockConnection { + /// Get the fwd_cnt value from the connection. + pub(crate) fn fwd_cnt(&self) -> Wrapping { + self.fwd_cnt + } + + /// Forcefully insert a credit update flag. + pub(crate) fn insert_credit_update(&mut self) { + self.pending_rx.insert(PendingRx::CreditUpdate); + } + } + + fn init_pkt(pkt: &mut VsockPacket, op: u16, len: u32) -> &mut VsockPacket { + for b in pkt.hdr_mut() { + *b = 0; + } + pkt.set_src_cid(PEER_CID) + .set_dst_cid(LOCAL_CID) + .set_src_port(PEER_PORT) + .set_dst_port(LOCAL_PORT) + .set_type(uapi::VSOCK_TYPE_STREAM) + .set_buf_alloc(PEER_BUF_ALLOC) + .set_op(op) + .set_len(len) + } + + // This is the connection state machine test context: a helper struct to provide CSM testing + // primitives. A single `VsockPacket` object will be enough for our testing needs. We'll be + // using it for simulating both packet sends and packet receives. We need to keep the vsock + // testing context alive, since `VsockPacket` is just a pointer-wrapper over some data that + // resides in guest memory. The vsock test context owns the `GuestMemoryMmap` object, so we'll make + // it a member here, in order to make sure that guest memory outlives our testing packet. A + // single `VsockConnection` object will also suffice for our testing needs. We'll be using a + // specially crafted `Read + Write + AsRawFd` object as a backing stream, so that we can + // control the various error conditions that might arise. + struct CsmTestContext { + _vsock_test_ctx: TestContext, + pkt: VsockPacket, + conn: VsockConnection, + } + + impl CsmTestContext { + fn new_established() -> Self { + Self::new(ConnState::Established) + } + + fn new(conn_state: ConnState) -> Self { + let vsock_test_ctx = TestContext::new(); + let mut handler_ctx = vsock_test_ctx.create_event_handler_context(); + let stream = TestStream::new(); + let mut pkt = VsockPacket::from_rx_virtq_head( + &mut handler_ctx.queues[RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&vsock_test_ctx.mem) + .unwrap(), + ) + .unwrap(); + let conn = match conn_state { + ConnState::PeerInit => VsockConnection::new_peer_init( + Box::new(stream), + LOCAL_CID, + PEER_CID, + LOCAL_PORT, + PEER_PORT, + PEER_BUF_ALLOC, + ), + ConnState::LocalInit => VsockConnection::new_local_init( + Box::new(stream), + LOCAL_CID, + PEER_CID, + LOCAL_PORT, + PEER_PORT, + ), + ConnState::Established => { + let mut conn = VsockConnection::new_peer_init( + Box::new(stream), + LOCAL_CID, + PEER_CID, + LOCAL_PORT, + PEER_PORT, + PEER_BUF_ALLOC, + ); + assert!(conn.has_pending_rx()); + conn.recv_pkt(&mut pkt).unwrap(); + assert_eq!(pkt.op(), uapi::VSOCK_OP_RESPONSE); + conn + } + other => panic!("invalid ctx state: {:?}", other), + }; + assert_eq!(conn.state, conn_state); + Self { + _vsock_test_ctx: vsock_test_ctx, + pkt, + conn, + } + } + + fn set_stream(&mut self, stream: TestStream) { + self.conn.stream = Box::new(stream); + } + + fn set_peer_credit(&mut self, credit: u32) { + assert!(credit < self.conn.peer_buf_alloc); + self.conn.peer_fwd_cnt = Wrapping(0); + self.conn.rx_cnt = Wrapping(self.conn.peer_buf_alloc - credit); + assert_eq!(self.conn.peer_avail_credit(), credit as usize); + } + + fn send(&mut self) { + self.conn.send_pkt(&self.pkt).unwrap(); + } + + fn recv(&mut self) { + self.conn.recv_pkt(&mut self.pkt).unwrap(); + } + + fn notify_epollin(&mut self) { + self.conn.notify(epoll::Events::EPOLLIN); + assert!(self.conn.has_pending_rx()); + } + + fn notify_epollout(&mut self) { + self.conn.notify(epoll::Events::EPOLLOUT); + } + + fn init_pkt(&mut self, op: u16, len: u32) -> &mut VsockPacket { + init_pkt(&mut self.pkt, op, len) + } + + fn init_data_pkt(&mut self, data: &[u8]) -> &VsockPacket { + assert!(data.len() <= self.pkt.buf().unwrap().len()); + self.init_pkt(uapi::VSOCK_OP_RW, data.len() as u32); + self.pkt.buf_mut().unwrap()[..data.len()].copy_from_slice(data); + &self.pkt + } + } + + #[test] + fn test_peer_request() { + let mut ctx = CsmTestContext::new(ConnState::PeerInit); + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + // For peer-initiated requests, our connection should always yield a vsock reponse packet, + // in order to establish the connection. + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + assert_eq!(ctx.pkt.src_cid(), LOCAL_CID); + assert_eq!(ctx.pkt.dst_cid(), PEER_CID); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + assert_eq!(ctx.pkt.type_(), uapi::VSOCK_TYPE_STREAM); + assert_eq!(ctx.pkt.len(), 0); + // After yielding the response packet, the connection should have transitioned to the + // established state. + assert_eq!(ctx.conn.state, ConnState::Established); + } + + #[test] + fn test_local_request() { + let mut ctx = CsmTestContext::new(ConnState::LocalInit); + // Host-initiated connections should first yield a connection request packet. + assert!(ctx.conn.has_pending_rx()); + // Before yielding the connection request packet, the timeout kill timer shouldn't be + // armed. + assert!(!ctx.conn.will_expire()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_REQUEST); + // Since the request might time-out, the kill timer should now be armed. + assert!(ctx.conn.will_expire()); + assert!(!ctx.conn.has_expired()); + ctx.init_pkt(uapi::VSOCK_OP_RESPONSE, 0); + ctx.send(); + // Upon receiving a connection response, the connection should have transitioned to the + // established state, and the kill timer should've been disarmed. + assert_eq!(ctx.conn.state, ConnState::Established); + assert!(!ctx.conn.will_expire()); + } + + #[test] + fn test_local_request_timeout() { + let mut ctx = CsmTestContext::new(ConnState::LocalInit); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_REQUEST); + assert!(ctx.conn.will_expire()); + assert!(!ctx.conn.has_expired()); + std::thread::sleep(std::time::Duration::from_millis( + defs::CONN_REQUEST_TIMEOUT_MS, + )); + assert!(ctx.conn.has_expired()); + } + + #[test] + fn test_rx_data() { + let mut ctx = CsmTestContext::new_established(); + let data = &[1, 2, 3, 4]; + ctx.set_stream(TestStream::new_with_read_buf(data)); + assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.as_raw_fd()); + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RW); + assert_eq!(ctx.pkt.len() as usize, data.len()); + assert_eq!(ctx.pkt.buf().unwrap()[..ctx.pkt.len() as usize], *data); + + // There's no more data in the stream, so `recv_pkt` should yield `VsockError::NoData`. + match ctx.conn.recv_pkt(&mut ctx.pkt) { + Err(VsockError::NoData) => (), + other => panic!("{:?}", other), + } + + // A recv attempt in an invalid state should yield an instant reset packet. + ctx.conn.state = ConnState::LocalClosed; + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } + + #[test] + fn test_local_close() { + let mut ctx = CsmTestContext::new_established(); + let mut stream = TestStream::new(); + stream.read_state = StreamState::Closed; + ctx.set_stream(stream); + ctx.notify_epollin(); + ctx.recv(); + // When the host-side stream is closed, we can neither send not receive any more data. + // Therefore, the vsock shutdown packet that we'll deliver to the guest must contain both + // the no-more-send and the no-more-recv indications. + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_SHUTDOWN); + assert_ne!(ctx.pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND, 0); + assert_ne!(ctx.pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV, 0); + + // The kill timer should now be armed. + assert!(ctx.conn.will_expire()); + assert!( + ctx.conn.expiry().unwrap() + < Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS) + ); + } + + #[test] + fn test_peer_close() { + // Test that send/recv shutdown indications are handled correctly. + // I.e. once set, an indication cannot be reset. + { + let mut ctx = CsmTestContext::new_established(); + + ctx.init_pkt(uapi::VSOCK_OP_SHUTDOWN, 0) + .set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); + ctx.send(); + assert_eq!(ctx.conn.state, ConnState::PeerClosed(true, false)); + + // Attempting to reset the no-more-recv indication should not work + // (we are only setting the no-more-send indication here). + ctx.pkt.set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); + ctx.send(); + assert_eq!(ctx.conn.state, ConnState::PeerClosed(true, true)); + } + + // Test case: + // - reading data from a no-more-send connection should work; and + // - writing data should have no effect. + { + let data = &[1, 2, 3, 4]; + let mut ctx = CsmTestContext::new_established(); + ctx.set_stream(TestStream::new_with_read_buf(data)); + ctx.init_pkt(uapi::VSOCK_OP_SHUTDOWN, 0) + .set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); + ctx.send(); + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RW); + assert_eq!(&ctx.pkt.buf().unwrap()[..ctx.pkt.len() as usize], data); + + ctx.init_data_pkt(data); + ctx.send(); + let test_stream = ctx + .conn + .stream + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(test_stream.write_buf.len(), 0); + assert!(ctx.conn.tx_buf.is_empty()); + } + + // Test case: + // - writing data to a no-more-recv connection should work; and + // - attempting to read data from it should yield an RST packet. + { + let mut ctx = CsmTestContext::new_established(); + ctx.init_pkt(uapi::VSOCK_OP_SHUTDOWN, 0) + .set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); + ctx.send(); + let data = &[1, 2, 3, 4]; + ctx.init_data_pkt(data); + ctx.send(); + let test_stream = ctx + .conn + .stream + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(test_stream.write_buf, data.to_vec()); + + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } + + // Test case: setting both no-more-send and no-more-recv indications should have the + // connection confirm termination (i.e. yield an RST). + { + let mut ctx = CsmTestContext::new_established(); + ctx.init_pkt(uapi::VSOCK_OP_SHUTDOWN, 0) + .set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV | uapi::VSOCK_FLAGS_SHUTDOWN_SEND); + ctx.send(); + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } + } + + #[test] + fn test_local_read_error() { + let mut ctx = CsmTestContext::new_established(); + let mut stream = TestStream::new(); + stream.read_state = StreamState::Error(ErrorKind::PermissionDenied); + ctx.set_stream(stream); + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } + + #[test] + fn test_credit_request_to_peer() { + let mut ctx = CsmTestContext::new_established(); + ctx.set_peer_credit(0); + ctx.notify_epollin(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_CREDIT_REQUEST); + } + + #[test] + fn test_credit_request_from_peer() { + let mut ctx = CsmTestContext::new_established(); + ctx.init_pkt(uapi::VSOCK_OP_CREDIT_REQUEST, 0); + ctx.send(); + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_CREDIT_UPDATE); + assert_eq!(ctx.pkt.buf_alloc(), csm_defs::CONN_TX_BUF_SIZE); + assert_eq!(ctx.pkt.fwd_cnt(), ctx.conn.fwd_cnt.0); + } + + #[test] + fn test_credit_update_to_peer() { + let mut ctx = CsmTestContext::new_established(); + + // Force a stale state, where the peer hasn't been updated on our credit situation. + ctx.conn.last_fwd_cnt_to_peer = Wrapping(0); + + // Since a credit update token is sent when the fwd_cnt value exceeds + // CONN_TX_BUF_SIZE - CONN_CREDIT_UPDATE_THRESHOLD, we initialize + // fwd_cnt at 6 bytes below the threshold. + let initial_fwd_cnt = + csm_defs::CONN_TX_BUF_SIZE - csm_defs::CONN_CREDIT_UPDATE_THRESHOLD - 6; + ctx.conn.fwd_cnt = Wrapping(initial_fwd_cnt); + + // Use a 4-byte packet for triggering the credit update threshold. + let data = &[1, 2, 3, 4]; + + // Check that there is no pending RX. + ctx.init_data_pkt(data); + ctx.send(); + assert!(!ctx.conn.has_pending_rx()); + + // Send a packet again. + ctx.init_data_pkt(data); + ctx.send(); + + // The CSM should now have a credit update available for the peer. + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_CREDIT_UPDATE); + assert_eq!(ctx.pkt.fwd_cnt(), initial_fwd_cnt + data.len() as u32 * 2); + assert_eq!(ctx.conn.fwd_cnt, ctx.conn.last_fwd_cnt_to_peer); + } + + #[test] + fn test_tx_buffering() { + // Test case: + // - when writing to the backing stream would block, TX data should end up in the TX buf + // - when the CSM is notified that it can write to the backing stream, it should flush + // the TX buf. + { + let mut ctx = CsmTestContext::new_established(); + + let mut stream = TestStream::new(); + stream.write_state = StreamState::WouldBlock; + ctx.set_stream(stream); + + // Send some data through the connection. The backing stream is set to reject writes, + // so the data should end up in the TX buffer. + let data = &[1, 2, 3, 4]; + ctx.init_data_pkt(data); + ctx.send(); + + // When there's data in the TX buffer, the connection should ask to be notified when it + // can write to its backing stream. + assert!(ctx + .conn + .get_polled_evset() + .contains(epoll::Events::EPOLLOUT)); + assert_eq!(ctx.conn.tx_buf.len(), data.len()); + + // Unlock the write stream and notify the connection it can now write its bufferred + // data. + ctx.set_stream(TestStream::new()); + ctx.conn.notify(epoll::Events::EPOLLOUT); + assert!(ctx.conn.tx_buf.is_empty()); + let test_stream = ctx + .conn + .stream + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(test_stream.write_buf, data); + } + } + + #[test] + fn test_stream_write_error() { + // Test case: sending a data packet to a broken / closed backing stream should kill it. + { + let mut ctx = CsmTestContext::new_established(); + let mut stream = TestStream::new(); + stream.write_state = StreamState::Closed; + ctx.set_stream(stream); + + let data = &[1, 2, 3, 4]; + ctx.init_data_pkt(data); + ctx.send(); + + assert_eq!(ctx.conn.state, ConnState::Killed); + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } + + // Test case: notifying a connection that it can flush its TX buffer to a broken stream + // should kill the connection. + { + let mut ctx = CsmTestContext::new_established(); + + let mut stream = TestStream::new(); + stream.write_state = StreamState::WouldBlock; + ctx.set_stream(stream); + + // Send some data through the connection. The backing stream is set to reject writes, + // so the data should end up in the TX buffer. + let data = &[1, 2, 3, 4]; + ctx.init_data_pkt(data); + ctx.send(); + + // Set the backing stream to error out on write. + let mut stream = TestStream::new(); + stream.write_state = StreamState::Closed; + ctx.set_stream(stream); + + assert!(ctx + .conn + .get_polled_evset() + .contains(epoll::Events::EPOLLOUT)); + ctx.notify_epollout(); + assert_eq!(ctx.conn.state, ConnState::Killed); + } + } + + #[test] + fn test_peer_credit_misbehavior() { + let mut ctx = CsmTestContext::new_established(); + + let mut stream = TestStream::new(); + stream.write_state = StreamState::WouldBlock; + ctx.set_stream(stream); + + // Fill up the TX buffer. + let data = vec![0u8; ctx.pkt.buf().unwrap().len()]; + ctx.init_data_pkt(data.as_slice()); + for _i in 0..(csm_defs::CONN_TX_BUF_SIZE / data.len() as u32) { + ctx.send(); + } + + // Then try to send more data. + ctx.send(); + + // The connection should've committed suicide. + assert_eq!(ctx.conn.state, ConnState::Killed); + assert!(ctx.conn.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/mod.rs new file mode 100644 index 000000000000..964f4ea9ff99 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/mod.rs @@ -0,0 +1,131 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +/// This module implements our vsock connection state machine. The heavy lifting +/// is done by `connection::VsockConnection`, while this file only defines some +/// constants and helper structs. +pub(crate) mod connection; +pub mod txbuf; + +pub use self::connection::VsockConnection; + +pub mod defs { + /// Vsock connection TX buffer capacity. + pub const CONN_TX_BUF_SIZE: u32 = 64 * 1024; + + /// When the guest thinks we have less than this amount of free buffer + /// space, we will send them a credit update packet. + pub const CONN_CREDIT_UPDATE_THRESHOLD: u32 = 4 * 1024; + + /// Connection request timeout, in millis. + pub const CONN_REQUEST_TIMEOUT_MS: u64 = 2000; + + /// Connection graceful shutdown timeout, in millis. + pub const CONN_SHUTDOWN_TIMEOUT_MS: u64 = 2000; +} + +#[derive(Debug)] +pub enum Error { + /// Attempted to push data to a full TX buffer. + TxBufFull, + /// An I/O error occurred, when attempting to flush the connection TX + /// buffer. + TxBufFlush(std::io::Error), + /// An I/O error occurred, when attempting to write data to the host-side + /// stream. + StreamWrite(std::io::Error), +} + +type Result = std::result::Result; + +/// A vsock connection state. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ConnState { + /// The connection has been initiated by the host end, but is yet to be + /// confirmed by the guest. + LocalInit, + /// The connection has been initiated by the guest, but we are yet to + /// confirm it, by sending a response packet (VSOCK_OP_RESPONSE). + PeerInit, + /// The connection handshake has been performed successfully, and data can + /// now be exchanged. + Established, + /// The host (AF_UNIX) socket was closed. + LocalClosed, + /// A VSOCK_OP_SHUTDOWN packet was received from the guest. The tuple + /// represents the guest R/W indication: (will_not_recv_anymore_data, + /// will_not_send_anymore_data). + PeerClosed(bool, bool), + /// The connection is scheduled to be forcefully terminated as soon as + /// possible. + Killed, +} + +/// An RX indication, used by `VsockConnection` to schedule future `recv_pkt()` +/// responses. +/// +/// For instance, after being notified that there is available data to be read +/// from the host stream (via `notify()`), the connection will store a +/// `PendingRx::Rw` to be later inspected by `recv_pkt()`. +#[derive(Clone, Copy, PartialEq, Eq)] +enum PendingRx { + /// We need to yield a connection request packet (VSOCK_OP_REQUEST). + Request = 0, + /// We need to yield a connection response packet (VSOCK_OP_RESPONSE). + Response = 1, + /// We need to yield a forceful connection termination packet (VSOCK_OP_RST). + Rst = 2, + /// We need to yield a data packet (VSOCK_OP_RW), by reading from the + /// AF_UNIX socket. + Rw = 3, + /// We need to yield a credit update packet (VSOCK_OP_CREDIT_UPDATE). + CreditUpdate = 4, +} +impl PendingRx { + /// Transform the enum value into a bitmask, that can be used for set + /// operations. + fn into_mask(self) -> u16 { + 1u16 << (self as u16) + } +} + +/// A set of RX indications (`PendingRx` items). +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PendingRxSet { + data: u16, +} + +impl PendingRxSet { + /// Insert an item into the set. + fn insert(&mut self, it: PendingRx) { + self.data |= it.into_mask(); + } + + /// Remove an item from the set and return: + /// - true, if the item was in the set; or + /// - false, if the item wasn't in the set. + fn remove(&mut self, it: PendingRx) -> bool { + let ret = self.contains(it); + self.data &= !it.into_mask(); + ret + } + + /// Check if an item is present in this set. + fn contains(&self, it: PendingRx) -> bool { + self.data & it.into_mask() != 0 + } + + /// Check if the set is empty. + fn is_empty(&self) -> bool { + self.data == 0 + } +} + +/// Create a set containing only one item. +impl From for PendingRxSet { + fn from(it: PendingRx) -> Self { + Self { + data: it.into_mask(), + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/txbuf.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/txbuf.rs new file mode 100644 index 000000000000..ab12416490c3 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/csm/txbuf.rs @@ -0,0 +1,274 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::Write; +use std::num::Wrapping; + +use super::defs; +use super::{Error, Result}; + +/// A simple ring-buffer implementation, used by vsock connections to buffer TX +/// (guest -> host) data. Memory for this buffer is allocated lazily, since +/// buffering will only be needed when the host can't read fast enough. +#[derive(Eq, PartialEq)] +pub struct TxBuf { + /// The actual u8 buffer - only allocated after the first push. + pub data: Option>, + /// Ring-buffer head offset - where new data is pushed to. + pub head: Wrapping, + /// Ring-buffer tail offset - where data is flushed from. + pub tail: Wrapping, +} + +impl TxBuf { + /// Total buffer size, in bytes. + const SIZE: usize = defs::CONN_TX_BUF_SIZE as usize; + + /// Get the used length of this buffer - number of bytes that have been + /// pushed in, but not yet flushed out. + pub fn len(&self) -> usize { + (self.head - self.tail).0 as usize + } + + /// Push a byte slice onto the ring-buffer. + /// + /// Either the entire source slice will be pushed to the ring-buffer, or + /// none of it, if there isn't enough room, in which case + /// `Err(Error::TxBufFull)` is returned. + pub fn push(&mut self, src: &[u8]) -> Result<()> { + // Error out if there's no room to push the entire slice. + if self.len() + src.len() > Self::SIZE { + return Err(Error::TxBufFull); + } + + let data = self + .data + .get_or_insert_with(|| vec![0u8; Self::SIZE].into_boxed_slice()); + + // Buffer head, as an offset into the data slice. + let head_ofs = self.head.0 as usize % Self::SIZE; + + // Pushing a slice to this buffer can take either one or two slice + // copies: - one copy, if the slice fits between `head_ofs` and + // `Self::SIZE`; or - two copies, if the ring-buffer head wraps around. + + // First copy length: we can only go from the head offset up to the + // total buffer size. + let len = std::cmp::min(Self::SIZE - head_ofs, src.len()); + data[head_ofs..(head_ofs + len)].copy_from_slice(&src[..len]); + + // If the slice didn't fit, the buffer head will wrap around, and + // pushing continues from the start of the buffer (`&self.data[0]`). + if len < src.len() { + data[..(src.len() - len)].copy_from_slice(&src[len..]); + } + + // Either way, we've just pushed exactly `src.len()` bytes, so that's + // the amount by which the (wrapping) buffer head needs to move forward. + self.head += Wrapping(src.len() as u32); + + Ok(()) + } + + /// Flush the contents of the ring-buffer to a writable stream. + /// + /// Return the number of bytes that have been transferred out of the + /// ring-buffer and into the writable stream. + pub fn flush_to(&mut self, sink: &mut W) -> Result + where + W: Write, + { + // Nothing to do, if this buffer holds no data. + if self.is_empty() { + return Ok(0); + } + + // Buffer tail, as an offset into the buffer data slice. + let tail_ofs = self.tail.0 as usize % Self::SIZE; + + // Flushing the buffer can take either one or two writes: + // - one write, if the tail doesn't need to wrap around to reach the + // head; or + // - two writes, if the tail would wrap around: tail to slice end, then + // slice end to head. + + // First write length: the lesser of tail to slice end, or tail to head. + let len_to_write = std::cmp::min(Self::SIZE - tail_ofs, self.len()); + + // It's safe to unwrap here, since we've already checked if the buffer + // was empty. + let data = self.data.as_ref().unwrap(); + + // Issue the first write and absorb any `WouldBlock` error (we can just + // try again later). + let written = sink + .write(&data[tail_ofs..(tail_ofs + len_to_write)]) + .map_err(Error::TxBufFlush)?; + + // Move the buffer tail ahead by the amount (of bytes) we were able to + // flush out. + self.tail += Wrapping(written as u32); + + // If we weren't able to flush out as much as we tried, there's no point + // in attempting our second write. + if written < len_to_write { + return Ok(written); + } + + // Attempt our second write. This will return immediately if a second + // write isn't needed, since checking for an empty buffer is the first + // thing we do in this function. + // + // Interesting corner case: if we've already written some data in the + // first pass, and then the second write fails, we will consider the + // flush action a success and return the number of bytes written in the + // first pass. + Ok(written + self.flush_to(sink).unwrap_or(0)) + } + + /// Check if the buffer holds any data that hasn't yet been flushed out. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl Default for TxBuf { + /// Ring-buffer constructor. + fn default() -> Self { + Self { + data: None, + head: Wrapping(0), + tail: Wrapping(0), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Error as IoError; + use std::io::Result as IoResult; + use std::io::{ErrorKind, Write}; + + struct TestSink { + data: Vec, + err: Option, + capacity: usize, + } + + impl TestSink { + const DEFAULT_CAPACITY: usize = 2 * TxBuf::SIZE; + fn new() -> Self { + Self { + data: Vec::with_capacity(Self::DEFAULT_CAPACITY), + err: None, + capacity: Self::DEFAULT_CAPACITY, + } + } + } + + impl Write for TestSink { + fn write(&mut self, src: &[u8]) -> IoResult { + if self.err.is_some() { + return Err(self.err.take().unwrap()); + } + let len_to_push = std::cmp::min(self.capacity - self.data.len(), src.len()); + self.data.extend_from_slice(&src[..len_to_push]); + Ok(len_to_push) + } + fn flush(&mut self) -> IoResult<()> { + Ok(()) + } + } + + impl TestSink { + fn clear(&mut self) { + self.data = Vec::with_capacity(self.capacity); + self.err = None; + } + fn set_err(&mut self, err: IoError) { + self.err = Some(err); + } + fn set_capacity(&mut self, capacity: usize) { + self.capacity = capacity; + if self.data.len() > self.capacity { + self.data.resize(self.capacity, 0); + } + } + } + + #[test] + fn test_push_nowrap() { + let mut txbuf = TxBuf::default(); + let mut sink = TestSink::new(); + assert!(txbuf.is_empty()); + + assert!(txbuf.data.is_none()); + txbuf.push(&[1, 2, 3, 4]).unwrap(); + txbuf.push(&[5, 6, 7, 8]).unwrap(); + txbuf.flush_to(&mut sink).unwrap(); + assert_eq!(sink.data, [1, 2, 3, 4, 5, 6, 7, 8]); + } + + #[test] + fn test_push_wrap() { + let mut txbuf = TxBuf::default(); + let mut sink = TestSink::new(); + let mut tmp: Vec = Vec::new(); + + tmp.resize(TxBuf::SIZE - 2, 0); + txbuf.push(tmp.as_slice()).unwrap(); + txbuf.flush_to(&mut sink).unwrap(); + sink.clear(); + + txbuf.push(&[1, 2, 3, 4]).unwrap(); + assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 4); + assert_eq!(sink.data, [1, 2, 3, 4]); + } + + #[test] + fn test_push_error() { + let mut txbuf = TxBuf::default(); + let mut tmp = Vec::with_capacity(TxBuf::SIZE); + + tmp.resize(TxBuf::SIZE - 1, 0); + txbuf.push(tmp.as_slice()).unwrap(); + match txbuf.push(&[1, 2]) { + Err(Error::TxBufFull) => (), + other => panic!("Unexpected result: {:?}", other), + } + } + + #[test] + fn test_incomplete_flush() { + let mut txbuf = TxBuf::default(); + let mut sink = TestSink::new(); + + sink.set_capacity(2); + txbuf.push(&[1, 2, 3, 4]).unwrap(); + assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 2); + assert_eq!(txbuf.len(), 2); + assert_eq!(sink.data, [1, 2]); + + sink.set_capacity(4); + assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 2); + assert!(txbuf.is_empty()); + assert_eq!(sink.data, [1, 2, 3, 4]); + } + + #[test] + fn test_flush_error() { + const EACCESS: i32 = 13; + + let mut txbuf = TxBuf::default(); + let mut sink = TestSink::new(); + + txbuf.push(&[1, 2, 3, 4]).unwrap(); + let io_err = IoError::from_raw_os_error(EACCESS); + sink.set_err(io_err); + match txbuf.flush_to(&mut sink) { + Err(Error::TxBufFlush(ref err)) if err.kind() == ErrorKind::PermissionDenied => (), + other => panic!("Unexpected result: {:?}", other), + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/device.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/device.rs new file mode 100644 index 000000000000..66f5b8bf5c26 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/device.rs @@ -0,0 +1,369 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +use std::any::Any; +use std::marker::PhantomData; +use std::sync::Arc; + +use dbs_device::resources::ResourceConstraint; +use dbs_utils::epoll_manager::{EpollManager, SubscriberId}; +use log::debug; +use log::trace; +use log::warn; +use virtio_queue::QueueT; +use vm_memory::GuestAddressSpace; +use vm_memory::GuestMemoryRegion; + +use super::backend::VsockBackend; +use super::defs::uapi; +use super::epoll_handler::VsockEpollHandler; +use super::muxer::{Error as MuxerError, VsockGenericMuxer, VsockMuxer}; +use super::{Result, VsockError}; +use crate::device::{VirtioDeviceConfig, VirtioDeviceInfo}; +use crate::{ActivateResult, ConfigResult, DbsGuestAddressSpace, VirtioDevice}; + +const VSOCK_DRIVER_NAME: &str = "virtio-vsock"; +const VSOCK_CONFIG_SPACE_SIZE: usize = 8; +const VSOCK_AVAIL_FEATURES: u64 = + 1u64 << uapi::VIRTIO_F_VERSION_1 | 1u64 << uapi::VIRTIO_F_IN_ORDER; + +/// This is the `VirtioDevice` implementation for our vsock device. It handles +/// the virtio-level device logic: feature negociation, device configuration, +/// and device activation. The run-time device logic (i.e. event-driven data +/// handling) is implemented by `super::epoll_handler::EpollHandler`. +/// +/// The vsock device has two input parameters: a CID to identify the device, and +/// a `VsockBackend` to use for offloading vsock traffic. +/// +/// Upon its activation, the vsock device creates its `EpollHandler`, passes it +/// the event-interested file descriptors, and registers these descriptors with +/// the VMM `EpollContext`. Going forward, the `EpollHandler` will get notified +/// whenever an event occurs on the just-registered FDs: +/// - an RX queue FD; +/// - a TX queue FD; +/// - an event queue FD; and +/// - a backend FD. +pub struct Vsock { + cid: u64, + queue_sizes: Arc>, + device_info: VirtioDeviceInfo, + subscriber_id: Option, + muxer: Option, + phantom: PhantomData, +} + +// Default muxer implementation of Vsock +impl Vsock { + /// Create a new virtio-vsock device with the given VM CID and vsock + /// backend. + pub fn new(cid: u64, queue_sizes: Arc>, epoll_mgr: EpollManager) -> Result { + let muxer = VsockMuxer::new(cid).map_err(VsockError::Muxer)?; + Self::new_with_muxer(cid, queue_sizes, epoll_mgr, muxer) + } +} + +impl Vsock { + pub(crate) fn new_with_muxer( + cid: u64, + queue_sizes: Arc>, + epoll_mgr: EpollManager, + muxer: M, + ) -> Result { + let mut config_space = Vec::with_capacity(VSOCK_CONFIG_SPACE_SIZE); + for i in 0..VSOCK_CONFIG_SPACE_SIZE { + config_space.push((cid >> (8 * i as u64)) as u8); + } + + Ok(Vsock { + cid, + queue_sizes: queue_sizes.clone(), + device_info: VirtioDeviceInfo::new( + VSOCK_DRIVER_NAME.to_string(), + VSOCK_AVAIL_FEATURES, + queue_sizes, + config_space, + epoll_mgr, + ), + subscriber_id: None, + muxer: Some(muxer), + phantom: PhantomData, + }) + } + + fn id(&self) -> &str { + &self.device_info.driver_name + } + + /// add backend for vsock muxer + // NOTE: Backend is not allowed to add when vsock device is activated. + pub fn add_backend(&mut self, backend: Box, is_default: bool) -> Result<()> { + if let Some(muxer) = self.muxer.as_mut() { + muxer + .add_backend(backend, is_default) + .map_err(VsockError::Muxer) + } else { + Err(VsockError::Muxer(MuxerError::BackendAddAfterActivated)) + } + } +} + +impl VirtioDevice for Vsock +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send + 'static, + R: GuestMemoryRegion + Sync + Send + 'static, + M: VsockGenericMuxer + 'static, +{ + fn device_type(&self) -> u32 { + uapi::VIRTIO_ID_VSOCK + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.queue_sizes + } + + fn get_avail_features(&self, page: u32) -> u32 { + self.device_info.get_avail_features(page) + } + + fn set_acked_features(&mut self, page: u32, value: u32) { + trace!(target: "virtio-vsock", "{}: VirtioDevice::set_acked_features({}, 0x{:x})", + self.id(), page, value + ); + self.device_info.set_acked_features(page, value) + } + + fn read_config(&mut self, offset: u64, data: &mut [u8]) -> ConfigResult { + trace!(target: "virtio-vsock", "{}: VirtioDevice::read_config(0x{:x}, {:?})", + self.id(), offset, data); + self.device_info.read_config(offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> ConfigResult { + trace!(target: "virtio-vsock", "{}: VirtioDevice::write_config(0x{:x}, {:?})", + self.id(), offset, data); + self.device_info.write_config(offset, data) + } + + fn activate(&mut self, config: VirtioDeviceConfig) -> ActivateResult { + trace!(target: "virtio-vsock", "{}: VirtioDevice::activate()", self.id()); + + self.device_info.check_queue_sizes(&config.queues[..])?; + let handler: VsockEpollHandler = VsockEpollHandler::new( + config, + self.id().to_owned(), + self.cid, + // safe to unwrap, because we create muxer using New() + self.muxer.take().unwrap(), + ); + + self.subscriber_id = Some(self.device_info.register_event_handler(Box::new(handler))); + + Ok(()) + } + + fn get_resource_requirements( + &self, + requests: &mut Vec, + use_generic_irq: bool, + ) { + trace!(target: "virtio-vsock", "{}: VirtioDevice::get_resource_requirements()", self.id()); + + requests.push(ResourceConstraint::LegacyIrq { irq: None }); + if use_generic_irq { + requests.push(ResourceConstraint::GenericIrq { + size: (self.queue_sizes.len() + 1) as u32, + }); + } + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn remove(&mut self) { + let subscriber_id = self.subscriber_id.take(); + if let Some(subscriber_id) = subscriber_id { + match self.device_info.remove_event_handler(subscriber_id) { + Ok(_) => debug!("virtio-vsock: removed subscriber_id {:?}", subscriber_id), + Err(err) => warn!("virtio-vsock: failed to remove event handler: {:?}", err), + }; + } else { + self.muxer.take(); + } + } +} + +#[cfg(test)] +mod tests { + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{GuestAddress, GuestMemoryMmap, GuestRegionMmap}; + + use super::super::defs::uapi; + use super::super::tests::{test_bytes, TestContext}; + use super::*; + use crate::device::VirtioDeviceConfig; + use crate::VirtioQueueConfig; + + impl Vsock { + pub fn mock_activate( + &mut self, + config: VirtioDeviceConfig, + ) -> Result> { + trace!(target: "virtio-vsock", "{}: VirtioDevice::activate_re()", self.id()); + + self.device_info + .check_queue_sizes(&config.queues[..]) + .unwrap(); + let handler: VsockEpollHandler = + VsockEpollHandler::new( + config, + self.id().to_owned(), + self.cid, + // safe to unwrap, because we create muxer using New() + self.muxer.take().unwrap(), + ); + + Ok(handler) + } + } + + #[test] + fn test_virtio_device() { + let mut ctx = TestContext::new(); + let device_features = VSOCK_AVAIL_FEATURES; + let driver_features: u64 = VSOCK_AVAIL_FEATURES | 1 | (1 << 32); + let device_pages = [ + (device_features & 0xffff_ffff) as u32, + (device_features >> 32) as u32, + ]; + let driver_pages = [ + (driver_features & 0xffff_ffff) as u32, + (driver_features >> 32) as u32, + ]; + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::device_type( + &ctx.device + ), + uapi::VIRTIO_ID_VSOCK + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features( + &ctx.device, 0 + ), + device_pages[0] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features( + &ctx.device, 1 + ), + device_pages[1] + ); + assert_eq!( + VirtioDevice::>, QueueSync, GuestRegionMmap>::get_avail_features( + &ctx.device, 2 + ), + 0 + ); + + // Ack device features, page 0. + ctx.device + .device_info + .set_acked_features(0, driver_pages[0]); + // Ack device features, page 1. + ctx.device + .device_info + .set_acked_features(1, driver_pages[1]); + // Ack some bogus page (i.e. 2). This should have no side effect. + ctx.device.device_info.set_acked_features(2, 0); + // Attempt to un-ack the first feature page. This should have no side effect. + ctx.device + .device_info + .set_acked_features(0, !driver_pages[0]); + // Check that no side effect are present, and that the acked features are exactly the same + // as the device features. + assert_eq!( + ctx.device.device_info.acked_features(), + device_features & driver_features + ); + + // Test reading 32-bit chunks. + let mut data = [0u8; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut ctx.device, + 0, + &mut data[..4], + ) + .unwrap(); + test_bytes(&data[..], &(ctx.cid & 0xffff_ffff).to_le_bytes()); + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut ctx.device, + 4, + &mut data[4..], + ) + .unwrap(); + test_bytes(&data[4..], &((ctx.cid >> 32) & 0xffff_ffff).to_le_bytes()); + + // Test reading 64-bit. + let mut data = [0u8; 8]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut ctx.device, + 0, + &mut data, + ) + .unwrap(); + test_bytes(&data, &ctx.cid.to_le_bytes()); + + // Check out-of-bounds reading. + let mut data = [0u8, 1, 2, 3, 4, 5, 6, 7]; + VirtioDevice::>, QueueSync, GuestRegionMmap>::read_config( + &mut ctx.device, + 2, + &mut data, + ) + .unwrap(); + assert_eq!(data, [0u8, 0, 0, 0, 0, 0, 6, 7]); + + // Just covering lines here, since the vsock device has no writable config. + // A warning is, however, logged, if the guest driver attempts to write any config data. + VirtioDevice::>, QueueSync, GuestRegionMmap>::write_config( + &mut ctx.device, + 0, + &data[..4], + ) + .unwrap(); + + let mem = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let queues = vec![ + VirtioQueueConfig::::create(2, 0).unwrap(), + VirtioQueueConfig::::create(2, 0).unwrap(), + VirtioQueueConfig::::create(2, 0).unwrap(), + ]; + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>>::new( + Arc::new(mem), + vm_fd, + resources, + queues, + None, + Arc::new(NoopNotifier::new()), + ); + + // Test activation. + ctx.device.activate(config).unwrap(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs new file mode 100644 index 000000000000..5ffcd23c5595 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/epoll_handler.rs @@ -0,0 +1,629 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::ops::Deref; + +use dbs_utils::epoll_manager::{EventOps, EventSet, Events, MutEventSubscriber}; +use log::{error, trace, warn}; +use virtio_queue::{QueueOwnedT, QueueSync, QueueT}; +use vm_memory::{GuestMemoryRegion, GuestRegionMmap}; + +use super::defs; +use super::muxer::{VsockGenericMuxer, VsockMuxer}; +use super::packet::VsockPacket; +use crate::device::VirtioDeviceConfig; +use crate::{DbsGuestAddressSpace, Result as VirtIoResult}; + +const QUEUE_RX: usize = 0; +const QUEUE_TX: usize = 1; +const QUEUE_CFG: usize = 2; + +// TODO: Detect / handle queue deadlock: +// 1. If `self.backend.send_pkt()` errors out, TX queue processing will halt. +// Try to process any pending backend RX, then try TX again. If it fails +// again, we have a deadlock. +// 2. If the driver halts RX queue processing, we'll need to notify +// `self.backend`, so that it can unregister any EPOLLIN listeners, since +// otherwise it will keep spinning, unable to consume its EPOLLIN events. + +/// The vsock `EpollHandler` implements the runtime logic of our vsock device: +/// 1. Respond to TX queue events by wrapping virtio buffers into +/// `VsockPacket`s, then sending those packets to the `VsockBackend`; +/// 2. Forward backend FD event notifications to the `VsockBackend`; +/// 3. Fetch incoming packets from the `VsockBackend` and place them into the +/// virtio RX queue; +/// 4. Whenever we have processed some virtio buffers (either TX or RX), let the +/// driver know by raising our assigned IRQ. +/// +/// In a nutshell, the `EpollHandler` logic looks like this: +/// - on TX queue event: +/// - fetch all packets from the TX queue and send them to the backend; then +/// - if the backend has queued up any incoming packets, fetch them into any +/// available RX buffers. +/// - on RX queue event: +/// - fetch any incoming packets, queued up by the backend, into newly +/// available RX buffers. +/// - on backend event: +/// - forward the event to the backend; then +/// - again, attempt to fetch any incoming packets queued by the backend into +/// virtio RX buffers. +pub struct VsockEpollHandler< + AS: DbsGuestAddressSpace, + Q: QueueT + Send = QueueSync, + R: GuestMemoryRegion = GuestRegionMmap, + M: VsockGenericMuxer = VsockMuxer, +> { + pub(crate) config: VirtioDeviceConfig, + id: String, + pub(crate) muxer: M, + _cid: u64, +} + +impl VsockEpollHandler +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send, + R: GuestMemoryRegion, + M: VsockGenericMuxer, +{ + pub fn new(config: VirtioDeviceConfig, id: String, cid: u64, muxer: M) -> Self { + VsockEpollHandler { + config, + id, + _cid: cid, + muxer, + } + } + + /// Signal the guest driver that we've used some virtio buffers that it had + /// previously made available. + pub(crate) fn signal_used_queue(&self, idx: usize) -> VirtIoResult<()> { + trace!("{}: raising IRQ", self.id); + self.config.queues[idx].notify().map_err(|e| { + error!("{}: failed to signal used queue {}, {:?}", self.id, idx, e); + e + }) + } + + /// Walk the driver-provided RX queue buffers and attempt to fill them up + /// with any data that we have pending. + fn process_rx(&mut self, mem: &AS::M) { + trace!("{}: epoll_handler::process_rx()", self.id); + let mut raise_irq = false; + { + let rxvq = &mut self.config.queues[QUEUE_RX].queue_mut().lock(); + loop { + let mut iter = match rxvq.iter(mem) { + Err(e) => { + error!("{}: failed to process rx queue. {}", self.id, e); + return; + } + Ok(iter) => iter, + }; + + if let Some(mut desc_chain) = iter.next() { + let used_len = match VsockPacket::from_rx_virtq_head(&mut desc_chain) { + Ok(mut pkt) => { + if self.muxer.recv_pkt(&mut pkt).is_ok() { + pkt.hdr().len() as u32 + pkt.len() + } else { + // We are using a consuming iterator over the virtio buffers, so, if we + // can't fill in this buffer, we'll need to undo the last iterator step. + iter.go_to_previous_position(); + break; + } + } + Err(e) => { + warn!("{}: RX queue error: {:?}", self.id, e); + 0 + } + }; + + raise_irq = true; + let _ = rxvq.add_used(mem, desc_chain.head_index(), used_len); + } else { + break; + } + } + } + if raise_irq { + if let Err(e) = self.signal_used_queue(QUEUE_RX) { + error!("{}: failed to notify guest for RX queue, {:?}", self.id, e); + } + } + } + + /// Walk the dirver-provided TX queue buffers, package them up as vsock + /// packets, and send them to the backend for processing. + fn process_tx(&mut self, mem: &AS::M) { + trace!("{}: epoll_handler::process_tx()", self.id); + let mut have_used = false; + + { + let txvq = &mut self.config.queues[QUEUE_TX].queue_mut().lock(); + + loop { + let mut iter = match txvq.iter(mem) { + Err(e) => { + error!("{}: failed to process tx queue. {}", self.id, e); + return; + } + Ok(iter) => iter, + }; + + if let Some(mut desc_chain) = iter.next() { + let pkt = match VsockPacket::from_tx_virtq_head(&mut desc_chain) { + Ok(pkt) => pkt, + Err(e) => { + error!("{}: error reading TX packet: {:?}", self.id, e); + have_used = true; + let _ = txvq.add_used(mem, desc_chain.head_index(), 0); + continue; + } + }; + + if self.muxer.send_pkt(&pkt).is_err() { + iter.go_to_previous_position(); + break; + } + + have_used = true; + let _ = txvq.add_used(mem, desc_chain.head_index(), 0); + } else { + break; + } + } + } + if have_used { + if let Err(e) = self.signal_used_queue(QUEUE_TX) { + error!("{}: failed to notify guest for TX queue, {:?}", self.id, e); + } + } + } + + pub(crate) fn handle_rxq_event(&mut self, mem: &AS::M) { + trace!("{}: handle RX queue event", self.id); + if let Err(e) = self.config.queues[QUEUE_RX].consume_event() { + error!("{}: failed to consume rx queue event, {:?}", self.id, e); + } else if self.muxer.has_pending_rx() { + self.process_rx(mem); + } + } + + pub(crate) fn handle_txq_event(&mut self, mem: &AS::M) { + trace!("{}: handle TX queue event", self.id); + if let Err(e) = self.config.queues[QUEUE_TX].consume_event() { + error!("{}: failed to consume tx queue event, {:?}", self.id, e); + } else { + self.process_tx(mem); + // The backend may have queued up responses to the packets + // we sent during TX queue processing. If that happened, we + // need to fetch those responses and place them into RX + // buffers. + if self.muxer.has_pending_rx() { + self.process_rx(mem); + } + } + } + + fn handle_evq_event(&mut self, _mem: &AS::M) { + trace!("{}: handle event queue event", self.id); + if let Err(e) = self.config.queues[QUEUE_CFG].consume_event() { + error!("{}: failed to consume config queue event, {:?}", self.id, e); + } + } + + pub(crate) fn notify_backend_event(&mut self, events: &Events, mem: &AS::M) { + trace!("{}: backend event", self.id); + let events = epoll::Events::from_bits(events.event_set().bits()).unwrap(); + self.muxer.notify(events); + // After the backend has been kicked, it might've freed up some + // resources, so we can attempt to send it more data to process. In + // particular, if `self.backend.send_pkt()` halted the TX queue + // processing (by reurning an error) at some point in the past, now is + // the time to try walking the TX queue again. + self.process_tx(mem); + // This event may have caused some packets to be queued up by the + // backend. Make sure they are processed. + if self.muxer.has_pending_rx() { + self.process_rx(mem); + } + } +} + +impl MutEventSubscriber for VsockEpollHandler +where + AS: DbsGuestAddressSpace, + Q: QueueT + Send, + R: GuestMemoryRegion, + M: VsockGenericMuxer + 'static, +{ + fn process(&mut self, events: Events, _ops: &mut EventOps) { + let guard = self.config.lock_guest_memory(); + let mem = guard.deref(); + + match events.data() { + defs::RXQ_EVENT => self.handle_rxq_event(mem), + defs::TXQ_EVENT => self.handle_txq_event(mem), + defs::EVQ_EVENT => self.handle_evq_event(mem), + defs::BACKEND_EVENT => self.notify_backend_event(&events, mem), + _ => error!("{}: unknown epoll event slot {}", self.id, events.data()), + } + } + + fn init(&mut self, ops: &mut EventOps) { + trace!("{}: VsockEpollHandler::init()", self.id); + + let events = Events::with_data( + self.config.queues[QUEUE_RX].eventfd.as_ref(), + defs::RXQ_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register epoll event for RX queue, {:?}.", + self.id, e + ); + } + + let events = Events::with_data( + self.config.queues[QUEUE_TX].eventfd.as_ref(), + defs::TXQ_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register epoll event for TX queue, {:?}.", + self.id, e + ); + } + + let events = Events::with_data( + self.config.queues[QUEUE_CFG].eventfd.as_ref(), + defs::EVQ_EVENT, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register epoll event for config queue, {:?}.", + self.id, e + ); + } + + let be_fd = self.muxer.as_raw_fd(); + let be_evset = EventSet::from_bits(self.muxer.get_polled_evset().bits()).unwrap(); + let events = Events::with_data_raw(be_fd, defs::BACKEND_EVENT, be_evset); + if let Err(e) = ops.add(events) { + error!( + "{}: failed to register epoll event for backend fd: {:?}, {:?}.", + self.id, be_fd, e + ); + } + } +} + +#[cfg(test)] +mod tests { + use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap}; + use vmm_sys_util::epoll::EventSet; + + use super::super::packet::VSOCK_PKT_HDR_SIZE; + use super::super::tests::TestContext; + use super::super::VsockError; + use super::*; + + #[test] + fn test_irq() { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + assert!(ctx.signal_used_queue(0).is_ok()); + } + + #[test] + fn test_txq_event() { + // Test case: + // - the driver has something to send (there's data in the TX queue); + // and + // - the backend has no pending RX data. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(false); + } + ctx.signal_txq_event(); + + // The available TX descriptor should have been used. + assert_eq!(ctx.guest_txvq.used.idx().load(), 1); + // The available RX descriptor should be untouched. + assert_eq!(ctx.guest_rxvq.used.idx().load(), 0); + } + + // Test case: + // - the driver has something to send (there's data in the TX queue); + // and + // - the backend also has some pending RX data. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(true); + } + ctx.signal_txq_event(); + + // Both available RX and TX descriptors should have been used. + assert_eq!(ctx.guest_txvq.used.idx().load(), 1); + assert_eq!(ctx.guest_rxvq.used.idx().load(), 1); + } + + // Test case: + // - the driver has something to send (there's data in the TX queue); + // and + // - the backend errors out and cannot process the TX queue. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(false); + epoll_handler.muxer.set_tx_err(Some(VsockError::NoData)); + } + ctx.signal_txq_event(); + + // Both RX and TX queues should be untouched. + assert_eq!(ctx.guest_txvq.used.idx().load(), 0); + assert_eq!(ctx.guest_rxvq.used.idx().load(), 0); + } + + // Test case: + // - the driver supplied a malformed TX buffer. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + // Invalidate the packet header descriptor, by setting its length to + // 0. + ctx.guest_txvq.dtable(0).len().store(0); + ctx.signal_txq_event(); + + // The available descriptor should have been consumed, but no packet + // should have reached the backend. + assert_eq!(ctx.guest_txvq.used.idx().load(), 1); + if let Some(epoll_handler) = &mut ctx.epoll_handler { + assert_eq!(epoll_handler.muxer.tx_ok_cnt, 0); + } + } + } + + #[test] + fn test_rxq_event() { + // Test case: + // - there is pending RX data in the backend; and + // - the driver makes RX buffers available; and + // - the backend successfully places its RX data into the queue. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(true); + epoll_handler.muxer.set_rx_err(Some(VsockError::NoData)); + } + ctx.signal_rxq_event(); + + // The available RX buffer should've been left untouched. + assert_eq!(ctx.guest_rxvq.used.idx().load(), 0); + } + + // Test case: + // - there is pending RX data in the backend; and + // - the driver makes RX buffers available; and + // - the backend errors out, when attempting to receive data. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(true); + } + ctx.signal_rxq_event(); + + // The available RX buffer should have been used. + assert_eq!(ctx.guest_rxvq.used.idx().load(), 1); + } + + // Test case: the driver provided a malformed RX descriptor chain. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + // Invalidate the packet header descriptor, by setting its length to 0. + ctx.guest_rxvq.dtable(0).len().store(0); + + // The chain should've been processed, without employing the backend. + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.process_rx(&test_ctx.mem); + assert_eq!(ctx.guest_rxvq.used.idx().load(), 1); + assert_eq!(epoll_handler.muxer.rx_ok_cnt, 0); + } + } + } + + #[test] + fn test_backend_event() { + // Test case: + // - a backend event is received; and + // - the backend has pending RX data. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(true); + epoll_handler + .notify_backend_event(&Events::new_raw(0, EventSet::IN), &test_ctx.mem); + + // The backend should've received this event + assert_eq!(epoll_handler.muxer.evset, Some(epoll::Events::EPOLLIN)); + } + + // TX queue processing should've been triggered. + assert_eq!(ctx.guest_txvq.used.idx().load(), 1); + // RX queue processing should've been triggered. + assert_eq!(ctx.guest_rxvq.used.idx().load(), 1); + } + + // Test case: + // - a backend event is received; and + // - the backend doesn't have any pending RX data. + { + let test_ctx = TestContext::new(); + let mut ctx = test_ctx.create_event_handler_context(); + ctx.arti_activate(&test_ctx.mem); + + if let Some(epoll_handler) = &mut ctx.epoll_handler { + epoll_handler.muxer.set_pending_rx(false); + epoll_handler + .notify_backend_event(&Events::new_raw(0, EventSet::IN), &test_ctx.mem); + + // The backend should've received this event. + assert_eq!(epoll_handler.muxer.evset, Some(epoll::Events::EPOLLIN)); + } + // TX queue processing should've been triggered. + assert_eq!(ctx.guest_txvq.used.idx().load(), 1); + // The RX queue should've been left untouched. + assert_eq!(ctx.guest_rxvq.used.idx().load(), 0); + } + } + + // Creates an epoll handler context and attempts to assemble a VsockPkt from + // the descriptor chains available on the rx and tx virtqueues, but first it + // will set the addr and len of the descriptor specified by desc_idx to the + // provided values. We are only using this function for testing error cases, + // so the asserts always expect is_err() to be true. When desc_idx = 0 we + // are altering the header (first descriptor in the chain), and when + // desc_idx = 1 we are altering the packet buffer. + fn vsock_bof_helper(test_ctx: &mut TestContext, desc_idx: usize, addr: u64, len: u32) { + assert!(desc_idx <= 1); + + { + // should error here, but it works + // let mut ctx = test_ctx.create_event_handler_context(); + // ctx.guest_rxvq.dtable(desc_idx as u16).addr().store(addr); + // ctx.guest_rxvq.dtable(desc_idx as u16).len().store(len); + // // If the descriptor chain is already declared invalid, there's no + // // reason to assemble a packet. + // if let Some(mut rx_desc) = ctx.queues[defs::RXQ_EVENT as usize] + // .iter(&mut test_ctx.mem) + // .next() + // { + // assert!(VsockPacket::from_rx_virtq_head(&mut rx_desc).is_err()); + // } + } + + { + let mut ctx = test_ctx.create_event_handler_context(); + + // When modifiyng the buffer descriptor, make sure the len field is altered in the + // vsock packet header descriptor as well. + if desc_idx == 1 { + // The vsock packet len field has offset 24 in the header. + let hdr_len_addr = GuestAddress(ctx.guest_txvq.dtable(0).addr().load() + 24); + test_ctx + .mem + .write_obj(len.to_le_bytes(), hdr_len_addr) + .unwrap(); + } + + ctx.guest_txvq.dtable(desc_idx as u16).addr().store(addr); + ctx.guest_txvq.dtable(desc_idx as u16).len().store(len); + + if let Some(mut tx_desc) = ctx.queues[defs::TXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + { + assert!(VsockPacket::from_tx_virtq_head(&mut tx_desc).is_err()); + } + } + } + + #[test] + fn test_vsock_bof() { + const GAP_SIZE: usize = 768 << 20; + const FIRST_AFTER_GAP: usize = 1 << 32; + const GAP_START_ADDR: usize = FIRST_AFTER_GAP - GAP_SIZE; + const MIB: usize = 1 << 20; + + let mut test_ctx = TestContext::new(); + test_ctx.mem = GuestMemoryMmap::from_ranges(&[ + (GuestAddress(0), 8 * MIB), + (GuestAddress((GAP_START_ADDR - MIB) as u64), MIB), + (GuestAddress(FIRST_AFTER_GAP as u64), MIB), + ]) + .unwrap(); + + // The default configured descriptor chains are valid. + { + let mut ctx = test_ctx.create_event_handler_context(); + let mut rx_desc = ctx.queues[defs::RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(); + assert!(VsockPacket::from_rx_virtq_head(&mut rx_desc).is_ok()); + } + + { + let mut ctx = test_ctx.create_event_handler_context(); + let mut tx_desc = ctx.queues[defs::TXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(); + assert!(VsockPacket::from_tx_virtq_head(&mut tx_desc).is_ok()); + } + + // Let's check what happens when the header descriptor is right before + // the gap. + vsock_bof_helper( + &mut test_ctx, + 0, + GAP_START_ADDR as u64 - 1, + VSOCK_PKT_HDR_SIZE as u32, + ); + + // Let's check what happens when the buffer descriptor crosses into the + // gap, but does not go past its right edge. + vsock_bof_helper( + &mut test_ctx, + 1, + GAP_START_ADDR as u64 - 4, + GAP_SIZE as u32 + 4, + ); + + // Let's modify the buffer descriptor addr and len such that it crosses + // over the MMIO gap, and check we cannot assemble the VsockPkts. + vsock_bof_helper( + &mut test_ctx, + 1, + GAP_START_ADDR as u64 - 4, + GAP_SIZE as u32 + 100, + ); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/mod.rs new file mode 100644 index 000000000000..de99cc6be7aa --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/mod.rs @@ -0,0 +1,494 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +pub mod backend; +pub mod csm; +mod device; +mod epoll_handler; +pub mod muxer; +mod packet; + +use std::os::unix::io::AsRawFd; + +use vm_memory::GuestMemoryError; + +pub use self::defs::{NUM_QUEUES, QUEUE_SIZES}; +pub use self::device::Vsock; +use self::muxer::Error as MuxerError; +pub use self::muxer::VsockMuxer; +use self::packet::VsockPacket; + +mod defs { + /// RX queue event: the driver added available buffers to the RX queue. + pub const RXQ_EVENT: u32 = 0; + /// TX queue event: the driver added available buffers to the RX queue. + pub const TXQ_EVENT: u32 = 1; + /// Event queue event: the driver added available buffers to the event + /// queue. + pub const EVQ_EVENT: u32 = 2; + /// Backend event: the backend needs a kick. + pub const BACKEND_EVENT: u32 = 3; + + /// Number of virtio queues. + pub const NUM_QUEUES: usize = 3; + /// Virtio queue sizes, in number of descriptor chain heads. + /// + /// There are 3 queues for a virtio device (in this order): RX, TX, Event + pub const QUEUE_SIZES: &[u16] = &[256; NUM_QUEUES]; + + /// Max vsock packet data/buffer size. + pub const MAX_PKT_BUF_SIZE: usize = 64 * 1024; + + pub mod uapi { + /// Virtio feature flags. + /// + /// Defined in `/include/uapi/linux/virtio_config.h`. + /// + /// The device processes available buffers in the same order in which + /// the device offers them. + pub const VIRTIO_F_IN_ORDER: usize = 35; + /// The device conforms to the virtio spec version 1.0. + pub const VIRTIO_F_VERSION_1: u32 = 32; + + /// Virtio vsock device ID. + /// + /// Defined in `include/uapi/linux/virtio_ids.h`. + pub const VIRTIO_ID_VSOCK: u32 = 19; + + /// Vsock packet operation IDs. + /// + /// Defined in `/include/uapi/linux/virtio_vsock.h`. + /// + /// Connection request. + pub const VSOCK_OP_REQUEST: u16 = 1; + /// Connection response. + pub const VSOCK_OP_RESPONSE: u16 = 2; + /// Connection reset. + pub const VSOCK_OP_RST: u16 = 3; + /// Connection clean shutdown. + pub const VSOCK_OP_SHUTDOWN: u16 = 4; + /// Connection data (read/write). + pub const VSOCK_OP_RW: u16 = 5; + /// Flow control credit update. + pub const VSOCK_OP_CREDIT_UPDATE: u16 = 6; + /// Flow control credit update request. + pub const VSOCK_OP_CREDIT_REQUEST: u16 = 7; + + /// Vsock packet flags. Defined in `/include/uapi/linux/virtio_vsock.h`. + /// + /// Valid with a VSOCK_OP_SHUTDOWN packet: the packet sender will + /// receive no more data. + pub const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1; + /// Valid with a VSOCK_OP_SHUTDOWN packet: the packet sender will send + /// no more data. + pub const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2; + + /// Vsock packet type. + /// Defined in `/include/uapi/linux/virtio_vsock.h`. + /// + /// Stream / connection-oriented packet (the only currently valid type). + pub const VSOCK_TYPE_STREAM: u16 = 1; + + /// Well known vsock CID for host system. + pub const VSOCK_HOST_CID: u64 = 2; + } +} + +#[derive(Debug, thiserror::Error)] +pub enum VsockError { + /// vsock backend error + #[error("Vsock backend error: {0}")] + Backend(#[source] std::io::Error), + /// The vsock data/buffer virtio descriptor is expected, but missing. + #[error("The vsock data/buffer virtio descriptor is expected, but missing")] + BufDescMissing, + /// The vsock data/buffer virtio descriptor length is smaller than expected. + #[error("The vsock data/buffer virtio descriptor length is smaller than expected")] + BufDescTooSmall, + /// Chained GuestMemory error. + #[error("Chained GuestMemory error: {0}")] + GuestMemory(#[source] GuestMemoryError), + /// Bounds check failed on guest memory pointer. + #[error("Bounds check failed on guest memory pointer, addr: {0}, size: {1}")] + GuestMemoryBounds(u64, usize), + /// The vsock header descriptor length is too small. + #[error("The vsock header descriptor length {0} is too small")] + HdrDescTooSmall(u32), + /// The vsock header `len` field holds an invalid value. + #[error("The vsock header `len` field holds an invalid value {0}")] + InvalidPktLen(u32), + /// vsock muxer error + #[error("Vsock muxer error: {0}")] + Muxer(#[source] MuxerError), + /// A data fetch was attempted when no data was available. + #[error("A data fetch was attempted when no data was available")] + NoData, + /// A data buffer was expected for the provided packet, but it is missing. + #[error("A data buffer was expected for the provided packet, but it is missing")] + PktBufMissing, + /// Encountered an unexpected write-only virtio descriptor. + #[error("Encountered an unexpected write-only virtio descriptor")] + UnreadableDescriptor, + /// Encountered an unexpected read-only virtio descriptor. + #[error("Encountered an unexpected read-only virtio descriptor")] + UnwritableDescriptor, +} + +type Result = std::result::Result; + +/// A passive, event-driven object, that needs to be notified whenever an +/// epoll-able event occurs. An event-polling control loop will use +/// `get_polled_fd()` and `get_polled_evset()` to query the listener for the +/// file descriptor and the set of events it's interested in. When such an event +/// occurs, the control loop will route the event to the listener via +/// `notify()`. +pub trait VsockEpollListener: AsRawFd { + /// Get the set of events for which the listener wants to be notified. + fn get_polled_evset(&self) -> epoll::Events; + + /// Notify the listener that one ore more events have occured. + fn notify(&mut self, evset: epoll::Events); +} + +/// Any channel that handles vsock packet traffic: sending and receiving +/// packets. Since we're implementing the device model here, our responsibility +/// is to always process the sending of packets (i.e. the TX queue). So, any +/// locally generated data, addressed to the driver (e.g. a connection response +/// or RST), will have to be queued, until we get to processing the RX queue. +/// +/// Note: `recv_pkt()` and `send_pkt()` are named analogous to `Read::read()` +/// and `Write::write()`, respectively. I.e. - `recv_pkt()` will read data +/// from the channel, and place it into a packet; and - `send_pkt()` will +/// fetch data from a packet, and place it into the channel. +pub trait VsockChannel { + /// Read/receive an incoming packet from the channel. + fn recv_pkt(&mut self, pkt: &mut VsockPacket) -> Result<()>; + + /// Write/send a packet through the channel. + fn send_pkt(&mut self, pkt: &VsockPacket) -> Result<()>; + + /// Checks weather there is pending incoming data inside the channel, + /// meaning that a subsequent call to `recv_pkt()` won't fail. + fn has_pending_rx(&self) -> bool; +} + +#[cfg(test)] +mod tests { + use std::ops::Deref; + use std::os::unix::io::{AsRawFd, RawFd}; + use std::sync::Arc; + + use dbs_device::resources::DeviceResources; + use dbs_interrupt::NoopNotifier; + use dbs_utils::epoll_manager::EpollManager; + use kvm_ioctls::Kvm; + use virtio_queue::QueueSync; + use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap}; + use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + + use super::backend::VsockBackend; + use super::defs::{EVQ_EVENT, RXQ_EVENT, TXQ_EVENT}; + use super::epoll_handler::VsockEpollHandler; + use super::muxer::{Result as MuxerResult, VsockGenericMuxer}; + use super::packet::{VsockPacket, VSOCK_PKT_HDR_SIZE}; + use super::*; + use crate::device::VirtioDeviceConfig; + use crate::tests::{VirtQueue as GuestQ, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; + use crate::Result as VirtioResult; + use crate::VirtioQueueConfig; + + pub fn test_bytes(src: &[u8], dst: &[u8]) { + let min_len = std::cmp::min(src.len(), dst.len()); + assert_eq!(src[0..min_len], dst[0..min_len]) + } + + type Result = std::result::Result; + + pub struct TestMuxer { + pub evfd: EventFd, + pub rx_err: Option, + pub tx_err: Option, + pub pending_rx: bool, + pub rx_ok_cnt: usize, + pub tx_ok_cnt: usize, + pub evset: Option, + } + + impl TestMuxer { + pub fn new() -> Self { + Self { + evfd: EventFd::new(EFD_NONBLOCK).unwrap(), + rx_err: None, + tx_err: None, + pending_rx: false, + rx_ok_cnt: 0, + tx_ok_cnt: 0, + evset: None, + } + } + + pub fn set_rx_err(&mut self, err: Option) { + self.rx_err = err; + } + pub fn set_tx_err(&mut self, err: Option) { + self.tx_err = err; + } + pub fn set_pending_rx(&mut self, prx: bool) { + self.pending_rx = prx; + } + } + + impl Default for TestMuxer { + fn default() -> Self { + Self::new() + } + } + + impl VsockChannel for TestMuxer { + fn recv_pkt(&mut self, _pkt: &mut VsockPacket) -> Result<()> { + let cool_buf = [0xDu8, 0xE, 0xA, 0xD, 0xB, 0xE, 0xE, 0xF]; + match self.rx_err.take() { + None => { + if let Some(buf) = _pkt.buf_mut() { + for i in 0..buf.len() { + buf[i] = cool_buf[i % cool_buf.len()]; + } + } + self.rx_ok_cnt += 1; + Ok(()) + } + Some(e) => Err(e), + } + } + + fn send_pkt(&mut self, _pkt: &VsockPacket) -> Result<()> { + match self.tx_err.take() { + None => { + self.tx_ok_cnt += 1; + Ok(()) + } + Some(e) => Err(e), + } + } + + fn has_pending_rx(&self) -> bool { + self.pending_rx + } + } + + impl AsRawFd for TestMuxer { + fn as_raw_fd(&self) -> RawFd { + self.evfd.as_raw_fd() + } + } + + impl VsockEpollListener for TestMuxer { + fn get_polled_evset(&self) -> epoll::Events { + epoll::Events::EPOLLIN + } + fn notify(&mut self, evset: epoll::Events) { + self.evset = Some(evset); + } + } + + impl VsockGenericMuxer for TestMuxer { + fn add_backend( + &mut self, + _backend: Box, + _is_peer_backend: bool, + ) -> MuxerResult<()> { + Ok(()) + } + } + + pub struct TestContext { + pub cid: u64, + pub mem: GuestMemoryMmap, + pub mem_size: usize, + pub epoll_manager: EpollManager, + pub device: Vsock, TestMuxer>, + } + + impl TestContext { + pub fn new() -> Self { + const CID: u64 = 52; + const MEM_SIZE: usize = 1024 * 1024 * 128; + let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), MEM_SIZE)]).unwrap(); + let epoll_manager = EpollManager::default(); + Self { + cid: CID, + mem, + mem_size: MEM_SIZE, + epoll_manager: epoll_manager.clone(), + device: Vsock::new_with_muxer( + CID, + Arc::new(defs::QUEUE_SIZES.to_vec()), + epoll_manager, + TestMuxer::new(), + ) + .unwrap(), + } + } + + pub fn create_event_handler_context(&self) -> EventHandlerContext { + const QSIZE: u16 = 256; + + let guest_rxvq = GuestQ::new(GuestAddress(0x0010_0000), &self.mem, QSIZE); + let guest_txvq = GuestQ::new(GuestAddress(0x0020_0000), &self.mem, QSIZE); + let guest_evvq = GuestQ::new(GuestAddress(0x0030_0000), &self.mem, QSIZE); + let rxvq = guest_rxvq.create_queue(); + let txvq = guest_txvq.create_queue(); + let evvq = guest_evvq.create_queue(); + + let rxvq_config = VirtioQueueConfig::new( + rxvq, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + RXQ_EVENT as u16, + ); + let txvq_config = VirtioQueueConfig::new( + txvq, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + TXQ_EVENT as u16, + ); + let evvq_config = VirtioQueueConfig::new( + evvq, + Arc::new(EventFd::new(0).unwrap()), + Arc::new(NoopNotifier::new()), + EVQ_EVENT as u16, + ); + + // Set up one available descriptor in the RX queue. + guest_rxvq.dtable(0).set( + 0x0040_0000, + VSOCK_PKT_HDR_SIZE as u32, + VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT, + 1, + ); + guest_rxvq + .dtable(1) + .set(0x0040_1000, 4096, VIRTQ_DESC_F_WRITE, 0); + + guest_rxvq.avail.ring(0).store(0); + guest_rxvq.avail.idx().store(1); + + // Set up one available descriptor in the TX queue. + guest_txvq + .dtable(0) + .set(0x0050_0000, VSOCK_PKT_HDR_SIZE as u32, VIRTQ_DESC_F_NEXT, 1); + guest_txvq.dtable(1).set(0x0050_1000, 4096, 0, 0); + guest_txvq.avail.ring(0).store(0); + guest_txvq.avail.idx().store(1); + + let queues = vec![rxvq_config, txvq_config, evvq_config]; + EventHandlerContext { + guest_rxvq, + guest_txvq, + guest_evvq, + queues, + epoll_handler: None, + device: Vsock::new_with_muxer( + self.cid, + Arc::new(defs::QUEUE_SIZES.to_vec()), + EpollManager::default(), + TestMuxer::new(), + ) + .unwrap(), + mem: Arc::new(self.mem.clone()), + } + } + } + + impl Default for TestContext { + fn default() -> Self { + Self::new() + } + } + + pub struct EventHandlerContext<'a> { + pub device: Vsock, TestMuxer>, + pub epoll_handler: + Option, QueueSync, GuestRegionMmap, TestMuxer>>, + pub queues: Vec>, + pub guest_rxvq: GuestQ<'a>, + pub guest_txvq: GuestQ<'a>, + pub guest_evvq: GuestQ<'a>, + pub mem: Arc, + } + + impl<'a> EventHandlerContext<'a> { + // Artificially activate the device. + pub fn arti_activate(&mut self, mem: &GuestMemoryMmap) { + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let resources = DeviceResources::new(); + let config = VirtioDeviceConfig::>, QueueSync>::new( + Arc::new(mem.clone()), + vm_fd, + resources, + self.queues.drain(..).collect(), + None, + Arc::new(NoopNotifier::new()), + ); + + let epoll_handler = self.device.mock_activate(config).unwrap(); + self.epoll_handler = Some(epoll_handler); + } + + pub fn handle_txq_event(&mut self, mem: &GuestMemoryMmap) { + if let Some(epoll_handler) = &mut self.epoll_handler { + epoll_handler.config.queues[TXQ_EVENT as usize] + .generate_event() + .unwrap(); + epoll_handler.handle_txq_event(mem); + } + } + + pub fn handle_rxq_event(&mut self, mem: &GuestMemoryMmap) { + if let Some(epoll_handler) = &mut self.epoll_handler { + epoll_handler.config.queues[TXQ_EVENT as usize] + .generate_event() + .unwrap(); + epoll_handler.handle_rxq_event(mem); + } + } + + pub fn signal_txq_event(&mut self) { + if let Some(epoll_handler) = &mut self.epoll_handler { + epoll_handler.config.queues[TXQ_EVENT as usize] + .generate_event() + .unwrap(); + } + let mem_guard = self.mem.memory(); + let mem = mem_guard.deref(); + self.handle_txq_event(mem); + } + + pub fn signal_rxq_event(&mut self) { + if let Some(epoll_handler) = &mut self.epoll_handler { + epoll_handler.config.queues[RXQ_EVENT as usize] + .generate_event() + .unwrap(); + } + let mem_guard = self.mem.memory(); + let mem = mem_guard.deref(); + self.handle_rxq_event(mem); + } + + pub fn signal_used_queue(&mut self, idx: usize) -> VirtioResult<()> { + if let Some(epoll_handler) = &mut self.epoll_handler { + epoll_handler.config.queues[RXQ_EVENT as usize] + .generate_event() + .unwrap(); + epoll_handler.signal_used_queue(idx).unwrap(); + } + + Ok(()) + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/mod.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/mod.rs new file mode 100644 index 000000000000..b6b7db1cb465 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/mod.rs @@ -0,0 +1,86 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// This module implements a muxer for vsock - a mediator between guest-side +/// AF_VSOCK sockets and host-side backends. The heavy lifting is performed by +/// `muxer::VsockMuxer`, a connection multiplexer that uses +/// `super::csm::VsockConnection` for handling vsock connection states. Check +/// out `muxer.rs` for a more detailed explanation of the inner workings of this +/// backend. +pub mod muxer_impl; +pub mod muxer_killq; +pub mod muxer_rxq; + +use super::backend::{VsockBackend, VsockBackendType}; +use super::{VsockChannel, VsockEpollListener}; +pub use muxer_impl::VsockMuxer; + +mod defs { + /// Maximum number of established connections that we can handle. + pub const MAX_CONNECTIONS: usize = 1023; + + /// Size of the muxer RX packet queue. + pub const MUXER_RXQ_SIZE: usize = 256; + + /// Size of the muxer connection kill queue. + pub const MUXER_KILLQ_SIZE: usize = 128; +} + +pub type Result = std::result::Result; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Error registering a new epoll-listening FD. + #[error("error when registering a new epoll-listening FD: {0}")] + EpollAdd(#[source] std::io::Error), + + /// Error creating an epoll FD. + #[error("error when creating an epoll: {0}")] + EpollFdCreate(#[source] std::io::Error), + + /// The host made an invalid vsock port connection request. + #[error("invalid vsock prot connection request")] + InvalidPortRequest, + + /// Cannot add muxer backend when vsock device is activated. + #[error("cannot add muxer backend when vsock device is activated")] + BackendAddAfterActivated, + + /// Error accepting a new connection from backend. + #[error("error accepting a new connection from backend: {0}")] + BackendAccept(#[source] std::io::Error), + + /// Error binding to the backend. + #[error("error binding to the backend: {0}")] + BackendBind(#[source] std::io::Error), + + /// Error connecting to a backend. + #[error("error connecting to a backend: {0}")] + BackendConnect(#[source] std::io::Error), + + /// Error set nonblock to a backend stream. + #[error("error set nonblocking to a backend: {0}")] + BackendSetNonBlock(#[source] std::io::Error), + + /// Error reading from backend. + #[error("error reading from backend: {0}")] + BackendRead(#[source] std::io::Error), + + /// Muxer connection limit reached. + #[error("muxer reaches connection limit")] + TooManyConnections, + + /// Backend type has been registered. + #[error("backend type has been registered: {0:?}")] + BackendRegistered(VsockBackendType), +} + +/// The vsock generic muxer, which is basically an epoll-event-driven vsock +/// channel. Currently, the only implementation we have is +/// `vsock::muxer::muxer::VsockMuxer`, which translates guest-side vsock +/// connections to host-side connections with different backends. +pub trait VsockGenericMuxer: VsockChannel + VsockEpollListener + Send { + fn add_backend(&mut self, backend: Box, is_peer_backend: bool) -> Result<()>; +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_impl.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_impl.rs new file mode 100644 index 000000000000..bfcc5fa1c8f5 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_impl.rs @@ -0,0 +1,1605 @@ +// Copyright 2022 Alibaba Cloud. All Rights Reserved. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// `VsockMuxer` is the device-facing component of multiple vsock backends. You +/// can add various of backends to VsockMuxer which implements the +/// `VsockBackend` trait. VsockMuxer can abstracts away the gory details of +/// translating between AF_VSOCK and the protocol of backends which you added. +/// It can also presents a clean interface to the rest of the vsock device +/// model. +/// +/// The vsock muxer has two main roles: +/// 1. Vsock connection multiplexer: It's the muxer's job to create, manage, and +/// terminate `VsockConnection` objects. The muxer also routes packets to +/// their owning connections. It does so via a connection `HashMap`, keyed by +/// what is basically a (host_port, guest_port) tuple. Vsock packet traffic +/// needs to be inspected, in order to detect connection request packets +/// (leading to the creation of a new connection), and connection reset +/// packets (leading to the termination of an existing connection). All other +/// packets, though, must belong to an existing connection and, as such, the +/// muxer simply forwards them. +/// 2. Event dispatcher There are three event categories that the vsock backend +/// is interested it: +/// 1. A new host-initiated connection is ready to be accepted from the +/// backends added to muxer; +/// 2. Data is available for reading from a newly-accepted host-initiated +/// connection (i.e. the host is ready to issue a vsock connection +/// request, informing us of the destination port to which it wants to +/// connect); +/// 3. Some event was triggered for a connected backend connection, that +/// belongs to a `VsockConnection`. The muxer gets notified about all of +/// these events, because, as a `VsockEpollListener` implementor, it gets +/// to register a nested epoll FD into the main VMM epolling loop. All +/// other pollable FDs are then registered under this nested epoll FD. To +/// route all these events to their handlers, the muxer uses another +/// `HashMap` object, mapping `RawFd`s to `EpollListener`s. +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::io::Read; +use std::os::fd::FromRawFd; +use std::os::unix::io::{AsRawFd, RawFd}; + +use log::{debug, error, info, trace, warn}; + +use super::super::backend::{HybridStream, VsockBackend, VsockBackendType, VsockStream}; + +use super::super::csm::{ConnState, VsockConnection}; +use super::super::defs::uapi; +use super::super::packet::VsockPacket; +use super::super::{Result as VsockResult, VsockChannel, VsockEpollListener, VsockError}; +use super::muxer_killq::MuxerKillQ; +use super::muxer_rxq::MuxerRxQ; +use super::{defs, Error, Result, VsockGenericMuxer}; + +/// A unique identifier of a `VsockConnection` object. Connections are stored in +/// a hash map, keyed by a `ConnMapKey` object. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct ConnMapKey { + local_port: u32, + pub(crate) peer_port: u32, +} + +/// A muxer RX queue item. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum MuxerRx { + /// The packet must be fetched from the connection identified by + /// `ConnMapKey`. + ConnRx(ConnMapKey), + /// The muxer must produce an RST packet. + RstPkt { local_port: u32, peer_port: u32 }, +} + +enum ReadPortResult { + PassFd, + Connect(u32), +} + +/// An epoll listener, registered under the muxer's nested epoll FD. +pub enum EpollListener { + /// The listener is a `VsockConnection`, identified by `key`, and interested + /// in the events in `evset`. Since `VsockConnection` implements + /// `VsockEpollListener`, notifications will be forwarded to the listener + /// via `VsockEpollListener::notify()`. + Connection { + key: ConnMapKey, + evset: epoll::Events, + backend: VsockBackendType, + }, + /// A listener interested in new host-initiated connections. + Backend(VsockBackendType), + /// A listener interested in reading host "connect " commands from a + /// freshly connected host socket. + LocalStream(Box), + /// A listener interested in recvmsg from host to get the and a + /// socket/pipe fd. + PassFdStream(Box), +} + +/// The vsock connection multiplexer. +pub struct VsockMuxer { + /// Guest CID. + cid: u64, + /// A hash map used to store the active connections. + conn_map: HashMap, + /// A hash map used to store epoll event listeners / handlers. + listener_map: HashMap, + /// The RX queue. Items in this queue are consumed by + /// `VsockMuxer::recv_pkt()`, and produced + /// - by `VsockMuxer::send_pkt()` (e.g. RST in response to a connection + /// request packet); and + /// - in response to EPOLLIN events (e.g. data available to be read from an + /// AF_UNIX socket). + rxq: MuxerRxQ, + /// A queue used for terminating connections that are taking too long to + /// shut down. + killq: MuxerKillQ, + /// The nested epoll FD, used to register epoll listeners. + epoll_fd: RawFd, + /// A hash set used to keep track of used host-side (local) ports, in order + /// to assign local ports to host-initiated connections. + local_port_set: HashSet, + /// The last used host-side port. + local_port_last: u32, + /// backend implementations supported in muxer. + backend_map: HashMap>, + /// the backend which can accept peer-initiated connection. + peer_backend: Option, +} + +impl VsockChannel for VsockMuxer { + /// Deliver a vsock packet to the guest vsock driver. + /// + /// Retuns: + /// - `Ok(())`: `pkt` has been successfully filled in; or + /// - `Err(VsockError::NoData)`: there was no available data with which to fill in the packet. + fn recv_pkt(&mut self, pkt: &mut VsockPacket) -> VsockResult<()> { + // We'll look for instructions on how to build the RX packet in the RX + // queue. If the queue is empty, that doesn't necessarily mean we don't + // have any pending RX, since the queue might be out-of-sync. If that's + // the case, we'll attempt to sync it first, and then try to pop + // something out again. + if self.rxq.is_empty() && !self.rxq.is_synced() { + self.rxq = MuxerRxQ::from_conn_map(&self.conn_map); + } + + while let Some(rx) = self.rxq.peek() { + let res = match rx { + // We need to build an RST packet, going from `local_port` to + // `peer_port`. + MuxerRx::RstPkt { + local_port, + peer_port, + } => { + pkt.set_op(uapi::VSOCK_OP_RST) + .set_src_cid(uapi::VSOCK_HOST_CID) + .set_dst_cid(self.cid) + .set_src_port(local_port) + .set_dst_port(peer_port) + .set_len(0) + .set_type(uapi::VSOCK_TYPE_STREAM) + .set_flags(0) + .set_buf_alloc(0) + .set_fwd_cnt(0); + self.rxq.pop().unwrap(); + trace!( + "vsock: muxer.recv[rxq.len={}, type={}, op={}, sp={}, sc={}, dp={}, dc={}]: {:?}", + self.rxq.len(), + pkt.type_(), + pkt.op(), + pkt.src_port(), + pkt.src_cid(), + pkt.dst_port(), + pkt.dst_cid(), + pkt.hdr() + ); + return Ok(()); + } + + // We'll defer building the packet to this connection, that has + // something to say. + MuxerRx::ConnRx(key) => { + let mut conn_res = Err(VsockError::NoData); + let mut do_pop = true; + self.apply_conn_mutation(key, |conn| { + conn_res = conn.recv_pkt(pkt); + do_pop = !conn.has_pending_rx(); + }); + if do_pop { + self.rxq.pop().unwrap(); + } + conn_res + } + }; + + if res.is_ok() { + // Inspect traffic, looking for RST packets, since that means we + // have to terminate and remove this connection from the active + // connection pool. + if pkt.op() == uapi::VSOCK_OP_RST { + self.remove_connection(ConnMapKey { + local_port: pkt.src_port(), + peer_port: pkt.dst_port(), + }); + } + + trace!( + "vsock: muxer.recv[rxq.len={}, type={}, op={}, sp={}, sc={}, dp={}, dc={}]: {:?}", + self.rxq.len(), + pkt.type_(), + pkt.op(), + pkt.src_port(), + pkt.src_cid(), + pkt.dst_port(), + pkt.dst_cid(), + pkt.hdr() + ); + return Ok(()); + } + } + + Err(VsockError::NoData) + } + + /// Deliver a guest-generated packet to its destination in the vsock + /// backend. + /// + /// This absorbs unexpected packets, handles RSTs (by dropping connections), + /// and forwards all the rest to their owning `VsockConnection`. + /// + /// Returns: always `Ok(())` - the packet has been consumed, and its virtio + /// TX buffers can be returned to the guest vsock driver. + fn send_pkt(&mut self, pkt: &VsockPacket) -> VsockResult<()> { + let conn_key = ConnMapKey { + local_port: pkt.dst_port(), + peer_port: pkt.src_port(), + }; + + trace!( + "vsock: muxer.send[rxq.len={}, type={}, op={}, sp={}, sc={}, dp={}, dc={}]: {:?}", + self.rxq.len(), + pkt.type_(), + pkt.op(), + pkt.src_port(), + pkt.src_cid(), + pkt.dst_port(), + pkt.dst_cid(), + pkt.hdr() + ); + + // If this packet has an unsupported type (!=stream), we must send back + // an RST. + if pkt.type_() != uapi::VSOCK_TYPE_STREAM { + self.enq_rst(pkt.dst_port(), pkt.src_port()); + return Ok(()); + } + + // We don't know how to handle packets addressed to other CIDs. We only + // handle the host part of the guest - host communication here. + if pkt.dst_cid() != uapi::VSOCK_HOST_CID { + info!( + "vsock: dropping guest packet for unknown CID: {:?}", + pkt.hdr() + ); + return Ok(()); + } + + if !self.conn_map.contains_key(&conn_key) { + // This packet can't be routed to any active connection (based on + // its src and dst ports). The only orphan / unroutable packets we + // know how to handle are connection requests. + if pkt.op() == uapi::VSOCK_OP_REQUEST { + // Oh, this is a connection request! + self.handle_peer_request_pkt(pkt); + } else { + // Send back an RST, to let the drive know we weren't expecting + // this packet. + self.enq_rst(pkt.dst_port(), pkt.src_port()); + } + return Ok(()); + } + + // Right, we know where to send this packet, then (to `conn_key`). + // However, if this is an RST, we have to forcefully terminate the + // connection, so there's no point in forwarding it the packet. + if pkt.op() == uapi::VSOCK_OP_RST { + self.remove_connection(conn_key); + return Ok(()); + } + + // Alright, everything looks in order - forward this packet to its + // owning connection. + let mut res: VsockResult<()> = Ok(()); + self.apply_conn_mutation(conn_key, |conn| { + res = conn.send_pkt(pkt); + }); + + res + } + + /// Check if the muxer has any pending RX data, with which to fill a + /// guest-provided RX buffer. + fn has_pending_rx(&self) -> bool { + !self.rxq.is_empty() || !self.rxq.is_synced() + } +} + +impl AsRawFd for VsockMuxer { + /// Get the FD to be registered for polling upstream (in the main VMM epoll + /// loop, in this case). + /// + /// This will be the muxer's nested epoll FD. + fn as_raw_fd(&self) -> RawFd { + self.epoll_fd + } +} + +impl VsockEpollListener for VsockMuxer { + /// Get the epoll events to be polled upstream. + /// + /// Since the polled FD is a nested epoll FD, we're only interested in + /// EPOLLIN events (i.e. some event occurred on one of the FDs registered + /// under our epoll FD). + fn get_polled_evset(&self) -> epoll::Events { + epoll::Events::EPOLLIN + } + + /// Notify the muxer about a pending event having occurred under its nested + /// epoll FD. + fn notify(&mut self, _: epoll::Events) { + trace!("vsock: muxer received kick"); + + let mut epoll_events = vec![epoll::Event::new(epoll::Events::empty(), 0); 32]; + match epoll::wait(self.epoll_fd, 0, epoll_events.as_mut_slice()) { + Ok(ev_cnt) => { + for ev in &epoll_events[0..ev_cnt] { + self.handle_event( + ev.data as RawFd, + epoll::Events::from_bits(ev.events).unwrap(), + ); + } + } + Err(e) => { + warn!("vsock: failed to consume muxer epoll event: {}", e); + } + } + } +} + +impl VsockGenericMuxer for VsockMuxer { + /// add a backend for Muxer. + fn add_backend(&mut self, backend: Box, is_peer_backend: bool) -> Result<()> { + let backend_type = backend.r#type(); + if self.backend_map.contains_key(&backend_type) { + return Err(Error::BackendRegistered(backend_type)); + } + self.add_listener( + backend.as_raw_fd(), + EpollListener::Backend(backend_type.clone()), + )?; + self.backend_map.insert(backend_type.clone(), backend); + if is_peer_backend { + self.peer_backend = Some(backend_type); + } + Ok(()) + } +} + +impl VsockMuxer { + /// Muxer constructor. + pub fn new(cid: u64) -> Result { + Ok(Self { + cid, + epoll_fd: epoll::create(false).map_err(Error::EpollFdCreate)?, + rxq: MuxerRxQ::default(), + conn_map: HashMap::with_capacity(defs::MAX_CONNECTIONS), + listener_map: HashMap::with_capacity(defs::MAX_CONNECTIONS + 1), + killq: MuxerKillQ::default(), + local_port_last: (1u32 << 30) - 1, + local_port_set: HashSet::with_capacity(defs::MAX_CONNECTIONS), + backend_map: HashMap::new(), + peer_backend: None, + }) + } + + /// Handle/dispatch an epoll event to its listener. + fn handle_event(&mut self, fd: RawFd, event_set: epoll::Events) { + trace!( + "vsock: muxer processing event: fd={}, evset={:?}", + fd, + event_set + ); + + match self.listener_map.get_mut(&fd) { + // This event needs to be forwarded to a `VsockConnection` that is + // listening for it. + Some(EpollListener::Connection { key, evset: _, .. }) => { + let key_copy = *key; + // The handling of this event will most probably mutate the + // state of the receiving connection. We'll need to check for new + // pending RX, event set mutation, and all that, so we're + // wrapping the event delivery inside those checks. + self.apply_conn_mutation(key_copy, |conn| { + conn.notify(event_set); + }); + } + + // A new host-initiated connection is ready to be accepted. + Some(EpollListener::Backend(backend_type)) => { + if let Some(backend) = self.backend_map.get_mut(backend_type) { + if self.rxq.len() == defs::MAX_CONNECTIONS { + // If we're already maxed-out on connections, we'll just + // accept and immediately discard this potentially new + // one. + warn!("vsock: connection limit reached; refusing new host connection"); + backend.accept().map(|_| 0).unwrap_or(0); + return; + } + backend + .accept() + .map_err(Error::BackendAccept) + .and_then(|stream| { + // Before forwarding this connection to a listening + // AF_VSOCK socket on the guest side, we need to + // know the destination port. We'll read that port + // from a "connect" command received on this socket, + // so the next step is to ask to be notified the + // moment we can read from it. + + self.add_listener( + stream.as_raw_fd(), + EpollListener::LocalStream(stream), + ) + }) + .unwrap_or_else(|err| { + warn!("vsock: unable to accept local connection: {:?}", err); + }); + } else { + error!("vsock: unsable to find specific backend {:?}", backend_type) + } + } + + // Data is ready to be read from a host-initiated connection. That + // would be the "connect" command that we're expecting. + Some(EpollListener::LocalStream(_)) => { + if let Some(EpollListener::LocalStream(mut stream)) = self.remove_listener(fd) { + Self::read_local_stream_port(&mut stream) + .and_then(|read_port_result| match read_port_result { + ReadPortResult::Connect(peer_port) => { + let local_port = self.allocate_local_port(); + self.add_connection( + ConnMapKey { + local_port, + peer_port, + }, + VsockConnection::new_local_init( + stream, + uapi::VSOCK_HOST_CID, + self.cid, + local_port, + peer_port, + ), + ) + } + ReadPortResult::PassFd => self.add_listener( + stream.as_raw_fd(), + EpollListener::PassFdStream(stream), + ), + }) + .unwrap_or_else(|err| { + info!("vsock: error adding local-init connection: {:?}", err); + }) + } + } + + Some(EpollListener::PassFdStream(_)) => { + if let Some(EpollListener::PassFdStream(mut stream)) = self.remove_listener(fd) { + Self::passfd_read_port_and_fd(&mut stream) + .map(|(nfd, peer_port)| (nfd, self.allocate_local_port(), peer_port)) + .and_then(|(nfd, local_port, peer_port)| { + // Here we should make sure the nfd the sole owner to convert it + // into an UnixStream object, otherwise, it could cause memory unsafety. + let nstream = unsafe { File::from_raw_fd(nfd) }; + + let mut hybridstream = HybridStream { + hybrid_stream: nstream, + slave_stream: Some(stream), + }; + + hybridstream + .set_nonblocking(true) + .map_err(Error::BackendSetNonBlock)?; + + self.add_connection( + ConnMapKey { + local_port, + peer_port, + }, + VsockConnection::new_local_init( + Box::new(hybridstream), + uapi::VSOCK_HOST_CID, + self.cid, + local_port, + peer_port, + ), + ) + }) + .unwrap_or_else(|err| { + info!( + "vsock: error adding local-init passthrough fd connection: {:?}", + err + ); + }) + } + } + + _ => { + info!( + "vsock: unexpected event: fd={:?}, evset={:?}", + fd, event_set + ); + } + } + } + + /// Parse a host "connect" command, and extract the destination vsock port. + fn read_local_stream_port(stream: &mut Box) -> Result { + let mut buf = [0u8; 32]; + + // This is the minimum number of bytes that we should be able to read, + // when parsing a valid connection request. I.e. `b"passfd\n"`, otherwise, + // it would be `b"connect 0\n".len()`. + const MIN_READ_LEN: usize = 7; + + // Bring in the minimum number of bytes that we should be able to read. + stream + .read(&mut buf[..MIN_READ_LEN]) + .map_err(Error::BackendRead)?; + + // Now, finish reading the destination port number if it's connect command, + // by bringing in one byte at a time, until we reach an EOL terminator (or our buffer + // space runs out). Yeah, not particularly proud of this approach, but it will have to + // do for now. + let mut blen = MIN_READ_LEN; + while buf[blen - 1] != b'\n' && blen < buf.len() { + stream + .read_exact(&mut buf[blen..=blen]) + .map_err(Error::BackendRead)?; + blen += 1; + } + + let mut word_iter = std::str::from_utf8(&buf) + .map_err(|_| Error::InvalidPortRequest)? + .split_whitespace(); + + word_iter + .next() + .ok_or(Error::InvalidPortRequest) + .and_then(|word| { + let key = word.to_lowercase(); + if key == "connect" { + Ok(true) + } else if key == "passfd" { + Ok(false) + } else { + Err(Error::InvalidPortRequest) + } + }) + .and_then(|connect| { + if connect { + word_iter.next().ok_or(Error::InvalidPortRequest).map(Some) + } else { + Ok(None) + } + }) + .and_then(|word| { + word.map_or_else( + || Ok(ReadPortResult::PassFd), + |word| { + word.parse::() + .map_or(Err(Error::InvalidPortRequest), |word| { + Ok(ReadPortResult::Connect(word)) + }) + }, + ) + }) + .map_err(|_| Error::InvalidPortRequest) + } + + fn passfd_read_port_and_fd(stream: &mut Box) -> Result<(RawFd, u32)> { + let mut buf = [0u8; 32]; + let mut fds = [0, 1]; + let (data_len, fd_len) = stream + .recv_data_fd(&mut buf, &mut fds) + .map_err(Error::BackendRead)?; + + if fd_len != 1 || fds[0] <= 0 { + return Err(Error::InvalidPortRequest); + } + + let mut port_iter = std::str::from_utf8(&buf[..data_len]) + .map_err(|_| Error::InvalidPortRequest)? + .split_whitespace(); + + let port = port_iter + .next() + .ok_or(Error::InvalidPortRequest) + .and_then(|word| word.parse::().map_err(|_| Error::InvalidPortRequest))?; + + Ok((fds[0], port)) + } + + /// Add a new connection to the active connection pool. + fn add_connection(&mut self, key: ConnMapKey, conn: VsockConnection) -> Result<()> { + // We might need to make room for this new connection, so let's sweep + // the kill queue first. It's fine to do this here because: + // - unless the kill queue is out of sync, this is a pretty inexpensive + // operation; and + // - we are under no pressure to respect any accurate timing for + // connection termination. + self.sweep_killq(); + + if self.conn_map.len() >= defs::MAX_CONNECTIONS { + info!( + "vsock: muxer connection limit reached ({})", + defs::MAX_CONNECTIONS + ); + return Err(Error::TooManyConnections); + } + + self.add_listener( + conn.as_raw_fd(), + EpollListener::Connection { + key, + evset: conn.get_polled_evset(), + backend: conn.stream.backend_type(), + }, + ) + .map(|_| { + if conn.has_pending_rx() { + // We can safely ignore any error in adding a connection RX + // indication. Worst case scenario, the RX queue will get + // desynchronized, but we'll handle that the next time we need + // to yield an RX packet. + self.rxq.push(MuxerRx::ConnRx(key)); + } + self.conn_map.insert(key, conn); + }) + } + + /// Remove a connection from the active connection poll. + fn remove_connection(&mut self, key: ConnMapKey) { + if let Some(conn) = self.conn_map.remove(&key) { + self.remove_listener(conn.as_raw_fd()); + } + self.free_local_port(key.local_port); + } + + /// Schedule a connection for immediate termination. I.e. as soon as we can + /// also let our peer know we're dropping the connection, by sending it an + /// RST packet. + fn kill_connection(&mut self, key: ConnMapKey) { + let mut had_rx = false; + + self.conn_map.entry(key).and_modify(|conn| { + had_rx = conn.has_pending_rx(); + conn.kill(); + }); + // This connection will now have an RST packet to yield, so we need to + // add it to the RX queue. However, there's no point in doing that if it + // was already in the queue. + if !had_rx { + // We can safely ignore any error in adding a connection RX + // indication. Worst case scenario, the RX queue will get + // desynchronized, but we'll handle that the next time we need to + // yield an RX packet. + self.rxq.push(MuxerRx::ConnRx(key)); + } + } + + /// Register a new epoll listener under the muxer's nested epoll FD. + pub(crate) fn add_listener(&mut self, fd: RawFd, listener: EpollListener) -> Result<()> { + let evset = match listener { + EpollListener::Connection { evset, .. } => evset, + EpollListener::LocalStream(_) => epoll::Events::EPOLLIN, + EpollListener::Backend(_) => epoll::Events::EPOLLIN, + EpollListener::PassFdStream(_) => epoll::Events::EPOLLIN, + }; + + epoll::ctl( + self.epoll_fd, + epoll::ControlOptions::EPOLL_CTL_ADD, + fd, + epoll::Event::new(evset, fd as u64), + ) + .map(|_| { + self.listener_map.insert(fd, listener); + }) + .map_err(Error::EpollAdd)?; + + Ok(()) + } + + /// Remove (and return) a previously registered epoll listener. + fn remove_listener(&mut self, fd: RawFd) -> Option { + let maybe_listener = self.listener_map.remove(&fd); + + if maybe_listener.is_some() { + epoll::ctl( + self.epoll_fd, + epoll::ControlOptions::EPOLL_CTL_DEL, + fd, + epoll::Event::new(epoll::Events::empty(), 0), + ) + .unwrap_or_else(|err| { + warn!( + "vosck muxer: error removing epoll listener for fd {:?}: {:?}", + fd, err + ); + }); + } + + maybe_listener + } + + /// Allocate a host-side port to be assigned to a new host-initiated + /// connection. + fn allocate_local_port(&mut self) -> u32 { + // TODO: this doesn't seem very space-efficient. + // Mybe rewrite this to limit port range and use a bitmap? + + loop { + self.local_port_last = (self.local_port_last + 1) & !(1 << 31) | (1 << 30); + if self.local_port_set.insert(self.local_port_last) { + break; + } + } + self.local_port_last + } + + /// Mark a previously used host-side port as free. + fn free_local_port(&mut self, port: u32) { + self.local_port_set.remove(&port); + } + + /// Handle a new connection request comming from our peer (the guest vsock + /// driver). + /// + /// This will attempt to connect to a host-side backend. If successful, a + /// new connection object will be created and added to the connection pool. + /// On failure, a new RST packet will be scheduled for delivery to the + /// guest. + fn handle_peer_request_pkt(&mut self, pkt: &VsockPacket) { + if self.peer_backend.is_none() { + error!("no usable backend for peer request"); + self.enq_rst(pkt.dst_port(), pkt.src_port()); + return; + } + + // safe to unwrap + if let Some(backend) = self.backend_map.get(self.peer_backend.as_ref().unwrap()) { + backend + .connect(pkt.dst_port()) + .map_err(Error::BackendConnect) + .and_then(|stream| { + self.add_connection( + ConnMapKey { + local_port: pkt.dst_port(), + peer_port: pkt.src_port(), + }, + VsockConnection::new_peer_init( + stream, + uapi::VSOCK_HOST_CID, + self.cid, + pkt.dst_port(), + pkt.src_port(), + pkt.buf_alloc(), + ), + ) + }) + .unwrap_or_else(|e| { + error!("peer request error: {:?}", e); + self.enq_rst(pkt.dst_port(), pkt.src_port()); + }); + } else { + error!("no usable backend selected for peer request"); + self.enq_rst(pkt.dst_port(), pkt.src_port()); + } + } + + /// Perform an action that might mutate a connection's state. + /// + /// This is used as shorthand for repetitive tasks that need to be performed + /// after a connection object mutates. E.g. + /// - update the connection's epoll listener; + /// - schedule the connection to be queried for RX data; + /// - kill the connection if an unrecoverable error occurs. + fn apply_conn_mutation(&mut self, key: ConnMapKey, mut_fn: F) + where + F: FnOnce(&mut VsockConnection), + { + if let Some(conn) = self.conn_map.get_mut(&key) { + let had_rx = conn.has_pending_rx(); + let was_expiring = conn.will_expire(); + let prev_state = conn.state(); + let backend_type = conn.stream.backend_type(); + + mut_fn(conn); + + // If this is a host-initiated connection that has just become + // established, we'll have to send an ack message to the host end. + if prev_state == ConnState::LocalInit && conn.state() == ConnState::Established { + let msg = format!("OK {}\n", key.local_port); + match conn.send_bytes_raw(msg.as_bytes()) { + Ok(written) if written == msg.len() => (), + Ok(_) => { + // If we can't write a dozen bytes to a pristine + // connection something must be really wrong. Killing + // it. + conn.kill(); + warn!("vsock: unable to fully write connection ack msg."); + } + Err(err) => { + conn.kill(); + warn!("vsock: unable to ack host connection [local_cid {}, peer_cid {}, local_port {}, peer_port {}]: {:?}", conn.local_cid, conn.peer_cid, conn.local_port, conn.peer_port, err); + } + }; + } + + // If the connection wasn't previously scheduled for RX, add it to + // our RX queue. + if !had_rx && conn.has_pending_rx() { + self.rxq.push(MuxerRx::ConnRx(key)); + } + + // If the connection wasn't previously scheduled for termination, + // add it to the kill queue. + if !was_expiring && conn.will_expire() { + // It's safe to unwrap here, since `conn.will_expire()` already + // guaranteed that an `conn.expiry` is available. + self.killq.push(key, conn.expiry().unwrap()); + } + + let fd = conn.as_raw_fd(); + let new_evset = conn.get_polled_evset(); + if new_evset.is_empty() { + // If the connection no longer needs epoll notifications, remove + // its listener from our list. + self.remove_listener(fd); + return; + } + if let Some(EpollListener::Connection { evset, .. }) = self.listener_map.get_mut(&fd) { + if *evset != new_evset { + // If the set of events that the connection is interested in + // has changed, we need to update its epoll listener. + debug!( + "vsock: updating listener for (lp={}, pp={}): old={:?}, new={:?}", + key.local_port, key.peer_port, *evset, new_evset + ); + + *evset = new_evset; + epoll::ctl( + self.epoll_fd, + epoll::ControlOptions::EPOLL_CTL_MOD, + fd, + epoll::Event::new(new_evset, fd as u64), + ) + .unwrap_or_else(|err| { + // This really shouldn't happen, like, ever. However, + // "famous last words" and all that, so let's just kill + // it with fire, and walk away. + self.kill_connection(key); + warn!( + "vsock: error updating epoll listener for (lp={}, pp={}): {:?}", + key.local_port, key.peer_port, err + ); + }); + } + } else { + // The connection had previously asked to be removed from the + // listener map (by returning an empty event set via + // `get_polled_fd()`), but now wants back in. + self.add_listener( + fd, + EpollListener::Connection { + key, + evset: new_evset, + backend: backend_type, + }, + ) + .unwrap_or_else(|err| { + self.kill_connection(key); + warn!( + "vsock: error updating epoll listener for (lp={}, pp={}): {:?}", + key.local_port, key.peer_port, err + ); + }); + } + } + } + + /// Check if any connections have timed out, and if so, schedule them for + /// immediate termination. + fn sweep_killq(&mut self) { + while let Some(key) = self.killq.pop() { + // Connections don't get removed from the kill queue when their kill + // timer is disarmed, since that would be a costly operation. This + // means we must check if the connection has indeed expired, prior + // to killing it. + let mut kill = false; + self.conn_map + .entry(key) + .and_modify(|conn| kill = conn.has_expired()); + if kill { + self.kill_connection(key); + } + } + + if self.killq.is_empty() && !self.killq.is_synced() { + self.killq = MuxerKillQ::from_conn_map(&self.conn_map); + // If we've just re-created the kill queue, we can sweep it again; + // maybe there's more to kill. + self.sweep_killq(); + } + } + + /// Enqueue an RST packet into `self.rxq`. + /// + /// Enqueue errors aren't propagated up the call chain, since there is + /// nothing we can do to handle them. We do, however, log a warning, since + /// not being able to enqueue an RST packet means we have to drop it, which + /// is not normal operation. + fn enq_rst(&mut self, local_port: u32, peer_port: u32) { + let pushed = self.rxq.push(MuxerRx::RstPkt { + local_port, + peer_port, + }); + if !pushed { + warn!( + "vsock: muxer.rxq full; dropping RST packet for lp={}, pp={}", + local_port, peer_port + ); + } + } +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::io::{Read, Write}; + use std::ops::Drop; + use std::os::unix::net::{UnixListener, UnixStream}; + use std::path::{Path, PathBuf}; + + use virtio_queue::QueueT; + use vmm_sys_util::tempfile::TempFile; + + use super::super::super::backend::VsockUnixStreamBackend; + use super::super::super::csm::defs as csm_defs; + use super::super::super::defs::RXQ_EVENT; + use super::super::super::tests::TestContext as VsockTestContext; + use super::*; + + const PEER_CID: u64 = 3; + const PEER_BUF_ALLOC: u32 = 64 * 1024; + + struct MuxerTestContext { + _vsock_test_ctx: VsockTestContext, + pkt: VsockPacket, + muxer: VsockMuxer, + host_sock_path: String, + } + + impl Drop for MuxerTestContext { + fn drop(&mut self) { + std::fs::remove_file(self.host_sock_path.as_str()).unwrap(); + } + } + + // Create a TempFile with a given prefix and return it as a nice String + fn get_file(fprefix: &str) -> String { + let listener_path = TempFile::new_with_prefix(fprefix).unwrap(); + listener_path + .as_path() + .as_os_str() + .to_str() + .unwrap() + .to_owned() + } + + impl MuxerTestContext { + fn new(name: &str) -> Self { + let vsock_test_ctx = VsockTestContext::new(); + let mut handler_ctx = vsock_test_ctx.create_event_handler_context(); + let pkt = VsockPacket::from_rx_virtq_head( + &mut handler_ctx.queues[RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&vsock_test_ctx.mem) + .unwrap(), + ) + .unwrap(); + + let host_sock_path = get_file(name); + let mut muxer = VsockMuxer::new(PEER_CID).unwrap(); + let uds_backend = + Box::new(VsockUnixStreamBackend::new(host_sock_path.clone()).unwrap()); + muxer.add_backend(uds_backend, true).unwrap(); + Self { + _vsock_test_ctx: vsock_test_ctx, + pkt, + muxer, + host_sock_path, + } + } + + fn init_pkt(&mut self, local_port: u32, peer_port: u32, op: u16) -> &mut VsockPacket { + for b in self.pkt.hdr_mut() { + *b = 0; + } + self.pkt + .set_type(uapi::VSOCK_TYPE_STREAM) + .set_src_cid(PEER_CID) + .set_dst_cid(uapi::VSOCK_HOST_CID) + .set_src_port(peer_port) + .set_dst_port(local_port) + .set_op(op) + .set_buf_alloc(PEER_BUF_ALLOC) + } + + fn init_data_pkt( + &mut self, + local_port: u32, + peer_port: u32, + data: &[u8], + ) -> &mut VsockPacket { + assert!(data.len() <= self.pkt.buf().unwrap().len()); + self.init_pkt(local_port, peer_port, uapi::VSOCK_OP_RW) + .set_len(data.len() as u32); + self.pkt.buf_mut().unwrap()[..data.len()].copy_from_slice(data); + &mut self.pkt + } + + fn send(&mut self) { + self.muxer.send_pkt(&self.pkt).unwrap(); + } + + fn recv(&mut self) { + self.muxer.recv_pkt(&mut self.pkt).unwrap(); + } + + fn notify_muxer(&mut self) { + self.muxer.notify(epoll::Events::EPOLLIN); + } + + fn count_epoll_listeners(&self) -> (usize, usize) { + let mut local_lsn_count = 0usize; + let mut conn_lsn_count = 0usize; + for key in self.muxer.listener_map.values() { + match key { + EpollListener::LocalStream(_) => local_lsn_count += 1, + EpollListener::Connection { .. } => conn_lsn_count += 1, + _ => (), + }; + } + (local_lsn_count, conn_lsn_count) + } + + fn create_local_listener(&self, port: u32) -> LocalListener { + LocalListener::new(format!("{}_{}", self.host_sock_path, port)) + } + + fn local_connect(&mut self, peer_port: u32) -> (UnixStream, u32) { + let (init_local_lsn_count, init_conn_lsn_count) = self.count_epoll_listeners(); + + let mut stream = UnixStream::connect(self.host_sock_path.clone()).unwrap(); + stream.set_nonblocking(true).unwrap(); + // The muxer would now get notified of a new connection having arrived at its Unix + // socket, so it can accept it. + self.notify_muxer(); + + // Just after having accepted a new local connection, the muxer should've added a new + // `LocalStream` listener to its `listener_map`. + let (local_lsn_count, _) = self.count_epoll_listeners(); + assert_eq!(local_lsn_count, init_local_lsn_count + 1); + + let buf = format!("CONNECT {peer_port}\n"); + stream.write_all(buf.as_bytes()).unwrap(); + // The muxer would now get notified that data is available for reading from the locally + // initiated connection. + self.notify_muxer(); + + // Successfully reading and parsing the connection request should have removed the + // LocalStream epoll listener and added a Connection epoll listener. + let (local_lsn_count, conn_lsn_count) = self.count_epoll_listeners(); + assert_eq!(local_lsn_count, init_local_lsn_count); + assert_eq!(conn_lsn_count, init_conn_lsn_count + 1); + + // A LocalInit connection should've been added to the muxer connection map. A new + // local port should also have been allocated for the new LocalInit connection. + let local_port = self.muxer.local_port_last; + let key = ConnMapKey { + local_port, + peer_port, + }; + assert!(self.muxer.conn_map.contains_key(&key)); + assert!(self.muxer.local_port_set.contains(&local_port)); + + // A connection request for the peer should now be available from the muxer. + assert!(self.muxer.has_pending_rx()); + self.recv(); + assert_eq!(self.pkt.op(), uapi::VSOCK_OP_REQUEST); + assert_eq!(self.pkt.dst_port(), peer_port); + assert_eq!(self.pkt.src_port(), local_port); + + self.init_pkt(local_port, peer_port, uapi::VSOCK_OP_RESPONSE); + self.send(); + + let mut buf = vec![0u8; 32]; + let len = stream.read(&mut buf[..]).unwrap(); + assert_eq!(&buf[..len], format!("OK {local_port}\n").as_bytes()); + + (stream, local_port) + } + } + + struct LocalListener { + path: PathBuf, + sock: UnixListener, + } + impl LocalListener { + fn new + Clone>(path: P) -> Self { + let path_buf = path.as_ref().to_path_buf(); + let sock = UnixListener::bind(path).unwrap(); + sock.set_nonblocking(true).unwrap(); + Self { + path: path_buf, + sock, + } + } + fn accept(&mut self) -> UnixStream { + let (stream, _) = self.sock.accept().unwrap(); + stream.set_nonblocking(true).unwrap(); + stream + } + } + impl Drop for LocalListener { + fn drop(&mut self) { + std::fs::remove_file(&self.path).unwrap(); + } + } + + #[test] + fn test_muxer_epoll_listener() { + let ctx = MuxerTestContext::new("/tmp/muxer_epoll_listener"); + assert_eq!(ctx.muxer.as_raw_fd(), ctx.muxer.epoll_fd); + assert_eq!(ctx.muxer.get_polled_evset(), epoll::Events::EPOLLIN); + } + + #[test] + fn test_bad_peer_pkt() { + const LOCAL_PORT: u32 = 1026; + const PEER_PORT: u32 = 1025; + const SOCK_DGRAM: u16 = 2; + + let mut ctx = MuxerTestContext::new("/tmp/bad_peer_pkt"); + ctx.init_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST) + .set_type(SOCK_DGRAM); + ctx.send(); + + // The guest sent a SOCK_DGRAM packet. Per the vsock spec, we need to reply with an RST + // packet, since vsock only supports stream sockets. + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + assert_eq!(ctx.pkt.src_cid(), uapi::VSOCK_HOST_CID); + assert_eq!(ctx.pkt.dst_cid(), PEER_CID); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + + // Any orphan (i.e. without a connection), non-RST packet, should be replied to with an + // RST. + let bad_ops = [ + uapi::VSOCK_OP_RESPONSE, + uapi::VSOCK_OP_CREDIT_REQUEST, + uapi::VSOCK_OP_CREDIT_UPDATE, + uapi::VSOCK_OP_SHUTDOWN, + uapi::VSOCK_OP_RW, + ]; + for op in bad_ops.iter() { + ctx.init_pkt(LOCAL_PORT, PEER_PORT, *op); + ctx.send(); + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + } + + // Any packet addressed to anything other than VSOCK_VHOST_CID should get dropped. + assert!(!ctx.muxer.has_pending_rx()); + ctx.init_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST) + .set_dst_cid(uapi::VSOCK_HOST_CID + 1); + ctx.send(); + assert!(!ctx.muxer.has_pending_rx()); + } + + #[test] + fn test_peer_connection() { + const LOCAL_PORT: u32 = 1026; + const PEER_PORT: u32 = 1025; + + let mut ctx = MuxerTestContext::new("/tmp/peer_connection"); + + // Test peer connection refused. + ctx.init_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); + ctx.send(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + assert_eq!(ctx.pkt.len(), 0); + assert_eq!(ctx.pkt.src_cid(), uapi::VSOCK_HOST_CID); + assert_eq!(ctx.pkt.dst_cid(), PEER_CID); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + + // Test peer connection accepted. + let mut listener = ctx.create_local_listener(LOCAL_PORT); + ctx.init_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); + ctx.send(); + assert_eq!(ctx.muxer.conn_map.len(), 1); + let mut stream = listener.accept(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + assert_eq!(ctx.pkt.len(), 0); + assert_eq!(ctx.pkt.src_cid(), uapi::VSOCK_HOST_CID); + assert_eq!(ctx.pkt.dst_cid(), PEER_CID); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + let key = ConnMapKey { + local_port: LOCAL_PORT, + peer_port: PEER_PORT, + }; + assert!(ctx.muxer.conn_map.contains_key(&key)); + + // Test guest -> host data flow. + let data = [1, 2, 3, 4]; + ctx.init_data_pkt(LOCAL_PORT, PEER_PORT, &data); + ctx.send(); + let mut buf = vec![0; data.len()]; + stream.read_exact(buf.as_mut_slice()).unwrap(); + assert_eq!(buf.as_slice(), data); + + // Test host -> guest data flow. + let data = [5u8, 6, 7, 8]; + stream.write_all(&data).unwrap(); + + // When data is available on the local stream, an EPOLLIN event would normally be delivered + // to the muxer's nested epoll FD. For testing only, we can fake that event notification + // here. + ctx.notify_muxer(); + // After being notified, the muxer should've figured out that RX data was available for one + // of its connections, so it should now be reporting that it can fill in an RX packet. + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RW); + assert_eq!(ctx.pkt.buf().unwrap()[..data.len()], data); + assert_eq!(ctx.pkt.src_port(), LOCAL_PORT); + assert_eq!(ctx.pkt.dst_port(), PEER_PORT); + + assert!(!ctx.muxer.has_pending_rx()); + } + + #[test] + fn test_local_connection() { + let mut ctx = MuxerTestContext::new("/tmp/local_connection"); + let peer_port = 1025; + let (mut stream, local_port) = ctx.local_connect(peer_port); + + // Test guest -> host data flow. + let data = [1, 2, 3, 4]; + ctx.init_data_pkt(local_port, peer_port, &data); + ctx.send(); + + let mut buf = vec![0u8; data.len()]; + stream.read_exact(buf.as_mut_slice()).unwrap(); + assert_eq!(buf.as_slice(), &data); + + // Test host -> guest data flow. + let data = [5, 6, 7, 8]; + stream.write_all(&data).unwrap(); + ctx.notify_muxer(); + + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RW); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port); + assert_eq!(ctx.pkt.buf().unwrap()[..data.len()], data); + } + + #[test] + fn test_local_close() { + let peer_port = 1025; + let mut ctx = MuxerTestContext::new("/tmp/local_close"); + let local_port; + { + let (_stream, local_port_) = ctx.local_connect(peer_port); + local_port = local_port_; + } + // Local var `_stream` was now dropped, thus closing the local stream. After the muxer gets + // notified via EPOLLIN, it should attempt to gracefully shutdown the connection, issuing a + // VSOCK_OP_SHUTDOWN with both no-more-send and no-more-recv indications set. + ctx.notify_muxer(); + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_SHUTDOWN); + assert_ne!(ctx.pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND, 0); + assert_ne!(ctx.pkt.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV, 0); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port); + + // The connection should get removed (and its local port freed), after the peer replies + // with an RST. + ctx.init_pkt(local_port, peer_port, uapi::VSOCK_OP_RST); + ctx.send(); + let key = ConnMapKey { + local_port, + peer_port, + }; + assert!(!ctx.muxer.conn_map.contains_key(&key)); + assert!(!ctx.muxer.local_port_set.contains(&local_port)); + } + + #[test] + fn test_peer_close() { + let peer_port = 1025; + let local_port = 1026; + let mut ctx = MuxerTestContext::new("/tmp/peer_close"); + + let mut sock = ctx.create_local_listener(local_port); + ctx.init_pkt(local_port, peer_port, uapi::VSOCK_OP_REQUEST); + ctx.send(); + let mut stream = sock.accept(); + + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port); + let key = ConnMapKey { + local_port, + peer_port, + }; + assert!(ctx.muxer.conn_map.contains_key(&key)); + + // Emulate a full shutdown from the peer (no-more-send + no-more-recv). + ctx.init_pkt(local_port, peer_port, uapi::VSOCK_OP_SHUTDOWN) + .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_SEND) + .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); + ctx.send(); + + // Now, the muxer should remove the connection from its map, and reply with an RST. + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port); + let key = ConnMapKey { + local_port, + peer_port, + }; + assert!(!ctx.muxer.conn_map.contains_key(&key)); + + // The muxer should also drop / close the local Unix socket for this connection. + let mut buf = vec![0u8; 16]; + assert_eq!(stream.read(buf.as_mut_slice()).unwrap(), 0); + } + + #[test] + fn test_muxer_rxq() { + let mut ctx = MuxerTestContext::new("/tmp/muxer_rxq"); + let local_port = 1026; + let peer_port_first = 1025; + let mut listener = ctx.create_local_listener(local_port); + let mut streams: Vec = Vec::new(); + + for peer_port in peer_port_first..peer_port_first + defs::MUXER_RXQ_SIZE { + ctx.init_pkt(local_port, peer_port as u32, uapi::VSOCK_OP_REQUEST); + ctx.send(); + streams.push(listener.accept()); + } + + // The muxer RX queue should now be full (with connection reponses), but still + // synchronized. + assert!(ctx.muxer.rxq.is_synced()); + + // One more queued reply should desync the RX queue. + ctx.init_pkt( + local_port, + (peer_port_first + defs::MUXER_RXQ_SIZE) as u32, + uapi::VSOCK_OP_REQUEST, + ); + ctx.send(); + assert!(!ctx.muxer.rxq.is_synced()); + + // With an out-of-sync queue, an RST should evict any non-RST packet from the queue, and + // take its place. We'll check that by making sure that the last packet popped from the + // queue is an RST. + ctx.init_pkt( + local_port + 1, + peer_port_first as u32, + uapi::VSOCK_OP_REQUEST, + ); + ctx.send(); + + for peer_port in peer_port_first..peer_port_first + defs::MUXER_RXQ_SIZE - 1 { + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + // The response order should hold. The evicted response should have been the last + // enqueued. + assert_eq!(ctx.pkt.dst_port(), peer_port as u32); + } + // There should be one more packet in the queue: the RST. + assert_eq!(ctx.muxer.rxq.len(), 1); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + + // The queue should now be empty, but out-of-sync, so the muxer should report it has some + // pending RX. + assert!(ctx.muxer.rxq.is_empty()); + assert!(!ctx.muxer.rxq.is_synced()); + assert!(ctx.muxer.has_pending_rx()); + + // The next recv should sync the queue back up. It should also yield one of the two + // responses that are still left: + // - the one that desynchronized the queue; and + // - the one that got evicted by the RST. + ctx.recv(); + assert!(ctx.muxer.rxq.is_synced()); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + } + + #[test] + fn test_muxer_killq() { + let mut ctx = MuxerTestContext::new("/tmp/muxer_killq"); + let local_port = 1026; + let peer_port_first = 1025; + let peer_port_last = peer_port_first + defs::MUXER_KILLQ_SIZE; + let mut listener = ctx.create_local_listener(local_port); + + for peer_port in peer_port_first..=peer_port_last { + ctx.init_pkt(local_port, peer_port as u32, uapi::VSOCK_OP_REQUEST); + ctx.send(); + ctx.notify_muxer(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port as u32); + { + let _stream = listener.accept(); + } + ctx.notify_muxer(); + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_SHUTDOWN); + assert_eq!(ctx.pkt.src_port(), local_port); + assert_eq!(ctx.pkt.dst_port(), peer_port as u32); + // The kill queue should be synchronized, up until the `defs::MUXER_KILLQ_SIZE`th + // connection we schedule for termination. + assert_eq!( + ctx.muxer.killq.is_synced(), + peer_port < peer_port_first + defs::MUXER_KILLQ_SIZE + ); + } + + assert!(!ctx.muxer.killq.is_synced()); + assert!(!ctx.muxer.has_pending_rx()); + + // Wait for the kill timers to expire. + std::thread::sleep(std::time::Duration::from_millis( + csm_defs::CONN_SHUTDOWN_TIMEOUT_MS, + )); + + // Trigger a kill queue sweep, by requesting a new connection. + ctx.init_pkt( + local_port, + peer_port_last as u32 + 1, + uapi::VSOCK_OP_REQUEST, + ); + ctx.send(); + + // After sweeping the kill queue, it should now be synced (assuming the RX queue is larger + // than the kill queue, since an RST packet will be queued for each killed connection). + assert!(ctx.muxer.killq.is_synced()); + assert!(ctx.muxer.has_pending_rx()); + // There should be `defs::MUXER_KILLQ_SIZE` RSTs in the RX queue, from terminating the + // dying connections in the recent killq sweep. + for _p in peer_port_first..peer_port_last { + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RST); + assert_eq!(ctx.pkt.src_port(), local_port); + } + + // There should be one more packet in the RX queue: the connection response our request + // that triggered the kill queue sweep. + ctx.recv(); + assert_eq!(ctx.pkt.op(), uapi::VSOCK_OP_RESPONSE); + assert_eq!(ctx.pkt.dst_port(), peer_port_last as u32 + 1); + + assert!(!ctx.muxer.has_pending_rx()); + } + + #[test] + fn test_regression_handshake() { + // Address one of the issues found while fixing the following issue: + // https://github.com/firecracker-microvm/firecracker/issues/1751 + // This test checks that the handshake message is not accounted for + let mut ctx = MuxerTestContext::new("/tmp/regression_handshake"); + let peer_port = 1025; + + // Create a local connection. + let (_, local_port) = ctx.local_connect(peer_port); + + // Get the connection from the connection map. + let key = ConnMapKey { + local_port, + peer_port, + }; + let conn = ctx.muxer.conn_map.get_mut(&key).unwrap(); + + // Check that fwd_cnt is 0 - "OK ..." was not accounted for. + assert_eq!(conn.fwd_cnt().0, 0); + } + + #[test] + fn test_regression_rxq_pop() { + // Address one of the issues found while fixing the following issue: + // https://github.com/firecracker-microvm/firecracker/issues/1751 + // This test checks that a connection is not popped out of the muxer + // rxq when multiple flags are set + let mut ctx = MuxerTestContext::new("/tmp/regression_rxq_pop"); + let peer_port = 1025; + let (mut stream, local_port) = ctx.local_connect(peer_port); + + // Send some data. + let data = [5u8, 6, 7, 8]; + stream.write_all(&data).unwrap(); + ctx.notify_muxer(); + + // Get the connection from the connection map. + let key = ConnMapKey { + local_port, + peer_port, + }; + let conn = ctx.muxer.conn_map.get_mut(&key).unwrap(); + + // Forcefully insert another flag. + conn.insert_credit_update(); + + // Call recv twice in order to check that the connection is still + // in the rxq. + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + assert!(ctx.muxer.has_pending_rx()); + ctx.recv(); + + // Since initially the connection had two flags set, now there should + // not be any pending RX in the muxer. + assert!(!ctx.muxer.has_pending_rx()); + } + + #[test] + fn test_add_backend_to_muxer() { + let host_sock_path_1 = String::from("/tmp/host_sock_path_muxer_1_1"); + let host_sock_path_2 = String::from("/tmp/host_sock_path_muxer_1_2"); + let host_sock_path_3 = String::from("/tmp/host_sock_path_muxer_1_3"); + fs::remove_file(Path::new(&host_sock_path_1)).unwrap_or_default(); + fs::remove_file(Path::new(&host_sock_path_2)).unwrap_or_default(); + fs::remove_file(Path::new(&host_sock_path_3)).unwrap_or_default(); + + let mut muxer_1 = VsockMuxer::new(PEER_CID).unwrap(); + let uds_backend_1 = + Box::new(VsockUnixStreamBackend::new(host_sock_path_1.clone()).unwrap()); + let uds_backend_2 = + Box::new(VsockUnixStreamBackend::new(host_sock_path_2.clone()).unwrap()); + + // add uds backend, ok + assert!(muxer_1.add_backend(uds_backend_1, false).is_ok()); + // add another uds backend, err + assert!(muxer_1.add_backend(uds_backend_2, false).is_err()); + + let mut muxer_2 = VsockMuxer::new(PEER_CID).unwrap(); + let uds_backend_3 = + Box::new(VsockUnixStreamBackend::new(host_sock_path_3.clone()).unwrap()); + assert!(muxer_2.add_backend(uds_backend_3, true).is_ok()); + // peer_backend need to be uds backend + assert!(muxer_2.peer_backend == Some(VsockBackendType::UnixStream)); + + fs::remove_file(Path::new(&host_sock_path_1)).unwrap_or_default(); + fs::remove_file(Path::new(&host_sock_path_2)).unwrap_or_default(); + fs::remove_file(Path::new(&host_sock_path_3)).unwrap_or_default(); + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_killq.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_killq.rs new file mode 100644 index 000000000000..427fd42c5e1b --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_killq.rs @@ -0,0 +1,157 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +/// `MuxerKillQ` implements a helper object that `VsockMuxer` can use for +/// scheduling forced connection termination. I.e. after one peer issues a clean +/// shutdown request (VSOCK_OP_SHUTDOWN), the concerned connection is queued for +/// termination (VSOCK_OP_RST) in the near future (herein implemented via an +/// expiring timer). +/// +/// Whenever the muxer needs to schedule a connection for termination, it pushes +/// it (or rather an identifier - the connection key) to this queue. A +/// subsequent pop() operation will succeed if and only if the first connection +/// in the queue is ready to be terminated (i.e. its kill timer expired). +/// +/// Without using this queue, the muxer would have to walk its entire connection +/// pool (hashmap), whenever it needs to check for expired kill timers. With +/// this queue, both scheduling and termination are performed in constant time. +/// However, since we don't want to waste space on a kill queue that's as big as +/// the connection hashmap itself, it is possible that this queue may become +/// full at times. We call this kill queue "synchronized" if we are certain that +/// all connections that are awaiting termination are present in the queue. This +/// means a simple constant-time pop() operation is enough to check wether any +/// connections need to be terminated. When the kill queue becomes full, though, +/// pushing fails, so connections that should be terminated are left out. The +/// queue is not synchronized anymore. When that happens, the muxer will first +/// drain the queue, and then replace it with a new queue, created by walking +/// the connection pool, looking for connections that will be expiring in the +/// future. +use std::collections::{HashMap, VecDeque}; +use std::time::Instant; + +use super::super::csm::VsockConnection; +use super::defs; +use super::muxer_impl::ConnMapKey; + +/// A kill queue item, holding the connection key and the scheduled time for +/// termination. +#[derive(Clone, Copy, Debug)] +pub struct MuxerKillQItem { + pub(crate) key: ConnMapKey, + pub(crate) kill_time: Instant, +} + +impl PartialEq for MuxerKillQItem { + fn eq(&self, other: &MuxerKillQItem) -> bool { + // Time error within 10ms is considered no problem + if let Some(duration) = self.kill_time.checked_duration_since(other.kill_time) { + if duration.as_millis() > 10 { + return false; + } + } else if let Some(duration) = other.kill_time.checked_duration_since(self.kill_time) { + if duration.as_millis() > 10 { + return false; + } + } else { + return false; + } + + self.key == other.key + } +} + +/// The connection kill queue: a FIFO structure, storing the connections that +/// are scheduled for termination. +#[derive(PartialEq)] +pub struct MuxerKillQ { + /// The kill queue contents. + pub(crate) q: VecDeque, + + /// The kill queue sync status: + /// - when true, all connections that are awaiting termination are + /// guaranteed to be in this queue; + /// - when false, some connections may have been left out. + pub(crate) synced: bool, +} + +impl MuxerKillQ { + const SIZE: usize = defs::MUXER_KILLQ_SIZE; + + /// Create a kill queue by walking the connection pool, looking for + /// connections that are set to expire at some point in the future. + /// + /// Note: if more than `Self::SIZE` connections are found, the queue will be + /// created in an out-of-sync state, and will be discarded after it is + /// emptied. + pub fn from_conn_map(conn_map: &HashMap) -> Self { + let mut q_buf: Vec = Vec::with_capacity(Self::SIZE); + let mut synced = true; + for (key, conn) in conn_map.iter() { + if !conn.will_expire() { + continue; + } + if q_buf.len() >= Self::SIZE { + synced = false; + break; + } + q_buf.push(MuxerKillQItem { + key: *key, + kill_time: conn.expiry().unwrap(), + }); + } + q_buf.sort_unstable_by_key(|it| it.kill_time); + Self { + q: q_buf.into(), + synced, + } + } + + /// Push a connection key to the queue, scheduling it for termination at + /// `CONN_SHUTDOWN_TIMEOUT_MS` from now (the push time). + pub fn push(&mut self, key: ConnMapKey, kill_time: Instant) { + if !self.is_synced() || self.is_full() { + self.synced = false; + return; + } + self.q.push_back(MuxerKillQItem { key, kill_time }); + } + + /// Attempt to pop an expired connection from the kill queue. + /// + /// This will succeed and return a connection key, only if the connection at + /// the front of the queue has expired. Otherwise, `None` is returned. + pub fn pop(&mut self) -> Option { + if let Some(item) = self.q.front() { + if Instant::now() > item.kill_time { + return Some(self.q.pop_front().unwrap().key); + } + } + None + } + + /// Check if the kill queue is synchronized with the connection pool. + pub fn is_synced(&self) -> bool { + self.synced + } + + /// Check if the kill queue is empty, obviously. + pub fn is_empty(&self) -> bool { + self.q.len() == 0 + } + + /// Check if the kill queue is full. + pub fn is_full(&self) -> bool { + self.q.len() == Self::SIZE + } +} + +impl Default for MuxerKillQ { + /// Trivial kill queue constructor. + fn default() -> Self { + Self { + q: VecDeque::with_capacity(Self::SIZE), + synced: true, + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_rxq.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_rxq.rs new file mode 100644 index 000000000000..fd4723b0b511 --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/muxer/muxer_rxq.rs @@ -0,0 +1,146 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +/// `MuxerRxQ` implements a helper object that `VsockMuxer` can use for queuing +/// RX (host -> guest) packets (or rather instructions on how to build said +/// packets). +/// +/// Under ideal operation, every connection that has pending RX data will be +/// present in the muxer RX queue. However, since the RX queue is smaller than +/// the connection pool, it may, under some conditions, become full, meaning +/// that it can no longer account for all the connections that can yield RX +/// data. When that happens, we say that it is no longer "synchronized" (i.e. +/// with the connection pool). A desynchronized RX queue still holds valid data, +/// and the muxer will continue to pop packets from it. However, when a +/// desynchronized queue is drained, additional data may still be available, so +/// the muxer will have to perform a more costly walk of the entire connection +/// pool to find it. This walk is also implemented here, and it is part of the +/// resynchronization procedure: inspect all connections, and add every +/// connection that has pending RX data to the RX queue. +use std::collections::{HashMap, VecDeque}; + +use super::super::csm::VsockConnection; +use super::super::VsockChannel; +use super::defs; +use super::muxer_impl::{ConnMapKey, MuxerRx}; + +/// The muxer RX queue. +#[derive(Eq, PartialEq)] +pub struct MuxerRxQ { + /// The RX queue data. + pub(crate) q: VecDeque, + /// The RX queue sync status. + pub(crate) synced: bool, +} + +impl MuxerRxQ { + const SIZE: usize = defs::MUXER_RXQ_SIZE; + + /// Attempt to build an RX queue, that is synchronized to the connection + /// pool. + /// + /// Note: the resulting queue may still be desynchronized, if there are too + /// many connections that have pending RX data. In that case, the + /// muxer will first drain this queue, and then try again to build a + /// synchronized one. + pub fn from_conn_map(conn_map: &HashMap) -> Self { + let mut q = VecDeque::new(); + let mut synced = true; + + for (key, conn) in conn_map.iter() { + if !conn.has_pending_rx() { + continue; + } + if q.len() >= Self::SIZE { + synced = false; + break; + } + q.push_back(MuxerRx::ConnRx(*key)); + } + Self { q, synced } + } + + /// Push a new RX item to the queue. + /// + /// A push will fail when: + /// - trying to push a connection key onto an out-of-sync, or full queue; or + /// - trying to push an RST onto a queue already full of RSTs. + /// + /// RSTs take precedence over connections, because connections can always be + /// queried for pending RX data later. Aside from this queue, there is no + /// other storage for RSTs, so, failing to push one means that we have to + /// drop the packet. + /// + /// Returns: + /// - `true` if the new item has been successfully queued; or + /// - `false` if there was no room left in the queue. + pub fn push(&mut self, rx: MuxerRx) -> bool { + // Pushing to a non-full, synchronized queue will always succeed. + if self.is_synced() && !self.is_full() { + self.q.push_back(rx); + return true; + } + + match rx { + MuxerRx::RstPkt { .. } => { + // If we just failed to push an RST packet, we'll look through + // the queue, trying to find a connection key that we could + // evict. This way, the queue does lose sync, but we don't drop + // any packets. + for qi in self.q.iter_mut().rev() { + if let MuxerRx::ConnRx(_) = qi { + *qi = rx; + self.synced = false; + return true; + } + } + } + MuxerRx::ConnRx(_) => { + self.synced = false; + } + }; + + false + } + + /// Peek into the front of the queue. + pub fn peek(&self) -> Option { + self.q.front().copied() + } + + /// Pop an RX item from the front of the queue. + pub fn pop(&mut self) -> Option { + self.q.pop_front() + } + + /// Check if the RX queue is synchronized with the connection pool. + pub fn is_synced(&self) -> bool { + self.synced + } + + /// Get the total number of items in the queue. + pub fn len(&self) -> usize { + self.q.len() + } + + /// Check if the queue is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Check if the queue is full. + pub fn is_full(&self) -> bool { + self.len() == Self::SIZE + } +} + +/// Trivial RX queue constructor. +impl Default for MuxerRxQ { + fn default() -> Self { + Self { + q: VecDeque::with_capacity(Self::SIZE), + synced: true, + } + } +} diff --git a/src/dragonball/src/dbs_virtio_devices/src/vsock/packet.rs b/src/dragonball/src/dbs_virtio_devices/src/vsock/packet.rs new file mode 100644 index 000000000000..bbdd5f3820bf --- /dev/null +++ b/src/dragonball/src/dbs_virtio_devices/src/vsock/packet.rs @@ -0,0 +1,763 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// `VsockPacket` provides a thin wrapper over the buffers exchanged via virtio +/// queues. There are two components to a vsock packet, each using its own +/// descriptor in a virtio queue: +/// - the packet header; and +/// - the packet data/buffer. +/// +/// There is a 1:1 relation between descriptor chains and packets: the first +/// (chain head) holds the header, and an optional second descriptor holds the +/// data. The second descriptor is only present for data packets (VSOCK_OP_RW). +/// +/// `VsockPacket` wraps these two buffers and provides direct access to the data +/// stored in guest memory. This is done to avoid unnecessarily copying data +/// from guest memory to temporary buffers, before passing it on to the vsock +/// backend. +use std::ops::{Deref, DerefMut}; + +use virtio_queue::{Descriptor, DescriptorChain}; +use vm_memory::GuestMemory; + +use super::defs; +use super::{Result, VsockError}; + +/// The vsock packet header. +// +// The vsock packet header is defined by the C struct: +// +// ```C +// struct virtio_vsock_hdr { +// le64 src_cid; +// le64 dst_cid; +// le32 src_port; +// le32 dst_port; +// le32 len; +// le16 type; +// le16 op; +// le32 flags; +// le32 buf_alloc; +// le32 fwd_cnt; +// } __attribute__((packed)); +// ``` +// +// NOTE: this needs to be marked as repr(C), so we can predict its exact layout +// in memory, since we'll be using guest-provided pointers for access. The Linux +// UAPI headers define this struct as packed, but, in this particular case, +// packing only eliminates 4 trailing padding bytes. Declaring this struct as +// packed would also reduce its alignment to 1, which gets the Rust compiler all +// fidgety. Little does it know, the guest driver already aligned the structure +// properly, so we don't need to worry about alignment. That said, we'll be +// going with only repr(C) (no packing), and hard-coding the struct size as +// `VSOCK_PKT_HDR_SIZE`, since, given this particular layout, the first +// `VSOCK_PKT_HDR_SIZE` bytes are the same in both the packed and unpacked +// layouts. +// +// All fields use the little-endian byte order. Since we're only thinly wrapping +// a pointer to where the guest driver stored the packet header, let's restrict +// this to little-endian targets. +#[cfg(target_endian = "little")] +#[derive(Clone, Copy, Debug, Default)] +#[repr(C)] +pub struct VsockPacketHdr { + /// Source CID. + pub src_cid: u64, + /// Destination CID. + pub dst_cid: u64, + /// Source port. + pub src_port: u32, + /// Destination port. + pub dst_port: u32, + /// Data length (in bytes) - may be 0, if there is now data buffer. + pub len: u32, + /// Socket type. Currently, only connection-oriented streams are defined by + /// the vsock protocol. + pub type_: u16, + /// Operation ID - one of the VSOCK_OP_* values; e.g. + /// - VSOCK_OP_RW: a data packet; + /// - VSOCK_OP_REQUEST: connection request; + /// - VSOCK_OP_RST: forcefull connection termination; + /// etc (see `super::defs::uapi` for the full list). + pub op: u16, + /// Additional options (flags) associated with the current operation (`op`). + /// Currently, only used with shutdown requests (VSOCK_OP_SHUTDOWN). + pub flags: u32, + /// Size (in bytes) of the packet sender receive buffer (for the connection + /// to which this packet belongs). + pub buf_alloc: u32, + /// Number of bytes the sender has received and consumed (for the connection + /// to which this packet belongs). For instance, for our Unix backend, this + /// counter would be the total number of bytes we have successfully written + /// to a backing Unix socket. + pub fwd_cnt: u32, +} + +/// The size (in bytes) of the above packet header struct, as present in a +/// virtio queue buffer. See the explanation above on why we are hard-coding +/// this value here. +pub const VSOCK_PKT_HDR_SIZE: usize = 44; + +/// A thin wrapper over a `VsockPacketHdr` pointer. This is useful because +/// packet headers are provided by the guest via virtio descriptors (so, +/// basically, pointers). We never need to create header structs - only access +/// them. Access to specific members of the wrapped struct is provided via +/// `Deref` and `DerefMut` impls. +pub struct HdrWrapper { + ptr: *mut VsockPacketHdr, +} + +impl HdrWrapper { + /// Create the wrapper from a virtio queue descriptor (a pointer), performing some sanity checks + /// in the process. + pub fn from_virtq_desc(desc: &Descriptor, mem: &M) -> Result { + if desc.len() < VSOCK_PKT_HDR_SIZE as u32 { + return Err(VsockError::HdrDescTooSmall(desc.len())); + } + // TODO: check buffer alignment + + mem.checked_offset(desc.addr(), VSOCK_PKT_HDR_SIZE) + .ok_or_else(|| VsockError::GuestMemoryBounds(desc.addr().0, VSOCK_PKT_HDR_SIZE))?; + + // It's safe to create the wrapper from this pointer, as: + // - the guest driver aligned the data; and + // - `GuestMemory` is page-aligned. + Ok(Self::from_ptr_unchecked( + mem.get_host_address(desc.addr()) + .map_err(VsockError::GuestMemory)?, + )) + } + + /// Create the wrapper from a raw pointer. + /// + /// Warning: the pointer needs to follow proper alignment for + /// `VsockPacketHdr`. This is not a problem for virtq buffers, since the + /// guest driver already handled alignment, and `GuestMemory` is + /// page-aligned. + fn from_ptr_unchecked(ptr: *const u8) -> Self { + #[allow(clippy::cast_ptr_alignment)] + Self { + ptr: ptr as *mut VsockPacketHdr, + } + } + + /// Provide byte-wise access to the data stored inside the header, via a + /// slice / fat-pointer. + pub fn as_slice(&self) -> &[u8] { + // This is safe, since `Self::from_virtq_head()` already performed all the bound checks. + // + unsafe { std::slice::from_raw_parts(self.ptr as *const u8, VSOCK_PKT_HDR_SIZE) } + } + + /// Provide byte-wise mutable access to the data stored inside the header, + /// via a slice / fat-pointer. + pub fn as_mut_slice(&mut self) -> &mut [u8] { + // This is safe, since `Self::from_virtq_head()` already performed all + // the bound checks. + unsafe { std::slice::from_raw_parts_mut(self.ptr as *mut u8, VSOCK_PKT_HDR_SIZE) } + } +} + +/// `Deref` implementation for `HdrWrapper`, allowing access to `VsockPacketHdr` +/// individual members. +impl Deref for HdrWrapper { + type Target = VsockPacketHdr; + + fn deref(&self) -> &VsockPacketHdr { + // Dereferencing this pointer is safe, because it was already validated + // by the `HdrWrapper` constructor. + unsafe { &*self.ptr } + } +} + +/// `DerefMut` implementation for `HdrWrapper`, allowing mutable access to +/// `VsockPacketHdr` individual members. +impl DerefMut for HdrWrapper { + fn deref_mut(&mut self) -> &mut VsockPacketHdr { + // Dereferencing this pointer is safe, because it was already validated + // by the `HdrWrapper` constructor. + unsafe { &mut *self.ptr } + } +} + +/// A thin wrapper over a vsock data pointer in guest memory. The wrapper is +/// meant to be constructed from a guest-provided virtq descriptor, and provides +/// byte-slice-like access. +pub struct BufWrapper { + ptr: *mut u8, + len: usize, +} + +impl BufWrapper { + /// Create the data wrapper from a virtq descriptor. + pub fn from_virtq_desc(desc: &Descriptor, mem: &M) -> Result { + // Check the guest provided pointer and data size. + mem.checked_offset(desc.addr(), desc.len() as usize) + .ok_or_else(|| VsockError::GuestMemoryBounds(desc.addr().0, desc.len() as usize))?; + + Ok(Self::from_fat_ptr_unchecked( + mem.get_host_address(desc.addr()) + .map_err(VsockError::GuestMemory)?, + desc.len() as usize, + )) + } + + /// Create the data wrapper from a pointer and size. + /// + /// Warning: Both `ptr` and `len` must be insured as valid by the caller. + fn from_fat_ptr_unchecked(ptr: *const u8, len: usize) -> Self { + Self { + ptr: ptr as *mut u8, + len, + } + } + + /// Provide access to the data buffer, as a byte slice. + pub fn as_slice(&self) -> &[u8] { + // This is safe since bound checks have already been performed when + // creating the buffer from the virtq descriptor. + unsafe { std::slice::from_raw_parts(self.ptr as *const u8, self.len) } + } + + /// Provide mutable access to the data buffer, as a byte slice. + pub fn as_mut_slice(&mut self) -> &mut [u8] { + // This is safe since bound checks have already been performed when + // creating the buffer from the virtq descriptor. + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } +} + +/// The vsock packet, implemented as a wrapper over a virtq descriptor chain: +/// - the chain head, holding the packet header; and +/// - (an optional) data/buffer descriptor, only present for data packets +/// (VSOCK_OP_RW). +pub struct VsockPacket { + hdr: HdrWrapper, + buf: Option, +} + +impl VsockPacket { + /// Create the packet wrapper from a TX virtq chain head. + /// + /// The chain head is expected to hold valid packet header data. A following + /// packet buffer descriptor can optionally end the chain. Bounds and + /// pointer checks are performed when creating the wrapper. + pub fn from_tx_virtq_head( + desc_chain: &mut DescriptorChain<&M>, + ) -> Result { + let desc = desc_chain.next().ok_or(VsockError::BufDescMissing)?; + + // All buffers in the TX queue must be readable. + if desc.is_write_only() { + return Err(VsockError::UnreadableDescriptor); + } + + let hdr = HdrWrapper::from_virtq_desc(&desc, desc_chain.memory())?; + + // Reject weirdly-sized packets. + if hdr.len > defs::MAX_PKT_BUF_SIZE as u32 { + return Err(VsockError::InvalidPktLen(hdr.len)); + } + + // Don't bother to look for the data descriptor, if the header says + // there's no data. + if hdr.len == 0 { + return Ok(Self { hdr, buf: None }); + } + + let buf_desc = desc_chain.next().ok_or(VsockError::BufDescMissing)?; + + // All TX buffers must be readable. + if buf_desc.is_write_only() { + return Err(VsockError::UnreadableDescriptor); + } + + // The data descriptor should be large enough to hold the data length + // indicated by the header. + if buf_desc.len() < hdr.len { + return Err(VsockError::BufDescTooSmall); + } + + Ok(Self { + hdr, + buf: Some(BufWrapper::from_virtq_desc(&buf_desc, desc_chain.memory())?), + }) + } + + /// Create the packet wrapper from an RX virtq chain head. + /// + /// There must be two descriptors in the chain, both writable: a header + /// descriptor and a data descriptor. Bounds and pointer checks are + /// performed when creating the wrapper. + pub fn from_rx_virtq_head( + desc_chain: &mut DescriptorChain<&M>, + ) -> Result { + let desc = desc_chain.next().ok_or(VsockError::BufDescMissing)?; + + // All RX buffers must be writable. + if !desc.is_write_only() { + return Err(VsockError::UnwritableDescriptor); + } + + let hdr = HdrWrapper::from_virtq_desc(&desc, desc_chain.memory())?; + + let buf_desc = desc_chain.next().ok_or(VsockError::BufDescMissing)?; + if !buf_desc.is_write_only() { + return Err(VsockError::UnwritableDescriptor); + } + + Ok(Self { + hdr, + buf: Some(BufWrapper::from_virtq_desc(&buf_desc, desc_chain.memory())?), + }) + } + + /// Provides in-place, byte-slice, access to the vsock packet header. + pub fn hdr(&self) -> &[u8] { + self.hdr.as_slice() + } + + /// Provides in-place, byte-slice, mutable access to the vsock packet + /// header. + pub fn hdr_mut(&mut self) -> &mut [u8] { + self.hdr.as_mut_slice() + } + + // Provides in-place, byte-slice access to the vsock packet data buffer. + /// + /// Note: control packets (e.g. connection request or reset) have no data + /// buffer associated. For those packets, this method will return + /// `None`. Also note: calling `len()` on the returned slice will + /// yield the buffer size, which may be (and often is) larger than the + /// length of the packet data. The packet data length is stored in the + /// packet header, and accessible via `VsockPacket::len()`. + pub fn buf(&self) -> Option<&[u8]> { + self.buf.as_ref().map(|buf| buf.as_slice()) + } + + /// Provides in-place, byte-slice, mutable access to the vsock packet data + /// buffer. + /// + /// Note: control packets (e.g. connection request or reset) have no data + /// buffer associated. For those packets, this method will return + /// `None`. Also note: calling `len()` on the returned slice will + /// yield the buffer size, which may be (and often is) larger than the + /// length of the packet data. The packet data length is stored in the + /// packet header, and accessible via `VsockPacket::len()`. + pub fn buf_mut(&mut self) -> Option<&mut [u8]> { + self.buf.as_mut().map(|buf| buf.as_mut_slice()) + } + + pub fn src_cid(&self) -> u64 { + self.hdr.src_cid + } + + pub fn set_src_cid(&mut self, cid: u64) -> &mut Self { + self.hdr.src_cid = cid; + self + } + + pub fn dst_cid(&self) -> u64 { + self.hdr.dst_cid + } + + pub fn set_dst_cid(&mut self, cid: u64) -> &mut Self { + self.hdr.dst_cid = cid; + self + } + + pub fn src_port(&self) -> u32 { + self.hdr.src_port + } + + pub fn set_src_port(&mut self, port: u32) -> &mut Self { + self.hdr.src_port = port; + self + } + + pub fn dst_port(&self) -> u32 { + self.hdr.dst_port + } + + pub fn set_dst_port(&mut self, port: u32) -> &mut Self { + self.hdr.dst_port = port; + self + } + + pub fn len(&self) -> u32 { + self.hdr.len + } + + pub fn set_len(&mut self, len: u32) -> &mut Self { + self.hdr.len = len; + self + } + + pub fn type_(&self) -> u16 { + self.hdr.type_ + } + + pub fn set_type(&mut self, type_: u16) -> &mut Self { + self.hdr.type_ = type_; + self + } + + pub fn op(&self) -> u16 { + self.hdr.op + } + + pub fn set_op(&mut self, op: u16) -> &mut Self { + self.hdr.op = op; + self + } + + pub fn flags(&self) -> u32 { + self.hdr.flags + } + + pub fn set_flags(&mut self, flags: u32) -> &mut Self { + self.hdr.flags = flags; + self + } + + pub fn set_flag(&mut self, flag: u32) -> &mut Self { + self.set_flags(self.flags() | flag); + self + } + + pub fn buf_alloc(&self) -> u32 { + self.hdr.buf_alloc + } + + pub fn set_buf_alloc(&mut self, buf_alloc: u32) -> &mut Self { + self.hdr.buf_alloc = buf_alloc; + self + } + + pub fn fwd_cnt(&self) -> u32 { + self.hdr.fwd_cnt + } + + pub fn set_fwd_cnt(&mut self, fwd_cnt: u32) -> &mut Self { + self.hdr.fwd_cnt = fwd_cnt; + self + } +} + +#[cfg(test)] +mod tests { + use virtio_queue::QueueT; + use vm_memory::{GuestAddress, GuestMemoryMmap}; + + use super::super::defs::MAX_PKT_BUF_SIZE; + use super::super::tests::{test_bytes, TestContext}; + use super::defs::{RXQ_EVENT, TXQ_EVENT}; + use super::*; + use crate::tests::{VirtqDesc as GuestQDesc, VIRTQ_DESC_F_WRITE}; + + const HDROFF_SRC_CID: usize = 0; + const HDROFF_DST_CID: usize = 8; + const HDROFF_SRC_PORT: usize = 16; + const HDROFF_DST_PORT: usize = 20; + const HDROFF_LEN: usize = 24; + const HDROFF_TYPE: usize = 28; + const HDROFF_OP: usize = 30; + const HDROFF_FLAGS: usize = 32; + const HDROFF_BUF_ALLOC: usize = 36; + const HDROFF_FWD_CNT: usize = 40; + + macro_rules! create_context { + ($test_ctx:ident, $handler_ctx:ident) => { + let $test_ctx = TestContext::new(); + let mut $handler_ctx = $test_ctx.create_event_handler_context(); + // For TX packets, hdr.len should be set to a valid value. + set_pkt_len(1024, &$handler_ctx.guest_txvq.dtable(0), &$test_ctx.mem); + }; + } + + macro_rules! expect_asm_error { + (tx, $test_ctx:expr, $handler_ctx:expr, $err:pat) => { + expect_asm_error!($test_ctx, $handler_ctx, $err, from_tx_virtq_head, TXQ_EVENT); + }; + (rx, $test_ctx:expr, $handler_ctx:expr, $err:pat) => { + expect_asm_error!($test_ctx, $handler_ctx, $err, from_rx_virtq_head, RXQ_EVENT); + }; + ($test_ctx:expr, $handler_ctx:expr, $err:pat, $ctor:ident, $vq_index:ident) => { + match VsockPacket::$ctor( + &mut $handler_ctx.queues[$vq_index as usize] + .queue_mut() + .pop_descriptor_chain(&$test_ctx.mem) + .unwrap(), + ) { + Err($err) => (), + Ok(_) => panic!("Packet assembly should've failed!"), + Err(other) => panic!("Packet assembly failed with: {:?}", other), + } + }; + } + + fn set_pkt_len(len: u32, guest_desc: &GuestQDesc, mem: &GuestMemoryMmap) { + let hdr_gpa = guest_desc.addr(); + let hdr_ptr = mem.get_host_address(GuestAddress(hdr_gpa.load())).unwrap(); + let len_ptr = unsafe { hdr_ptr.add(HDROFF_LEN) }; + + unsafe { std::slice::from_raw_parts_mut(len_ptr, 4).copy_from_slice(&len.to_le_bytes()) }; + } + + #[test] + #[allow(clippy::cognitive_complexity)] + fn test_tx_packet_assembly() { + // Test case: successful TX packet assembly. + { + create_context!(test_ctx, handler_ctx); + let pkt = VsockPacket::from_tx_virtq_head( + &mut handler_ctx.queues[TXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(), + ) + .unwrap(); + assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE); + assert_eq!( + pkt.buf().unwrap().len(), + handler_ctx.guest_txvq.dtable(1).len().load() as usize + ); + } + + // Test case: error on write-only hdr descriptor. + { + create_context!(test_ctx, handler_ctx); + handler_ctx + .guest_txvq + .dtable(0) + .flags() + .store(VIRTQ_DESC_F_WRITE); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::UnreadableDescriptor); + } + + // Test case: header descriptor has insufficient space to hold the packet header. + { + create_context!(test_ctx, handler_ctx); + handler_ctx + .guest_txvq + .dtable(0) + .len() + .store(VSOCK_PKT_HDR_SIZE as u32 - 1); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::HdrDescTooSmall(_)); + } + + // Test case: zero-length TX packet. + { + create_context!(test_ctx, handler_ctx); + set_pkt_len(0, &handler_ctx.guest_txvq.dtable(0), &test_ctx.mem); + let mut pkt = VsockPacket::from_tx_virtq_head( + &mut handler_ctx.queues[TXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(), + ) + .unwrap(); + assert!(pkt.buf().is_none()); + assert!(pkt.buf_mut().is_none()); + } + + // Test case: TX packet has more data than we can handle. + { + create_context!(test_ctx, handler_ctx); + set_pkt_len( + MAX_PKT_BUF_SIZE as u32 + 1, + &handler_ctx.guest_txvq.dtable(0), + &test_ctx.mem, + ); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::InvalidPktLen(_)); + } + + // Test case: + // - packet header advertises some data length; and + // - the data descriptor is missing. + { + create_context!(test_ctx, handler_ctx); + set_pkt_len(1024, &handler_ctx.guest_txvq.dtable(0), &test_ctx.mem); + handler_ctx.guest_txvq.dtable(0).flags().store(0); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::BufDescMissing); + } + + // Test case: error on write-only buf descriptor. + { + create_context!(test_ctx, handler_ctx); + handler_ctx + .guest_txvq + .dtable(1) + .flags() + .store(VIRTQ_DESC_F_WRITE); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::UnreadableDescriptor); + } + + // Test case: the buffer descriptor cannot fit all the data advertised by the the + // packet header `len` field. + { + create_context!(test_ctx, handler_ctx); + set_pkt_len(8 * 1024, &handler_ctx.guest_txvq.dtable(0), &test_ctx.mem); + handler_ctx.guest_txvq.dtable(1).len().store(4 * 1024); + expect_asm_error!(tx, test_ctx, handler_ctx, VsockError::BufDescTooSmall); + } + } + + #[test] + fn test_rx_packet_assembly() { + // Test case: successful RX packet assembly. + { + create_context!(test_ctx, handler_ctx); + let pkt = VsockPacket::from_rx_virtq_head( + &mut handler_ctx.queues[RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(), + ) + .unwrap(); + assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE); + assert_eq!( + pkt.buf().unwrap().len(), + handler_ctx.guest_rxvq.dtable(1).len().load() as usize + ); + } + + // Test case: read-only RX packet header. + { + create_context!(test_ctx, handler_ctx); + handler_ctx.guest_rxvq.dtable(0).flags().store(0); + expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::UnwritableDescriptor); + } + + // Test case: RX descriptor head cannot fit the entire packet header. + { + create_context!(test_ctx, handler_ctx); + handler_ctx + .guest_rxvq + .dtable(0) + .len() + .store(VSOCK_PKT_HDR_SIZE as u32 - 1); + expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::HdrDescTooSmall(_)); + } + + // Test case: RX descriptor chain is missing the packet buffer descriptor. + { + create_context!(test_ctx, handler_ctx); + handler_ctx + .guest_rxvq + .dtable(0) + .flags() + .store(VIRTQ_DESC_F_WRITE); + expect_asm_error!(rx, test_ctx, handler_ctx, VsockError::BufDescMissing); + } + } + + #[test] + #[allow(clippy::cognitive_complexity)] + fn test_packet_hdr_accessors() { + const SRC_CID: u64 = 1; + const DST_CID: u64 = 2; + const SRC_PORT: u32 = 3; + const DST_PORT: u32 = 4; + const LEN: u32 = 5; + const TYPE: u16 = 6; + const OP: u16 = 7; + const FLAGS: u32 = 8; + const BUF_ALLOC: u32 = 9; + const FWD_CNT: u32 = 10; + + create_context!(test_ctx, handler_ctx); + let mut pkt = VsockPacket::from_rx_virtq_head( + &mut handler_ctx.queues[RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(), + ) + .unwrap(); + + // Test field accessors. + pkt.set_src_cid(SRC_CID) + .set_dst_cid(DST_CID) + .set_src_port(SRC_PORT) + .set_dst_port(DST_PORT) + .set_len(LEN) + .set_type(TYPE) + .set_op(OP) + .set_flags(FLAGS) + .set_buf_alloc(BUF_ALLOC) + .set_fwd_cnt(FWD_CNT); + + assert_eq!(pkt.src_cid(), SRC_CID); + assert_eq!(pkt.dst_cid(), DST_CID); + assert_eq!(pkt.src_port(), SRC_PORT); + assert_eq!(pkt.dst_port(), DST_PORT); + assert_eq!(pkt.len(), LEN); + assert_eq!(pkt.type_(), TYPE); + assert_eq!(pkt.op(), OP); + assert_eq!(pkt.flags(), FLAGS); + assert_eq!(pkt.buf_alloc(), BUF_ALLOC); + assert_eq!(pkt.fwd_cnt(), FWD_CNT); + + // Test individual flag setting. + let flags = pkt.flags() | 0b1000; + pkt.set_flag(0b1000); + assert_eq!(pkt.flags(), flags); + + // Test packet header as-slice access. + // + + assert_eq!(pkt.hdr().len(), VSOCK_PKT_HDR_SIZE); + + test_bytes(&SRC_CID.to_le_bytes(), &pkt.hdr()[HDROFF_SRC_CID..]); + test_bytes(&DST_CID.to_le_bytes(), &pkt.hdr()[HDROFF_DST_CID..]); + test_bytes(&SRC_PORT.to_le_bytes(), &pkt.hdr()[HDROFF_SRC_PORT..]); + test_bytes(&DST_PORT.to_le_bytes(), &pkt.hdr()[HDROFF_DST_PORT..]); + test_bytes(&LEN.to_le_bytes(), &pkt.hdr()[HDROFF_LEN..]); + test_bytes(&TYPE.to_le_bytes(), &pkt.hdr()[HDROFF_TYPE..]); + test_bytes(&OP.to_le_bytes(), &pkt.hdr()[HDROFF_OP..]); + test_bytes(&FLAGS.to_le_bytes(), &pkt.hdr()[HDROFF_FLAGS..]); + test_bytes(&BUF_ALLOC.to_le_bytes(), &pkt.hdr()[HDROFF_BUF_ALLOC..]); + test_bytes(&FWD_CNT.to_le_bytes(), &pkt.hdr()[HDROFF_FWD_CNT..]); + + assert_eq!(pkt.hdr_mut().len(), VSOCK_PKT_HDR_SIZE); + for b in pkt.hdr_mut() { + *b = 0; + } + assert_eq!(pkt.src_cid(), 0); + assert_eq!(pkt.dst_cid(), 0); + assert_eq!(pkt.src_port(), 0); + assert_eq!(pkt.dst_port(), 0); + assert_eq!(pkt.len(), 0); + assert_eq!(pkt.type_(), 0); + assert_eq!(pkt.op(), 0); + assert_eq!(pkt.flags(), 0); + assert_eq!(pkt.buf_alloc(), 0); + assert_eq!(pkt.fwd_cnt(), 0); + } + + #[test] + fn test_packet_buf() { + create_context!(test_ctx, handler_ctx); + let mut pkt = VsockPacket::from_rx_virtq_head( + &mut handler_ctx.queues[RXQ_EVENT as usize] + .queue_mut() + .pop_descriptor_chain(&test_ctx.mem) + .unwrap(), + ) + .unwrap(); + + assert_eq!( + pkt.buf().unwrap().len(), + handler_ctx.guest_rxvq.dtable(1).len().load() as usize + ); + assert_eq!( + pkt.buf_mut().unwrap().len(), + handler_ctx.guest_rxvq.dtable(1).len().load() as usize + ); + + for i in 0..pkt.buf().unwrap().len() { + pkt.buf_mut().unwrap()[i] = (i % 0x100) as u8; + assert_eq!(pkt.buf().unwrap()[i], (i % 0x100) as u8); + } + } +} diff --git a/src/dragonball/src/device_manager/balloon_dev_mgr.rs b/src/dragonball/src/device_manager/balloon_dev_mgr.rs new file mode 100644 index 000000000000..b0ee2bd37e29 --- /dev/null +++ b/src/dragonball/src/device_manager/balloon_dev_mgr.rs @@ -0,0 +1,419 @@ +// Copyright 2020 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use dbs_virtio_devices as virtio; +use serde_derive::{Deserialize, Serialize}; +use slog::{error, info}; +use virtio::balloon::{Balloon, BalloonConfig}; +use virtio::Error as VirtIoError; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::DbsMmioV2Device; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = false; + +/// Errors associated with `BalloonDeviceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum BalloonDeviceError { + /// The balloon device was already used. + #[error("the virtio-balloon ID was already added to a different device")] + BalloonDeviceAlreadyExists, + + /// Cannot perform the requested operation after booting the microVM. + #[error("the update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// guest memory error + #[error("failed to access guest memory, {0}")] + GuestMemoryError(#[source] vm_memory::mmap::Error), + + /// create balloon device error + #[error("failed to create virtio-balloon device, {0}")] + CreateBalloonDevice(#[source] virtio::Error), + + /// hotplug balloon device error + #[error("cannot hotplug virtio-balloon device, {0}")] + HotplugDeviceFailed(#[source] DeviceMgrError), + + /// create mmio device error + #[error("cannot create virtio-balloon mmio device, {0}")] + CreateMmioDevice(#[source] DeviceMgrError), + + /// Cannot initialize a balloon device or add a device to the MMIO Bus. + #[error("failure while registering balloon device: {0}")] + RegisterBalloonDevice(#[source] DeviceMgrError), + + /// resize balloon device error + #[error("failure while resizing virtio-balloon device, {0}")] + ResizeFailed(#[source] VirtIoError), + + /// The balloon device id doesn't exist. + #[error("invalid balloon device id '{0}'")] + InvalidDeviceId(String), + + /// balloon device does not exist + #[error("balloon device does not exist")] + NotExist, + + /// The device manager errors. + #[error("DeviceManager error: {0}")] + DeviceManager(#[source] DeviceMgrError), +} + +/// Configuration information for a virtio-balloon device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct BalloonDeviceConfigInfo { + /// Unique identifier of the balloon device + pub balloon_id: String, + /// Resize balloon size in mib + pub size_mib: u64, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, + /// VIRTIO_BALLOON_F_DEFLATE_ON_OOM + pub f_deflate_on_oom: bool, + /// VIRTIO_BALLOON_F_REPORTING + pub f_reporting: bool, +} + +impl ConfigItem for BalloonDeviceConfigInfo { + type Err = BalloonDeviceError; + + fn id(&self) -> &str { + &self.balloon_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), BalloonDeviceError> { + if self.balloon_id.as_str() == other.balloon_id.as_str() { + Err(BalloonDeviceError::BalloonDeviceAlreadyExists) + } else { + Ok(()) + } + } +} + +/// Balloon Device Info +pub type BalloonDeviceInfo = DeviceConfigInfo; + +impl ConfigItem for BalloonDeviceInfo { + type Err = BalloonDeviceError; + + fn id(&self) -> &str { + &self.config.balloon_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), BalloonDeviceError> { + if self.config.balloon_id.as_str() == other.config.balloon_id.as_str() { + Err(BalloonDeviceError::BalloonDeviceAlreadyExists) + } else { + Ok(()) + } + } +} + +/// Wrapper for the collection that holds all the Balloon Devices Configs +#[derive(Clone)] +pub struct BalloonDeviceMgr { + /// A list of `BalloonDeviceConfig` objects. + info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl BalloonDeviceMgr { + /// Inserts `balloon_cfg` in the virtio-balloon device configuration list. + /// If an entry with the same id already exists, it will attempt to update + /// the existing entry. + pub fn insert_or_update_device( + &mut self, + mut ctx: DeviceOpContext, + balloon_cfg: BalloonDeviceConfigInfo, + ) -> std::result::Result<(), BalloonDeviceError> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + error!(ctx.logger(), "hotplug feature has been disabled."; + "subsystem" => "balloon_dev_mgr",); + return Err(BalloonDeviceError::UpdateNotAllowedPostBoot); + } + + let epoll_mgr = ctx + .get_epoll_mgr() + .map_err(BalloonDeviceError::DeviceManager)?; + + // If the id of the drive already exists in the list, the operation is update. + if let Some(index) = self.get_index_of_balloon_dev(&balloon_cfg.balloon_id) { + // Update an existing balloon device + if ctx.is_hotplug { + info!(ctx.logger(), "resize virtio balloon size to {:?}", balloon_cfg.size_mib; "subsystem" => "balloon_dev_mgr"); + self.update_balloon_size(index, balloon_cfg.size_mib)?; + } + self.info_list.insert_or_update(&balloon_cfg)?; + } else { + // Create a new balloon device + if !self.info_list.is_empty() { + error!(ctx.logger(), "only support one balloon device!"; "subsystem" => "balloon_dev_mgr"); + return Err(BalloonDeviceError::BalloonDeviceAlreadyExists); + } + + if !ctx.is_hotplug { + self.info_list.insert_or_update(&balloon_cfg)?; + return Ok(()); + } + + info!(ctx.logger(), "hotplug balloon device: {}", balloon_cfg.balloon_id; "subsystem" => "balloon_dev_mgr"); + let device = Box::new( + virtio::balloon::Balloon::new( + epoll_mgr, + BalloonConfig { + f_deflate_on_oom: balloon_cfg.f_deflate_on_oom, + f_reporting: balloon_cfg.f_reporting, + }, + ) + .map_err(BalloonDeviceError::CreateBalloonDevice)?, + ); + + let mmio_dev = + DeviceManager::create_mmio_virtio_device_with_device_change_notification( + device, + &mut ctx, + balloon_cfg.use_shared_irq.unwrap_or(self.use_shared_irq), + balloon_cfg.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BalloonDeviceError::CreateMmioDevice)?; + ctx.insert_hotplug_mmio_device(&mmio_dev, None) + .map_err(|e| { + error!( + ctx.logger(), + "hotplug balloon device {} error: {}", + &balloon_cfg.balloon_id, e; + "subsystem" => "balloon_dev_mgr" + ); + BalloonDeviceError::HotplugDeviceFailed(e) + })?; + let index = self.info_list.insert_or_update(&balloon_cfg)?; + self.info_list[index].set_device(mmio_dev); + } + Ok(()) + } + + /// Attaches all virtio-balloon devices from the BalloonDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), BalloonDeviceError> { + let epoll_mgr = ctx + .get_epoll_mgr() + .map_err(BalloonDeviceError::DeviceManager)?; + + for info in self.info_list.iter_mut() { + info!(ctx.logger(), "attach balloon device: {}", info.config.balloon_id; "subsystem" => "balloon_dev_mgr"); + + let device = Balloon::new( + epoll_mgr.clone(), + BalloonConfig { + f_deflate_on_oom: info.config.f_deflate_on_oom, + f_reporting: info.config.f_reporting, + }, + ) + .map_err(BalloonDeviceError::CreateBalloonDevice)?; + let mmio_dev = + DeviceManager::create_mmio_virtio_device_with_device_change_notification( + Box::new(device), + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BalloonDeviceError::RegisterBalloonDevice)?; + info.set_device(mmio_dev); + } + + Ok(()) + } + + fn update_balloon_size( + &self, + index: usize, + size_mib: u64, + ) -> std::result::Result<(), BalloonDeviceError> { + let device = self.info_list[index] + .device + .as_ref() + .ok_or_else(|| BalloonDeviceError::NotExist)?; + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(balloon_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return balloon_dev + .set_size(size_mib) + .map_err(BalloonDeviceError::ResizeFailed); + } + } + Ok(()) + } + + fn get_index_of_balloon_dev(&self, balloon_id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.balloon_id.eq(balloon_id)) + } +} + +impl Default for BalloonDeviceMgr { + /// Create a new `BalloonDeviceMgr` object.. + fn default() -> Self { + BalloonDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::tests::create_vm_for_test; + + impl Default for BalloonDeviceConfigInfo { + fn default() -> Self { + BalloonDeviceConfigInfo { + balloon_id: "".to_string(), + size_mib: 0, + use_generic_irq: None, + use_shared_irq: None, + f_deflate_on_oom: false, + f_reporting: false, + } + } + } + + #[test] + fn test_balloon_config_check_conflicts() { + let config = BalloonDeviceConfigInfo::default(); + let mut config2 = BalloonDeviceConfigInfo::default(); + assert!(config.check_conflicts(&config2).is_err()); + config2.balloon_id = "dummy_balloon".to_string(); + assert!(config.check_conflicts(&config2).is_ok()); + } + + #[test] + fn test_create_balloon_devices_configs() { + let mgr = BalloonDeviceMgr::default(); + assert_eq!(mgr.info_list.len(), 0); + assert_eq!(mgr.get_index_of_balloon_dev(""), None); + } + + #[test] + fn test_balloon_insert_or_update_device() { + //Init vm for test. + let mut vm = create_vm_for_test(); + + // Test for standard config + let device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + let dummy_balloon_device = BalloonDeviceConfigInfo::default(); + vm.device_manager_mut() + .balloon_manager + .insert_or_update_device(device_op_ctx, dummy_balloon_device) + .unwrap(); + assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1); + } + + #[test] + fn test_balloon_attach_device() { + //Init vm and insert balloon config for test. + let mut vm = create_vm_for_test(); + let device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + let dummy_balloon_device = BalloonDeviceConfigInfo::default(); + vm.device_manager_mut() + .balloon_manager + .insert_or_update_device(device_op_ctx, dummy_balloon_device) + .unwrap(); + assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1); + + // Test for standard config + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + assert!(vm + .device_manager_mut() + .balloon_manager + .attach_devices(&mut device_op_ctx) + .is_ok()); + assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1); + } + + #[test] + fn test_balloon_update_device() { + //Init vm for test. + let mut vm = create_vm_for_test(); + let device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + let dummy_balloon_device = BalloonDeviceConfigInfo::default(); + vm.device_manager_mut() + .balloon_manager + .insert_or_update_device(device_op_ctx, dummy_balloon_device) + .unwrap(); + assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1); + + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + assert!(vm + .device_manager_mut() + .balloon_manager + .attach_devices(&mut device_op_ctx) + .is_ok()); + assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1); + + assert!(vm + .device_manager() + .balloon_manager + .update_balloon_size(0, 200) + .is_ok()); + } +} diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs new file mode 100644 index 000000000000..854edfc0977a --- /dev/null +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -0,0 +1,1405 @@ +// Copyright 2020-2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Device manager for virtio-blk and vhost-user-blk devices. +use std::collections::{vec_deque, VecDeque}; +use std::convert::TryInto; +use std::fs::OpenOptions; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::block::{aio::Aio, io_uring::IoUring, Block, LocalFile, Ufile}; +use serde_derive::{Deserialize, Serialize}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo}; +use crate::device_manager::blk_dev_mgr::BlockDeviceError::InvalidDeviceId; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::get_bucket_update; +use crate::vm::KernelConfigInfo; + +use super::DbsMmioV2Device; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +macro_rules! info( + ($l:expr, $($args:tt)+) => { + slog::info!($l, $($args)+; slog::o!("subsystem" => "block_manager")) + }; +); + +macro_rules! error( + ($l:expr, $($args:tt)+) => { + slog::error!($l, $($args)+; slog::o!("subsystem" => "block_manager")) + }; +); + +/// Default queue size for VirtIo block devices. +pub const QUEUE_SIZE: u16 = 128; + +/// Errors associated with the operations allowed on a drive. +#[derive(Debug, thiserror::Error)] +pub enum BlockDeviceError { + /// Invalid VM instance ID. + #[error("invalid VM instance id")] + InvalidVMID, + + /// The block device path is invalid. + #[error("invalid block device path '{0}'")] + InvalidBlockDevicePath(PathBuf), + + /// The block device type is invalid. + #[error("invalid block device type")] + InvalidBlockDeviceType, + + /// The block device path was already used for a different drive. + #[error("block device path '{0}' already exists")] + BlockDevicePathAlreadyExists(PathBuf), + + /// The device id doesn't exist. + #[error("invalid block device id '{0}'")] + InvalidDeviceId(String), + + /// Cannot perform the requested operation after booting the microVM. + #[error("block device does not support runtime update")] + UpdateNotAllowedPostBoot, + + /// A root block device was already added. + #[error("could not add multiple virtual machine root devices")] + RootBlockDeviceAlreadyAdded, + + /// Failed to send patch message to block epoll handler. + #[error("could not send patch message to the block epoll handler")] + BlockEpollHanderSendFail, + + /// Failure from device manager, + #[error("device manager errors: {0}")] + DeviceManager(#[from] DeviceMgrError), + + /// Failure from virtio subsystem. + #[error(transparent)] + Virtio(virtio::Error), + + /// Unable to seek the block device backing file due to invalid permissions or + /// the file was deleted/corrupted. + #[error("cannot create block device: {0}")] + CreateBlockDevice(#[source] virtio::Error), + + /// Cannot open the block device backing file. + #[error("cannot open the block device backing file: {0}")] + OpenBlockDevice(#[source] std::io::Error), + + /// Cannot initialize a MMIO Block Device or add a device to the MMIO Bus. + #[error("failure while registering block device: {0}")] + RegisterBlockDevice(#[source] DeviceMgrError), +} + +/// Type of low level storage device/protocol for virtio-blk devices. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum BlockDeviceType { + /// Unknown low level device type. + Unknown, + /// Vhost-user-blk based low level device. + /// SPOOL is a reliable NVMe virtualization system for the cloud environment. + /// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue + Spool, + /// The standard vhost-user-blk based device such as Spdk device. + Spdk, + /// Local disk/file based low level device. + RawBlock, +} + +impl BlockDeviceType { + /// Get type of low level storage device/protocol by parsing `path`. + pub fn get_type(path: &str) -> BlockDeviceType { + // SPOOL path should be started with "spool", e.g. "spool:/device1" + if path.starts_with("spool:/") { + BlockDeviceType::Spool + } else if path.starts_with("spdk:/") { + BlockDeviceType::Spdk + } else { + BlockDeviceType::RawBlock + } + } +} + +/// Configuration information for a block device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct BlockDeviceConfigUpdateInfo { + /// Unique identifier of the drive. + pub drive_id: String, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, +} + +impl BlockDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the bandwidth rate limiter. + pub fn bytes(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the ops rate limiter. + pub fn ops(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, ops) + } +} + +/// Configuration information for a block device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct BlockDeviceConfigInfo { + /// Unique identifier of the drive. + pub drive_id: String, + /// Type of low level storage/protocol. + pub device_type: BlockDeviceType, + /// Path of the drive. + pub path_on_host: PathBuf, + /// If set to true, it makes the current device the root block device. + /// Setting this flag to true will mount the block device in the + /// guest under /dev/vda unless the part_uuid is present. + pub is_root_device: bool, + /// Part-UUID. Represents the unique id of the boot partition of this device. + /// It is optional and it will be used only if the `is_root_device` field is true. + pub part_uuid: Option, + /// If set to true, the drive is opened in read-only mode. Otherwise, the + /// drive is opened as read-write. + pub is_read_only: bool, + /// If set to false, the drive is opened with buffered I/O mode. Otherwise, the + /// drive is opened with direct I/O mode. + pub is_direct: bool, + /// Don't close `path_on_host` file when dropping the device. + pub no_drop: bool, + /// Block device multi-queue + pub num_queues: usize, + /// Virtio queue size. Size: byte + pub queue_size: u16, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl std::default::Default for BlockDeviceConfigInfo { + fn default() -> Self { + Self { + drive_id: String::default(), + device_type: BlockDeviceType::RawBlock, + path_on_host: PathBuf::default(), + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: Self::default_direct(), + no_drop: Self::default_no_drop(), + num_queues: Self::default_num_queues(), + queue_size: 256, + rate_limiter: None, + use_shared_irq: None, + use_generic_irq: None, + } + } +} + +impl BlockDeviceConfigInfo { + /// Get default queue numbers + pub fn default_num_queues() -> usize { + 1 + } + + /// Get default value of is_direct switch + pub fn default_direct() -> bool { + true + } + + /// Get default value of no_drop switch + pub fn default_no_drop() -> bool { + false + } + + /// Get type of low level storage/protocol. + pub fn device_type(&self) -> BlockDeviceType { + self.device_type + } + + /// Returns a reference to `path_on_host`. + pub fn path_on_host(&self) -> &PathBuf { + &self.path_on_host + } + + /// Returns a reference to the part_uuid. + pub fn get_part_uuid(&self) -> Option<&String> { + self.part_uuid.as_ref() + } + + /// Checks whether the drive had read only permissions. + pub fn is_read_only(&self) -> bool { + self.is_read_only + } + + /// Checks whether the drive uses direct I/O + pub fn is_direct(&self) -> bool { + self.is_direct + } + + /// Get number and size of queues supported. + pub fn queue_sizes(&self) -> Vec { + (0..self.num_queues) + .map(|_| self.queue_size) + .collect::>() + } +} + +impl ConfigItem for BlockDeviceConfigInfo { + type Err = BlockDeviceError; + + fn id(&self) -> &str { + &self.drive_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), BlockDeviceError> { + if self.drive_id == other.drive_id { + Ok(()) + } else if self.path_on_host == other.path_on_host { + Err(BlockDeviceError::BlockDevicePathAlreadyExists( + self.path_on_host.clone(), + )) + } else { + Ok(()) + } + } +} + +impl std::fmt::Debug for BlockDeviceInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.config) + } +} + +/// Block Device Info +pub type BlockDeviceInfo = DeviceConfigInfo; + +/// Wrapper for the collection that holds all the Block Devices Configs +#[derive(Clone)] +pub struct BlockDeviceMgr { + /// A list of `BlockDeviceInfo` objects. + info_list: VecDeque, + has_root_block: bool, + has_part_uuid_root: bool, + read_only_root: bool, + part_uuid: Option, + use_shared_irq: bool, +} + +impl BlockDeviceMgr { + /// returns a front-to-back iterator. + pub fn iter(&self) -> vec_deque::Iter { + self.info_list.iter() + } + + /// Checks whether any of the added BlockDevice is the root. + pub fn has_root_block_device(&self) -> bool { + self.has_root_block + } + + /// Checks whether the root device is configured using a part UUID. + pub fn has_part_uuid_root(&self) -> bool { + self.has_part_uuid_root + } + + /// Checks whether the root device has read-only permisssions. + pub fn is_read_only_root(&self) -> bool { + self.read_only_root + } + + /// Gets the index of the device with the specified `drive_id` if it exists in the list. + pub fn get_index_of_drive_id(&self, id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(id)) + } + + /// Gets the 'BlockDeviceConfigInfo' of the device with the specified `drive_id` if it exists in the list. + pub fn get_config_of_drive_id(&self, drive_id: &str) -> Option { + match self.get_index_of_drive_id(drive_id) { + Some(index) => { + let config = self.info_list.get(index).unwrap().config.clone(); + Some(config) + } + None => None, + } + } + + /// Inserts `block_device_config` in the block device configuration list. + /// If an entry with the same id already exists, it will attempt to update + /// the existing entry. + /// Inserting a secondary root block device will fail. + pub fn insert_device( + &mut self, + mut ctx: DeviceOpContext, + config: BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(BlockDeviceError::UpdateNotAllowedPostBoot); + } + + // If the id of the drive already exists in the list, the operation is update. + match self.get_index_of_drive_id(config.id()) { + Some(index) => { + // No support for runtime update yet. + if ctx.is_hotplug { + Err(BlockDeviceError::BlockDevicePathAlreadyExists( + config.path_on_host.clone(), + )) + } else { + for (idx, info) in self.info_list.iter().enumerate() { + if idx != index { + info.config.check_conflicts(&config)?; + } + } + self.update(index, config) + } + } + None => { + for info in self.info_list.iter() { + info.config.check_conflicts(&config)?; + } + let index = self.create(config.clone())?; + if !ctx.is_hotplug { + return Ok(()); + } + + match config.device_type { + BlockDeviceType::RawBlock => { + let device = Self::create_blk_device(&config, &mut ctx) + .map_err(BlockDeviceError::Virtio)?; + let dev = DeviceManager::create_mmio_virtio_device( + device, + &mut ctx, + config.use_shared_irq.unwrap_or(self.use_shared_irq), + config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BlockDeviceError::DeviceManager)?; + self.update_device_by_index(index, Arc::clone(&dev))?; + // live-upgrade need save/restore device from info.device. + self.info_list[index].set_device(dev.clone()); + ctx.insert_hotplug_mmio_device(&dev, None).map_err(|e| { + let logger = ctx.logger().new(slog::o!()); + self.remove_device(ctx, &config.drive_id).unwrap(); + error!( + logger, + "failed to hot-add virtio block device {}, {:?}", + &config.drive_id, + e + ); + BlockDeviceError::DeviceManager(e) + }) + } + BlockDeviceType::Spool | BlockDeviceType::Spdk => { + // TBD + todo!() + } + _ => Err(BlockDeviceError::InvalidBlockDeviceType), + } + } + } + } + + /// Attaches all block devices from the BlockDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), BlockDeviceError> { + for info in self.info_list.iter_mut() { + match info.config.device_type { + BlockDeviceType::RawBlock => { + info!( + ctx.logger(), + "attach virtio-blk device, drive_id {}, path {}", + info.config.drive_id, + info.config.path_on_host.to_str().unwrap_or("") + ); + let device = Self::create_blk_device(&info.config, ctx) + .map_err(BlockDeviceError::Virtio)?; + let device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BlockDeviceError::RegisterBlockDevice)?; + info.device = Some(device); + } + _ => { + return Err(BlockDeviceError::OpenBlockDevice( + std::io::Error::from_raw_os_error(libc::EINVAL), + )); + } + } + } + + Ok(()) + } + + /// Removes all virtio-blk devices + pub fn remove_devices(&mut self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> { + while let Some(mut info) = self.info_list.pop_back() { + info!(ctx.logger(), "remove drive {}", info.config.drive_id); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, ctx)?; + } + } + + Ok(()) + } + + fn remove(&mut self, drive_id: &str) -> Option { + match self.get_index_of_drive_id(drive_id) { + Some(index) => self.info_list.remove(index), + None => None, + } + } + + /// remove a block device, it basically is the inverse operation of `insert_device`` + pub fn remove_device( + &mut self, + mut ctx: DeviceOpContext, + drive_id: &str, + ) -> std::result::Result<(), BlockDeviceError> { + if !cfg!(feature = "hotplug") { + return Err(BlockDeviceError::UpdateNotAllowedPostBoot); + } + + match self.remove(drive_id) { + Some(mut info) => { + info!(ctx.logger(), "remove drive {}", info.config.drive_id); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, &mut ctx) + .map_err(BlockDeviceError::DeviceManager)?; + } + } + None => return Err(BlockDeviceError::InvalidDeviceId(drive_id.to_owned())), + } + + Ok(()) + } + + fn create_blk_device( + cfg: &BlockDeviceConfigInfo, + ctx: &mut DeviceOpContext, + ) -> std::result::Result>, virtio::Error> { + let epoll_mgr = ctx.epoll_mgr.clone().ok_or(virtio::Error::InvalidInput)?; + + let mut block_files: Vec> = vec![]; + + match cfg.device_type { + BlockDeviceType::RawBlock => { + let custom_flags = if cfg.is_direct() { + info!( + ctx.logger(), + "Open block device \"{}\" in direct mode.", + cfg.path_on_host().display() + ); + libc::O_DIRECT + } else { + info!( + ctx.logger(), + "Open block device \"{}\" in buffer mode.", + cfg.path_on_host().display(), + ); + 0 + }; + let io_uring_supported = IoUring::is_supported(); + for i in 0..cfg.num_queues { + let queue_size = cfg.queue_sizes()[i] as u32; + let file = OpenOptions::new() + .read(true) + .custom_flags(custom_flags) + .write(!cfg.is_read_only()) + .open(cfg.path_on_host())?; + info!(ctx.logger(), "Queue {}: block file opened", i); + + if io_uring_supported { + info!( + ctx.logger(), + "Queue {}: Using io_uring Raw disk file, queue size {}.", i, queue_size + ); + let io_engine = IoUring::new(file.as_raw_fd(), queue_size)?; + block_files.push(Box::new(LocalFile::new(file, cfg.no_drop, io_engine)?)); + } else { + info!( + ctx.logger(), + "Queue {}: Since io_uring_supported is not enabled, change to default support of Aio Raw disk file, queue size {}", i, queue_size + ); + let io_engine = Aio::new(file.as_raw_fd(), queue_size)?; + block_files.push(Box::new(LocalFile::new(file, cfg.no_drop, io_engine)?)); + } + } + } + _ => { + error!( + ctx.logger(), + "invalid block device type: {:?}", cfg.device_type + ); + return Err(virtio::Error::InvalidInput); + } + }; + + let mut limiters = vec![]; + for _i in 0..cfg.num_queues { + if let Some(limiter) = cfg.rate_limiter.clone().map(|mut v| { + v.resize(cfg.num_queues as u64); + v.try_into().unwrap() + }) { + limiters.push(limiter); + } + } + + Ok(Box::new(Block::new( + block_files, + cfg.is_read_only, + Arc::new(cfg.queue_sizes()), + epoll_mgr, + limiters, + )?)) + } + + /// Generated guest kernel commandline related to root block device. + pub fn generate_kernel_boot_args( + &self, + kernel_config: &mut KernelConfigInfo, + ) -> std::result::Result<(), DeviceMgrError> { + // Respect user configuration if kernel_cmdline contains "root=", + // special attention for the case when kernel command line starting with "root=xxx" + let old_kernel_cmdline = format!( + " {:?}", + kernel_config + .kernel_cmdline() + .as_cstring() + .map_err(DeviceMgrError::Cmdline)? + ); + if !old_kernel_cmdline.contains(" root=") && self.has_root_block { + let cmdline = kernel_config.kernel_cmdline_mut(); + if let Some(ref uuid) = self.part_uuid { + cmdline + .insert("root", &format!("PART_UUID={}", uuid)) + .map_err(DeviceMgrError::Cmdline)?; + } else { + cmdline + .insert("root", "/dev/vda") + .map_err(DeviceMgrError::Cmdline)?; + } + if self.read_only_root { + if old_kernel_cmdline.contains(" rw") { + return Err(DeviceMgrError::InvalidOperation); + } + cmdline.insert_str("ro").map_err(DeviceMgrError::Cmdline)?; + } + } + + Ok(()) + } + + /// insert a block device's config. return index on success. + fn create( + &mut self, + block_device_config: BlockDeviceConfigInfo, + ) -> std::result::Result { + self.check_data_file_present(&block_device_config)?; + if self + .get_index_of_drive_path(&block_device_config.path_on_host) + .is_some() + { + return Err(BlockDeviceError::BlockDevicePathAlreadyExists( + block_device_config.path_on_host, + )); + } + + // check whether the Device Config belongs to a root device + // we need to satisfy the condition by which a VMM can only have on root device + if block_device_config.is_root_device { + if self.has_root_block { + Err(BlockDeviceError::RootBlockDeviceAlreadyAdded) + } else { + self.has_root_block = true; + self.read_only_root = block_device_config.is_read_only; + self.has_part_uuid_root = block_device_config.part_uuid.is_some(); + self.part_uuid = block_device_config.part_uuid.clone(); + // Root Device should be the first in the list whether or not PART_UUID is specified + // in order to avoid bugs in case of switching from part_uuid boot scenarios to + // /dev/vda boot type. + self.info_list + .push_front(BlockDeviceInfo::new(block_device_config)); + Ok(0) + } + } else { + self.info_list + .push_back(BlockDeviceInfo::new(block_device_config)); + Ok(self.info_list.len() - 1) + } + } + + /// Updates a Block Device Config. The update fails if it would result in two + /// root block devices. + fn update( + &mut self, + mut index: usize, + new_config: BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + // Check if the path exists + self.check_data_file_present(&new_config)?; + if let Some(idx) = self.get_index_of_drive_path(&new_config.path_on_host) { + if idx != index { + return Err(BlockDeviceError::BlockDevicePathAlreadyExists( + new_config.path_on_host.clone(), + )); + } + } + + if self.info_list.get(index).is_none() { + return Err(InvalidDeviceId(index.to_string())); + } + // Check if the root block device is being updated. + if self.info_list[index].config.is_root_device { + self.has_root_block = new_config.is_root_device; + self.read_only_root = new_config.is_root_device && new_config.is_read_only; + self.has_part_uuid_root = new_config.part_uuid.is_some(); + self.part_uuid = new_config.part_uuid.clone(); + } else if new_config.is_root_device { + // Check if a second root block device is being added. + if self.has_root_block { + return Err(BlockDeviceError::RootBlockDeviceAlreadyAdded); + } else { + // One of the non-root blocks is becoming root. + self.has_root_block = true; + self.read_only_root = new_config.is_read_only; + self.has_part_uuid_root = new_config.part_uuid.is_some(); + self.part_uuid = new_config.part_uuid.clone(); + + // Make sure the root device is on the first position. + self.info_list.swap(0, index); + // Block config to be updated has moved to first position. + index = 0; + } + } + // Update the config. + self.info_list[index].config = new_config; + + Ok(()) + } + + fn check_data_file_present( + &self, + block_device_config: &BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + if block_device_config.device_type == BlockDeviceType::RawBlock + && !block_device_config.path_on_host.exists() + { + Err(BlockDeviceError::InvalidBlockDevicePath( + block_device_config.path_on_host.clone(), + )) + } else { + Ok(()) + } + } + + fn get_index_of_drive_path(&self, drive_path: &Path) -> Option { + self.info_list + .iter() + .position(|info| info.config.path_on_host.eq(drive_path)) + } + + /// update devce information in `info_list`. The caller of this method is + /// `insert_device` when hotplug is true. + pub fn update_device_by_index( + &mut self, + index: usize, + device: Arc, + ) -> Result<(), BlockDeviceError> { + if let Some(info) = self.info_list.get_mut(index) { + info.device = Some(device); + return Ok(()); + } + + Err(BlockDeviceError::InvalidDeviceId("".to_owned())) + } + + /// Update the ratelimiter settings of a virtio blk device. + pub fn update_device_ratelimiters( + &mut self, + new_cfg: BlockDeviceConfigUpdateInfo, + ) -> std::result::Result<(), BlockDeviceError> { + match self.get_index_of_drive_id(&new_cfg.drive_id) { + Some(index) => { + let config = &mut self.info_list[index].config; + config.rate_limiter = new_cfg.rate_limiter.clone(); + let device = self.info_list[index] + .device + .as_mut() + .ok_or_else(|| BlockDeviceError::InvalidDeviceId("".to_owned()))?; + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(blk_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return blk_dev + .set_patch_rate_limiters(new_cfg.bytes(), new_cfg.ops()) + .map(|_p| ()) + .map_err(|_e| BlockDeviceError::BlockEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(BlockDeviceError::InvalidDeviceId(new_cfg.drive_id)), + } + } +} + +impl Default for BlockDeviceMgr { + /// Constructor for the BlockDeviceMgr. It initializes an empty LinkedList. + fn default() -> BlockDeviceMgr { + BlockDeviceMgr { + info_list: VecDeque::::new(), + has_root_block: false, + has_part_uuid_root: false, + read_only_root: false, + part_uuid: None, + use_shared_irq: USE_SHARED_IRQ, + } + } +} + +#[cfg(test)] +mod tests { + use test_utils::skip_if_not_root; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::test_utils::tests::create_vm_for_test; + + #[test] + fn test_block_device_type() { + let dev_type = BlockDeviceType::get_type("spool:/device1"); + assert_eq!(dev_type, BlockDeviceType::Spool); + let dev_type = BlockDeviceType::get_type("/device1"); + assert_eq!(dev_type, BlockDeviceType::RawBlock); + } + + #[test] + fn test_create_block_devices_configs() { + let mgr = BlockDeviceMgr::default(); + assert!(!mgr.has_root_block_device()); + assert!(!mgr.has_part_uuid_root()); + assert!(!mgr.is_read_only_root()); + assert_eq!(mgr.get_index_of_drive_id(""), None); + assert_eq!(mgr.info_list.len(), 0); + } + + #[test] + fn test_add_non_root_block_device() { + skip_if_not_root!(); + let dummy_file = TempFile::new().unwrap(); + let dummy_path = dummy_file.as_path().to_owned(); + let dummy_id = String::from("1"); + let dummy_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path.clone(), + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: dummy_id.clone(), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + assert!(vm + .device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device.clone(),) + .is_ok()); + + assert_eq!(vm.device_manager().block_manager.info_list.len(), 1); + assert!(!vm.device_manager().block_manager.has_root_block_device()); + assert!(!vm.device_manager().block_manager.has_part_uuid_root()); + assert!(!vm.device_manager().block_manager.is_read_only_root()); + assert_eq!(vm.device_manager().block_manager.info_list.len(), 1); + assert_eq!( + vm.device_manager().block_manager.info_list[0] + .config + .device_type(), + BlockDeviceType::RawBlock + ); + assert_eq!( + vm.device_manager().block_manager.info_list[0] + .config + .queue_sizes(), + [128u16] + ); + + let dev_config = vm.device_manager().block_manager.iter().next().unwrap(); + assert_eq!(dev_config.config, dummy_block_device); + assert!(vm + .device_manager() + .block_manager + .get_index_of_drive_path(&dummy_path) + .is_some()); + assert!(vm + .device_manager() + .block_manager + .get_index_of_drive_id(&dummy_id) + .is_some()); + } + + #[test] + fn test_update_blk_device_ratelimiters() { + skip_if_not_root!(); + //Init vm for test. + let mut vm = create_vm_for_test(); + let device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + let dummy_file = TempFile::new().unwrap(); + let dummy_path = dummy_file.as_path().to_owned(); + + let dummy_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: true, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + vm.device_manager_mut() + .block_manager + .insert_device(device_op_ctx, dummy_block_device) + .unwrap(); + + let cfg = BlockDeviceConfigUpdateInfo { + drive_id: String::from("1"), + rate_limiter: None, + }; + + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + + vm.device_manager_mut() + .block_manager + .attach_devices(&mut device_op_ctx) + .unwrap(); + assert_eq!(vm.device_manager().block_manager.info_list.len(), 1); + + //Patch while the epoll handler is invalid. + let expected_error = "could not send patch message to the block epoll handler".to_string(); + + assert_eq!( + vm.device_manager_mut() + .block_manager + .update_device_ratelimiters(cfg) + .unwrap_err() + .to_string(), + expected_error + ); + + //Invalid drive id + let cfg2 = BlockDeviceConfigUpdateInfo { + drive_id: String::from("2"), + rate_limiter: None, + }; + + let expected_error = format!("invalid block device id '{0}'", cfg2.drive_id); + + assert_eq!( + vm.device_manager_mut() + .block_manager + .update_device_ratelimiters(cfg2) + .unwrap_err() + .to_string(), + expected_error + ); + } + + #[test] + fn test_add_one_root_block_device() { + skip_if_not_root!(); + let dummy_file = TempFile::new().unwrap(); + let dummy_path = dummy_file.as_path().to_owned(); + let dummy_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: true, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + assert!(vm + .device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device.clone(),) + .is_ok()); + + assert_eq!(vm.device_manager().block_manager.info_list.len(), 1); + assert!(vm.device_manager().block_manager.has_root_block); + assert!(!vm.device_manager().block_manager.has_part_uuid_root); + assert!(vm.device_manager().block_manager.read_only_root); + assert_eq!(vm.device_manager().block_manager.info_list.len(), 1); + + let dev_config = vm.device_manager().block_manager.iter().next().unwrap(); + assert_eq!(dev_config.config, dummy_block_device); + assert!(vm.device_manager().block_manager.is_read_only_root()); + } + + #[test] + fn test_add_two_root_block_devices_configs() { + skip_if_not_root!(); + let dummy_file_1 = TempFile::new().unwrap(); + let dummy_path_1 = dummy_file_1.as_path().to_owned(); + let root_block_device_1 = BlockDeviceConfigInfo { + path_on_host: dummy_path_1, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_2 = TempFile::new().unwrap(); + let dummy_path_2 = dummy_file_2.as_path().to_owned(); + let root_block_device_2 = BlockDeviceConfigInfo { + path_on_host: dummy_path_2, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("2"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device_1) + .unwrap(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + assert!(vm + .device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device_2) + .is_err()); + } + + #[test] + // Test BlockDevicesConfigs::add when you first add the root device and then the other devices. + fn test_add_root_block_device_first() { + skip_if_not_root!(); + let dummy_file_1 = TempFile::new().unwrap(); + let dummy_path_1 = dummy_file_1.as_path().to_owned(); + let root_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path_1, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_2 = TempFile::new().unwrap(); + let dummy_path_2 = dummy_file_2.as_path().to_owned(); + let dummy_block_device_2 = BlockDeviceConfigInfo { + path_on_host: dummy_path_2, + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("2"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_3 = TempFile::new().unwrap(); + let dummy_path_3 = dummy_file_3.as_path().to_owned(); + let dummy_block_device_3 = BlockDeviceConfigInfo { + path_on_host: dummy_path_3, + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("3"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + vm.device_manager_mut() + .block_manager + .create(root_block_device.clone()) + .unwrap(); + vm.device_manager_mut() + .block_manager + .create(dummy_block_device_2.clone()) + .unwrap(); + vm.device_manager_mut() + .block_manager + .create(dummy_block_device_3.clone()) + .unwrap(); + + assert!(vm.device_manager().block_manager.has_root_block_device(),); + assert!(!vm.device_manager().block_manager.has_part_uuid_root()); + assert_eq!(vm.device_manager().block_manager.info_list.len(), 3); + + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device) + .unwrap(); + + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2) + .unwrap(); + + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_3) + .unwrap(); + } + + #[test] + // Test BlockDevicesConfigs::add when you add other devices first and then the root device. + fn test_root_block_device_add_last() { + skip_if_not_root!(); + let dummy_file_1 = TempFile::new().unwrap(); + let dummy_path_1 = dummy_file_1.as_path().to_owned(); + let root_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path_1, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_2 = TempFile::new().unwrap(); + let dummy_path_2 = dummy_file_2.as_path().to_owned(); + let dummy_block_device_2 = BlockDeviceConfigInfo { + path_on_host: dummy_path_2, + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("2"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_3 = TempFile::new().unwrap(); + let dummy_path_3 = dummy_file_3.as_path().to_owned(); + let dummy_block_device_3 = BlockDeviceConfigInfo { + path_on_host: dummy_path_3, + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("3"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2.clone()) + .unwrap(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_3.clone()) + .unwrap(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device.clone()) + .unwrap(); + + assert!(vm.device_manager().block_manager.has_root_block_device(),); + assert!(!vm.device_manager().block_manager.has_part_uuid_root()); + assert_eq!(vm.device_manager().block_manager.info_list.len(), 3); + + let mut block_dev_iter = vm.device_manager().block_manager.iter(); + // The root device should be first in the list no matter of the order in + // which the devices were added. + assert_eq!( + block_dev_iter.next().unwrap().config.drive_id, + root_block_device.drive_id + ); + assert_eq!( + block_dev_iter.next().unwrap().config.drive_id, + dummy_block_device_2.drive_id + ); + assert_eq!( + block_dev_iter.next().unwrap().config.drive_id, + dummy_block_device_3.drive_id + ); + } + + #[test] + fn test_block_device_update() { + skip_if_not_root!(); + let dummy_file_1 = TempFile::new().unwrap(); + let dummy_path_1 = dummy_file_1.as_path().to_owned(); + let root_block_device = BlockDeviceConfigInfo { + path_on_host: dummy_path_1.clone(), + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let dummy_file_2 = TempFile::new().unwrap(); + let dummy_path_2 = dummy_file_2.as_path().to_owned(); + let mut dummy_block_device_2 = BlockDeviceConfigInfo { + path_on_host: dummy_path_2.clone(), + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("2"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + + let mut vm = crate::vm::tests::create_vm_instance(); + + // Add 2 block devices. + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device) + .unwrap(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2.clone()) + .unwrap(); + + // Get index zero. + assert_eq!( + vm.device_manager() + .block_manager + .get_index_of_drive_id(&String::from("1")) + .unwrap(), + 0 + ); + + // Get None. + assert!(vm + .device_manager() + .block_manager + .get_index_of_drive_id(&String::from("foo")) + .is_none()); + + // Test several update cases using dummy_block_device_2. + // Validate `dummy_block_device_2` is already in the list + assert!(vm + .device_manager() + .block_manager + .get_index_of_drive_id(&dummy_block_device_2.drive_id) + .is_some()); + // Update OK. + dummy_block_device_2.is_read_only = true; + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2.clone()) + .unwrap(); + + let index = vm + .device_manager() + .block_manager + .get_index_of_drive_id(&dummy_block_device_2.drive_id) + .unwrap(); + // Validate update was successful. + assert!( + vm.device_manager().block_manager.info_list[index] + .config + .is_read_only + ); + + // Update with invalid path. + let dummy_filename_3 = String::from("test_update_3"); + let dummy_path_3 = PathBuf::from(dummy_filename_3); + dummy_block_device_2.path_on_host = dummy_path_3; + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + assert!(vm + .device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2.clone(),) + .is_err()); + + // Update with 2 root block devices. + dummy_block_device_2.path_on_host = dummy_path_2.clone(); + dummy_block_device_2.is_root_device = true; + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + assert!(vm + .device_manager_mut() + .block_manager + .insert_device(ctx, dummy_block_device_2,) + .is_err(),); + + // Switch roots and add a PARTUUID for the new one. + let root_block_device_old = BlockDeviceConfigInfo { + path_on_host: dummy_path_1, + device_type: BlockDeviceType::RawBlock, + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("1"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + let root_block_device_new = BlockDeviceConfigInfo { + path_on_host: dummy_path_2, + device_type: BlockDeviceType::RawBlock, + is_root_device: true, + part_uuid: Some("0eaa91a0-01".to_string()), + is_read_only: false, + is_direct: false, + no_drop: false, + drive_id: String::from("2"), + rate_limiter: None, + num_queues: BlockDeviceConfigInfo::default_num_queues(), + queue_size: 128, + use_shared_irq: None, + use_generic_irq: None, + }; + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device_old) + .unwrap(); + let ctx = DeviceOpContext::create_boot_ctx(&vm, None); + vm.device_manager_mut() + .block_manager + .insert_device(ctx, root_block_device_new) + .unwrap(); + assert!(vm.device_manager().block_manager.has_part_uuid_root); + } +} diff --git a/src/dragonball/src/device_manager/console_manager.rs b/src/dragonball/src/device_manager/console_manager.rs new file mode 100644 index 000000000000..d5adb74e6835 --- /dev/null +++ b/src/dragonball/src/device_manager/console_manager.rs @@ -0,0 +1,450 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Virtual machine console device manager. +//! +//! A virtual console are composed up of two parts: frontend in virtual machine and backend in +//! host OS. A frontend may be serial port, virtio-console etc, a backend may be stdio or Unix +//! domain socket. The manager connects the frontend with the backend. +use std::io::{self, Read}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use bytes::{BufMut, BytesMut}; +use dbs_legacy_devices::{ConsoleHandler, SerialDevice}; +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use vmm_sys_util::terminal::Terminal; + +use super::{DeviceMgrError, Result}; + +const EPOLL_EVENT_SERIAL: u32 = 0; +const EPOLL_EVENT_SERIAL_DATA: u32 = 1; +const EPOLL_EVENT_STDIN: u32 = 2; +// Maximal backend throughput for every data transaction. +const MAX_BACKEND_THROUGHPUT: usize = 64; + +/// Errors related to Console manager operations. +#[derive(Debug, thiserror::Error)] +pub enum ConsoleManagerError { + /// Cannot create unix domain socket for serial port + #[error("cannot create socket for serial console")] + CreateSerialSock(#[source] std::io::Error), + + /// An operation on the epoll instance failed due to resource exhaustion or bad configuration. + #[error("failure while managing epoll event for console fd")] + EpollMgr(#[source] dbs_utils::epoll_manager::Error), + + /// Cannot set mode for terminal. + #[error("failure while setting attribute for terminal")] + StdinHandle(#[source] vmm_sys_util::errno::Error), +} + +enum Backend { + StdinHandle(std::io::Stdin), + SockPath(String), +} + +/// Console manager to manage frontend and backend console devices. +pub struct ConsoleManager { + epoll_mgr: EpollManager, + logger: slog::Logger, + subscriber_id: Option, + backend: Option, +} + +impl ConsoleManager { + /// Create a console manager instance. + pub fn new(epoll_mgr: EpollManager, logger: &slog::Logger) -> Self { + let logger = logger.new(slog::o!("subsystem" => "console_manager")); + ConsoleManager { + epoll_mgr, + logger, + subscriber_id: Default::default(), + backend: None, + } + } + + /// Create a console backend device by using stdio streams. + pub fn create_stdio_console(&mut self, device: Arc>) -> Result<()> { + device + .lock() + .unwrap() + .set_output_stream(Some(Box::new(std::io::stdout()))); + let stdin_handle = std::io::stdin(); + { + let guard = stdin_handle.lock(); + guard + .set_raw_mode() + .map_err(ConsoleManagerError::StdinHandle) + .map_err(DeviceMgrError::ConsoleManager)?; + guard + .set_non_block(true) + .map_err(ConsoleManagerError::StdinHandle) + .map_err(DeviceMgrError::ConsoleManager)?; + } + let handler = ConsoleEpollHandler::new(device, Some(stdin_handle), None, &self.logger); + self.subscriber_id = Some(self.epoll_mgr.add_subscriber(Box::new(handler))); + self.backend = Some(Backend::StdinHandle(std::io::stdin())); + + Ok(()) + } + + /// Create s console backend device by using Unix Domain socket. + pub fn create_socket_console( + &mut self, + device: Arc>, + sock_path: String, + ) -> Result<()> { + let sock_listener = Self::bind_domain_socket(&sock_path).map_err(|e| { + DeviceMgrError::ConsoleManager(ConsoleManagerError::CreateSerialSock(e)) + })?; + let handler = ConsoleEpollHandler::new(device, None, Some(sock_listener), &self.logger); + + self.subscriber_id = Some(self.epoll_mgr.add_subscriber(Box::new(handler))); + self.backend = Some(Backend::SockPath(sock_path)); + + Ok(()) + } + + /// Reset the host side terminal to canonical mode. + pub fn reset_console(&self) -> Result<()> { + if let Some(Backend::StdinHandle(stdin_handle)) = self.backend.as_ref() { + stdin_handle + .lock() + .set_canon_mode() + .map_err(|e| DeviceMgrError::ConsoleManager(ConsoleManagerError::StdinHandle(e)))?; + } + + Ok(()) + } + + fn bind_domain_socket(serial_path: &str) -> std::result::Result { + let path = Path::new(serial_path); + if path.is_file() { + let _ = std::fs::remove_file(serial_path); + } + + UnixListener::bind(path) + } +} + +struct ConsoleEpollHandler { + device: Arc>, + stdin_handle: Option, + sock_listener: Option, + sock_conn: Option, + logger: slog::Logger, +} + +impl ConsoleEpollHandler { + fn new( + device: Arc>, + stdin_handle: Option, + sock_listener: Option, + logger: &slog::Logger, + ) -> Self { + ConsoleEpollHandler { + device, + stdin_handle, + sock_listener, + sock_conn: None, + logger: logger.new(slog::o!("subsystem" => "console_manager")), + } + } + + fn uds_listener_accept(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + if self.sock_conn.is_some() { + slog::warn!(self.logger, + "UDS for serial port 1 already exists, reject the new connection"; + "subsystem" => "console_mgr", + ); + // Do not expected poisoned lock. + let _ = self.sock_listener.as_mut().unwrap().accept(); + } else { + // Safe to unwrap() because self.sock_conn is Some(). + let (conn_sock, _) = self.sock_listener.as_ref().unwrap().accept()?; + let events = Events::with_data(&conn_sock, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!(self.logger, + "failed to register epoll event for serial, {:?}", e; + "subsystem" => "console_mgr", + ); + return Err(std::io::Error::last_os_error()); + } + + let conn_sock_copy = conn_sock.try_clone()?; + // Do not expected poisoned lock. + self.device + .lock() + .unwrap() + .set_output_stream(Some(Box::new(conn_sock_copy))); + + self.sock_conn = Some(conn_sock); + } + + Ok(()) + } + + fn uds_read_in(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + let mut should_drop = true; + + if let Some(conn_sock) = self.sock_conn.as_mut() { + let mut out = [0u8; MAX_BACKEND_THROUGHPUT]; + match conn_sock.read(&mut out[..]) { + Ok(0) => { + // Zero-length read means EOF. Remove this conn sock. + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + Ok(count) => { + self.device + .lock() + .expect("console: poisoned console lock") + .raw_input(&out[..count])?; + should_drop = false; + } + Err(e) => { + slog::warn!(self.logger, + "error while reading serial conn sock: {:?}", e; + "subsystem" => "console_mgr" + ); + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + } + } + + if should_drop { + assert!(self.sock_conn.is_some()); + // Safe to unwrap() because self.sock_conn is Some(). + let sock_conn = self.sock_conn.take().unwrap(); + let events = Events::with_data(&sock_conn, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.remove(events) { + slog::error!(self.logger, + "failed deregister epoll event for UDS, {:?}", e; + "subsystem" => "console_mgr" + ); + } + } + + Ok(()) + } + + fn stdio_read_in(&mut self, ops: &mut EventOps) -> std::io::Result<()> { + let mut should_drop = true; + + if let Some(handle) = self.stdin_handle.as_ref() { + let mut out = [0u8; MAX_BACKEND_THROUGHPUT]; + // Safe to unwrap() because self.stdin_handle is Some(). + let stdin_lock = handle.lock(); + match stdin_lock.read_raw(&mut out[..]) { + Ok(0) => { + // Zero-length read indicates EOF. Remove from pollables. + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + Ok(count) => { + self.device + .lock() + .expect("console: poisoned console lock") + .raw_input(&out[..count])?; + should_drop = false; + } + Err(e) => { + slog::warn!(self.logger, + "error while reading stdin: {:?}", e; + "subsystem" => "console_mgr" + ); + self.device + .lock() + .expect("console: poisoned console lock") + .set_output_stream(None); + } + } + } + + if should_drop { + let events = Events::with_data_raw(libc::STDIN_FILENO, EPOLL_EVENT_STDIN, EventSet::IN); + if let Err(e) = ops.remove(events) { + slog::error!(self.logger, + "failed to deregister epoll event for stdin, {:?}", e; + "subsystem" => "console_mgr" + ); + } + } + + Ok(()) + } +} + +impl MutEventSubscriber for ConsoleEpollHandler { + fn process(&mut self, events: Events, ops: &mut EventOps) { + slog::trace!(self.logger, "ConsoleEpollHandler::process()"); + let slot = events.data(); + match slot { + EPOLL_EVENT_SERIAL => { + if let Err(e) = self.uds_listener_accept(ops) { + slog::warn!(self.logger, "failed to accept incoming connection, {:?}", e); + } + } + EPOLL_EVENT_SERIAL_DATA => { + if let Err(e) = self.uds_read_in(ops) { + slog::warn!(self.logger, "failed to read data from UDS, {:?}", e); + } + } + EPOLL_EVENT_STDIN => { + if let Err(e) = self.stdio_read_in(ops) { + slog::warn!(self.logger, "failed to read data from stdin, {:?}", e); + } + } + _ => slog::error!(self.logger, "unknown epoll slot number {}", slot), + } + } + + fn init(&mut self, ops: &mut EventOps) { + slog::trace!(self.logger, "ConsoleEpollHandler::init()"); + + if self.stdin_handle.is_some() { + slog::info!(self.logger, "ConsoleEpollHandler: stdin handler"); + let events = Events::with_data_raw(libc::STDIN_FILENO, EPOLL_EVENT_STDIN, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for stdin, {:?}", + e + ); + } + } + if let Some(sock) = self.sock_listener.as_ref() { + slog::info!(self.logger, "ConsoleEpollHandler: sock listener"); + let events = Events::with_data(sock, EPOLL_EVENT_SERIAL, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for UDS listener, {:?}", + e + ); + } + } + + if let Some(conn) = self.sock_conn.as_ref() { + slog::info!(self.logger, "ConsoleEpollHandler: sock connection"); + let events = Events::with_data(conn, EPOLL_EVENT_SERIAL_DATA, EventSet::IN); + if let Err(e) = ops.add(events) { + slog::error!( + self.logger, + "failed to register epoll event for UDS connection, {:?}", + e + ); + } + } + } +} + +/// Writer to process guest kernel dmesg. +pub struct DmesgWriter { + buf: BytesMut, + logger: slog::Logger, +} + +impl DmesgWriter { + /// Creates a new instance. + pub fn new(logger: &slog::Logger) -> Self { + Self { + buf: BytesMut::with_capacity(1024), + logger: logger.new(slog::o!("subsystem" => "dmesg")), + } + } +} + +impl io::Write for DmesgWriter { + /// 0000000 [ 0 . 0 3 4 9 1 6 ] R + /// 5b 20 20 20 20 30 2e 30 33 34 39 31 36 5d 20 52 + /// 0000020 u n / s b i n / i n i t a s + /// 75 6e 20 2f 73 62 69 6e 2f 69 6e 69 74 20 61 73 + /// 0000040 i n i t p r o c e s s \r \n [ + /// + /// dmesg message end a line with /r/n . When redirect message to logger, we should + /// remove the /r/n . + fn write(&mut self, buf: &[u8]) -> io::Result { + let arr: Vec<&[u8]> = buf.split(|c| *c == b'\n').collect(); + let count = arr.len(); + + for (i, sub) in arr.iter().enumerate() { + if sub.is_empty() { + if !self.buf.is_empty() { + slog::info!( + self.logger, + "{}", + String::from_utf8_lossy(self.buf.as_ref()).trim_end() + ); + self.buf.clear(); + } + } else if sub.len() < buf.len() && i < count - 1 { + slog::info!( + self.logger, + "{}{}", + String::from_utf8_lossy(self.buf.as_ref()).trim_end(), + String::from_utf8_lossy(sub).trim_end(), + ); + self.buf.clear(); + } else { + self.buf.put_slice(sub); + } + } + + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use slog::Drain; + use std::io::Write; + + fn create_logger() -> slog::Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + slog::Logger::root(drain, slog::o!()) + } + + #[test] + fn test_dmesg_writer() { + let mut writer = DmesgWriter { + buf: Default::default(), + logger: create_logger(), + }; + + writer.flush().unwrap(); + writer.write_all("".as_bytes()).unwrap(); + writer.write_all("\n".as_bytes()).unwrap(); + writer.write_all("\n\n".as_bytes()).unwrap(); + writer.write_all("\n\n\n".as_bytes()).unwrap(); + writer.write_all("12\n23\n34\n56".as_bytes()).unwrap(); + writer.write_all("78".as_bytes()).unwrap(); + writer.write_all("90\n".as_bytes()).unwrap(); + writer.flush().unwrap(); + } + + // TODO: add unit tests for console manager +} diff --git a/src/dragonball/src/device_manager/fs_dev_mgr.rs b/src/dragonball/src/device_manager/fs_dev_mgr.rs new file mode 100644 index 000000000000..dca0e649e35d --- /dev/null +++ b/src/dragonball/src/device_manager/fs_dev_mgr.rs @@ -0,0 +1,528 @@ +// Copyright 2020-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryInto; + +use dbs_utils::epoll_manager::EpollManager; +use dbs_virtio_devices::{self as virtio, Error as VirtIoError}; +use serde_derive::{Deserialize, Serialize}; +use slog::{error, info}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ + ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, +}; +use crate::device_manager::{ + DbsMmioV2Device, DeviceManager, DeviceMgrError, DeviceOpContext, DeviceVirtioRegionHandler, +}; +use crate::get_bucket_update; + +use super::DbsVirtioDevice; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; +// Default cache size is 2 Gi since this is a typical VM memory size. +const DEFAULT_CACHE_SIZE: u64 = 2 * 1024 * 1024 * 1024; +// We have 2 supported fs device mode, vhostuser and virtio +const VHOSTUSER_FS_MODE: &str = "vhostuser"; +// We have 2 supported fs device mode, vhostuser and virtio +const VIRTIO_FS_MODE: &str = "virtio"; + +/// Errors associated with `FsDeviceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum FsDeviceError { + /// Invalid fs, "virtio" or "vhostuser" is allowed. + #[error("the fs type is invalid, virtio or vhostuser is allowed")] + InvalidFs, + + /// Cannot access address space. + #[error("Cannot access address space.")] + AddressSpaceNotInitialized, + + /// Cannot convert RateLimterConfigInfo into RateLimiter. + #[error("failure while converting RateLimterConfigInfo into RateLimiter: {0}")] + RateLimterConfigInfoTryInto(#[source] std::io::Error), + + /// The fs device tag was already used for a different fs. + #[error("VirtioFs device tag {0} already exists")] + FsDeviceTagAlreadyExists(String), + + /// The fs device path was already used for a different fs. + #[error("VirtioFs device tag {0} already exists")] + FsDevicePathAlreadyExists(String), + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The attachbackendfs operation fails. + #[error("Fs device attach a backend fs failed")] + AttachBackendFailed(String), + + /// attach backend fs must be done when vm is running. + #[error("vm is not running when attaching a backend fs")] + MicroVMNotRunning, + + /// The mount tag doesn't exist. + #[error("fs tag'{0}' doesn't exist")] + TagNotExists(String), + + /// Failed to send patch message to VirtioFs epoll handler. + #[error("could not send patch message to the VirtioFs epoll handler")] + VirtioFsEpollHanderSendFail, + + /// Creating a shared-fs device fails (if the vhost-user socket cannot be open.) + #[error("cannot create shared-fs device: {0}")] + CreateFsDevice(#[source] VirtIoError), + + /// Cannot initialize a shared-fs device or add a device to the MMIO Bus. + #[error("failure while registering shared-fs device: {0}")] + RegisterFsDevice(#[source] DeviceMgrError), + + /// The device manager errors. + #[error("DeviceManager error: {0}")] + DeviceManager(#[source] DeviceMgrError), +} + +/// Configuration information for a vhost-user-fs device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct FsDeviceConfigInfo { + /// vhost-user socket path. + pub sock_path: String, + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Number of virtqueues to use. + pub num_queues: usize, + /// Size of each virtqueue. Unit: byte. + pub queue_size: u16, + /// DAX cache window size + pub cache_size: u64, + /// Number of thread pool workers. + pub thread_pool_size: u16, + /// The caching policy the file system should use (auto, always or never). + /// This cache policy is set for virtio-fs, visit https://gitlab.com/virtio-fs/virtiofsd to get further information. + pub cache_policy: String, + /// Writeback cache + pub writeback_cache: bool, + /// Enable no_open or not + pub no_open: bool, + /// Enable xattr or not + pub xattr: bool, + /// Drop CAP_SYS_RESOURCE or not + pub drop_sys_resource: bool, + /// virtio fs or vhostuser fs. + pub mode: String, + /// Enable kill_priv_v2 or not + pub fuse_killpriv_v2: bool, + /// Enable no_readdir or not + pub no_readdir: bool, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl std::default::Default for FsDeviceConfigInfo { + fn default() -> Self { + Self { + sock_path: String::default(), + tag: String::default(), + num_queues: 1, + queue_size: 1024, + cache_size: DEFAULT_CACHE_SIZE, + thread_pool_size: 0, + cache_policy: Self::default_cache_policy(), + writeback_cache: Self::default_writeback_cache(), + no_open: Self::default_no_open(), + fuse_killpriv_v2: Self::default_fuse_killpriv_v2(), + no_readdir: Self::default_no_readdir(), + xattr: Self::default_xattr(), + drop_sys_resource: Self::default_drop_sys_resource(), + mode: Self::default_fs_mode(), + rate_limiter: Some(RateLimiterConfigInfo::default()), + use_shared_irq: None, + use_generic_irq: None, + } + } +} + +impl FsDeviceConfigInfo { + /// The default mode is set to 'virtio' for 'virtio-fs' device. + pub fn default_fs_mode() -> String { + String::from(VIRTIO_FS_MODE) + } + + /// The default cache policy + pub fn default_cache_policy() -> String { + "always".to_string() + } + + /// The default setting of writeback cache + pub fn default_writeback_cache() -> bool { + true + } + + /// The default setting of no_open + pub fn default_no_open() -> bool { + true + } + + /// The default setting of killpriv_v2 + pub fn default_fuse_killpriv_v2() -> bool { + false + } + + /// The default setting of xattr + pub fn default_xattr() -> bool { + false + } + + /// The default setting of drop_sys_resource + pub fn default_drop_sys_resource() -> bool { + false + } + + /// The default setting of no_readdir + pub fn default_no_readdir() -> bool { + false + } + + /// The default setting of rate limiter + pub fn default_fs_rate_limiter() -> Option { + None + } +} + +/// Configuration information for virtio-fs. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct FsDeviceConfigUpdateInfo { + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, +} + +impl FsDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the bandwidth rate limiter. + pub fn bytes(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the ops rate limiter. + pub fn ops(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, ops) + } +} + +impl ConfigItem for FsDeviceConfigInfo { + type Err = FsDeviceError; + + fn id(&self) -> &str { + &self.tag + } + + fn check_conflicts(&self, other: &Self) -> Result<(), FsDeviceError> { + if self.tag == other.tag { + Err(FsDeviceError::FsDeviceTagAlreadyExists(self.tag.clone())) + } else if self.mode.as_str() == VHOSTUSER_FS_MODE && self.sock_path == other.sock_path { + Err(FsDeviceError::FsDevicePathAlreadyExists( + self.sock_path.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Configuration information of manipulating backend fs for a virtiofs device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize, Default)] +pub struct FsMountConfigInfo { + /// Mount operations, mount, update, umount + pub ops: String, + /// The backend fs type to mount. + pub fstype: Option, + /// the source file/directory the backend fs points to + pub source: Option, + /// where the backend fs gets mounted + pub mountpoint: String, + /// backend fs config content in json format + pub config: Option, + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Path to file that contains file lists that should be prefetched by rafs + pub prefetch_list_path: Option, + /// What size file supports dax + pub dax_threshold_size_kb: Option, +} + +pub(crate) type FsDeviceInfo = DeviceConfigInfo; + +impl ConfigItem for FsDeviceInfo { + type Err = FsDeviceError; + fn id(&self) -> &str { + &self.config.tag + } + + fn check_conflicts(&self, other: &Self) -> Result<(), FsDeviceError> { + if self.config.tag == other.config.tag { + Err(FsDeviceError::FsDeviceTagAlreadyExists( + self.config.tag.clone(), + )) + } else if self.config.sock_path == other.config.sock_path { + Err(FsDeviceError::FsDevicePathAlreadyExists( + self.config.sock_path.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Wrapper for the collection that holds all the Fs Devices Configs +pub struct FsDeviceMgr { + /// A list of `FsDeviceConfig` objects. + pub(crate) info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl FsDeviceMgr { + /// Inserts `fs_cfg` in the shared-fs device configuration list. + pub fn insert_device( + device_mgr: &mut DeviceManager, + ctx: DeviceOpContext, + fs_cfg: FsDeviceConfigInfo, + ) -> std::result::Result<(), FsDeviceError> { + // It's too complicated to manage life cycle of shared-fs service process for hotplug. + if ctx.is_hotplug { + error!( + ctx.logger(), + "no support of shared-fs device hotplug"; + "subsystem" => "shared-fs", + "tag" => &fs_cfg.tag, + ); + return Err(FsDeviceError::UpdateNotAllowedPostBoot); + } + + info!( + ctx.logger(), + "add shared-fs device configuration"; + "subsystem" => "shared-fs", + "tag" => &fs_cfg.tag, + ); + device_mgr + .fs_manager + .lock() + .unwrap() + .info_list + .insert_or_update(&fs_cfg)?; + + Ok(()) + } + + /// Attaches all vhost-user-fs devices from the FsDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), FsDeviceError> { + let epoll_mgr = ctx + .epoll_mgr + .clone() + .ok_or(FsDeviceError::CreateFsDevice(virtio::Error::InvalidInput))?; + + for info in self.info_list.iter_mut() { + let device = Self::create_fs_device(&info.config, ctx, epoll_mgr.clone())?; + let mmio_device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(FsDeviceError::RegisterFsDevice)?; + + info.set_device(mmio_device); + } + + Ok(()) + } + + fn create_fs_device( + config: &FsDeviceConfigInfo, + ctx: &mut DeviceOpContext, + epoll_mgr: EpollManager, + ) -> std::result::Result { + match &config.mode as &str { + VIRTIO_FS_MODE => Self::attach_virtio_fs_devices(config, ctx, epoll_mgr), + _ => Err(FsDeviceError::CreateFsDevice(virtio::Error::InvalidInput)), + } + } + + fn attach_virtio_fs_devices( + config: &FsDeviceConfigInfo, + ctx: &mut DeviceOpContext, + epoll_mgr: EpollManager, + ) -> std::result::Result { + info!( + ctx.logger(), + "add virtio-fs device configuration"; + "subsystem" => "virito-fs", + "tag" => &config.tag, + "dax_window_size" => &config.cache_size, + ); + + let limiter = if let Some(rlc) = config.rate_limiter.clone() { + Some( + rlc.try_into() + .map_err(FsDeviceError::RateLimterConfigInfoTryInto)?, + ) + } else { + None + }; + + let vm_as = ctx.get_vm_as().map_err(|e| { + error!(ctx.logger(), "virtio-fs get vm_as error: {:?}", e; + "subsystem" => "virito-fs"); + FsDeviceError::DeviceManager(e) + })?; + let address_space = match ctx.address_space.as_ref() { + Some(address_space) => address_space.clone(), + None => { + error!(ctx.logger(), "virtio-fs get address_space error"; "subsystem" => "virito-fs"); + return Err(FsDeviceError::AddressSpaceNotInitialized); + } + }; + let handler = DeviceVirtioRegionHandler { + vm_as, + address_space, + }; + + let device = Box::new( + virtio::fs::VirtioFs::new( + &config.tag, + config.num_queues, + config.queue_size, + config.cache_size, + &config.cache_policy, + config.thread_pool_size, + config.writeback_cache, + config.no_open, + config.fuse_killpriv_v2, + config.xattr, + config.drop_sys_resource, + config.no_readdir, + Box::new(handler), + epoll_mgr, + limiter, + ) + .map_err(FsDeviceError::CreateFsDevice)?, + ); + + Ok(device) + } + + /// Attach a backend fs to a VirtioFs device or detach a backend + /// fs from a Virtiofs device + pub fn manipulate_backend_fs( + device_mgr: &mut DeviceManager, + config: FsMountConfigInfo, + ) -> std::result::Result<(), FsDeviceError> { + let mut found = false; + + let mgr = &mut device_mgr.fs_manager.lock().unwrap(); + for info in mgr + .info_list + .iter() + .filter(|info| info.config.tag.as_str() == config.tag.as_str()) + { + found = true; + if let Some(device) = info.device.as_ref() { + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let mut guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device_mut(); + if let Some(virtio_fs_dev) = inner_dev + .as_any_mut() + .downcast_mut::>() + { + return virtio_fs_dev + .manipulate_backend_fs( + config.source, + config.fstype, + &config.mountpoint, + config.config, + &config.ops, + config.prefetch_list_path, + config.dax_threshold_size_kb, + ) + .map(|_p| ()) + .map_err(|e| FsDeviceError::AttachBackendFailed(e.to_string())); + } + } + } + } + if !found { + Err(FsDeviceError::AttachBackendFailed( + "fs tag not found".to_string(), + )) + } else { + Ok(()) + } + } + + /// Gets the index of the device with the specified `tag` if it exists in the list. + pub fn get_index_of_tag(&self, tag: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(tag)) + } + + /// Update the ratelimiter settings of a virtio fs device. + pub fn update_device_ratelimiters( + device_mgr: &mut DeviceManager, + new_cfg: FsDeviceConfigUpdateInfo, + ) -> std::result::Result<(), FsDeviceError> { + let mgr = &mut device_mgr.fs_manager.lock().unwrap(); + match mgr.get_index_of_tag(&new_cfg.tag) { + Some(index) => { + let config = &mut mgr.info_list[index].config; + config.rate_limiter = new_cfg.rate_limiter.clone(); + let device = mgr.info_list[index] + .device + .as_mut() + .ok_or_else(|| FsDeviceError::TagNotExists("".to_owned()))?; + + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(fs_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return fs_dev + .set_patch_rate_limiters(new_cfg.bytes(), new_cfg.ops()) + .map(|_p| ()) + .map_err(|_e| FsDeviceError::VirtioFsEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(FsDeviceError::TagNotExists(new_cfg.tag)), + } + } +} + +impl Default for FsDeviceMgr { + /// Create a new `FsDeviceMgr` object.. + fn default() -> Self { + FsDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/device_manager/legacy.rs b/src/dragonball/src/device_manager/legacy.rs new file mode 100644 index 000000000000..50a47cab7310 --- /dev/null +++ b/src/dragonball/src/device_manager/legacy.rs @@ -0,0 +1,246 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Device Manager for Legacy Devices. + +use std::io; +use std::sync::{Arc, Mutex}; + +use dbs_device::device_manager::Error as IoManagerError; +#[cfg(target_arch = "aarch64")] +use dbs_legacy_devices::RTCDevice; +use dbs_legacy_devices::SerialDevice; +use vmm_sys_util::eventfd::EventFd; + +// The I8042 Data Port (IO Port 0x60) is used for reading data that was received from a I8042 device or from the I8042 controller itself and writing data to a I8042 device or to the I8042 controller itself. +const I8042_DATA_PORT: u16 = 0x60; + +/// Errors generated by legacy device manager. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Cannot add legacy device to Bus. + #[error("bus failure while managing legacy device")] + BusError(#[source] IoManagerError), + + /// Cannot create EventFd. + #[error("failure while reading EventFd file descriptor")] + EventFd(#[source] io::Error), + + /// Failed to register/deregister interrupt. + #[error("failure while managing interrupt for legacy device")] + IrqManager(#[source] vmm_sys_util::errno::Error), +} + +/// The `LegacyDeviceManager` is a wrapper that is used for registering legacy devices +/// on an I/O Bus. +/// +/// It currently manages the uart and i8042 devices. The `LegacyDeviceManger` should be initialized +/// only by using the constructor. +pub struct LegacyDeviceManager { + #[cfg(target_arch = "x86_64")] + i8042_reset_eventfd: EventFd, + #[cfg(target_arch = "aarch64")] + pub(crate) _rtc_device: Arc>, + #[cfg(target_arch = "aarch64")] + _rtc_eventfd: EventFd, + pub(crate) com1_device: Arc>, + _com1_eventfd: EventFd, + pub(crate) com2_device: Arc>, + _com2_eventfd: EventFd, +} + +impl LegacyDeviceManager { + /// Get the serial device for com1. + pub fn get_com1_serial(&self) -> Arc> { + self.com1_device.clone() + } + + /// Get the serial device for com2 + pub fn get_com2_serial(&self) -> Arc> { + self.com2_device.clone() + } +} + +#[cfg(target_arch = "x86_64")] +pub(crate) mod x86_64 { + use super::*; + use dbs_device::device_manager::IoManager; + use dbs_device::resources::Resource; + use dbs_legacy_devices::{EventFdTrigger, I8042Device, I8042DeviceMetrics}; + use kvm_ioctls::VmFd; + + pub(crate) const COM1_IRQ: u32 = 4; + pub(crate) const COM1_PORT1: u16 = 0x3f8; + pub(crate) const COM2_IRQ: u32 = 3; + pub(crate) const COM2_PORT1: u16 = 0x2f8; + + type Result = ::std::result::Result; + + impl LegacyDeviceManager { + /// Create a LegacyDeviceManager instance handling legacy devices (uart, i8042). + pub fn create_manager(bus: &mut IoManager, vm_fd: Option>) -> Result { + let (com1_device, com1_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), COM1_IRQ, COM1_PORT1)?; + let (com2_device, com2_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), COM2_IRQ, COM2_PORT1)?; + + let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let i8042_device = Arc::new(Mutex::new(I8042Device::new( + EventFdTrigger::new(exit_evt.try_clone().map_err(Error::EventFd)?), + Arc::new(I8042DeviceMetrics::default()), + ))); + let resources = [Resource::PioAddressRange { + // 0x60 and 0x64 are the io ports that i8042 devices used. + // We register pio address range from 0x60 - 0x64 with base I8042_DATA_PORT for i8042 to use. + base: I8042_DATA_PORT, + size: 0x5, + }]; + bus.register_device_io(i8042_device, &resources) + .map_err(Error::BusError)?; + + Ok(LegacyDeviceManager { + i8042_reset_eventfd: exit_evt, + com1_device, + _com1_eventfd: com1_eventfd, + com2_device, + _com2_eventfd: com2_eventfd, + }) + } + + /// Get the eventfd for exit notification. + pub fn get_reset_eventfd(&self) -> Result { + self.i8042_reset_eventfd.try_clone().map_err(Error::EventFd) + } + + fn create_com_device( + bus: &mut IoManager, + vm_fd: Option<&Arc>, + irq: u32, + port_base: u16, + ) -> Result<(Arc>, EventFd)> { + let eventfd = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let device = Arc::new(Mutex::new(SerialDevice::new( + eventfd.try_clone().map_err(Error::EventFd)?, + ))); + // port_base defines the base port address for the COM devices. + // Since every COM device has 8 data registers so we register the pio address range as size 0x8. + let resources = [Resource::PioAddressRange { + base: port_base, + size: 0x8, + }]; + bus.register_device_io(device.clone(), &resources) + .map_err(Error::BusError)?; + + if let Some(fd) = vm_fd { + fd.register_irqfd(&eventfd, irq) + .map_err(Error::IrqManager)?; + } + + Ok((device, eventfd)) + } + } +} + +#[cfg(target_arch = "aarch64")] +pub(crate) mod aarch64 { + use super::*; + use dbs_device::device_manager::IoManager; + use dbs_device::resources::DeviceResources; + use kvm_ioctls::VmFd; + use std::collections::HashMap; + + type Result = ::std::result::Result; + + /// LegacyDeviceType: com1 + pub const COM1: &str = "com1"; + /// LegacyDeviceType: com2 + pub const COM2: &str = "com2"; + /// LegacyDeviceType: rtc + pub const RTC: &str = "rtc"; + + impl LegacyDeviceManager { + /// Create a LegacyDeviceManager instance handling legacy devices. + pub fn create_manager( + bus: &mut IoManager, + vm_fd: Option>, + resources: &HashMap, + ) -> Result { + let (com1_device, com1_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), resources.get(COM1).unwrap())?; + let (com2_device, com2_eventfd) = + Self::create_com_device(bus, vm_fd.as_ref(), resources.get(COM2).unwrap())?; + let (rtc_device, rtc_eventfd) = + Self::create_rtc_device(bus, vm_fd.as_ref(), resources.get(RTC).unwrap())?; + + Ok(LegacyDeviceManager { + _rtc_device: rtc_device, + _rtc_eventfd: rtc_eventfd, + com1_device, + _com1_eventfd: com1_eventfd, + com2_device, + _com2_eventfd: com2_eventfd, + }) + } + + fn create_com_device( + bus: &mut IoManager, + vm_fd: Option<&Arc>, + resources: &DeviceResources, + ) -> Result<(Arc>, EventFd)> { + let eventfd = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let device = Arc::new(Mutex::new(SerialDevice::new( + eventfd.try_clone().map_err(Error::EventFd)?, + ))); + + bus.register_device_io(device.clone(), resources.get_all_resources()) + .map_err(Error::BusError)?; + + if let Some(fd) = vm_fd { + let irq = resources.get_legacy_irq().unwrap(); + fd.register_irqfd(&eventfd, irq) + .map_err(Error::IrqManager)?; + } + + Ok((device, eventfd)) + } + + fn create_rtc_device( + bus: &mut IoManager, + vm_fd: Option<&Arc>, + resources: &DeviceResources, + ) -> Result<(Arc>, EventFd)> { + let eventfd = EventFd::new(libc::EFD_NONBLOCK).map_err(Error::EventFd)?; + let device = Arc::new(Mutex::new(RTCDevice::new())); + + bus.register_device_io(device.clone(), resources.get_all_resources()) + .map_err(Error::BusError)?; + + if let Some(fd) = vm_fd { + let irq = resources.get_legacy_irq().unwrap(); + fd.register_irqfd(&eventfd, irq) + .map_err(Error::IrqManager)?; + } + + Ok((device, eventfd)) + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "x86_64")] + use super::*; + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_create_legacy_device_manager() { + let mut bus = dbs_device::device_manager::IoManager::new(); + let mgr = LegacyDeviceManager::create_manager(&mut bus, None).unwrap(); + let _exit_fd = mgr.get_reset_eventfd().unwrap(); + } +} diff --git a/src/dragonball/src/device_manager/mem_dev_mgr.rs b/src/dragonball/src/device_manager/mem_dev_mgr.rs new file mode 100644 index 000000000000..2bb68ae80f9d --- /dev/null +++ b/src/dragonball/src/device_manager/mem_dev_mgr.rs @@ -0,0 +1,733 @@ +// Copyright 2020 Alibaba Cloud. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::sync::{Arc, Mutex}; + +use dbs_address_space::{ + AddressSpace, AddressSpaceError, AddressSpaceRegion, MPOL_MF_MOVE, MPOL_PREFERRED, USABLE_END, +}; +use dbs_utils::epoll_manager::EpollManager; +use dbs_virtio_devices as virtio; +use kvm_bindings::kvm_userspace_memory_region; +use kvm_ioctls::VmFd; +use nix::sys::mman; +use serde_derive::{Deserialize, Serialize}; +use slog::{debug, error, info, warn}; +use virtio::mem::{Mem, MemRegionFactory}; +use virtio::Error as VirtIoError; +use vm_memory::{ + Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestRegionMmap, GuestUsize, MmapRegion, +}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::DbsMmioV2Device; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::vm::VmConfigInfo; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = false; + +const HUGE_PAGE_2M: usize = 0x200000; + +// max numa node ids on host +const MAX_NODE: u32 = 64; + +/// Errors associated with `MemDeviceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum MemDeviceError { + /// The mem device was already used. + #[error("the virtio-mem ID was already added to a different device")] + MemDeviceAlreadyExists, + + /// Cannot perform the requested operation after booting the microVM. + #[error("the update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// insert mem device error + #[error("cannot add virtio-mem device, {0}")] + InsertDeviceFailed(#[source] DeviceMgrError), + + /// create mem device error + #[error("cannot create virito-mem device, {0}")] + CreateMemDevice(#[source] DeviceMgrError), + + /// create mmio device error + #[error("cannot create virito-mem mmio device, {0}")] + CreateMmioDevice(#[source] DeviceMgrError), + + /// resize mem device error + #[error("failure while resizing virtio-mem device, {0}")] + ResizeFailed(#[source] VirtIoError), + + /// mem device does not exist + #[error("mem device does not exist")] + DeviceNotExist, + + /// address space region error + #[error("address space region error, {0}")] + AddressSpaceRegion(#[source] AddressSpaceError), + + /// Cannot initialize a mem device or add a device to the MMIO Bus. + #[error("failure while registering mem device: {0}")] + RegisterMemDevice(#[source] DeviceMgrError), + + /// The mem device id doesn't exist. + #[error("invalid mem device id '{0}'")] + InvalidDeviceId(String), + + /// The device manager errors. + #[error("DeviceManager error: {0}")] + DeviceManager(#[source] DeviceMgrError), +} + +/// Configuration information for a virtio-mem device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct MemDeviceConfigInfo { + /// Unique identifier of the pmem device + pub mem_id: String, + /// Memory size mib + pub size_mib: u64, + /// Memory capacity mib + pub capacity_mib: u64, + /// Use multi_region or not + pub multi_region: bool, + /// host numa node id + pub host_numa_node_id: Option, + /// guest numa node id + pub guest_numa_node_id: Option, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl ConfigItem for MemDeviceConfigInfo { + type Err = MemDeviceError; + + fn id(&self) -> &str { + &self.mem_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), MemDeviceError> { + if self.mem_id.as_str() == other.mem_id.as_str() { + Err(MemDeviceError::MemDeviceAlreadyExists) + } else { + Ok(()) + } + } +} + +/// Mem Device Info +pub type MemDeviceInfo = DeviceConfigInfo; + +impl ConfigItem for MemDeviceInfo { + type Err = MemDeviceError; + + fn id(&self) -> &str { + &self.config.mem_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), MemDeviceError> { + if self.config.mem_id.as_str() == other.config.mem_id.as_str() { + Err(MemDeviceError::MemDeviceAlreadyExists) + } else { + Ok(()) + } + } +} + +/// Wrapper for the collection that holds all the Mem Devices Configs +#[derive(Clone)] +pub struct MemDeviceMgr { + /// A list of `MemDeviceConfig` objects. + info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl MemDeviceMgr { + /// Inserts `mem_cfg` in the virtio-mem device configuration list. + /// If an entry with the same id already exists, it will attempt to update + /// the existing entry. + pub fn insert_or_update_device( + &mut self, + mut ctx: DeviceOpContext, + mem_cfg: MemDeviceConfigInfo, + ) -> std::result::Result<(), MemDeviceError> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + error!(ctx.logger(), "hotplug feature has been disabled."; + "subsystem" => "virito-mem"); + return Err(MemDeviceError::UpdateNotAllowedPostBoot); + } + + let epoll_mgr = ctx.get_epoll_mgr().map_err(MemDeviceError::DeviceManager)?; + + // If the id of the drive already exists in the list, the operation is update. + if let Some(index) = self.get_index_of_mem_dev(&mem_cfg.mem_id) { + // Update an existing memory device + if ctx.is_hotplug { + info!( + ctx.logger(), + "update memory device: {}, size: 0x{:x}MB.", + mem_cfg.mem_id, + mem_cfg.size_mib; + "subsystem" => "virito-mem" + ); + self.update_memory_size(index, mem_cfg.size_mib)?; + } + self.info_list.insert_or_update(&mem_cfg)?; + } else { + // Create a new memory device + if !ctx.is_hotplug { + self.info_list.insert_or_update(&mem_cfg)?; + return Ok(()); + } + + info!( + ctx.logger(), + "hot-add memory device: {}, size: 0x{:x}MB.", mem_cfg.mem_id, mem_cfg.size_mib; + "subsystem" => "virito-mem" + ); + + let device = Self::create_memory_device(&mem_cfg, &ctx, &epoll_mgr) + .map_err(MemDeviceError::CreateMemDevice)?; + let mmio_device = + DeviceManager::create_mmio_virtio_device_with_device_change_notification( + Box::new(device), + &mut ctx, + mem_cfg.use_shared_irq.unwrap_or(self.use_shared_irq), + mem_cfg.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(MemDeviceError::CreateMmioDevice)?; + + #[cfg(not(test))] + ctx.insert_hotplug_mmio_device(&mmio_device, None) + .map_err(|e| { + error!( + ctx.logger(), + "failed to hot-add virtio-mem device {}, {}", &mem_cfg.mem_id, e; + "subsystem" => "virito-mem" + ); + MemDeviceError::InsertDeviceFailed(e) + })?; + + let index = self.info_list.insert_or_update(&mem_cfg)?; + self.info_list[index].set_device(mmio_device); + } + + Ok(()) + } + + /// Attaches all virtio-mem devices from the MemDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), MemDeviceError> { + let epoll_mgr = ctx.get_epoll_mgr().map_err(MemDeviceError::DeviceManager)?; + + for info in self.info_list.iter_mut() { + let config = &info.config; + info!( + ctx.logger(), + "attach virtio-mem device {}, size 0x{:x}.", config.mem_id, config.size_mib; + "subsystem" => "virito-mem" + ); + // Ignore virtio-mem device with zero memory capacity. + if config.size_mib == 0 { + debug!( + ctx.logger(), + "ignore zero-sizing memory device {}.", config.mem_id; + "subsystem" => "virito-mem" + ); + continue; + } + + let device = Self::create_memory_device(config, ctx, &epoll_mgr) + .map_err(MemDeviceError::CreateMemDevice)?; + let mmio_device = + DeviceManager::create_mmio_virtio_device_with_device_change_notification( + Box::new(device), + ctx, + config.use_shared_irq.unwrap_or(self.use_shared_irq), + config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(MemDeviceError::RegisterMemDevice)?; + + info.set_device(mmio_device); + } + + Ok(()) + } + + fn get_index_of_mem_dev(&self, mem_id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.mem_id.eq(mem_id)) + } + + fn create_memory_device( + config: &MemDeviceConfigInfo, + ctx: &DeviceOpContext, + epoll_mgr: &EpollManager, + ) -> std::result::Result, DeviceMgrError> { + let factory = Arc::new(Mutex::new(MemoryRegionFactory::new( + ctx, + config.mem_id.clone(), + config.host_numa_node_id, + )?)); + + let mut capacity_mib = config.capacity_mib; + if capacity_mib == 0 { + capacity_mib = *USABLE_END >> 20; + } + // get boot memory size for calculate alignment + let boot_mem_size = { + let boot_size = (ctx.get_vm_config()?.mem_size_mib << 20) as u64; + // increase 1G memory because of avoiding mmio hole + match boot_size { + x if x > dbs_boot::layout::MMIO_LOW_START => x + (1 << 30), + _ => boot_size, + } + }; + + virtio::mem::Mem::new( + config.mem_id.clone(), + capacity_mib, + config.size_mib, + config.multi_region, + config.guest_numa_node_id, + epoll_mgr.clone(), + factory, + boot_mem_size, + ) + .map_err(DeviceMgrError::Virtio) + } + + /// Removes all virtio-mem devices + pub fn remove_devices(&self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> { + for info in self.info_list.iter() { + if let Some(device) = &info.device { + DeviceManager::destroy_mmio_virtio_device(device.clone(), ctx)?; + } + } + + Ok(()) + } + + fn update_memory_size( + &self, + index: usize, + size_mib: u64, + ) -> std::result::Result<(), MemDeviceError> { + let device = self.info_list[index] + .device + .as_ref() + .ok_or_else(|| MemDeviceError::DeviceNotExist)?; + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(mem_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return mem_dev + .set_requested_size(size_mib) + .map_err(MemDeviceError::ResizeFailed); + } + } + Ok(()) + } +} + +impl Default for MemDeviceMgr { + /// Create a new `MemDeviceMgr` object.. + fn default() -> Self { + MemDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} + +struct MemoryRegionFactory { + mem_id: String, + vm_as: GuestAddressSpaceImpl, + address_space: AddressSpace, + vm_config: VmConfigInfo, + vm_fd: Arc, + logger: Arc, + host_numa_node_id: Option, + instance_id: String, +} + +impl MemoryRegionFactory { + fn new( + ctx: &DeviceOpContext, + mem_id: String, + host_numa_node_id: Option, + ) -> Result { + let vm_as = ctx.get_vm_as()?; + let address_space = ctx.get_address_space()?; + let vm_config = ctx.get_vm_config()?; + let logger = Arc::new(ctx.logger().new(slog::o!())); + + let shared_info = ctx.shared_info.read().unwrap(); + let instance_id = shared_info.id.clone(); + + Ok(MemoryRegionFactory { + mem_id, + vm_as, + address_space, + vm_config, + vm_fd: ctx.vm_fd.clone(), + logger, + host_numa_node_id, + instance_id, + }) + } + + fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> { + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_DONTFORK, + ) + } + .map_err(VirtIoError::Madvise)?; + + Ok(()) + } + + fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<(), VirtIoError> { + let nodemask = 1_u64 + .checked_shl(node_id) + .ok_or(VirtIoError::InvalidInput)?; + let res = unsafe { + libc::syscall( + libc::SYS_mbind, + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + MPOL_PREFERRED, + &nodemask as *const u64, + MAX_NODE, + MPOL_MF_MOVE, + ) + }; + if res < 0 { + warn!( + self.logger, + "failed to mbind memory to host_numa_node_id {}: this may affect performance", + node_id; + "subsystem" => "virito-mem" + ); + } + Ok(()) + } + + fn configure_thp(&mut self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> { + debug!( + self.logger, + "Setting MADV_HUGEPAGE on AddressSpaceRegion addr {:x?} len {:x?}", + mmap_reg.as_ptr(), + mmap_reg.size(); + "subsystem" => "virito-mem" + ); + + // Safe because we just create the MmapRegion + unsafe { + mman::madvise( + mmap_reg.as_ptr() as *mut libc::c_void, + mmap_reg.size(), + mman::MmapAdvise::MADV_HUGEPAGE, + ) + } + .map_err(VirtIoError::Madvise)?; + + Ok(()) + } + + fn map_to_kvm( + &mut self, + slot: u32, + reg: &Arc, + mmap_reg: &MmapRegion, + ) -> Result<(), VirtIoError> { + let host_addr = mmap_reg.as_ptr() as u64; + + let flags = 0u32; + + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: reg.start_addr().raw_value(), + memory_size: reg.len(), + userspace_addr: host_addr, + flags, + }; + + // Safe because the user mem region is just created, and kvm slot is allocated + // by resource allocator. + unsafe { self.vm_fd.set_user_memory_region(mem_region) } + .map_err(VirtIoError::SetUserMemoryRegion)?; + + Ok(()) + } +} + +impl MemRegionFactory for MemoryRegionFactory { + fn create_region( + &mut self, + guest_addr: GuestAddress, + region_len: GuestUsize, + kvm_slot: u32, + ) -> std::result::Result, VirtIoError> { + // create address space region + let mem_type = self.vm_config.mem_type.as_str(); + let mut mem_file_path = self.vm_config.mem_file_path.clone(); + let mem_file_name = format!( + "/virtiomem_{}_{}", + self.instance_id.as_str(), + self.mem_id.as_str() + ); + mem_file_path.push_str(mem_file_name.as_str()); + let region = Arc::new( + AddressSpaceRegion::create_default_memory_region( + guest_addr, + region_len, + self.host_numa_node_id, + mem_type, + mem_file_path.as_str(), + false, + true, + ) + .map_err(|e| { + error!(self.logger, "failed to insert address space region: {}", e); + // dbs-virtio-devices should not depend on dbs-address-space. + // So here io::Error is used instead of AddressSpaceError directly. + VirtIoError::IOError(io::Error::new( + io::ErrorKind::Other, + format!( + "invalid address space region ({0:#x}, {1:#x})", + guest_addr.0, region_len + ), + )) + })?, + ); + info!( + self.logger, + "VM: mem_type: {} mem_file_path: {}, numa_node_id: {:?} file_offset: {:?}", + mem_type, + mem_file_path, + self.host_numa_node_id, + region.file_offset(); + "subsystem" => "virito-mem" + ); + + let mmap_region = MmapRegion::build( + region.file_offset().cloned(), + region_len as usize, + region.prot_flags(), + region.perm_flags(), + ) + .map_err(VirtIoError::NewMmapRegion)?; + let host_addr: u64 = mmap_region.as_ptr() as u64; + + // thp + if mem_type == "hugeanon" || mem_type == "hugeshmem" { + self.configure_thp(&mmap_region)?; + } + + // Handle numa + if let Some(numa_node_id) = self.host_numa_node_id { + self.configure_numa(&mmap_region, numa_node_id)?; + } + + // add to guest memory mapping + self.map_to_kvm(kvm_slot, ®ion, &mmap_region)?; + + info!( + self.logger, + "kvm set user memory region: slot: {}, flags: {}, guest_phys_addr: {:X}, memory_size: {}, userspace_addr: {:X}", + kvm_slot, + 0, + guest_addr.raw_value(), + region_len, + host_addr; + "subsystem" => "virito-mem" + ); + + // All value should be valid. + let memory_region = Arc::new( + GuestRegionMmap::new(mmap_region, guest_addr).map_err(VirtIoError::InsertMmap)?, + ); + + let vm_as_new = self + .vm_as + .memory() + .insert_region(memory_region.clone()) + .map_err(VirtIoError::InsertMmap)?; + self.vm_as.lock().unwrap().replace(vm_as_new); + self.address_space.insert_region(region).map_err(|e| { + error!(self.logger, "failed to insert address space region: {}", e); + // dbs-virtio-devices should not depend on dbs-address-space. + // So here io::Error is used instead of AddressSpaceError directly. + VirtIoError::IOError(io::Error::new( + io::ErrorKind::Other, + format!( + "invalid address space region ({0:#x}, {1:#x})", + guest_addr.0, region_len + ), + )) + })?; + + Ok(memory_region) + } + + fn restore_region_addr( + &self, + guest_addr: GuestAddress, + ) -> std::result::Result<*mut u8, VirtIoError> { + let memory = self.vm_as.memory(); + // NOTE: We can't clone `GuestRegionMmap` reference directly!!! + // + // Since an important role of the member `mapping` (type is + // `MmapRegion`) in `GuestRegionMmap` is to mmap the memory during + // construction and munmap the memory during drop. However, when the + // life time of cloned data is over, the drop operation will be + // performed, which will munmap the origional mmap memory, which will + // cause some memory in dragonall to be inaccessable. And remember the + // data structure that was cloned is still alive now, when its life time + // is over, it will perform the munmap operation again, which will cause + // a memory exception! + memory + .get_host_address(guest_addr) + .map_err(VirtIoError::GuestMemory) + } + + fn get_host_numa_node_id(&self) -> Option { + self.host_numa_node_id + } + + fn set_host_numa_node_id(&mut self, host_numa_node_id: Option) { + self.host_numa_node_id = host_numa_node_id; + } +} + +#[cfg(test)] +mod tests { + use vm_memory::GuestMemoryRegion; + + use super::*; + use crate::test_utils::tests::create_vm_for_test; + + impl Default for MemDeviceConfigInfo { + fn default() -> Self { + MemDeviceConfigInfo { + mem_id: "".to_string(), + size_mib: 0, + capacity_mib: 1024, + multi_region: true, + host_numa_node_id: None, + guest_numa_node_id: None, + use_generic_irq: None, + use_shared_irq: None, + } + } + } + + #[test] + fn test_mem_config_check_conflicts() { + let config = MemDeviceConfigInfo::default(); + let mut config2 = MemDeviceConfigInfo::default(); + assert!(config.check_conflicts(&config2).is_err()); + config2.mem_id = "dummy_mem".to_string(); + assert!(config.check_conflicts(&config2).is_ok()); + } + + #[test] + fn test_create_mem_devices_configs() { + let mgr = MemDeviceMgr::default(); + assert_eq!(mgr.info_list.len(), 0); + assert_eq!(mgr.get_index_of_mem_dev(""), None); + } + + #[test] + fn test_mem_insert_or_update_device() { + // Init vm for test. + let mut vm = create_vm_for_test(); + + // We don't need to use virtio-mem before start vm + // Test for standard config with hotplug + let device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + vm.vm_address_space().cloned(), + true, + Some(VmConfigInfo::default()), + vm.shared_info().clone(), + ); + + let dummy_mem_device = MemDeviceConfigInfo::default(); + vm.device_manager_mut() + .mem_manager + .insert_or_update_device(device_op_ctx, dummy_mem_device) + .unwrap(); + assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1); + } + + #[test] + fn test_mem_attach_device() { + // Init vm and insert mem config for test. + let mut vm = create_vm_for_test(); + let dummy_mem_device = MemDeviceConfigInfo::default(); + vm.device_manager_mut() + .mem_manager + .info_list + .insert_or_update(&dummy_mem_device) + .unwrap(); + assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1); + + // Test for standard config + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + vm.vm_address_space().cloned(), + false, + Some(VmConfigInfo::default()), + vm.shared_info().clone(), + ); + vm.device_manager_mut() + .mem_manager + .attach_devices(&mut device_op_ctx) + .unwrap(); + assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1); + } + + #[test] + fn test_mem_create_region() { + let vm = create_vm_for_test(); + let ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + vm.vm_address_space().cloned(), + true, + Some(VmConfigInfo::default()), + vm.shared_info().clone(), + ); + let mem_id = String::from("mem0"); + let guest_addr = GuestAddress(0x1_0000_0000); + let region_len = 0x1000_0000; + let kvm_slot = 2; + + // no vfio manager, no numa node + let mut factory = MemoryRegionFactory::new(&ctx, mem_id, None).unwrap(); + let region_opt = factory.create_region(guest_addr, region_len, kvm_slot); + assert_eq!(region_opt.unwrap().len(), region_len); + } +} diff --git a/src/dragonball/src/device_manager/memory_region_handler.rs b/src/dragonball/src/device_manager/memory_region_handler.rs new file mode 100644 index 000000000000..fbf5aa20cbf0 --- /dev/null +++ b/src/dragonball/src/device_manager/memory_region_handler.rs @@ -0,0 +1,111 @@ +// Copyright 2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::sync::Arc; + +use dbs_address_space::{AddressSpace, AddressSpaceRegion, AddressSpaceRegionType}; +use dbs_virtio_devices::{Error as VirtIoError, VirtioRegionHandler}; +use log::{debug, error}; +use vm_memory::{FileOffset, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap}; + +use crate::address_space_manager::GuestAddressSpaceImpl; + +/// This struct implements the VirtioRegionHandler trait, which inserts the memory +/// region of the virtio device into vm_as and address_space. +/// +/// * After region is inserted into the vm_as, the virtio device can read guest memory +/// data using vm_as.get_slice with GuestAddress. +/// +/// * Insert virtio memory into address_space so that the correct guest last address can +/// be found when initializing the e820 table. The e820 table is a table that describes +/// guest memory prepared before the guest startup. we need to config the correct guest +/// memory address and length in the table. The virtio device memory belongs to the MMIO +/// space and does not belong to the Guest Memory space. Therefore, it cannot be configured +/// into the e820 table. When creating AddressSpaceRegion we use +/// AddressSpaceRegionType::ReservedMemory type, in this way, address_space will know that +/// this region a special memory, it will don't put the this memory in e820 table. +/// +/// This function relies on the atomic-guest-memory feature. Without this feature enabled, memory +/// regions cannot be inserted into vm_as. Because the insert_region interface of vm_as does +/// not insert regions in place, but returns an array of inserted regions. We need to manually +/// replace this array of regions with vm_as, and that's what atomic-guest-memory feature does. +/// So we rely on the atomic-guest-memory feature here +pub struct DeviceVirtioRegionHandler { + pub(crate) vm_as: GuestAddressSpaceImpl, + pub(crate) address_space: AddressSpace, +} + +impl DeviceVirtioRegionHandler { + fn insert_address_space( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + let file_offset = match region.file_offset() { + // TODO: use from_arc + Some(f) => Some(FileOffset::new(f.file().try_clone()?, 0)), + None => None, + }; + + let as_region = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DAXMemory, + region.start_addr(), + region.size() as u64, + None, + file_offset, + region.flags(), + region.prot(), + false, + )); + + self.address_space.insert_region(as_region).map_err(|e| { + error!("inserting address apace error: {}", e); + // dbs-virtio-devices should not depend on dbs-address-space. + // So here io::Error is used instead of AddressSpaceError directly. + VirtIoError::IOError(io::Error::new( + io::ErrorKind::Other, + format!( + "invalid address space region ({0:#x}, {1:#x})", + region.start_addr().0, + region.len() + ), + )) + })?; + Ok(()) + } + + fn insert_vm_as( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + let vm_as_new = self.vm_as.memory().insert_region(region).map_err(|e| { + error!( + "DeviceVirtioRegionHandler failed to insert guest memory region: {:?}.", + e + ); + VirtIoError::InsertMmap(e) + })?; + // Do not expect poisoned lock here, so safe to unwrap(). + self.vm_as.lock().unwrap().replace(vm_as_new); + + Ok(()) + } +} + +impl VirtioRegionHandler for DeviceVirtioRegionHandler { + fn insert_region( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + debug!( + "add geust memory region to address_space/vm_as, new region: {:?}", + region + ); + + self.insert_address_space(region.clone())?; + self.insert_vm_as(region)?; + + Ok(()) + } +} diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs new file mode 100644 index 000000000000..1579f329b8d0 --- /dev/null +++ b/src/dragonball/src/device_manager/mod.rs @@ -0,0 +1,1280 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Device manager to manage IO devices for a virtual machine. + +#[cfg(target_arch = "aarch64")] +use std::collections::HashMap; + +use std::io; +use std::sync::{Arc, Mutex, MutexGuard, RwLock}; + +use arc_swap::ArcSwap; +use dbs_address_space::AddressSpace; +#[cfg(target_arch = "aarch64")] +use dbs_arch::{DeviceType, MMIODeviceInfo}; +use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext}; +#[cfg(target_arch = "aarch64")] +use dbs_device::resources::DeviceResources; +use dbs_device::resources::Resource; +use dbs_device::DeviceIo; +use dbs_interrupt::KvmIrqManager; +use dbs_legacy_devices::ConsoleHandler; +use dbs_utils::epoll_manager::EpollManager; +use kvm_ioctls::VmFd; + +#[cfg(feature = "dbs-virtio-devices")] +use dbs_device::resources::ResourceConstraint; +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices as virtio; +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices::{ + mmio::{ + MmioV2Device, DRAGONBALL_FEATURE_INTR_USED, DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY, + DRAGONBALL_MMIO_DOORBELL_SIZE, MMIO_DEFAULT_CFG_SIZE, + }, + VirtioDevice, +}; + +#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] +use dbs_upcall::{ + DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError, + UpcallClientRequest, UpcallClientResponse, +}; +#[cfg(feature = "hotplug")] +use dbs_virtio_devices::vsock::backend::VsockInnerConnector; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::api::v1::InstanceInfo; +use crate::error::StartMicroVmError; +use crate::resource_manager::ResourceManager; +use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo}; +use crate::IoManagerCached; + +/// Virtual machine console device manager. +pub mod console_manager; +/// Console Manager for virtual machines console device. +pub use self::console_manager::ConsoleManager; + +mod legacy; +pub use self::legacy::{Error as LegacyDeviceError, LegacyDeviceManager}; + +#[cfg(target_arch = "aarch64")] +pub use self::legacy::aarch64::{COM1, COM2, RTC}; + +#[cfg(feature = "virtio-vsock")] +/// Device manager for user-space vsock devices. +pub mod vsock_dev_mgr; +#[cfg(feature = "virtio-vsock")] +use self::vsock_dev_mgr::VsockDeviceMgr; + +#[cfg(feature = "virtio-blk")] +/// virtio-block device manager +pub mod blk_dev_mgr; +#[cfg(feature = "virtio-blk")] +use self::blk_dev_mgr::BlockDeviceMgr; + +#[cfg(feature = "virtio-net")] +/// Device manager for virtio-net devices. +pub mod virtio_net_dev_mgr; +#[cfg(feature = "virtio-net")] +use self::virtio_net_dev_mgr::VirtioNetDeviceMgr; + +#[cfg(feature = "virtio-fs")] +/// virtio-block device manager +pub mod fs_dev_mgr; +#[cfg(feature = "virtio-fs")] +use self::fs_dev_mgr::FsDeviceMgr; +#[cfg(feature = "virtio-fs")] +mod memory_region_handler; +#[cfg(feature = "virtio-fs")] +pub use self::memory_region_handler::*; + +#[cfg(feature = "virtio-mem")] +/// Device manager for virtio-mem devices. +pub mod mem_dev_mgr; +#[cfg(feature = "virtio-mem")] +use self::mem_dev_mgr::MemDeviceMgr; + +#[cfg(feature = "virtio-balloon")] +/// Device manager for virtio-balloon devices. +pub mod balloon_dev_mgr; +#[cfg(feature = "virtio-balloon")] +use self::balloon_dev_mgr::BalloonDeviceMgr; + +macro_rules! info( + ($l:expr, $($args:tt)+) => { + slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager")) + }; +); + +/// Errors related to device manager operations. +#[derive(Debug, thiserror::Error)] +pub enum DeviceMgrError { + /// Invalid operation. + #[error("invalid device manager operation")] + InvalidOperation, + + /// Failed to get device resource. + #[error("failed to get device assigned resources")] + GetDeviceResource, + + /// Appending to kernel command line failed. + #[error("failed to add kernel command line parameter for device: {0}")] + Cmdline(#[source] linux_loader::cmdline::Error), + + /// Failed to manage console devices. + #[error(transparent)] + ConsoleManager(console_manager::ConsoleManagerError), + + /// Failed to create the device. + #[error("failed to create virtual device: {0}")] + CreateDevice(#[source] io::Error), + + /// Failed to perform an operation on the bus. + #[error(transparent)] + IoManager(IoManagerError), + + /// Failure from legacy device manager. + #[error(transparent)] + LegacyManager(legacy::Error), + + #[cfg(feature = "dbs-virtio-devices")] + /// Error from Virtio subsystem. + #[error(transparent)] + Virtio(virtio::Error), + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + /// Failed to hotplug the device. + #[error("failed to hotplug virtual device")] + HotplugDevice(#[source] UpcallClientError), + + /// Failed to free device resource. + #[error("failed to free device resources: {0}")] + ResourceError(#[source] crate::resource_manager::ResourceError), +} + +/// Specialized version of `std::result::Result` for device manager operations. +pub type Result = ::std::result::Result; + +/// Type of the dragonball virtio devices. +#[cfg(feature = "dbs-virtio-devices")] +pub type DbsVirtioDevice = Box< + dyn VirtioDevice, +>; + +/// Type of the dragonball virtio mmio devices. +#[cfg(feature = "dbs-virtio-devices")] +pub type DbsMmioV2Device = + MmioV2Device; + +/// Struct to support transactional operations for device management. +pub struct DeviceManagerTx { + io_manager: IoManager, + _io_lock: Arc>, + _guard: MutexGuard<'static, ()>, +} + +impl DeviceManagerTx { + fn new(mgr_ctx: &DeviceManagerContext) -> Self { + // Do not expect poisoned lock. + let guard = mgr_ctx.io_lock.lock().unwrap(); + + // It's really a heavy burden to carry on a lifetime parameter for MutexGuard. + // So we play a tricky here that we hold a reference to the Arc> and transmute + // the MutexGuard<'a, ()> to MutexGuard<'static, ()>. + // It's safe because we hold a reference to the Mutex lock. + let guard = + unsafe { std::mem::transmute::, MutexGuard<'static, ()>>(guard) }; + + DeviceManagerTx { + io_manager: mgr_ctx.io_manager.load().as_ref().clone(), + _io_lock: mgr_ctx.io_lock.clone(), + _guard: guard, + } + } +} + +/// Operation context for device management. +#[derive(Clone)] +pub struct DeviceManagerContext { + io_manager: Arc>, + io_lock: Arc>, +} + +impl DeviceManagerContext { + /// Create a DeviceManagerContext object. + pub fn new(io_manager: Arc>, io_lock: Arc>) -> Self { + DeviceManagerContext { + io_manager, + io_lock, + } + } +} + +impl IoManagerContext for DeviceManagerContext { + type Context = DeviceManagerTx; + + fn begin_tx(&self) -> Self::Context { + DeviceManagerTx::new(self) + } + + fn commit_tx(&self, context: Self::Context) { + self.io_manager.store(Arc::new(context.io_manager)); + } + + fn cancel_tx(&self, context: Self::Context) { + drop(context); + } + + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> std::result::Result<(), dbs_device::device_manager::Error> { + ctx.io_manager.register_device_io(device, resources) + } + + fn unregister_device_io( + &self, + ctx: &mut Self::Context, + resources: &[Resource], + ) -> std::result::Result<(), dbs_device::device_manager::Error> { + ctx.io_manager.unregister_device_io(resources) + } +} + +/// Context for device addition/removal operations. +pub struct DeviceOpContext { + epoll_mgr: Option, + io_context: DeviceManagerContext, + irq_manager: Arc, + res_manager: Arc, + vm_fd: Arc, + vm_as: Option, + address_space: Option, + logger: slog::Logger, + is_hotplug: bool, + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_client: Option>>, + #[cfg(feature = "dbs-virtio-devices")] + virtio_devices: Vec>, + vm_config: Option, + shared_info: Arc>, +} + +impl DeviceOpContext { + pub(crate) fn new( + epoll_mgr: Option, + device_mgr: &DeviceManager, + vm_as: Option, + address_space: Option, + is_hotplug: bool, + vm_config: Option, + shared_info: Arc>, + ) -> Self { + let irq_manager = device_mgr.irq_manager.clone(); + let res_manager = device_mgr.res_manager.clone(); + + let vm_fd = device_mgr.vm_fd.clone(); + let io_context = DeviceManagerContext { + io_manager: device_mgr.io_manager.clone(), + io_lock: device_mgr.io_lock.clone(), + }; + let logger = device_mgr.logger.new(slog::o!()); + + DeviceOpContext { + epoll_mgr, + io_context, + irq_manager, + res_manager, + vm_fd, + vm_as, + address_space, + logger, + is_hotplug, + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_client: None, + #[cfg(feature = "dbs-virtio-devices")] + virtio_devices: Vec::new(), + vm_config, + shared_info, + } + } + + pub(crate) fn create_boot_ctx(vm: &Vm, epoll_mgr: Option) -> Self { + Self::new( + epoll_mgr, + vm.device_manager(), + None, + None, + false, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ) + } + + pub(crate) fn get_vm_as(&self) -> Result { + match self.vm_as.as_ref() { + Some(v) => Ok(v.clone()), + None => Err(DeviceMgrError::InvalidOperation), + } + } + + pub(crate) fn get_vm_config(&self) -> Result { + match self.vm_config.as_ref() { + Some(v) => Ok(v.clone()), + None => Err(DeviceMgrError::InvalidOperation), + } + } + + pub(crate) fn get_address_space(&self) -> Result { + match self.address_space.as_ref() { + Some(v) => Ok(v.clone()), + None => Err(DeviceMgrError::InvalidOperation), + } + } + + pub(crate) fn get_epoll_mgr(&self) -> Result { + match self.epoll_mgr.as_ref() { + Some(v) => Ok(v.clone()), + None => Err(DeviceMgrError::InvalidOperation), + } + } + + pub(crate) fn logger(&self) -> &slog::Logger { + &self.logger + } + + #[allow(unused_variables)] + fn generate_kernel_boot_args(&mut self, kernel_config: &mut KernelConfigInfo) -> Result<()> { + if self.is_hotplug { + return Err(DeviceMgrError::InvalidOperation); + } + + #[cfg(feature = "dbs-virtio-devices")] + { + let cmdline = kernel_config.kernel_cmdline_mut(); + + for device in self.virtio_devices.iter() { + let (mmio_base, mmio_size, irq) = DeviceManager::get_virtio_device_info(device)?; + + // as per doc, [virtio_mmio.]device=@: needs to be appended + // to kernel commandline for virtio mmio devices to get recognized + // the size parameter has to be transformed to KiB, so dividing hexadecimal value in + // bytes to 1024; further, the '{}' formatting rust construct will automatically + // transform it to decimal + cmdline + .insert( + "virtio_mmio.device", + &format!("{}K@0x{:08x}:{}", mmio_size / 1024, mmio_base, irq), + ) + .map_err(DeviceMgrError::Cmdline)?; + } + } + + Ok(()) + } + + #[cfg(target_arch = "aarch64")] + fn generate_virtio_device_info(&self) -> Result> { + let mut dev_info = HashMap::new(); + #[cfg(feature = "dbs-virtio-devices")] + for (_index, device) in self.virtio_devices.iter().enumerate() { + let (mmio_base, mmio_size, irq) = DeviceManager::get_virtio_mmio_device_info(device)?; + let dev_type; + let device_id; + if let Some(mmiov2_device) = device.as_any().downcast_ref::() { + dev_type = mmiov2_device.get_device_type(); + device_id = None; + } else { + return Err(DeviceMgrError::InvalidOperation); + } + dev_info.insert( + ( + DeviceType::Virtio(dev_type), + format!("virtio-{}@0x{:08x?}", dev_type, mmio_base), + ), + MMIODeviceInfo::new(mmio_base, mmio_size, vec![irq], device_id), + ); + } + Ok(dev_info) + } +} + +#[cfg(all(feature = "hotplug", not(feature = "dbs-upcall")))] +impl DeviceOpContext { + pub(crate) fn insert_hotplug_mmio_device( + &self, + _dev: &Arc, + _callback: Option<()>, + ) -> Result<()> { + Err(DeviceMgrError::InvalidOperation) + } + + pub(crate) fn remove_hotplug_mmio_device( + &self, + _dev: &Arc, + _callback: Option<()>, + ) -> Result<()> { + Err(DeviceMgrError::InvalidOperation) + } +} + +#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] +impl DeviceOpContext { + pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option) -> Self { + let vm_as = vm.vm_as().expect("VM should have memory ready").clone(); + + let mut ctx = Self::new( + epoll_mgr, + vm.device_manager(), + Some(vm_as), + vm.vm_address_space().cloned(), + true, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + ctx.upcall_client = vm.upcall_client().clone(); + ctx + } + + fn call_hotplug_device( + &self, + req: DevMgrRequest, + callback: Option>, + ) -> Result<()> { + if let Some(upcall_client) = self.upcall_client.as_ref() { + if let Some(cb) = callback { + upcall_client + .send_request(UpcallClientRequest::DevMgr(req), cb) + .map_err(DeviceMgrError::HotplugDevice)?; + } else { + upcall_client + .send_request_without_result(UpcallClientRequest::DevMgr(req)) + .map_err(DeviceMgrError::HotplugDevice)?; + } + Ok(()) + } else { + Err(DeviceMgrError::InvalidOperation) + } + } + + pub(crate) fn insert_hotplug_mmio_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug { + return Err(DeviceMgrError::InvalidOperation); + } + + let (mmio_base, mmio_size, mmio_irq) = DeviceManager::get_virtio_device_info(dev)?; + let req = DevMgrRequest::AddMmioDev(MmioDevRequest { + mmio_base, + mmio_size, + mmio_irq, + }); + + self.call_hotplug_device(req, callback) + } + + pub(crate) fn remove_hotplug_mmio_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug { + return Err(DeviceMgrError::InvalidOperation); + } + let (mmio_base, mmio_size, mmio_irq) = DeviceManager::get_virtio_device_info(dev)?; + let req = DevMgrRequest::DelMmioDev(MmioDevRequest { + mmio_base, + mmio_size, + mmio_irq, + }); + + self.call_hotplug_device(req, callback) + } +} + +#[cfg(all(feature = "hotplug", feature = "acpi"))] +impl DeviceOpContext { + // TODO: We will implement this when we develop ACPI virtualization +} + +/// Device manager for virtual machines, which manages all device for a virtual machine. +pub struct DeviceManager { + io_manager: Arc>, + io_lock: Arc>, + irq_manager: Arc, + res_manager: Arc, + vm_fd: Arc, + pub(crate) logger: slog::Logger, + pub(crate) shared_info: Arc>, + pub(crate) con_manager: ConsoleManager, + pub(crate) legacy_manager: Option, + #[cfg(target_arch = "aarch64")] + pub(crate) mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>, + #[cfg(feature = "virtio-vsock")] + pub(crate) vsock_manager: VsockDeviceMgr, + + #[cfg(feature = "virtio-blk")] + // If there is a Root Block Device, this should be added as the first element of the list. + // This is necessary because we want the root to always be mounted on /dev/vda. + pub(crate) block_manager: BlockDeviceMgr, + + #[cfg(feature = "virtio-net")] + pub(crate) virtio_net_manager: VirtioNetDeviceMgr, + + #[cfg(feature = "virtio-fs")] + fs_manager: Arc>, + + #[cfg(feature = "virtio-mem")] + pub(crate) mem_manager: MemDeviceMgr, + + #[cfg(feature = "virtio-balloon")] + pub(crate) balloon_manager: BalloonDeviceMgr, +} + +impl DeviceManager { + /// Create a new device manager instance. + pub fn new( + vm_fd: Arc, + res_manager: Arc, + epoll_manager: EpollManager, + logger: &slog::Logger, + shared_info: Arc>, + ) -> Self { + DeviceManager { + io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))), + io_lock: Arc::new(Mutex::new(())), + irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())), + res_manager, + vm_fd, + logger: logger.new(slog::o!()), + shared_info, + + con_manager: ConsoleManager::new(epoll_manager, logger), + legacy_manager: None, + #[cfg(target_arch = "aarch64")] + mmio_device_info: HashMap::new(), + #[cfg(feature = "virtio-vsock")] + vsock_manager: VsockDeviceMgr::default(), + #[cfg(feature = "virtio-blk")] + block_manager: BlockDeviceMgr::default(), + #[cfg(feature = "virtio-net")] + virtio_net_manager: VirtioNetDeviceMgr::default(), + #[cfg(feature = "virtio-fs")] + fs_manager: Arc::new(Mutex::new(FsDeviceMgr::default())), + #[cfg(feature = "virtio-mem")] + mem_manager: MemDeviceMgr::default(), + #[cfg(feature = "virtio-balloon")] + balloon_manager: BalloonDeviceMgr::default(), + } + } + + /// Get the underlying IoManager to dispatch IO read/write requests. + pub fn io_manager(&self) -> IoManagerCached { + IoManagerCached::new(self.io_manager.clone()) + } + + /// Create the underline interrupt manager for the device manager. + pub fn create_interrupt_manager(&mut self) -> Result<()> { + self.irq_manager + .initialize() + .map_err(DeviceMgrError::CreateDevice) + } + + /// Get the underlying logger. + pub fn logger(&self) -> &slog::Logger { + &self.logger + } + + /// Create legacy devices associted virtual machine + #[allow(unused_variables)] + pub fn create_legacy_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicroVmError> { + #[cfg(any( + target_arch = "x86_64", + all(target_arch = "aarch64", feature = "dbs-virtio-devices") + ))] + { + let mut tx = ctx.io_context.begin_tx(); + let legacy_manager; + + #[cfg(target_arch = "x86_64")] + { + legacy_manager = LegacyDeviceManager::create_manager( + &mut tx.io_manager, + Some(self.vm_fd.clone()), + ); + } + + #[cfg(target_arch = "aarch64")] + #[cfg(feature = "dbs-virtio-devices")] + { + let resources = self.get_legacy_resources()?; + legacy_manager = LegacyDeviceManager::create_manager( + &mut tx.io_manager, + Some(self.vm_fd.clone()), + &resources, + ); + } + + match legacy_manager { + Ok(v) => { + self.legacy_manager = Some(v); + ctx.io_context.commit_tx(tx); + } + Err(e) => { + ctx.io_context.cancel_tx(tx); + return Err(StartMicroVmError::LegacyDevice(e)); + } + } + } + + Ok(()) + } + + /// Init legacy devices with logger stream in associted virtual machine + pub fn init_legacy_devices( + &mut self, + dmesg_fifo: Option>, + com1_sock_path: Option, + _ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicroVmError> { + // Connect serial ports to the console and dmesg_fifo. + self.set_guest_kernel_log_stream(dmesg_fifo) + .map_err(|_| StartMicroVmError::EventFd)?; + + info!(self.logger, "init console path: {:?}", com1_sock_path); + if let Some(legacy_manager) = self.legacy_manager.as_ref() { + let com1 = legacy_manager.get_com1_serial(); + if let Some(path) = com1_sock_path { + self.con_manager + .create_socket_console(com1, path) + .map_err(StartMicroVmError::DeviceManager)?; + } else { + self.con_manager + .create_stdio_console(com1) + .map_err(StartMicroVmError::DeviceManager)?; + } + } + + Ok(()) + } + + /// Set the stream for guest kernel log. + /// + /// Note: com2 is used for guest kernel logging. + /// TODO: check whether it works with aarch64. + pub fn set_guest_kernel_log_stream( + &self, + stream: Option>, + ) -> std::result::Result<(), io::Error> { + if let Some(legacy) = self.legacy_manager.as_ref() { + legacy + .get_com2_serial() + .lock() + .unwrap() + .set_output_stream(stream); + } + Ok(()) + } + + /// Reset the console into canonical mode. + pub fn reset_console(&self) -> Result<()> { + self.con_manager.reset_console() + } + + /// Create all registered devices when booting the associated virtual machine. + pub fn create_devices( + &mut self, + vm_as: GuestAddressSpaceImpl, + epoll_mgr: EpollManager, + kernel_config: &mut KernelConfigInfo, + dmesg_fifo: Option>, + address_space: Option<&AddressSpace>, + vm_config: &VmConfigInfo, + ) -> std::result::Result<(), StartMicroVmError> { + let mut ctx = DeviceOpContext::new( + Some(epoll_mgr), + self, + Some(vm_as), + address_space.cloned(), + false, + Some(vm_config.clone()), + self.shared_info.clone(), + ); + + let com1_sock_path = vm_config.serial_path.clone(); + + self.create_legacy_devices(&mut ctx)?; + self.init_legacy_devices(dmesg_fifo, com1_sock_path, &mut ctx)?; + + #[cfg(feature = "virtio-blk")] + self.block_manager + .attach_devices(&mut ctx) + .map_err(StartMicroVmError::BlockDeviceError)?; + + #[cfg(feature = "virtio-fs")] + { + let mut fs_manager = self.fs_manager.lock().unwrap(); + fs_manager + .attach_devices(&mut ctx) + .map_err(StartMicroVmError::FsDeviceError)?; + } + + #[cfg(feature = "virtio-net")] + self.virtio_net_manager + .attach_devices(&mut ctx) + .map_err(StartMicroVmError::VirtioNetDeviceError)?; + + #[cfg(feature = "virtio-vsock")] + self.vsock_manager.attach_devices(&mut ctx)?; + + #[cfg(feature = "virtio-blk")] + self.block_manager + .generate_kernel_boot_args(kernel_config) + .map_err(StartMicroVmError::DeviceManager)?; + ctx.generate_kernel_boot_args(kernel_config) + .map_err(StartMicroVmError::DeviceManager)?; + + #[cfg(target_arch = "aarch64")] + { + let dev_info = ctx + .generate_virtio_device_info() + .map_err(StartMicroVmError::DeviceManager)?; + self.mmio_device_info.extend(dev_info); + } + + Ok(()) + } + + /// Start all registered devices when booting the associated virtual machine. + pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { + // TODO: add vfio support here. issue #4589. + Ok(()) + } + + /// Remove all devices when shutdown the associated virtual machine + pub fn remove_devices( + &mut self, + vm_as: GuestAddressSpaceImpl, + epoll_mgr: EpollManager, + address_space: Option<&AddressSpace>, + ) -> Result<()> { + // create context for removing devices + let mut ctx = DeviceOpContext::new( + Some(epoll_mgr), + self, + Some(vm_as), + address_space.cloned(), + true, + None, + self.shared_info.clone(), + ); + + #[cfg(feature = "virtio-blk")] + self.block_manager.remove_devices(&mut ctx)?; + // FIXME: To acquire the full abilities for gracefully removing + // virtio-net and virtio-vsock devices, updating dragonball-sandbox + // is required. + #[cfg(feature = "virtio-net")] + self.virtio_net_manager.remove_devices(&mut ctx)?; + #[cfg(feature = "virtio-vsock")] + self.vsock_manager.remove_devices(&mut ctx)?; + + Ok(()) + } +} + +#[cfg(target_arch = "x86_64")] +impl DeviceManager { + /// Get the underlying eventfd for vm exit notification. + pub fn get_reset_eventfd(&self) -> Result { + if let Some(legacy) = self.legacy_manager.as_ref() { + legacy + .get_reset_eventfd() + .map_err(DeviceMgrError::LegacyManager) + } else { + Err(DeviceMgrError::LegacyManager(legacy::Error::EventFd( + io::Error::from_raw_os_error(libc::ENOENT), + ))) + } + } +} + +#[cfg(target_arch = "aarch64")] +impl DeviceManager { + /// Return mmio device info for FDT build. + pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> { + Some(&self.mmio_device_info) + } + + #[cfg(feature = "dbs-virtio-devices")] + fn get_legacy_resources( + &mut self, + ) -> std::result::Result, StartMicroVmError> { + let mut resources = HashMap::new(); + let legacy_devices = vec![ + (DeviceType::Serial, String::from(COM1)), + (DeviceType::Serial, String::from(COM2)), + (DeviceType::RTC, String::from(RTC)), + ]; + + for (device_type, device_id) in legacy_devices { + let res = self.allocate_mmio_device_resource()?; + self.add_mmio_device_info(&res, device_type, device_id.clone(), None); + resources.insert(device_id.clone(), res); + } + + Ok(resources) + } + + fn mmio_device_info_to_resources( + &self, + key: &(DeviceType, String), + ) -> std::result::Result { + self.mmio_device_info + .get(key) + .map(|info| { + let mut resources = DeviceResources::new(); + resources.append(Resource::LegacyIrq(info.irqs[0])); + resources.append(Resource::MmioAddressRange { + base: info.base, + size: info.size, + }); + resources + }) + .ok_or(StartMicroVmError::DeviceManager( + DeviceMgrError::GetDeviceResource, + )) + } + + #[cfg(feature = "dbs-virtio-devices")] + fn allocate_mmio_device_resource( + &self, + ) -> std::result::Result { + let requests = vec![ + ResourceConstraint::MmioAddress { + range: None, + align: MMIO_DEFAULT_CFG_SIZE, + size: MMIO_DEFAULT_CFG_SIZE, + }, + ResourceConstraint::LegacyIrq { irq: None }, + ]; + + self.res_manager + .allocate_device_resources(&requests, false) + .map_err(StartMicroVmError::AllocateResource) + } + + fn add_mmio_device_info( + &mut self, + resource: &DeviceResources, + device_type: DeviceType, + device_id: String, + msi_device_id: Option, + ) { + let (base, size) = resource.get_mmio_address_ranges()[0]; + let irq = resource.get_legacy_irq().unwrap(); + self.mmio_device_info.insert( + (device_type, device_id), + MMIODeviceInfo::new(base, size, vec![irq], msi_device_id), + ); + } + + #[cfg(feature = "dbs-virtio-devices")] + fn get_virtio_mmio_device_info(device: &Arc) -> Result<(u64, u64, u32)> { + let resources = device.get_assigned_resources(); + let irq = resources + .get_legacy_irq() + .ok_or(DeviceMgrError::GetDeviceResource)?; + + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + if let Resource::MmioAddressRange { base, size } = mmio_dev.get_mmio_cfg_res() { + return Ok((base, size, irq)); + } + } + + Err(DeviceMgrError::GetDeviceResource) + } +} + +#[cfg(feature = "dbs-virtio-devices")] +impl DeviceManager { + fn get_virtio_device_info(device: &Arc) -> Result<(u64, u64, u32)> { + let resources = device.get_assigned_resources(); + let irq = resources + .get_legacy_irq() + .ok_or(DeviceMgrError::GetDeviceResource)?; + let mmio_address_range = device.get_trapped_io_resources().get_mmio_address_ranges(); + + // Assume the first MMIO region is virtio configuration region. + // Virtio-fs needs to pay attention to this assumption. + if let Some(range) = mmio_address_range.into_iter().next() { + Ok((range.0, range.1, irq)) + } else { + Err(DeviceMgrError::GetDeviceResource) + } + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device. + pub fn create_mmio_virtio_device( + device: DbsVirtioDevice, + ctx: &mut DeviceOpContext, + use_shared_irq: bool, + use_generic_irq: bool, + ) -> std::result::Result, DeviceMgrError> { + let features = DRAGONBALL_FEATURE_INTR_USED | DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY; + DeviceManager::create_mmio_virtio_device_with_features( + device, + ctx, + Some(features), + use_shared_irq, + use_generic_irq, + ) + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device with configure + /// change notification enabled. + pub fn create_mmio_virtio_device_with_device_change_notification( + device: DbsVirtioDevice, + ctx: &mut DeviceOpContext, + use_shared_irq: bool, + use_generic_irq: bool, + ) -> std::result::Result, DeviceMgrError> { + let features = DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY; + DeviceManager::create_mmio_virtio_device_with_features( + device, + ctx, + Some(features), + use_shared_irq, + use_generic_irq, + ) + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device with specified + /// features. + pub fn create_mmio_virtio_device_with_features( + device: DbsVirtioDevice, + ctx: &mut DeviceOpContext, + features: Option, + use_shared_irq: bool, + use_generic_irq: bool, + ) -> std::result::Result, DeviceMgrError> { + // Every emulated Virtio MMIO device needs a 4K configuration space, + // and another 4K space for per queue notification. + const MMIO_ADDRESS_DEFAULT: ResourceConstraint = ResourceConstraint::MmioAddress { + range: None, + align: 0, + size: MMIO_DEFAULT_CFG_SIZE + DRAGONBALL_MMIO_DOORBELL_SIZE, + }; + let mut requests = vec![MMIO_ADDRESS_DEFAULT]; + device.get_resource_requirements(&mut requests, use_generic_irq); + let resources = ctx + .res_manager + .allocate_device_resources(&requests, use_shared_irq) + .map_err(|_| DeviceMgrError::GetDeviceResource)?; + + let virtio_dev = match MmioV2Device::new( + ctx.vm_fd.clone(), + ctx.get_vm_as()?, + ctx.irq_manager.clone(), + device, + resources, + features, + ) { + Ok(d) => d, + Err(e) => return Err(DeviceMgrError::Virtio(e)), + }; + + Self::register_mmio_virtio_device(Arc::new(virtio_dev), ctx) + } + + /// Teardown the Virtio MMIO transport layer device associated with the virtio backend device. + pub fn destroy_mmio_virtio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + Self::destroy_mmio_device(device.clone(), ctx)?; + + let mmio_dev = device + .as_any() + .downcast_ref::() + .ok_or(DeviceMgrError::InvalidOperation)?; + + mmio_dev.remove(); + + Ok(()) + } + + fn destroy_mmio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + // unregister IoManager + Self::deregister_mmio_virtio_device(&device, ctx)?; + + // unregister Resource manager + let resources = device.get_assigned_resources(); + ctx.res_manager + .free_device_resources(&resources) + .map_err(DeviceMgrError::ResourceError)?; + + Ok(()) + } + + /// Create an Virtio MMIO transport layer device for the virtio backend device. + pub fn register_mmio_virtio_device( + device: Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result, DeviceMgrError> { + let (mmio_base, mmio_size, irq) = Self::get_virtio_device_info(&device)?; + info!( + ctx.logger(), + "create virtio mmio device 0x{:x}@0x{:x}, irq: 0x{:x}", mmio_size, mmio_base, irq + ); + let resources = device.get_trapped_io_resources(); + + let mut tx = ctx.io_context.begin_tx(); + if let Err(e) = ctx + .io_context + .register_device_io(&mut tx, device.clone(), &resources) + { + ctx.io_context.cancel_tx(tx); + Err(DeviceMgrError::IoManager(e)) + } else { + ctx.virtio_devices.push(device.clone()); + ctx.io_context.commit_tx(tx); + Ok(device) + } + } + + /// Deregister a Virtio MMIO device from IoManager + pub fn deregister_mmio_virtio_device( + device: &Arc, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), DeviceMgrError> { + let resources = device.get_trapped_io_resources(); + info!( + ctx.logger(), + "unregister mmio virtio device: {:?}", resources + ); + let mut tx = ctx.io_context.begin_tx(); + if let Err(e) = ctx.io_context.unregister_device_io(&mut tx, &resources) { + ctx.io_context.cancel_tx(tx); + Err(DeviceMgrError::IoManager(e)) + } else { + ctx.io_context.commit_tx(tx); + Ok(()) + } + } +} + +#[cfg(feature = "hotplug")] +impl DeviceManager { + /// Get Unix Domain Socket path for the vsock device. + pub fn get_vsock_inner_connector(&mut self) -> Option { + #[cfg(feature = "virtio-vsock")] + { + self.vsock_manager + .get_default_connector() + .map(Some) + .unwrap_or(None) + } + #[cfg(not(feature = "virtio-vsock"))] + { + return None; + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + + use kvm_ioctls::Kvm; + use test_utils::skip_if_not_root; + use vm_memory::{GuestAddress, MmapRegion}; + + use super::*; + #[cfg(target_arch = "x86_64")] + use crate::vm::CpuTopology; + + impl DeviceManager { + pub fn new_test_mgr() -> Self { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vm_fd = Arc::new(vm); + let epoll_manager = EpollManager::default(); + let res_manager = Arc::new(ResourceManager::new(None)); + let logger = slog_scope::logger().new(slog::o!()); + let shared_info = Arc::new(RwLock::new(InstanceInfo::new( + String::from("dragonball"), + String::from("1"), + ))); + + DeviceManager { + vm_fd: Arc::clone(&vm_fd), + con_manager: ConsoleManager::new(epoll_manager, &logger), + io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))), + io_lock: Arc::new(Mutex::new(())), + irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())), + res_manager, + + legacy_manager: None, + #[cfg(feature = "virtio-blk")] + block_manager: BlockDeviceMgr::default(), + #[cfg(feature = "virtio-fs")] + fs_manager: Arc::new(Mutex::new(FsDeviceMgr::default())), + #[cfg(feature = "virtio-net")] + virtio_net_manager: VirtioNetDeviceMgr::default(), + #[cfg(feature = "virtio-vsock")] + vsock_manager: VsockDeviceMgr::default(), + #[cfg(feature = "virtio-mem")] + mem_manager: MemDeviceMgr::default(), + #[cfg(feature = "virtio-balloon")] + balloon_manager: BalloonDeviceMgr::default(), + #[cfg(target_arch = "aarch64")] + mmio_device_info: HashMap::new(), + + logger, + shared_info, + } + } + } + + #[test] + fn test_create_device_manager() { + skip_if_not_root!(); + let mgr = DeviceManager::new_test_mgr(); + let _ = mgr.io_manager(); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_create_devices() { + skip_if_not_root!(); + use crate::vm::VmConfigInfo; + + let epoll_manager = EpollManager::default(); + let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance( + epoll_manager.clone(), + ))); + let event_mgr = crate::event_manager::EventManager::new(&vmm, epoll_manager).unwrap(); + let mut vm = crate::vm::tests::create_vm_instance(); + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 1, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config.clone()); + vm.init_guest_memory().unwrap(); + vm.setup_interrupt_controller().unwrap(); + let vm_as = vm.vm_as().cloned().unwrap(); + let kernel_temp_file = vmm_sys_util::tempfile::TempFile::new().unwrap(); + let kernel_file = kernel_temp_file.into_file(); + let mut cmdline = crate::vm::KernelConfigInfo::new( + kernel_file, + None, + linux_loader::cmdline::Cmdline::new(0x1000).unwrap(), + ); + + let address_space = vm.vm_address_space().cloned(); + let mgr = vm.device_manager_mut(); + let guard = mgr.io_manager.load(); + let mut lcr = [0u8]; + // 0x3f8 is the adddress of serial device + guard.pio_read(0x3f8 + 3, &mut lcr).unwrap_err(); + assert_eq!(lcr[0], 0x0); + + mgr.create_interrupt_manager().unwrap(); + mgr.create_devices( + vm_as, + event_mgr.epoll_manager(), + &mut cmdline, + None, + address_space.as_ref(), + &vm_config, + ) + .unwrap(); + let guard = mgr.io_manager.load(); + guard.pio_read(0x3f8 + 3, &mut lcr).unwrap(); + assert_eq!(lcr[0], 0x3); + } + + #[cfg(feature = "virtio-fs")] + #[test] + fn test_handler_insert_region() { + skip_if_not_root!(); + + use dbs_virtio_devices::VirtioRegionHandler; + use lazy_static::__Deref; + use vm_memory::{GuestAddressSpace, GuestMemory, GuestMemoryRegion}; + + let vm = crate::test_utils::tests::create_vm_for_test(); + let ctx = DeviceOpContext::new( + Some(vm.epoll_manager().clone()), + vm.device_manager(), + Some(vm.vm_as().unwrap().clone()), + vm.vm_address_space().cloned(), + true, + Some(vm.vm_config().clone()), + vm.shared_info().clone(), + ); + #[cfg(target_arch = "x86_64")] + let guest_addr = GuestAddress(0x200000000000); + // TODO: #7290 - https://github.com/kata-containers/kata-containers/issues/7290 + #[cfg(target_arch = "aarch64")] + let guest_addr = GuestAddress(0xF800000000); + + let cache_len = 1024 * 1024 * 1024; + let mmap_region = MmapRegion::build( + None, + cache_len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_NORESERVE | libc::MAP_PRIVATE, + ) + .unwrap(); + + let guest_mmap_region = + Arc::new(vm_memory::GuestRegionMmap::new(mmap_region, guest_addr).unwrap()); + + let mut handler = DeviceVirtioRegionHandler { + vm_as: ctx.get_vm_as().unwrap(), + address_space: ctx.address_space.as_ref().unwrap().clone(), + }; + handler.insert_region(guest_mmap_region).unwrap(); + let mut find_region = false; + let find_region_ptr = &mut find_region; + + let guard = vm.vm_as().unwrap().clone().memory(); + + let mem = guard.deref(); + for region in mem.iter() { + if region.start_addr() == guest_addr && region.len() == cache_len { + *find_region_ptr = true; + } + } + + assert!(find_region); + } +} diff --git a/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs new file mode 100644 index 000000000000..f57c4ed67b69 --- /dev/null +++ b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs @@ -0,0 +1,399 @@ +// Copyright 2020-2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::convert::TryInto; +use std::sync::Arc; + +use dbs_utils::net::{MacAddr, Tap, TapError}; +use dbs_utils::rate_limiter::BucketUpdate; +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::net::Net; +use dbs_virtio_devices::Error as VirtioError; +use serde_derive::{Deserialize, Serialize}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ + ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, +}; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::get_bucket_update; + +use super::DbsMmioV2Device; + +/// Default number of virtio queues, one rx/tx pair. +pub const NUM_QUEUES: usize = 2; +/// Default size of virtio queues. +pub const QUEUE_SIZE: u16 = 256; +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +/// Errors associated with virtio net device operations. +#[derive(Debug, thiserror::Error)] +pub enum VirtioNetDeviceError { + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The iface ID is invalid. + #[error("invalid virtio-net iface id '{0}'")] + InvalidIfaceId(String), + + /// Invalid queue number configuration for virtio_net device. + #[error("invalid queue number {0} for virtio-net device")] + InvalidQueueNum(usize), + + /// Failure from device manager, + #[error("failure in device manager operations, {0}")] + DeviceManager(#[source] DeviceMgrError), + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// The MAC address is already in use. + #[error("the guest MAC address {0} is already in use")] + GuestMacAddressInUse(String), + + /// The host device name is already in use. + #[error("the host device name {0} is already in use")] + HostDeviceNameInUse(String), + + /// Cannot open/create tap device. + #[error("cannot open TAP device")] + OpenTap(#[source] TapError), + + /// Failure from virtio subsystem. + #[error(transparent)] + Virtio(VirtioError), + + /// Failed to send patch message to net epoll handler. + #[error("could not send patch message to the net epoll handler")] + NetEpollHanderSendFail, + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// Split this at some point. + /// Internal errors are due to resource exhaustion. + /// Users errors are due to invalid permissions. + #[error("cannot create network device: {0}")] + CreateNetDevice(#[source] VirtioError), + + /// Cannot initialize a MMIO Network Device or add a device to the MMIO Bus. + #[error("failure while registering network device: {0}")] + RegisterNetDevice(#[source] DeviceMgrError), +} + +/// Configuration information for virtio net devices. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct VirtioNetDeviceConfigUpdateInfo { + /// ID of the guest network interface. + pub iface_id: String, + /// Rate Limiter for received packages. + pub rx_rate_limiter: Option, + /// Rate Limiter for transmitted packages. + pub tx_rate_limiter: Option, +} + +impl VirtioNetDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the RX bandwidth rate limiter. + pub fn rx_bytes(&self) -> BucketUpdate { + get_bucket_update!(self, rx_rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the RX ops rate limiter. + pub fn rx_ops(&self) -> BucketUpdate { + get_bucket_update!(self, rx_rate_limiter, ops) + } + /// Provides a `BucketUpdate` description for the TX bandwidth rate limiter. + pub fn tx_bytes(&self) -> BucketUpdate { + get_bucket_update!(self, tx_rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the TX ops rate limiter. + pub fn tx_ops(&self) -> BucketUpdate { + get_bucket_update!(self, tx_rate_limiter, ops) + } +} + +/// Configuration information for virtio net devices. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize, Default)] +pub struct VirtioNetDeviceConfigInfo { + /// ID of the guest network interface. + pub iface_id: String, + /// Host level path for the guest network interface. + pub host_dev_name: String, + /// Number of virtqueues to use. + pub num_queues: usize, + /// Size of each virtqueue. Unit: byte. + pub queue_size: u16, + /// Guest MAC address. + pub guest_mac: Option, + /// Rate Limiter for received packages. + pub rx_rate_limiter: Option, + /// Rate Limiter for transmitted packages. + pub tx_rate_limiter: Option, + /// allow duplicate mac + pub allow_duplicate_mac: bool, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl VirtioNetDeviceConfigInfo { + /// Returns the tap device that `host_dev_name` refers to. + pub fn open_tap(&self) -> std::result::Result { + Tap::open_named(self.host_dev_name.as_str(), false).map_err(VirtioNetDeviceError::OpenTap) + } + + /// Returns a reference to the mac address. It the mac address is not configured, it + /// return None. + pub fn guest_mac(&self) -> Option<&MacAddr> { + self.guest_mac.as_ref() + } + + ///Rx and Tx queue and max queue sizes + pub fn queue_sizes(&self) -> Vec { + let mut queue_size = self.queue_size; + if queue_size == 0 { + queue_size = QUEUE_SIZE; + } + let num_queues = if self.num_queues > 0 { + self.num_queues + } else { + NUM_QUEUES + }; + + (0..num_queues).map(|_| queue_size).collect::>() + } +} + +impl ConfigItem for VirtioNetDeviceConfigInfo { + type Err = VirtioNetDeviceError; + + fn id(&self) -> &str { + &self.iface_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), VirtioNetDeviceError> { + if self.iface_id == other.iface_id { + Err(VirtioNetDeviceError::DeviceIDAlreadyExist( + self.iface_id.clone(), + )) + } else if !other.allow_duplicate_mac + && self.guest_mac.is_some() + && self.guest_mac == other.guest_mac + { + Err(VirtioNetDeviceError::GuestMacAddressInUse( + self.guest_mac.as_ref().unwrap().to_string(), + )) + } else if self.host_dev_name == other.host_dev_name { + Err(VirtioNetDeviceError::HostDeviceNameInUse( + self.host_dev_name.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Virtio Net Device Info +pub type VirtioNetDeviceInfo = DeviceConfigInfo; + +/// Device manager to manage all virtio net devices. +pub struct VirtioNetDeviceMgr { + pub(crate) info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl VirtioNetDeviceMgr { + /// Gets the index of the device with the specified `drive_id` if it exists in the list. + pub fn get_index_of_iface_id(&self, if_id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.iface_id.eq(if_id)) + } + + /// Insert or update a virtio net device into the manager. + pub fn insert_device( + &mut self, + mut ctx: DeviceOpContext, + config: VirtioNetDeviceConfigInfo, + ) -> std::result::Result<(), VirtioNetDeviceError> { + if config.num_queues % 2 != 0 { + return Err(VirtioNetDeviceError::InvalidQueueNum(config.num_queues)); + } + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(VirtioNetDeviceError::UpdateNotAllowedPostBoot); + } + + slog::info!( + ctx.logger(), + "add virtio-net device configuration"; + "subsystem" => "net_dev_mgr", + "id" => &config.iface_id, + "host_dev_name" => &config.host_dev_name, + ); + + let device_index = self.info_list.insert_or_update(&config)?; + + if ctx.is_hotplug { + slog::info!( + ctx.logger(), + "attach virtio-net device"; + "subsystem" => "net_dev_mgr", + "id" => &config.iface_id, + "host_dev_name" => &config.host_dev_name, + ); + + match Self::create_device(&config, &mut ctx) { + Ok(device) => { + let dev = DeviceManager::create_mmio_virtio_device( + device, + &mut ctx, + config.use_shared_irq.unwrap_or(self.use_shared_irq), + config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(VirtioNetDeviceError::DeviceManager)?; + ctx.insert_hotplug_mmio_device(&dev, None) + .map_err(VirtioNetDeviceError::DeviceManager)?; + // live-upgrade need save/restore device from info.device. + self.info_list[device_index].set_device(dev); + } + Err(e) => { + self.info_list.remove(device_index); + return Err(VirtioNetDeviceError::Virtio(e)); + } + } + } + + Ok(()) + } + + /// Update the ratelimiter settings of a virtio net device. + pub fn update_device_ratelimiters( + &mut self, + new_cfg: VirtioNetDeviceConfigUpdateInfo, + ) -> std::result::Result<(), VirtioNetDeviceError> { + match self.get_index_of_iface_id(&new_cfg.iface_id) { + Some(index) => { + let config = &mut self.info_list[index].config; + config.rx_rate_limiter = new_cfg.rx_rate_limiter.clone(); + config.tx_rate_limiter = new_cfg.tx_rate_limiter.clone(); + let device = self.info_list[index].device.as_mut().ok_or_else(|| { + VirtioNetDeviceError::InvalidIfaceId(new_cfg.iface_id.clone()) + })?; + + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(net_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return net_dev + .set_patch_rate_limiters( + new_cfg.rx_bytes(), + new_cfg.rx_ops(), + new_cfg.tx_bytes(), + new_cfg.tx_ops(), + ) + .map(|_p| ()) + .map_err(|_e| VirtioNetDeviceError::NetEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(VirtioNetDeviceError::InvalidIfaceId( + new_cfg.iface_id.clone(), + )), + } + } + + /// Attach all configured net device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), VirtioNetDeviceError> { + for info in self.info_list.iter_mut() { + slog::info!( + ctx.logger(), + "attach virtio-net device"; + "subsystem" => "net_dev_mgr", + "id" => &info.config.iface_id, + "host_dev_name" => &info.config.host_dev_name, + ); + + let device = Self::create_device(&info.config, ctx) + .map_err(VirtioNetDeviceError::CreateNetDevice)?; + let device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(VirtioNetDeviceError::RegisterNetDevice)?; + info.set_device(device); + } + + Ok(()) + } + + fn create_device( + cfg: &VirtioNetDeviceConfigInfo, + ctx: &mut DeviceOpContext, + ) -> std::result::Result>, virtio::Error> { + let epoll_mgr = ctx.epoll_mgr.clone().ok_or(virtio::Error::InvalidInput)?; + let rx_rate_limiter = match cfg.rx_rate_limiter.as_ref() { + Some(rl) => Some(rl.try_into().map_err(virtio::Error::IOError)?), + None => None, + }; + let tx_rate_limiter = match cfg.tx_rate_limiter.as_ref() { + Some(rl) => Some(rl.try_into().map_err(virtio::Error::IOError)?), + None => None, + }; + + let net_device = Net::new( + cfg.host_dev_name.clone(), + cfg.guest_mac(), + Arc::new(cfg.queue_sizes()), + epoll_mgr, + rx_rate_limiter, + tx_rate_limiter, + )?; + + Ok(Box::new(net_device)) + } + + /// Remove all virtio-net devices. + pub fn remove_devices(&mut self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> { + while let Some(mut info) = self.info_list.pop() { + slog::info!( + ctx.logger(), + "remove virtio-net device: {}", + info.config.iface_id + ); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, ctx)?; + } + } + Ok(()) + } +} + +impl Default for VirtioNetDeviceMgr { + /// Create a new virtio net device manager. + fn default() -> Self { + VirtioNetDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/device_manager/vsock_dev_mgr.rs b/src/dragonball/src/device_manager/vsock_dev_mgr.rs new file mode 100644 index 000000000000..791d9ded6540 --- /dev/null +++ b/src/dragonball/src/device_manager/vsock_dev_mgr.rs @@ -0,0 +1,314 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::sync::Arc; + +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::mmio::DRAGONBALL_FEATURE_INTR_USED; +use dbs_virtio_devices::vsock::backend::{ + VsockInnerBackend, VsockInnerConnector, VsockTcpBackend, VsockUnixStreamBackend, +}; +use dbs_virtio_devices::vsock::Vsock; +use dbs_virtio_devices::Error as VirtioError; +use serde_derive::{Deserialize, Serialize}; + +use super::{DeviceMgrError, StartMicroVmError}; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::{DeviceManager, DeviceOpContext}; + +pub use dbs_virtio_devices::vsock::QUEUE_SIZES; + +const SUBSYSTEM: &str = "vsock_dev_mgr"; +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +/// Errors associated with `VsockDeviceConfigInfo`. +#[derive(Debug, thiserror::Error)] +pub enum VsockDeviceError { + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// The Context Identifier is invalid. + #[error("the guest CID {0} is invalid")] + GuestCIDInvalid(u32), + + /// The Context Identifier is already in use. + #[error("the guest CID {0} is already in use")] + GuestCIDAlreadyInUse(u32), + + /// The Unix Domain Socket path is already in use. + #[error("the Unix Domain Socket path {0} is already in use")] + UDSPathAlreadyInUse(String), + + /// The net address is already in use. + #[error("the net address {0} is already in use")] + NetAddrAlreadyInUse(String), + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The VsockId Already Exists + #[error("vsock id {0} already exists")] + VsockIdAlreadyExists(String), + + /// Inner backend create error + #[error("vsock inner backend create error: {0}")] + CreateInnerBackend(#[source] std::io::Error), +} + +/// Configuration information for a vsock device. +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct VsockDeviceConfigInfo { + /// ID of the vsock device. + pub id: String, + /// A 32-bit Context Identifier (CID) used to identify the guest. + pub guest_cid: u32, + /// unix domain socket path. + pub uds_path: Option, + /// tcp socket address. + pub tcp_addr: Option, + /// Virtio queue size. + pub queue_size: Vec, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl Default for VsockDeviceConfigInfo { + fn default() -> Self { + Self { + id: String::default(), + guest_cid: 0, + uds_path: None, + tcp_addr: None, + queue_size: Vec::from(QUEUE_SIZES), + use_shared_irq: None, + use_generic_irq: None, + } + } +} + +impl VsockDeviceConfigInfo { + /// Get number and size of queues supported. + pub fn queue_sizes(&self) -> Vec { + self.queue_size.clone() + } +} + +impl ConfigItem for VsockDeviceConfigInfo { + type Err = VsockDeviceError; + + fn id(&self) -> &str { + &self.id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), VsockDeviceError> { + if self.id == other.id { + return Err(VsockDeviceError::DeviceIDAlreadyExist(self.id.clone())); + } + if self.guest_cid == other.guest_cid { + return Err(VsockDeviceError::GuestCIDAlreadyInUse(self.guest_cid)); + } + if let (Some(self_uds_path), Some(other_uds_path)) = + (self.uds_path.as_ref(), other.uds_path.as_ref()) + { + if self_uds_path == other_uds_path { + return Err(VsockDeviceError::UDSPathAlreadyInUse(self_uds_path.clone())); + } + } + if let (Some(self_net_addr), Some(other_net_addr)) = + (self.tcp_addr.as_ref(), other.tcp_addr.as_ref()) + { + if self_net_addr == other_net_addr { + return Err(VsockDeviceError::NetAddrAlreadyInUse(self_net_addr.clone())); + } + } + + Ok(()) + } +} + +/// Vsock Device Info +pub type VsockDeviceInfo = DeviceConfigInfo; + +/// Device manager to manage all vsock devices. +pub struct VsockDeviceMgr { + pub(crate) info_list: DeviceConfigInfos, + pub(crate) default_inner_backend: Option, + pub(crate) default_inner_connector: Option, + pub(crate) use_shared_irq: bool, +} + +impl VsockDeviceMgr { + /// Insert or update a vsock device into the manager. + pub fn insert_device( + &mut self, + ctx: DeviceOpContext, + config: VsockDeviceConfigInfo, + ) -> std::result::Result<(), VsockDeviceError> { + if ctx.is_hotplug { + slog::error!( + ctx.logger(), + "no support of virtio-vsock device hotplug"; + "subsystem" => SUBSYSTEM, + "id" => &config.id, + "uds_path" => &config.uds_path, + ); + + return Err(VsockDeviceError::UpdateNotAllowedPostBoot); + } + + // VMADDR_CID_ANY (-1U) means any address for binding; + // VMADDR_CID_HYPERVISOR (0) is reserved for services built into the hypervisor; + // VMADDR_CID_RESERVED (1) must not be used; + // VMADDR_CID_HOST (2) is the well-known address of the host. + if config.guest_cid <= 2 { + return Err(VsockDeviceError::GuestCIDInvalid(config.guest_cid)); + } + + slog::info!( + ctx.logger(), + "add virtio-vsock device configuration"; + "subsystem" => SUBSYSTEM, + "id" => &config.id, + "uds_path" => &config.uds_path, + ); + + self.lazy_make_default_connector()?; + + self.info_list.insert_or_update(&config)?; + + Ok(()) + } + + /// Attach all configured vsock device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicroVmError> { + let epoll_mgr = ctx + .epoll_mgr + .clone() + .ok_or(StartMicroVmError::CreateVsockDevice( + virtio::Error::InvalidInput, + ))?; + + for info in self.info_list.iter_mut() { + slog::info!( + ctx.logger(), + "attach virtio-vsock device"; + "subsystem" => SUBSYSTEM, + "id" => &info.config.id, + "uds_path" => &info.config.uds_path, + ); + + let mut device = Box::new( + Vsock::new( + info.config.guest_cid as u64, + Arc::new(info.config.queue_sizes()), + epoll_mgr.clone(), + ) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?, + ); + if let Some(uds_path) = info.config.uds_path.as_ref() { + let unix_backend = VsockUnixStreamBackend::new(uds_path.clone()) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?; + device + .add_backend(Box::new(unix_backend), true) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?; + } + if let Some(tcp_addr) = info.config.tcp_addr.as_ref() { + let tcp_backend = VsockTcpBackend::new(tcp_addr.clone()) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?; + device + .add_backend(Box::new(tcp_backend), false) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?; + } + // add inner backend to the the first added vsock device + if let Some(inner_backend) = self.default_inner_backend.take() { + device + .add_backend(Box::new(inner_backend), false) + .map_err(VirtioError::VirtioVsockError) + .map_err(StartMicroVmError::CreateVsockDevice)?; + } + let device = DeviceManager::create_mmio_virtio_device_with_features( + device, + ctx, + Some(DRAGONBALL_FEATURE_INTR_USED), + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(StartMicroVmError::RegisterVsockDevice)?; + info.device = Some(device); + } + + Ok(()) + } + + // check the default connector is present, or build it. + fn lazy_make_default_connector(&mut self) -> std::result::Result<(), VsockDeviceError> { + if self.default_inner_connector.is_none() { + let inner_backend = + VsockInnerBackend::new().map_err(VsockDeviceError::CreateInnerBackend)?; + self.default_inner_connector = Some(inner_backend.get_connector()); + self.default_inner_backend = Some(inner_backend); + } + Ok(()) + } + + /// Get the default vsock inner connector. + pub fn get_default_connector( + &mut self, + ) -> std::result::Result { + self.lazy_make_default_connector()?; + + // safe to unwrap, because we created the inner connector before + Ok(self.default_inner_connector.clone().unwrap()) + } + + /// Remove all virtio-vsock devices + pub fn remove_devices(&mut self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> { + while let Some(mut info) = self.info_list.pop() { + slog::info!( + ctx.logger(), + "remove virtio-vsock device: {}", + info.config.id + ); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, ctx)?; + } + } + Ok(()) + } +} + +impl Default for VsockDeviceMgr { + /// Create a new Vsock device manager. + fn default() -> Self { + VsockDeviceMgr { + info_list: DeviceConfigInfos::new(), + default_inner_backend: None, + default_inner_connector: None, + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs new file mode 100644 index 000000000000..e0aab17fbe0d --- /dev/null +++ b/src/dragonball/src/error.rs @@ -0,0 +1,240 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file + +//! Error codes for the virtual machine monitor subsystem. + +#[cfg(target_arch = "aarch64")] +use dbs_arch::pmu::PmuError; +#[cfg(feature = "dbs-virtio-devices")] +use dbs_virtio_devices::Error as VirtIoError; + +use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm}; + +/// Shorthand result type for internal VMM commands. +pub type Result = std::result::Result; + +/// Errors associated with the VMM internal logic. +/// +/// These errors cannot be generated by direct user input, but can result from bad configuration +/// of the host (for example if Dragonball doesn't have permissions to open the KVM fd). +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Empty AddressSpace from parameters. + #[error("Empty AddressSpace from parameters")] + AddressSpace, + + /// The zero page extends past the end of guest_mem. + #[error("the guest zero page extends past the end of guest memory")] + ZeroPagePastRamEnd, + + /// Error writing the zero page of guest memory. + #[error("failed to write to guest zero page")] + ZeroPageSetup, + + /// Failure occurs in issuing KVM ioctls and errors will be returned from kvm_ioctls lib. + #[error("failure in issuing KVM ioctl command: {0}")] + Kvm(#[source] kvm_ioctls::Error), + + /// The host kernel reports an unsupported KVM API version. + #[error("unsupported KVM version {0}")] + KvmApiVersion(i32), + + /// Cannot initialize the KVM context due to missing capabilities. + #[error("missing KVM capability: {0:?}")] + KvmCap(kvm_ioctls::Cap), + + #[cfg(target_arch = "x86_64")] + #[error("failed to configure MSRs: {0:?}")] + /// Cannot configure MSRs + GuestMSRs(dbs_arch::msr::Error), + + /// MSR inner error + #[error("MSR inner error")] + Msr(vmm_sys_util::fam::Error), + + /// Error writing MP table to memory. + #[cfg(target_arch = "x86_64")] + #[error("failed to write MP table to guest memory: {0}")] + MpTableSetup(#[source] dbs_boot::mptable::Error), + + /// Create pmu device error + #[cfg(target_arch = "aarch64")] + #[error("Create pmu device error: {0}")] + PmuDeviceError(#[source] PmuError), + + /// Fail to boot system + #[error("failed to boot system: {0}")] + BootSystem(#[source] dbs_boot::Error), + + /// Cannot open the VM file descriptor. + #[error(transparent)] + Vm(vm::VmError), +} + +/// Errors associated with starting the instance. +#[derive(Debug, thiserror::Error)] +pub enum StartMicroVmError { + /// Failed to allocate resources. + #[error("cannot allocate resources")] + AllocateResource(#[source] resource_manager::ResourceError), + + /// Cannot read from an Event file descriptor. + #[error("failure while reading from EventFd file descriptor")] + EventFd, + + /// Cannot add event to Epoll. + #[error("failure while registering epoll event for file descriptor")] + RegisterEvent, + + /// The start command was issued more than once. + #[error("the virtual machine is already running")] + MicroVMAlreadyRunning, + + /// Cannot start the VM because the kernel was not configured. + #[error("cannot start the virtual machine without kernel configuration")] + MissingKernelConfig, + + #[cfg(feature = "hotplug")] + /// Upcall initialize miss vsock device. + #[error("the upcall client needs a virtio-vsock device for communication")] + UpcallMissVsock, + + /// Upcall is not ready + #[error("the upcall client is not ready")] + UpcallServerNotReady, + + /// Configuration passed in is invalidate. + #[error("invalid virtual machine configuration: {0} ")] + ConfigureInvalid(String), + + /// This error is thrown by the minimal boot loader implementation. + /// It is related to a faulty memory configuration. + #[error("failure while configuring boot information for the virtual machine: {0}")] + ConfigureSystem(#[source] Error), + + /// Cannot configure the VM. + #[error("failure while configuring the virtual machine: {0}")] + ConfigureVm(#[source] vm::VmError), + + /// Cannot load initrd. + #[error("cannot load Initrd into guest memory: {0}")] + InitrdLoader(#[from] LoadInitrdError), + + /// Cannot load kernel due to invalid memory configuration or invalid kernel image. + #[error("cannot load guest kernel into guest memory: {0}")] + KernelLoader(#[source] linux_loader::loader::Error), + + /// Cannot load command line string. + #[error("failure while configuring guest kernel commandline: {0}")] + LoadCommandline(#[source] linux_loader::loader::Error), + + /// Cannot process command line string. + #[error("failure while processing guest kernel commandline: {0}.")] + ProcessCommandlne(#[source] linux_loader::cmdline::Error), + + /// The device manager was not configured. + #[error("the device manager failed to manage devices: {0}")] + DeviceManager(#[source] device_manager::DeviceMgrError), + + /// Cannot add devices to the Legacy I/O Bus. + #[error("failure in managing legacy device: {0}")] + LegacyDevice(#[source] device_manager::LegacyDeviceError), + + #[cfg(feature = "virtio-vsock")] + /// Failed to create the vsock device. + #[error("cannot create virtio-vsock device: {0}")] + CreateVsockDevice(#[source] VirtIoError), + + #[cfg(feature = "virtio-vsock")] + /// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus. + #[error("failure while registering virtio-vsock device: {0}")] + RegisterVsockDevice(#[source] device_manager::DeviceMgrError), + + /// Address space manager related error, e.g.cannot access guest address space manager. + #[error("address space manager related error: {0}")] + AddressManagerError(#[source] address_space_manager::AddressManagerError), + + /// Cannot create a new vCPU file descriptor. + #[error("vCPU related error: {0}")] + Vcpu(#[source] vcpu::VcpuManagerError), + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + /// Upcall initialize Error. + #[error("failure while initializing the upcall client: {0}")] + UpcallInitError(#[source] dbs_upcall::UpcallClientError), + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + /// Upcall connect Error. + #[error("failure while connecting the upcall client: {0}")] + UpcallConnectError(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "virtio-blk")] + /// Virtio-blk errors. + #[error("virtio-blk errors: {0}")] + BlockDeviceError(#[source] device_manager::blk_dev_mgr::BlockDeviceError), + + #[cfg(feature = "virtio-net")] + /// Virtio-net errors. + #[error("virtio-net errors: {0}")] + VirtioNetDeviceError(#[source] device_manager::virtio_net_dev_mgr::VirtioNetDeviceError), + + #[cfg(feature = "virtio-fs")] + /// Virtio-fs errors. + #[error("virtio-fs errors: {0}")] + FsDeviceError(#[source] device_manager::fs_dev_mgr::FsDeviceError), + + #[cfg(feature = "virtio-balloon")] + /// Virtio-balloon errors. + #[error("virtio-balloon errors: {0}")] + BalloonDeviceError(#[source] device_manager::balloon_dev_mgr::BalloonDeviceError), +} + +/// Errors associated with starting the instance. +#[derive(Debug, thiserror::Error)] +pub enum StopMicrovmError { + /// Guest memory has not been initialized. + #[error("Guest memory has not been initialized")] + GuestMemoryNotInitialized, + + /// Cannnot remove devices + #[error("Failed to remove devices in device_manager {0}")] + DeviceManager(#[source] device_manager::DeviceMgrError), +} + +/// Errors associated with loading initrd +#[derive(Debug, thiserror::Error)] +pub enum LoadInitrdError { + /// Cannot load initrd due to an invalid memory configuration. + #[error("failed to load the initrd image to guest memory")] + LoadInitrd, + /// Cannot load initrd due to an invalid image. + #[error("failed to read the initrd image: {0}")] + ReadInitrd(#[source] std::io::Error), +} + +/// A dedicated error type to glue with the vmm_epoll crate. +#[derive(Debug, thiserror::Error)] +pub enum EpollError { + /// Generic internal error. + #[error("unclassfied internal error")] + InternalError, + + /// Errors from the epoll subsystem. + #[error("failed to issue epoll syscall: {0}")] + EpollMgr(#[from] dbs_utils::epoll_manager::Error), + + /// Generic IO errors. + #[error(transparent)] + IOError(std::io::Error), + + #[cfg(feature = "dbs-virtio-devices")] + /// Errors from virtio devices. + #[error("failed to manager Virtio device: {0}")] + VirtIoDevice(#[source] VirtIoError), +} diff --git a/src/dragonball/src/event_manager.rs b/src/dragonball/src/event_manager.rs new file mode 100644 index 000000000000..69bf4dab4c1f --- /dev/null +++ b/src/dragonball/src/event_manager.rs @@ -0,0 +1,168 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Event manager to manage and handle IO events and requests from API server . + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use log::{error, warn}; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::{EpollError, Result}; +use crate::vmm::Vmm; + +// Statically assigned epoll slot for VMM events. +pub(crate) const EPOLL_EVENT_EXIT: u32 = 0; +pub(crate) const EPOLL_EVENT_API_REQUEST: u32 = 1; + +/// Shared information between vmm::vmm_thread_event_loop() and VmmEpollHandler. +pub(crate) struct EventContext { + pub api_event_fd: EventFd, + pub api_event_triggered: bool, + pub exit_evt_triggered: bool, +} + +impl EventContext { + /// Create a new instance of [`EventContext`]. + pub fn new(api_event_fd: EventFd) -> Result { + Ok(EventContext { + api_event_fd, + api_event_triggered: false, + exit_evt_triggered: false, + }) + } +} + +/// Event manager for VMM to handle API requests and IO events. +pub struct EventManager { + epoll_mgr: EpollManager, + subscriber_id: SubscriberId, + vmm_event_count: Arc, +} + +impl Drop for EventManager { + fn drop(&mut self) { + // Vmm -> Vm -> EpollManager -> VmmEpollHandler -> Vmm + // We need to remove VmmEpollHandler to break the circular reference + // so that Vmm can drop. + self.epoll_mgr + .remove_subscriber(self.subscriber_id) + .map_err(|e| { + error!("event_manager: remove_subscriber err. {:?}", e); + e + }) + .ok(); + } +} + +impl EventManager { + /// Create a new event manager associated with the VMM object. + pub fn new(vmm: &Arc>, epoll_mgr: EpollManager) -> Result { + let vmm_event_count = Arc::new(AtomicUsize::new(0)); + let handler: Box = Box::new(VmmEpollHandler { + vmm: vmm.clone(), + vmm_event_count: vmm_event_count.clone(), + }); + let subscriber_id = epoll_mgr.add_subscriber(handler); + + Ok(EventManager { + epoll_mgr, + subscriber_id, + vmm_event_count, + }) + } + + /// Get the underlying epoll event manager. + pub fn epoll_manager(&self) -> EpollManager { + self.epoll_mgr.clone() + } + + /// Registry the eventfd for exit notification. + pub fn register_exit_eventfd( + &mut self, + exit_evt: &EventFd, + ) -> std::result::Result<(), EpollError> { + let events = Events::with_data(exit_evt, EPOLL_EVENT_EXIT, EventSet::IN); + + self.epoll_mgr + .add_event(self.subscriber_id, events) + .map_err(EpollError::EpollMgr) + } + + /// Poll pending events and invoke registered event handler. + /// + /// # Arguments: + /// * timeout: maximum time in milliseconds to wait + pub fn handle_events(&self, timeout: i32) -> std::result::Result { + self.epoll_mgr + .handle_events(timeout) + .map_err(EpollError::EpollMgr) + } + + /// Fetch the VMM event count and reset it to zero. + pub fn fetch_vmm_event_count(&self) -> usize { + self.vmm_event_count.swap(0, Ordering::AcqRel) + } +} + +struct VmmEpollHandler { + vmm: Arc>, + vmm_event_count: Arc, +} + +impl MutEventSubscriber for VmmEpollHandler { + fn process(&mut self, events: Events, _ops: &mut EventOps) { + // Do not try to recover when the lock has already been poisoned. + // And be careful to avoid deadlock between process() and vmm::vmm_thread_event_loop(). + let mut vmm = self.vmm.lock().unwrap(); + + match events.data() { + EPOLL_EVENT_API_REQUEST => { + if let Err(e) = vmm.event_ctx.api_event_fd.read() { + error!("event_manager: failed to read API eventfd, {:?}", e); + } + vmm.event_ctx.api_event_triggered = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + EPOLL_EVENT_EXIT => { + let vm = vmm.get_vm().unwrap(); + match vm.get_reset_eventfd() { + Some(ev) => { + if let Err(e) = ev.read() { + error!("event_manager: failed to read exit eventfd, {:?}", e); + } + } + None => warn!("event_manager: leftover exit event in epoll context!"), + } + vmm.event_ctx.exit_evt_triggered = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + _ => error!("event_manager: unknown epoll slot number {}", events.data()), + } + } + + fn init(&mut self, ops: &mut EventOps) { + // Do not expect poisoned lock. + let vmm = self.vmm.lock().unwrap(); + let events = Events::with_data( + &vmm.event_ctx.api_event_fd, + EPOLL_EVENT_API_REQUEST, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "event_manager: failed to register epoll event for API server, {:?}", + e + ); + } + } +} diff --git a/src/dragonball/src/hypervisor_metrics.rs b/src/dragonball/src/hypervisor_metrics.rs new file mode 100644 index 000000000000..fec2fcb58895 --- /dev/null +++ b/src/dragonball/src/hypervisor_metrics.rs @@ -0,0 +1,102 @@ +// Copyright 2021-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +extern crate procfs; + +use crate::metric::{IncMetric, METRICS}; +use anyhow::{anyhow, Result}; +use prometheus::{Encoder, IntCounter, IntGaugeVec, Opts, Registry, TextEncoder}; +use std::sync::Mutex; + +const NAMESPACE_KATA_HYPERVISOR: &str = "kata_hypervisor"; + +lazy_static! { + static ref REGISTERED: Mutex = Mutex::new(false); + + // custom registry + static ref REGISTRY: Registry = Registry::new(); + + // hypervisor metrics + static ref HYPERVISOR_SCRAPE_COUNT: IntCounter = + IntCounter::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"scrape_count"), "Hypervisor metrics scrape count.").unwrap(); + + static ref HYPERVISOR_VCPU: IntGaugeVec = + IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"vcpu"), "Hypervisor metrics specific to VCPUs' mode of functioning."), &["item"]).unwrap(); + + static ref HYPERVISOR_SECCOMP: IntGaugeVec = + IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"seccomp"), "Hypervisor metrics for the seccomp filtering."), &["item"]).unwrap(); + + static ref HYPERVISOR_SIGNALS: IntGaugeVec = + IntGaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_HYPERVISOR,"signals"), "Hypervisor metrics related to signals."), &["item"]).unwrap(); +} + +/// get prometheus metrics +pub fn get_hypervisor_metrics() -> Result { + let mut registered = REGISTERED + .lock() + .map_err(|e| anyhow!("failed to check hypervisor metrics register status {:?}", e))?; + + if !(*registered) { + register_hypervisor_metrics()?; + *registered = true; + } + + update_hypervisor_metrics()?; + + // gather all metrics and return as a String + let metric_families = REGISTRY.gather(); + + let mut buffer = Vec::new(); + let encoder = TextEncoder::new(); + encoder.encode(&metric_families, &mut buffer)?; + + Ok(String::from_utf8(buffer)?) +} + +fn register_hypervisor_metrics() -> Result<()> { + REGISTRY.register(Box::new(HYPERVISOR_SCRAPE_COUNT.clone()))?; + REGISTRY.register(Box::new(HYPERVISOR_VCPU.clone()))?; + REGISTRY.register(Box::new(HYPERVISOR_SECCOMP.clone()))?; + REGISTRY.register(Box::new(HYPERVISOR_SIGNALS.clone()))?; + + Ok(()) +} + +fn update_hypervisor_metrics() -> Result<()> { + HYPERVISOR_SCRAPE_COUNT.inc(); + + set_intgauge_vec_vcpu(&HYPERVISOR_VCPU); + set_intgauge_vec_seccomp(&HYPERVISOR_SECCOMP); + set_intgauge_vec_signals(&HYPERVISOR_SIGNALS); + + Ok(()) +} + +fn set_intgauge_vec_vcpu(icv: &prometheus::IntGaugeVec) { + icv.with_label_values(&["exit_io_in"]) + .set(METRICS.vcpu.exit_io_in.count() as i64); + icv.with_label_values(&["exit_io_out"]) + .set(METRICS.vcpu.exit_io_out.count() as i64); + icv.with_label_values(&["exit_mmio_read"]) + .set(METRICS.vcpu.exit_mmio_read.count() as i64); + icv.with_label_values(&["exit_mmio_write"]) + .set(METRICS.vcpu.exit_mmio_write.count() as i64); + icv.with_label_values(&["failures"]) + .set(METRICS.vcpu.failures.count() as i64); + icv.with_label_values(&["filter_cpuid"]) + .set(METRICS.vcpu.filter_cpuid.count() as i64); +} + +fn set_intgauge_vec_seccomp(icv: &prometheus::IntGaugeVec) { + icv.with_label_values(&["num_faults"]) + .set(METRICS.seccomp.num_faults.count() as i64); +} + +fn set_intgauge_vec_signals(icv: &prometheus::IntGaugeVec) { + icv.with_label_values(&["sigbus"]) + .set(METRICS.signals.sigbus.count() as i64); + icv.with_label_values(&["sigsegv"]) + .set(METRICS.signals.sigsegv.count() as i64); +} diff --git a/src/dragonball/src/io_manager.rs b/src/dragonball/src/io_manager.rs new file mode 100644 index 000000000000..410703bc7abf --- /dev/null +++ b/src/dragonball/src/io_manager.rs @@ -0,0 +1,60 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use arc_swap::{ArcSwap, Cache}; +use dbs_device::device_manager::Error; +use dbs_device::device_manager::IoManager; + +/// A specialized version of [`std::result::Result`] for IO manager related operations. +pub type Result = std::result::Result; + +/// Wrapper over IoManager to support device hotplug with [`ArcSwap`] and [`Cache`]. +#[derive(Clone)] +pub struct IoManagerCached(pub(crate) Cache>, Arc>); + +impl IoManagerCached { + /// Create a new instance of [`IoManagerCached`]. + pub fn new(io_manager: Arc>) -> Self { + IoManagerCached(Cache::new(io_manager)) + } + + #[cfg(target_arch = "x86_64")] + #[inline] + /// Read data from IO ports. + pub fn pio_read(&mut self, addr: u16, data: &mut [u8]) -> Result<()> { + self.0.load().pio_read(addr, data) + } + + #[cfg(target_arch = "x86_64")] + #[inline] + /// Write data to IO ports. + pub fn pio_write(&mut self, addr: u16, data: &[u8]) -> Result<()> { + self.0.load().pio_write(addr, data) + } + + #[inline] + /// Read data to MMIO address. + pub fn mmio_read(&mut self, addr: u64, data: &mut [u8]) -> Result<()> { + self.0.load().mmio_read(addr, data) + } + + #[inline] + /// Write data to MMIO address. + pub fn mmio_write(&mut self, addr: u64, data: &[u8]) -> Result<()> { + self.0.load().mmio_write(addr, data) + } + + #[inline] + /// Revalidate the inner cache + pub fn revalidate_cache(&mut self) { + let _ = self.0.load(); + } + + #[inline] + /// Get immutable reference to underlying [`IoManager`]. + pub fn load(&mut self) -> &IoManager { + self.0.load() + } +} diff --git a/src/dragonball/src/kvm_context.rs b/src/dragonball/src/kvm_context.rs new file mode 100644 index 000000000000..ce458345886e --- /dev/null +++ b/src/dragonball/src/kvm_context.rs @@ -0,0 +1,260 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +#![allow(dead_code)] +use kvm_bindings::KVM_API_VERSION; +use kvm_ioctls::{Cap, Kvm, VmFd}; +use std::os::unix::io::{FromRawFd, RawFd}; + +use crate::error::{Error, Result}; + +/// Describes a KVM context that gets attached to the micro VM instance. +/// It gives access to the functionality of the KVM wrapper as long as every required +/// KVM capability is present on the host. +pub struct KvmContext { + kvm: Kvm, + max_memslots: usize, + #[cfg(target_arch = "x86_64")] + supported_msrs: kvm_bindings::MsrList, +} + +impl KvmContext { + /// Create a new KVM context object, using the provided `kvm_fd` if one is presented. + pub fn new(kvm_fd: Option) -> Result { + let kvm = if let Some(fd) = kvm_fd { + // Safe because we expect kvm_fd to contain a valid fd number when is_some() == true. + unsafe { Kvm::from_raw_fd(fd) } + } else { + Kvm::new().map_err(Error::Kvm)? + }; + + if kvm.get_api_version() != KVM_API_VERSION as i32 { + return Err(Error::KvmApiVersion(kvm.get_api_version())); + } + + Self::check_cap(&kvm, Cap::Irqchip)?; + Self::check_cap(&kvm, Cap::Irqfd)?; + Self::check_cap(&kvm, Cap::Ioeventfd)?; + Self::check_cap(&kvm, Cap::UserMemory)?; + #[cfg(target_arch = "x86_64")] + Self::check_cap(&kvm, Cap::SetTssAddr)?; + + #[cfg(target_arch = "x86_64")] + let supported_msrs = dbs_arch::msr::supported_guest_msrs(&kvm).map_err(Error::GuestMSRs)?; + let max_memslots = kvm.get_nr_memslots(); + + Ok(KvmContext { + kvm, + max_memslots, + #[cfg(target_arch = "x86_64")] + supported_msrs, + }) + } + + /// Get underlying KVM object to access kvm-ioctls interfaces. + pub fn kvm(&self) -> &Kvm { + &self.kvm + } + + /// Get the maximum number of memory slots reported by this KVM context. + pub fn max_memslots(&self) -> usize { + self.max_memslots + } + + /// Create a virtual machine object. + pub fn create_vm(&self) -> Result { + self.kvm.create_vm().map_err(Error::Kvm) + } + + /// Get the max vcpu count supported by kvm + pub fn get_max_vcpus(&self) -> usize { + self.kvm.get_max_vcpus() + } + + fn check_cap(kvm: &Kvm, cap: Cap) -> std::result::Result<(), Error> { + if !kvm.check_extension(cap) { + return Err(Error::KvmCap(cap)); + } + Ok(()) + } +} + +#[cfg(target_arch = "x86_64")] +mod x86_64 { + use super::*; + use dbs_arch::msr::*; + use kvm_bindings::{kvm_msr_entry, CpuId, MsrList, Msrs}; + use std::collections::HashSet; + + impl KvmContext { + /// Get information about supported CPUID of x86 processor. + pub fn supported_cpuid( + &self, + max_entries_count: usize, + ) -> std::result::Result { + self.kvm.get_supported_cpuid(max_entries_count) + } + + /// Get information about supported MSRs of x86 processor. + pub fn supported_msrs( + &self, + _max_entries_count: usize, + ) -> std::result::Result { + Ok(self.supported_msrs.clone()) + } + + // It's very sensible to manipulate MSRs, so please be careful to change code below. + fn build_msrs_list(kvm: &Kvm) -> Result { + let mut mset: HashSet = HashSet::new(); + let supported_msr_list = kvm.get_msr_index_list().map_err(super::Error::Kvm)?; + for msr in supported_msr_list.as_slice() { + mset.insert(*msr); + } + + let mut msrs = vec![ + MSR_IA32_APICBASE, + MSR_IA32_SYSENTER_CS, + MSR_IA32_SYSENTER_ESP, + MSR_IA32_SYSENTER_EIP, + MSR_IA32_CR_PAT, + ]; + + let filters_list = vec![ + MSR_STAR, + MSR_VM_HSAVE_PA, + MSR_TSC_AUX, + MSR_IA32_TSC_ADJUST, + MSR_IA32_TSCDEADLINE, + MSR_IA32_MISC_ENABLE, + MSR_IA32_BNDCFGS, + MSR_IA32_SPEC_CTRL, + ]; + for msr in filters_list { + if mset.contains(&msr) { + msrs.push(msr); + } + } + + // TODO: several msrs are optional. + + // TODO: Since our guests don't support nested-vmx, LMCE nor SGX for now. + // msrs.push(MSR_IA32_FEATURE_CONTROL); + + msrs.push(MSR_CSTAR); + msrs.push(MSR_KERNEL_GS_BASE); + msrs.push(MSR_SYSCALL_MASK); + msrs.push(MSR_LSTAR); + msrs.push(MSR_IA32_TSC); + + msrs.push(MSR_KVM_SYSTEM_TIME_NEW); + msrs.push(MSR_KVM_WALL_CLOCK_NEW); + + // FIXME: check if it's supported. + msrs.push(MSR_KVM_ASYNC_PF_EN); + msrs.push(MSR_KVM_PV_EOI_EN); + msrs.push(MSR_KVM_STEAL_TIME); + + msrs.push(MSR_CORE_PERF_FIXED_CTR_CTRL); + msrs.push(MSR_CORE_PERF_GLOBAL_CTRL); + msrs.push(MSR_CORE_PERF_GLOBAL_STATUS); + msrs.push(MSR_CORE_PERF_GLOBAL_OVF_CTRL); + + const MAX_FIXED_COUNTERS: u32 = 3; + for i in 0..MAX_FIXED_COUNTERS { + msrs.push(MSR_CORE_PERF_FIXED_CTR0 + i); + } + + // FIXME: skip MCE for now. + + let mtrr_msrs = vec![ + MSR_MTRRdefType, + MSR_MTRRfix64K_00000, + MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000, + MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_C8000, + MSR_MTRRfix4K_D0000, + MSR_MTRRfix4K_D8000, + MSR_MTRRfix4K_E0000, + MSR_MTRRfix4K_E8000, + MSR_MTRRfix4K_F0000, + MSR_MTRRfix4K_F8000, + ]; + for mtrr in mtrr_msrs { + msrs.push(mtrr); + } + + const MSR_MTRRCAP_VCNT: u32 = 8; + for i in 0..MSR_MTRRCAP_VCNT { + msrs.push(0x200 + 2 * i); + msrs.push(0x200 + 2 * i + 1); + } + + let msrs: Vec = msrs + .iter() + .map(|reg| kvm_msr_entry { + index: *reg, + reserved: 0, + data: 0, + }) + .collect(); + + Msrs::from_entries(&msrs).map_err(super::Error::Msr) + } + } +} + +#[cfg(test)] +mod tests { + use std::fs::File; + use std::os::unix::fs::MetadataExt; + use std::os::unix::io::{AsRawFd, FromRawFd}; + + use kvm_ioctls::Kvm; + use test_utils::skip_if_not_root; + + use super::*; + + #[test] + fn test_create_kvm_context() { + skip_if_not_root!(); + + let c = KvmContext::new(None).unwrap(); + + assert!(c.max_memslots >= 32); + + let kvm = Kvm::new().unwrap(); + let f = std::mem::ManuallyDrop::new(unsafe { File::from_raw_fd(kvm.as_raw_fd()) }); + let m1 = f.metadata().unwrap(); + let m2 = File::open("/dev/kvm").unwrap().metadata().unwrap(); + + assert_eq!(m1.dev(), m2.dev()); + assert_eq!(m1.ino(), m2.ino()); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_supported_cpu_id() { + skip_if_not_root!(); + + let c = KvmContext::new(None).unwrap(); + + let _ = c + .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) + .expect("failed to get supported CPUID"); + assert!(c.supported_cpuid(0).is_err()); + } + + #[test] + fn test_create_vm() { + skip_if_not_root!(); + + let c = KvmContext::new(None).unwrap(); + + let _ = c.create_vm().unwrap(); + } +} diff --git a/src/dragonball/src/lib.rs b/src/dragonball/src/lib.rs new file mode 100644 index 000000000000..26142a45628e --- /dev/null +++ b/src/dragonball/src/lib.rs @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Dragonball is a light-weight virtual machine manager(VMM) based on Linux Kernel-based Virtual +//! Machine(KVM) which is optimized for container workloads. + +#![warn(missing_docs)] +//TODO: Remove this, after the rest of dragonball has been committed. +#![allow(dead_code)] + +#[macro_use] +extern crate lazy_static; + +/// Address space manager for virtual machines. +pub mod address_space_manager; +/// API to handle vmm requests. +pub mod api; +/// Structs to maintain configuration information. +pub mod config_manager; +/// Device manager for virtual machines. +pub mod device_manager; +/// Errors related to Virtual machine manager. +pub mod error; +/// Prometheus Metrics. +pub mod hypervisor_metrics; +/// KVM operation context for virtual machines. +pub mod kvm_context; +/// Metrics system. +pub mod metric; +/// Resource manager for virtual machines. +pub mod resource_manager; +/// Signal handler for virtual machines. +pub mod signal_handler; +/// Virtual CPU manager for virtual machines. +pub mod vcpu; +/// Virtual machine manager for virtual machines. +pub mod vm; + +mod event_manager; +mod io_manager; + +mod test_utils; + +mod vmm; + +pub use self::error::StartMicroVmError; +pub use self::io_manager::IoManagerCached; +pub use self::vmm::Vmm; + +/// Success exit code. +pub const EXIT_CODE_OK: u8 = 0; +/// Generic error exit code. +pub const EXIT_CODE_GENERIC_ERROR: u8 = 1; +/// Generic exit code for an error considered not possible to occur if the program logic is sound. +pub const EXIT_CODE_UNEXPECTED_ERROR: u8 = 2; +/// Dragonball was shut down after intercepting a restricted system call. +pub const EXIT_CODE_BAD_SYSCALL: u8 = 148; +/// Dragonball was shut down after intercepting `SIGBUS`. +pub const EXIT_CODE_SIGBUS: u8 = 149; +/// Dragonball was shut down after intercepting `SIGSEGV`. +pub const EXIT_CODE_SIGSEGV: u8 = 150; +/// Invalid json passed to the Dragonball process for configuring microvm. +pub const EXIT_CODE_INVALID_JSON: u8 = 151; +/// Bad configuration for microvm's resources, when using a single json. +pub const EXIT_CODE_BAD_CONFIGURATION: u8 = 152; +/// Command line arguments parsing error. +pub const EXIT_CODE_ARG_PARSING: u8 = 153; diff --git a/src/dragonball/src/metric.rs b/src/dragonball/src/metric.rs new file mode 100644 index 000000000000..716e9e04406e --- /dev/null +++ b/src/dragonball/src/metric.rs @@ -0,0 +1,58 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use dbs_utils::metric::SharedIncMetric; +use lazy_static::lazy_static; +use serde::Serialize; + +pub use dbs_utils::metric::IncMetric; + +lazy_static! { + /// Static instance used for handling metrics. + pub static ref METRICS: DragonballMetrics = DragonballMetrics::default(); +} + +/// Metrics specific to VCPUs' mode of functioning. +#[derive(Default, Serialize)] +pub struct VcpuMetrics { + /// Number of KVM exits for handling input IO. + pub exit_io_in: SharedIncMetric, + /// Number of KVM exits for handling output IO. + pub exit_io_out: SharedIncMetric, + /// Number of KVM exits for handling MMIO reads. + pub exit_mmio_read: SharedIncMetric, + /// Number of KVM exits for handling MMIO writes. + pub exit_mmio_write: SharedIncMetric, + /// Number of errors during this VCPU's run. + pub failures: SharedIncMetric, + /// Failures in configuring the CPUID. + pub filter_cpuid: SharedIncMetric, +} + +/// Metrics for the seccomp filtering. +#[derive(Default, Serialize)] +pub struct SeccompMetrics { + /// Number of errors inside the seccomp filtering. + pub num_faults: SharedIncMetric, +} + +/// Metrics related to signals. +#[derive(Default, Serialize)] +pub struct SignalMetrics { + /// Number of times that SIGBUS was handled. + pub sigbus: SharedIncMetric, + /// Number of times that SIGSEGV was handled. + pub sigsegv: SharedIncMetric, +} + +/// Structure storing all metrics while enforcing serialization support on them. +#[derive(Default, Serialize)] +pub struct DragonballMetrics { + /// Metrics related to a vcpu's functioning. + pub vcpu: VcpuMetrics, + /// Metrics related to seccomp filtering. + pub seccomp: SeccompMetrics, + /// Metrics related to signals. + pub signals: SignalMetrics, +} diff --git a/src/dragonball/src/resource_manager.rs b/src/dragonball/src/resource_manager.rs new file mode 100644 index 000000000000..b0f96e252eba --- /dev/null +++ b/src/dragonball/src/resource_manager.rs @@ -0,0 +1,781 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Mutex; + +use dbs_allocator::{Constraint, IntervalTree, Range}; +use dbs_boot::layout::{ + GUEST_MEM_END, GUEST_MEM_START, GUEST_PHYS_END, IRQ_BASE as LEGACY_IRQ_BASE, + IRQ_MAX as LEGACY_IRQ_MAX, MMIO_LOW_END, MMIO_LOW_START, +}; +use dbs_device::resources::{DeviceResources, MsiIrqType, Resource, ResourceConstraint}; + +// We reserve the LEGACY_IRQ_BASE(5) for shared IRQ. +const SHARED_IRQ: u32 = LEGACY_IRQ_BASE; +// Since ioapic2 have 24 pins for legacy devices, so irq number 0-23 are used. We will set MSI_IRQ_BASE at 24. +#[cfg(target_arch = "x86_64")] +const MSI_IRQ_BASE: u32 = 24; +#[cfg(target_arch = "aarch64")] +/// We define MSI_IRQ_BASE as LEGACY_IRQ_MAX for aarch64 in order not to conflict with legacy irq numbers. +const MSI_IRQ_BASE: u32 = LEGACY_IRQ_MAX + 1; + +// kvm max irq is defined in arch/x86/include/asm/kvm_host.h +const MSI_IRQ_MAX: u32 = 1023; +// x86's kvm user mem slots is defined in arch/x86/include/asm/kvm_host.h +#[cfg(target_arch = "x86_64")] +const KVM_USER_MEM_SLOTS: u32 = 509; +// aarch64's kvm user mem slots is defined in arch/arm64/include/asm/kvm_host.h +#[cfg(target_arch = "aarch64")] +const KVM_USER_MEM_SLOTS: u32 = 512; +const PIO_MIN: u16 = 0x0; +const PIO_MAX: u16 = 0xFFFF; +// Reserve the 64MB MMIO address range just below 4G, x86 systems have special +// devices, such as LAPIC, IOAPIC, HPET etc, in this range. And we don't explicitly +// allocate MMIO address for those devices. +const MMIO_SPACE_RESERVED: u64 = 0x400_0000; + +/// Errors associated with resource management operations +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum ResourceError { + /// Unknown/unsupported resource type. + #[error("unsupported resource type")] + UnknownResourceType, + + /// Invalid resource range. + #[error("invalid resource range for resource type : {0}")] + InvalidResourceRange(String), + + /// No resource available. + #[error("no resource available")] + NoAvailResource, +} + +#[derive(Default)] +struct ResourceManagerBuilder { + // IntervalTree for allocating legacy irq number. + legacy_irq_pool: IntervalTree<()>, + // IntervalTree for allocating message signal interrupt (MSI) irq number. + msi_irq_pool: IntervalTree<()>, + // IntervalTree for allocating port-mapped io (PIO) address. + pio_pool: IntervalTree<()>, + // IntervalTree for allocating memory-mapped io (MMIO) address. + mmio_pool: IntervalTree<()>, + // IntervalTree for allocating guest memory. + mem_pool: IntervalTree<()>, + // IntervalTree for allocating kvm memory slot. + kvm_mem_slot_pool: IntervalTree<()>, +} + +impl ResourceManagerBuilder { + /// init legacy_irq_pool with arch specific constants. + fn init_legacy_irq_pool(mut self) -> Self { + // The LEGACY_IRQ_BASE irq is reserved for shared IRQ and won't be allocated / reallocated, + // so we don't insert it into the legacy_irq interval tree. + self.legacy_irq_pool + .insert(Range::new(LEGACY_IRQ_BASE + 1, LEGACY_IRQ_MAX), None); + self + } + + /// init msi_irq_pool with arch specific constants. + fn init_msi_irq_pool(mut self) -> Self { + self.msi_irq_pool + .insert(Range::new(MSI_IRQ_BASE, MSI_IRQ_MAX), None); + self + } + + /// init pio_pool with arch specific constants. + fn init_pio_pool(mut self) -> Self { + self.pio_pool.insert(Range::new(PIO_MIN, PIO_MAX), None); + self + } + + /// Create mmio_pool with arch specific constants. + /// allow(clippy) is because `GUEST_MEM_START > MMIO_LOW_END`, we may modify GUEST_MEM_START or + /// MMIO_LOW_END in the future. + #[allow(clippy::absurd_extreme_comparisons)] + fn init_mmio_pool_helper(mmio: &mut IntervalTree<()>) { + mmio.insert(Range::new(MMIO_LOW_START, MMIO_LOW_END), None); + if !(*GUEST_MEM_END < MMIO_LOW_START + || GUEST_MEM_START > MMIO_LOW_END + || MMIO_LOW_START == MMIO_LOW_END) + { + #[cfg(target_arch = "x86_64")] + { + let constraint = Constraint::new(MMIO_SPACE_RESERVED) + .min(MMIO_LOW_END - MMIO_SPACE_RESERVED) + .max(0xffff_ffffu64); + let key = mmio.allocate(&constraint); + if let Some(k) = key.as_ref() { + mmio.update(k, ()); + } else { + panic!("failed to reserve MMIO address range for x86 system devices"); + } + } + } + + if *GUEST_MEM_END < *GUEST_PHYS_END { + mmio.insert(Range::new(*GUEST_MEM_END + 1, *GUEST_PHYS_END), None); + } + } + + /// init mmio_pool with helper function + fn init_mmio_pool(mut self) -> Self { + Self::init_mmio_pool_helper(&mut self.mmio_pool); + self + } + + /// Create mem_pool with arch specific constants. + /// deny(clippy) is because `GUEST_MEM_START > MMIO_LOW_END`, we may modify GUEST_MEM_START or + /// MMIO_LOW_END in the future. + #[allow(clippy::absurd_extreme_comparisons)] + pub(crate) fn init_mem_pool_helper(mem: &mut IntervalTree<()>) { + if *GUEST_MEM_END < MMIO_LOW_START + || GUEST_MEM_START > MMIO_LOW_END + || MMIO_LOW_START == MMIO_LOW_END + { + mem.insert(Range::new(GUEST_MEM_START, *GUEST_MEM_END), None); + } else { + if MMIO_LOW_START > GUEST_MEM_START { + mem.insert(Range::new(GUEST_MEM_START, MMIO_LOW_START - 1), None); + } + if MMIO_LOW_END < *GUEST_MEM_END { + mem.insert(Range::new(MMIO_LOW_END + 1, *GUEST_MEM_END), None); + } + } + } + + /// init mem_pool with helper function + fn init_mem_pool(mut self) -> Self { + Self::init_mem_pool_helper(&mut self.mem_pool); + self + } + + /// init kvm_mem_slot_pool with arch specific constants. + fn init_kvm_mem_slot_pool(mut self, max_kvm_mem_slot: Option) -> Self { + let max_slots = max_kvm_mem_slot.unwrap_or(KVM_USER_MEM_SLOTS as usize); + self.kvm_mem_slot_pool + .insert(Range::new(0, max_slots as u64), None); + self + } + + fn build(self) -> ResourceManager { + ResourceManager { + legacy_irq_pool: Mutex::new(self.legacy_irq_pool), + msi_irq_pool: Mutex::new(self.msi_irq_pool), + pio_pool: Mutex::new(self.pio_pool), + mmio_pool: Mutex::new(self.mmio_pool), + mem_pool: Mutex::new(self.mem_pool), + kvm_mem_slot_pool: Mutex::new(self.kvm_mem_slot_pool), + } + } +} + +/// Resource manager manages all resources for a virtual machine instance. +pub struct ResourceManager { + legacy_irq_pool: Mutex>, + msi_irq_pool: Mutex>, + pio_pool: Mutex>, + mmio_pool: Mutex>, + mem_pool: Mutex>, + kvm_mem_slot_pool: Mutex>, +} + +impl Default for ResourceManager { + fn default() -> Self { + ResourceManagerBuilder::default().build() + } +} + +impl ResourceManager { + /// Create a resource manager instance. + pub fn new(max_kvm_mem_slot: Option) -> Self { + let res_manager_builder = ResourceManagerBuilder::default(); + res_manager_builder + .init_legacy_irq_pool() + .init_msi_irq_pool() + .init_pio_pool() + .init_mmio_pool() + .init_mem_pool() + .init_kvm_mem_slot_pool(max_kvm_mem_slot) + .build() + } + + /// Init mem_pool with arch specific constants. + pub fn init_mem_pool(&self) { + let mut mem = self.mem_pool.lock().unwrap(); + ResourceManagerBuilder::init_mem_pool_helper(&mut mem); + } + + /// Check if mem_pool is empty. + pub fn is_mem_pool_empty(&self) -> bool { + self.mem_pool.lock().unwrap().is_empty() + } + + /// Allocate one legacy irq number. + /// + /// Allocate the specified irq number if `fixed` contains an irq number. + pub fn allocate_legacy_irq(&self, shared: bool, fixed: Option) -> Option { + // if shared_irq is used, just return the shared irq num. + if shared { + return Some(SHARED_IRQ); + } + + let mut constraint = Constraint::new(1u32); + if let Some(v) = fixed { + if v == SHARED_IRQ { + return None; + } + + constraint.min = v as u64; + constraint.max = v as u64; + } + // Safe to unwrap() because we don't expect poisoned lock here. + let mut legacy_irq_pool = self.legacy_irq_pool.lock().unwrap(); + let key = legacy_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + legacy_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a legacy irq number. + /// + /// Panic if the irq number is invalid. + pub fn free_legacy_irq(&self, irq: u32) -> Result<(), ResourceError> { + // if the irq number is shared_irq, we don't need to do anything. + if irq == SHARED_IRQ { + return Ok(()); + } + + if !(LEGACY_IRQ_BASE..=LEGACY_IRQ_MAX).contains(&irq) { + return Err(ResourceError::InvalidResourceRange( + "Legacy IRQ".to_string(), + )); + } + let key = Range::new(irq, irq); + // Safe to unwrap() because we don't expect poisoned lock here. + self.legacy_irq_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a group of MSI irq numbers. + /// + /// The allocated MSI irq numbers may or may not be naturally aligned. + pub fn allocate_msi_irq(&self, count: u32) -> Option { + let constraint = Constraint::new(count); + // Safe to unwrap() because we don't expect poisoned lock here. + let mut msi_irq_pool = self.msi_irq_pool.lock().unwrap(); + let key = msi_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + msi_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Allocate a group of MSI irq numbers, naturally aligned to `count`. + /// + /// This may be used to support PCI MSI, which requires the allocated irq number is naturally + /// aligned. + pub fn allocate_msi_irq_aligned(&self, count: u32) -> Option { + let constraint = Constraint::new(count).align(count); + // Safe to unwrap() because we don't expect poisoned lock here. + let mut msi_irq_pool = self.msi_irq_pool.lock().unwrap(); + let key = msi_irq_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + msi_irq_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a group of MSI irq numbers. + /// + /// Panic if `irq` or `count` is invalid. + pub fn free_msi_irq(&self, irq: u32, count: u32) -> Result<(), ResourceError> { + if irq < MSI_IRQ_BASE + || count == 0 + || irq.checked_add(count).is_none() + || irq + count - 1 > MSI_IRQ_MAX + { + return Err(ResourceError::InvalidResourceRange("MSI IRQ".to_string())); + } + let key = Range::new(irq, irq + count - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.msi_irq_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a group of PIO address and returns the allocated PIO base address. + pub fn allocate_pio_address_simple(&self, size: u16) -> Option { + let constraint = Constraint::new(size); + self.allocate_pio_address(&constraint) + } + + /// Allocate a group of PIO address and returns the allocated PIO base address. + pub fn allocate_pio_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut pio_pool = self.pio_pool.lock().unwrap(); + let key = pio_pool.allocate(constraint); + if let Some(k) = key.as_ref() { + pio_pool.update(k, ()); + } + key.map(|v| v.min as u16) + } + + /// Free PIO address range `[base, base + size - 1]`. + /// + /// Panic if `base` or `size` is invalid. + pub fn free_pio_address(&self, base: u16, size: u16) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() { + return Err(ResourceError::InvalidResourceRange( + "PIO Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.pio_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a MMIO address range alinged to `align` and returns the allocated base address. + pub fn allocate_mmio_address_aligned(&self, size: u64, align: u64) -> Option { + let constraint = Constraint::new(size).align(align); + self.allocate_mmio_address(&constraint) + } + + /// Allocate a MMIO address range and returns the allocated base address. + pub fn allocate_mmio_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut mmio_pool = self.mmio_pool.lock().unwrap(); + let key = mmio_pool.allocate(constraint); + key.map(|v| v.min) + } + + /// Free MMIO address range `[base, base + size - 1]` + pub fn free_mmio_address(&self, base: u64, size: u64) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() { + return Err(ResourceError::InvalidResourceRange( + "MMIO Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.mmio_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate guest memory address range and returns the allocated base memory address. + pub fn allocate_mem_address(&self, constraint: &Constraint) -> Option { + // Safe to unwrap() because we don't expect poisoned lock here. + let mut mem_pool = self.mem_pool.lock().unwrap(); + let key = mem_pool.allocate(constraint); + + key.map(|v| v.min) + } + + /// Free the guest memory address range `[base, base + size - 1]`. + /// + /// Panic if the guest memory address range is invalid. + /// allow(clippy) is because `base < GUEST_MEM_START`, we may modify GUEST_MEM_START in the future. + #[allow(clippy::absurd_extreme_comparisons)] + pub fn free_mem_address(&self, base: u64, size: u64) -> Result<(), ResourceError> { + if base.checked_add(size).is_none() + || base < GUEST_MEM_START + || base + size > *GUEST_MEM_END + { + return Err(ResourceError::InvalidResourceRange( + "MEM Address".to_string(), + )); + } + let key = Range::new(base, base + size - 1); + // Safe to unwrap() because we don't expect poisoned lock here. + self.mem_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate a kvm memory slot number. + /// + /// Allocate the specified slot if `fixed` contains a slot number. + pub fn allocate_kvm_mem_slot(&self, size: u32, fixed: Option) -> Option { + let mut constraint = Constraint::new(size); + if let Some(v) = fixed { + constraint.min = v as u64; + constraint.max = v as u64; + } + // Safe to unwrap() because we don't expect poisoned lock here. + let mut kvm_mem_slot_pool = self.kvm_mem_slot_pool.lock().unwrap(); + let key = kvm_mem_slot_pool.allocate(&constraint); + if let Some(k) = key.as_ref() { + kvm_mem_slot_pool.update(k, ()); + } + key.map(|v| v.min as u32) + } + + /// Free a kvm memory slot number. + pub fn free_kvm_mem_slot(&self, slot: u32) -> Result<(), ResourceError> { + let key = Range::new(slot, slot); + // Safe to unwrap() because we don't expect poisoned lock here. + self.kvm_mem_slot_pool.lock().unwrap().free(&key); + Ok(()) + } + + /// Allocate requested resources for a device. + #[allow(clippy::question_mark)] + pub fn allocate_device_resources( + &self, + requests: &[ResourceConstraint], + shared_irq: bool, + ) -> std::result::Result { + let mut resources = DeviceResources::new(); + for resource in requests.iter() { + let res = match resource { + ResourceConstraint::PioAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0 as u64; + constraint.max = r.1 as u64; + } + match self.allocate_pio_address(&constraint) { + Some(base) => Resource::PioAddressRange { base, size: *size }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::MmioAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0; + constraint.max = r.1; + } + match self.allocate_mmio_address(&constraint) { + Some(base) => Resource::MmioAddressRange { base, size: *size }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::MemAddress { range, align, size } => { + let mut constraint = Constraint::new(*size).align(*align); + if let Some(r) = range { + constraint.min = r.0; + constraint.max = r.1; + } + match self.allocate_mem_address(&constraint) { + Some(base) => Resource::MemAddressRange { base, size: *size }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::LegacyIrq { irq } => { + match self.allocate_legacy_irq(shared_irq, *irq) { + Some(v) => Resource::LegacyIrq(v), + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::PciMsiIrq { size } => { + match self.allocate_msi_irq_aligned(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::PciMsi, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + ResourceConstraint::PciMsixIrq { size } => match self.allocate_msi_irq(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::PciMsix, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + }, + ResourceConstraint::GenericIrq { size } => match self.allocate_msi_irq(*size) { + Some(base) => Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base, + size: *size, + }, + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + }, + ResourceConstraint::KvmMemSlot { slot, size } => { + match self.allocate_kvm_mem_slot(*size, *slot) { + Some(v) => Resource::KvmMemSlot(v), + None => { + if let Err(e) = self.free_device_resources(&resources) { + return Err(e); + } else { + return Err(ResourceError::NoAvailResource); + } + } + } + } + }; + resources.append(res); + } + + Ok(resources) + } + + /// Free resources allocated for a device. + pub fn free_device_resources(&self, resources: &DeviceResources) -> Result<(), ResourceError> { + for res in resources.iter() { + let result = match res { + Resource::PioAddressRange { base, size } => self.free_pio_address(*base, *size), + Resource::MmioAddressRange { base, size } => self.free_mmio_address(*base, *size), + Resource::MemAddressRange { base, size } => self.free_mem_address(*base, *size), + Resource::LegacyIrq(base) => self.free_legacy_irq(*base), + Resource::MsiIrq { ty: _, base, size } => self.free_msi_irq(*base, *size), + Resource::KvmMemSlot(slot) => self.free_kvm_mem_slot(*slot), + Resource::MacAddresss(_) => Ok(()), + }; + result?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_allocate_legacy_irq() { + let mgr = ResourceManager::new(None); + + // Allocate/free shared IRQs multiple times. + assert_eq!(mgr.allocate_legacy_irq(true, None).unwrap(), SHARED_IRQ); + assert_eq!(mgr.allocate_legacy_irq(true, None).unwrap(), SHARED_IRQ); + mgr.free_legacy_irq(SHARED_IRQ).unwrap(); + mgr.free_legacy_irq(SHARED_IRQ).unwrap(); + mgr.free_legacy_irq(SHARED_IRQ).unwrap(); + + // Allocate specified IRQs. + assert_eq!( + mgr.allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .unwrap(), + LEGACY_IRQ_BASE + 10 + ); + mgr.free_legacy_irq(LEGACY_IRQ_BASE + 10).unwrap(); + assert_eq!( + mgr.allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .unwrap(), + LEGACY_IRQ_BASE + 10 + ); + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE + 10)) + .is_none()); + + assert!(mgr.allocate_legacy_irq(false, None).is_some()); + + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_BASE - 1)) + .is_none()); + assert!(mgr + .allocate_legacy_irq(false, Some(LEGACY_IRQ_MAX + 1)) + .is_none()); + assert!(mgr.allocate_legacy_irq(false, Some(SHARED_IRQ)).is_none()); + } + + #[test] + fn test_invalid_free_legacy_irq() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_legacy_irq(LEGACY_IRQ_MAX + 1), + Err(ResourceError::InvalidResourceRange( + "Legacy IRQ".to_string(), + )) + ); + } + + #[test] + fn test_allocate_msi_irq() { + let mgr = ResourceManager::new(None); + + let msi = mgr.allocate_msi_irq(3).unwrap(); + mgr.free_msi_irq(msi, 3).unwrap(); + let msi = mgr.allocate_msi_irq(3).unwrap(); + mgr.free_msi_irq(msi, 3).unwrap(); + + let irq = mgr.allocate_msi_irq_aligned(8).unwrap(); + assert_eq!(irq & 0x7, 0); + mgr.free_msi_irq(msi, 8).unwrap(); + let irq = mgr.allocate_msi_irq_aligned(8).unwrap(); + assert_eq!(irq & 0x7, 0); + + let irq = mgr.allocate_msi_irq_aligned(512).unwrap(); + assert_eq!(irq, 512); + mgr.free_msi_irq(irq, 512).unwrap(); + let irq = mgr.allocate_msi_irq_aligned(512).unwrap(); + assert_eq!(irq, 512); + + assert!(mgr.allocate_msi_irq(4099).is_none()); + } + + #[test] + fn test_invalid_free_msi_irq() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_msi_irq(MSI_IRQ_MAX, 3), + Err(ResourceError::InvalidResourceRange("MSI IRQ".to_string())) + ); + } + + #[test] + fn test_allocate_pio_addr() { + let mgr = ResourceManager::new(None); + assert!(mgr.allocate_pio_address_simple(10).is_some()); + let mut requests = vec![ + ResourceConstraint::PioAddress { + range: None, + align: 0x1000, + size: 0x2000, + }, + ResourceConstraint::PioAddress { + range: Some((0x8000, 0x9000)), + align: 0x1000, + size: 0x1000, + }, + ResourceConstraint::PioAddress { + range: Some((0x9000, 0xa000)), + align: 0x1000, + size: 0x1000, + }, + ResourceConstraint::PioAddress { + range: Some((0xb000, 0xc000)), + align: 0x1000, + size: 0x1000, + }, + ]; + let resources = mgr.allocate_device_resources(&requests, false).unwrap(); + mgr.free_device_resources(&resources).unwrap(); + let resources = mgr.allocate_device_resources(&requests, false).unwrap(); + mgr.free_device_resources(&resources).unwrap(); + requests.push(ResourceConstraint::PioAddress { + range: Some((0xc000, 0xc000)), + align: 0x1000, + size: 0x1000, + }); + assert!(mgr.allocate_device_resources(&requests, false).is_err()); + let resources = mgr + .allocate_device_resources(&requests[0..requests.len() - 1], false) + .unwrap(); + mgr.free_device_resources(&resources).unwrap(); + } + + #[test] + fn test_invalid_free_pio_addr() { + let mgr = ResourceManager::new(None); + assert_eq!( + mgr.free_pio_address(u16::MAX, 3), + Err(ResourceError::InvalidResourceRange( + "PIO Address".to_string(), + )) + ); + } + + #[test] + fn test_allocate_kvm_mem_slot() { + let mgr = ResourceManager::new(None); + assert_eq!(mgr.allocate_kvm_mem_slot(1, None).unwrap(), 0); + assert_eq!(mgr.allocate_kvm_mem_slot(1, Some(200)).unwrap(), 200); + mgr.free_kvm_mem_slot(200).unwrap(); + assert_eq!(mgr.allocate_kvm_mem_slot(1, Some(200)).unwrap(), 200); + assert_eq!( + mgr.allocate_kvm_mem_slot(1, Some(KVM_USER_MEM_SLOTS)) + .unwrap(), + KVM_USER_MEM_SLOTS + ); + assert!(mgr + .allocate_kvm_mem_slot(1, Some(KVM_USER_MEM_SLOTS + 1)) + .is_none()); + } + + #[test] + fn test_allocate_mmio_address() { + let mgr = ResourceManager::new(None); + + #[cfg(target_arch = "x86_64")] + { + // Can't allocate from reserved region + let constraint = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_ffffu64); + assert!(mgr.allocate_mmio_address(&constraint).is_none()); + } + let constraint = Constraint::new(0x100_0000u64).min(0x1_0000_0000u64 - 0x200_0000u64); + assert!(mgr.allocate_mmio_address(&constraint).is_some()); + + #[cfg(target_arch = "x86_64")] + { + // Can't allocate from reserved region + let constraint = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_ffffu64); + assert!(mgr.allocate_mem_address(&constraint).is_none()); + } + #[cfg(target_arch = "aarch64")] + { + let constraint = Constraint::new(0x200_0000u64) + .min(0x1_0000_0000u64 - 0x200_0000u64) + .max(0xffff_fffeu64); + assert!(mgr.allocate_mem_address(&constraint).is_none()); + } + let constraint = Constraint::new(0x100_0000u64).min(0x1_0000_0000u64 - 0x200_0000u64); + assert!(mgr.allocate_mem_address(&constraint).is_some()); + } + + #[test] + #[should_panic] + fn test_allocate_duplicate_memory() { + let mgr = ResourceManager::new(None); + + let constraint_1 = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64) + .max(0x1_0000_0000u64 + 0x100_0000u64); + let constraint_2 = Constraint::new(0x100_0000u64) + .min(0x1_0000_0000u64) + .max(0x1_0000_0000u64 + 0x100_0000u64); + + assert!(mgr.allocate_mem_address(&constraint_1).is_some()); + assert!(mgr.allocate_mem_address(&constraint_2).is_some()); + } +} diff --git a/src/dragonball/src/signal_handler.rs b/src/dragonball/src/signal_handler.rs new file mode 100644 index 000000000000..f6b7bfe46925 --- /dev/null +++ b/src/dragonball/src/signal_handler.rs @@ -0,0 +1,219 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use libc::{_exit, c_int, c_void, siginfo_t, SIGBUS, SIGSEGV, SIGSYS}; +use log::error; +use vmm_sys_util::signal::register_signal_handler; + +use crate::metric::{IncMetric, METRICS}; + +// The offset of `si_syscall` (offending syscall identifier) within the siginfo structure +// expressed as an `(u)int*`. +// Offset `6` for an `i32` field means that the needed information is located at `6 * sizeof(i32)`. +// See /usr/include/linux/signal.h for the C struct definition. +// See https://github.com/rust-lang/libc/issues/716 for why the offset is different in Rust. +const SI_OFF_SYSCALL: isize = 6; + +const SYS_SECCOMP_CODE: i32 = 1; + +extern "C" { + fn __libc_current_sigrtmin() -> c_int; + fn __libc_current_sigrtmax() -> c_int; +} + +/// Gets current sigrtmin +pub fn sigrtmin() -> c_int { + unsafe { __libc_current_sigrtmin() } +} + +/// Gets current sigrtmax +pub fn sigrtmax() -> c_int { + unsafe { __libc_current_sigrtmax() } +} + +/// Signal handler for `SIGSYS`. +/// +/// Increments the `seccomp.num_faults` metric, logs an error message and terminates the process +/// with a specific exit code. +extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { + // Safe because we're just reading some fields from a supposedly valid argument. + let si_signo = unsafe { (*info).si_signo }; + let si_code = unsafe { (*info).si_code }; + + // Sanity check. The condition should never be true. + if num != si_signo || num != SIGSYS || si_code != SYS_SECCOMP_CODE { + // Safe because we're terminating the process anyway. + unsafe { _exit(i32::from(super::EXIT_CODE_UNEXPECTED_ERROR)) }; + } + + // Other signals which might do async unsafe things incompatible with the rest of this + // function are blocked due to the sa_mask used when registering the signal handler. + let syscall = unsafe { *(info as *const i32).offset(SI_OFF_SYSCALL) as usize }; + // SIGSYS is triggered when bad syscalls are detected. num_faults is only added when SIGSYS is detected + // so it actually only collects the count for bad syscalls. + METRICS.seccomp.num_faults.inc(); + error!( + "Shutting down VM after intercepting a bad syscall ({}).", + syscall + ); + + // Safe because we're terminating the process anyway. We don't actually do anything when + // running unit tests. + #[cfg(not(test))] + unsafe { + _exit(i32::from(super::EXIT_CODE_BAD_SYSCALL)) + }; +} + +/// Signal handler for `SIGBUS` and `SIGSEGV`. +/// +/// Logs an error message and terminates the process with a specific exit code. +extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { + // Safe because we're just reading some fields from a supposedly valid argument. + let si_signo = unsafe { (*info).si_signo }; + let si_code = unsafe { (*info).si_code }; + + // Sanity check. The condition should never be true. + if num != si_signo || (num != SIGBUS && num != SIGSEGV) { + // Safe because we're terminating the process anyway. + unsafe { _exit(i32::from(super::EXIT_CODE_UNEXPECTED_ERROR)) }; + } + + // Other signals which might do async unsafe things incompatible with the rest of this + // function are blocked due to the sa_mask used when registering the signal handler. + match si_signo { + SIGBUS => METRICS.signals.sigbus.inc(), + SIGSEGV => METRICS.signals.sigsegv.inc(), + _ => (), + } + + error!( + "Shutting down VM after intercepting signal {}, code {}.", + si_signo, si_code + ); + + // Safe because we're terminating the process anyway. We don't actually do anything when + // running unit tests. + #[cfg(not(test))] + unsafe { + _exit(i32::from(match si_signo { + SIGBUS => super::EXIT_CODE_SIGBUS, + SIGSEGV => super::EXIT_CODE_SIGSEGV, + _ => super::EXIT_CODE_UNEXPECTED_ERROR, + })) + }; +} + +/// Registers all the required signal handlers. +/// +/// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS`. +pub fn register_signal_handlers() -> vmm_sys_util::errno::Result<()> { + // Call to unsafe register_signal_handler which is considered unsafe because it will + // register a signal handler which will be called in the current thread and will interrupt + // whatever work is done on the current thread, so we have to keep in mind that the registered + // signal handler must only do async-signal-safe operations. + register_signal_handler(SIGSYS, sigsys_handler)?; + register_signal_handler(SIGBUS, sigbus_sigsegv_handler)?; + register_signal_handler(SIGSEGV, sigbus_sigsegv_handler)?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use libc::{cpu_set_t, syscall}; + use std::convert::TryInto; + use std::{mem, process, thread}; + + use seccompiler::{apply_filter, BpfProgram, SeccompAction, SeccompFilter}; + + // This function is used when running unit tests, so all the unsafes are safe. + fn cpu_count() -> usize { + let mut cpuset: cpu_set_t = unsafe { mem::zeroed() }; + unsafe { + libc::CPU_ZERO(&mut cpuset); + } + let ret = unsafe { + libc::sched_getaffinity( + 0, + mem::size_of::(), + &mut cpuset as *mut cpu_set_t, + ) + }; + assert_eq!(ret, 0); + + let mut num = 0; + for i in 0..libc::CPU_SETSIZE as usize { + if unsafe { libc::CPU_ISSET(i, &cpuset) } { + num += 1; + } + } + num + } + + #[test] + fn test_signal_handler() { + let child = thread::spawn(move || { + assert!(register_signal_handlers().is_ok()); + + let filter = SeccompFilter::new( + vec![ + (libc::SYS_brk, vec![]), + (libc::SYS_exit, vec![]), + (libc::SYS_futex, vec![]), + (libc::SYS_getpid, vec![]), + (libc::SYS_munmap, vec![]), + (libc::SYS_kill, vec![]), + (libc::SYS_rt_sigprocmask, vec![]), + (libc::SYS_rt_sigreturn, vec![]), + (libc::SYS_sched_getaffinity, vec![]), + (libc::SYS_set_tid_address, vec![]), + (libc::SYS_sigaltstack, vec![]), + (libc::SYS_write, vec![]), + ] + .into_iter() + .collect(), + SeccompAction::Trap, + SeccompAction::Allow, + std::env::consts::ARCH.try_into().unwrap(), + ) + .unwrap(); + + assert!(apply_filter(&TryInto::::try_into(filter).unwrap()).is_ok()); + assert_eq!(METRICS.seccomp.num_faults.count(), 0); + + // Call the blacklisted `SYS_mkdirat`. + unsafe { syscall(libc::SYS_mkdirat, "/foo/bar\0") }; + + // Call SIGBUS signal handler. + assert_eq!(METRICS.signals.sigbus.count(), 0); + unsafe { + syscall(libc::SYS_kill, process::id(), SIGBUS); + } + + // Call SIGSEGV signal handler. + assert_eq!(METRICS.signals.sigsegv.count(), 0); + unsafe { + syscall(libc::SYS_kill, process::id(), SIGSEGV); + } + }); + assert!(child.join().is_ok()); + + // Sanity check. + assert!(cpu_count() > 0); + // Kcov somehow messes with our handler getting the SIGSYS signal when a bad syscall + // is caught, so the following assertion no longer holds. Ideally, we'd have a surefire + // way of either preventing this behaviour, or detecting for certain whether this test is + // run by kcov or not. The best we could do so far is to look at the perceived number of + // available CPUs. Kcov seems to make a single CPU available to the process running the + // tests, so we use this as an heuristic to decide if we check the assertion. + if cpu_count() > 1 { + // The signal handler should let the program continue during unit tests. + assert!(METRICS.seccomp.num_faults.count() >= 1); + } + assert!(METRICS.signals.sigbus.count() >= 1); + assert!(METRICS.signals.sigsegv.count() >= 1); + } +} diff --git a/src/dragonball/src/test_utils.rs b/src/dragonball/src/test_utils.rs new file mode 100644 index 000000000000..dec006f4334d --- /dev/null +++ b/src/dragonball/src/test_utils.rs @@ -0,0 +1,47 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +#[cfg(test)] +pub mod tests { + use crate::api::v1::InstanceInfo; + use crate::vm::{CpuTopology, KernelConfigInfo, Vm, VmConfigInfo}; + use dbs_utils::epoll_manager::EpollManager; + use linux_loader::cmdline::Cmdline; + use std::sync::{Arc, RwLock}; + use vmm_sys_util::tempfile::TempFile; + + pub fn create_vm_for_test() -> Vm { + // Call for kvm too frequently would cause error in some host kernel. + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap(); + let kernel_file = TempFile::new().unwrap(); + let cmd_line = Cmdline::new(64).unwrap(); + vm.set_kernel_config(KernelConfigInfo::new( + kernel_file.into_file(), + None, + cmd_line, + )); + + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 1, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 1, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config); + vm.init_guest_memory().unwrap(); + vm + } +} diff --git a/src/dragonball/src/vcpu/aarch64.rs b/src/dragonball/src/vcpu/aarch64.rs new file mode 100644 index 000000000000..ae45cd99d97f --- /dev/null +++ b/src/dragonball/src/vcpu/aarch64.rs @@ -0,0 +1,127 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::ops::Deref; +use std::sync::mpsc::{channel, Sender}; +use std::sync::Arc; + +use crate::IoManagerCached; +use dbs_arch::{regs, VpmuFeatureLevel}; +use dbs_boot::get_fdt_addr; +use dbs_utils::time::TimestampUs; +use kvm_ioctls::{VcpuFd, VmFd}; +use vm_memory::{Address, GuestAddress, GuestAddressSpace}; +use vmm_sys_util::eventfd::EventFd; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; +use crate::vcpu::VcpuConfig; + +#[allow(unused)] +impl Vcpu { + /// Constructs a new VCPU for `vm`. + /// + /// # Arguments + /// + /// * `id` - Represents the CPU number between [0, max vcpus). + /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu. + /// * `io_mgr` - The io-manager used to access port-io and mmio devices. + /// * `exit_evt` - An `EventFd` that will be written into when this vcpu + /// exits. + /// * `vcpu_state_event` - The eventfd which can notify vmm state of some + /// vcpu should change. + /// * `vcpu_state_sender` - The channel to send state change message from + /// vcpu thread to vmm thread. + /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. + /// * `support_immediate_exit` - whether kvm uses supports immediate_exit flag. + #[allow(clippy::too_many_arguments)] + pub fn new_aarch64( + id: u8, + vcpu_fd: Arc, + io_mgr: IoManagerCached, + exit_evt: EventFd, + vcpu_state_event: EventFd, + vcpu_state_sender: Sender, + create_ts: TimestampUs, + support_immediate_exit: bool, + ) -> Result { + let (event_sender, event_receiver) = channel(); + let (response_sender, response_receiver) = channel(); + + Ok(Vcpu { + fd: vcpu_fd, + id, + io_mgr, + create_ts, + event_receiver, + event_sender: Some(event_sender), + response_receiver: Some(response_receiver), + response_sender, + vcpu_state_event, + vcpu_state_sender, + support_immediate_exit, + mpidr: 0, + exit_evt, + }) + } + + /// Configures an aarch64 specific vcpu. + /// + /// # Arguments + /// + /// * `vcpu_config` - vCPU config for this vCPU status + /// * `vm_fd` - The kvm `VmFd` for this microvm. + /// * `vm_as` - The guest memory address space used by this microvm. + /// * `kernel_load_addr` - Offset from `guest_mem` at which the kernel is loaded. + /// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64) + pub fn configure( + &mut self, + vcpu_config: &VcpuConfig, + vm_fd: &VmFd, + vm_as: &GuestAddressSpaceImpl, + kernel_load_addr: Option, + _pgtable_addr: Option, + ) -> Result<()> { + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + + // This reads back the kernel's preferred target type. + vm_fd + .get_preferred_target(&mut kvi) + .map_err(VcpuError::VcpuArmPreferredTarget)?; + // We already checked that the capability is supported. + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; + // Non-boot cpus are powered off initially. + if self.id > 0 { + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; + } + if vcpu_config.vpmu_feature == VpmuFeatureLevel::FullyEnabled { + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; + } + + self.fd.vcpu_init(&kvi).map_err(VcpuError::VcpuArmInit)?; + + if let Some(address) = kernel_load_addr { + regs::setup_regs( + &self.fd, + self.id, + address.raw_value(), + get_fdt_addr(vm_as.memory().deref()), + ) + .map_err(VcpuError::REGSConfiguration)?; + } + + self.mpidr = regs::read_mpidr(&self.fd).map_err(VcpuError::REGSConfiguration)?; + + Ok(()) + } + + /// Gets the MPIDR register value. + pub fn get_mpidr(&self) -> u64 { + self.mpidr + } +} diff --git a/src/dragonball/src/vcpu/mod.rs b/src/dragonball/src/vcpu/mod.rs new file mode 100644 index 000000000000..0011d81bde3e --- /dev/null +++ b/src/dragonball/src/vcpu/mod.rs @@ -0,0 +1,35 @@ +// Copyright (C) 2022 Alibaba Cloud Computing. All rights reserved. +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +mod sm; +mod vcpu_impl; +mod vcpu_manager; + +use dbs_arch::VpmuFeatureLevel; +pub use vcpu_manager::{VcpuManager, VcpuManagerError, VcpuResizeInfo}; + +#[cfg(feature = "hotplug")] +pub use vcpu_manager::VcpuResizeError; + +/// vcpu config collection +pub struct VcpuConfig { + /// initial vcpu count + pub boot_vcpu_count: u8, + /// max vcpu count for hotplug + pub max_vcpu_count: u8, + /// threads per core for cpu topology information + pub threads_per_core: u8, + /// cores per die for cpu topology information + pub cores_per_die: u8, + /// dies per socket for cpu topology information + pub dies_per_socket: u8, + /// socket number for cpu topology information + pub sockets: u8, + /// if vpmu feature is Disabled, it means vpmu feature is off (by default) + /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions) + /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported + /// For aarch64, VpmuFeatureLevel only supports Disabled and FullyEnabled. + pub vpmu_feature: VpmuFeatureLevel, +} diff --git a/src/dragonball/src/vcpu/sm.rs b/src/dragonball/src/vcpu/sm.rs new file mode 100644 index 000000000000..2a51d64083a4 --- /dev/null +++ b/src/dragonball/src/vcpu/sm.rs @@ -0,0 +1,149 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::ops::Deref; + +/// Simple abstraction of a state machine. +/// +/// `StateMachine` is a wrapper over `T` that also encodes state information for `T`. +/// +/// Each state for `T` is represented by a `StateFn` which is a function that acts as +/// the state handler for that particular state of `T`. +/// +/// `StateFn` returns exactly one other `StateMachine` thus each state gets clearly +/// defined transitions to other states. +pub struct StateMachine { + function: StateFn, + end_state: bool, +} + +/// Type representing a state handler of a `StateMachine` machine. Each state handler +/// is a function from `T` that handles a specific state of `T`. +type StateFn = fn(&mut T) -> StateMachine; + +impl StateMachine { + /// Creates a new state wrapper. + /// + /// # Arguments + /// + /// `function` - the state handler for this state. + /// `end_state` - whether this state is final. + pub fn new(function: StateFn, end_state: bool) -> StateMachine { + StateMachine { + function, + end_state, + } + } + + /// Creates a new state wrapper that has further possible transitions. + /// + /// # Arguments + /// + /// `function` - the state handler for this state. + pub fn next(function: StateFn) -> StateMachine { + StateMachine::new(function, false) + } + + /// Creates a new state wrapper that has no further transitions. The state machine + /// will finish after running this handler. + /// + /// # Arguments + /// + /// `function` - the state handler for this last state. + pub fn finish(function: StateFn) -> StateMachine { + StateMachine::new(function, true) + } + + /// Runs a state machine for `T` starting from the provided state. + /// + /// # Arguments + /// + /// `machine` - a mutable reference to the object running through the various states. + /// `starting_state_fn` - a `fn(&mut T) -> StateMachine` that should be the handler for + /// the initial state. + pub fn run(machine: &mut T, starting_state_fn: StateFn) { + // Start off in the `starting_state` state. + let mut sf = StateMachine::new(starting_state_fn, false); + // While current state is not a final/end state, keep churning. + while !sf.end_state { + // Run the current state handler, and get the next one. + sf = sf(machine); + } + } +} + +// Implement Deref of `StateMachine` so that we can directly call its underlying state handler. +impl Deref for StateMachine { + type Target = StateFn; + fn deref(&self) -> &Self::Target { + &self.function + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // DummyMachine with states `s1`, `s2` and `s3`. + struct DummyMachine { + private_data_s1: bool, + private_data_s2: bool, + private_data_s3: bool, + } + + impl DummyMachine { + fn new() -> Self { + DummyMachine { + private_data_s1: false, + private_data_s2: false, + private_data_s3: false, + } + } + + // DummyMachine functions here. + + // Simple state-machine: start->s1->s2->s3->done. + fn run(&mut self) { + // Verify the machine has not run yet. + assert!(!self.private_data_s1); + assert!(!self.private_data_s2); + assert!(!self.private_data_s3); + + // Run the state-machine. + StateMachine::run(self, Self::s1); + + // Verify the machine went through all states. + assert!(self.private_data_s1); + assert!(self.private_data_s2); + assert!(self.private_data_s3); + } + + fn s1(&mut self) -> StateMachine { + // Verify private data mutates along with the states. + assert!(!self.private_data_s1); + self.private_data_s1 = true; + StateMachine::next(Self::s2) + } + + fn s2(&mut self) -> StateMachine { + // Verify private data mutates along with the states. + assert!(!self.private_data_s2); + self.private_data_s2 = true; + StateMachine::next(Self::s3) + } + + fn s3(&mut self) -> StateMachine { + // Verify private data mutates along with the states. + assert!(!self.private_data_s3); + self.private_data_s3 = true; + // The machine ends here, adding `s1` as next state to validate this. + StateMachine::finish(Self::s1) + } + } + + #[test] + fn test_sm() { + let mut machine = DummyMachine::new(); + machine.run(); + } +} diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs new file mode 100644 index 000000000000..3dffd579f19b --- /dev/null +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -0,0 +1,990 @@ +// Copyright (C) 2019-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! The implementation for per vcpu + +use std::cell::Cell; +use std::result; +use std::sync::atomic::{fence, Ordering}; +use std::sync::mpsc::{Receiver, Sender, TryRecvError}; +use std::sync::{Arc, Barrier}; +use std::thread; + +use dbs_utils::time::TimestampUs; +use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; +use kvm_ioctls::{VcpuExit, VcpuFd}; +use libc::{c_int, c_void, siginfo_t}; +use log::{error, info}; +use seccompiler::{apply_filter, BpfProgram, Error as SecError}; +use vmm_sys_util::eventfd::EventFd; +use vmm_sys_util::signal::{register_signal_handler, Killable}; + +use super::sm::StateMachine; +use crate::metric::{IncMetric, METRICS}; +use crate::signal_handler::sigrtmin; +use crate::IoManagerCached; + +#[cfg(target_arch = "x86_64")] +#[path = "x86_64.rs"] +mod x86_64; + +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod aarch64; + +#[cfg(target_arch = "x86_64")] +const MAGIC_IOPORT_BASE: u16 = 0xdbdb; +#[cfg(target_arch = "x86_64")] +const MAGIC_IOPORT_DEBUG_INFO: u16 = MAGIC_IOPORT_BASE; + +/// Signal number (SIGRTMIN) used to kick Vcpus. +pub const VCPU_RTSIG_OFFSET: i32 = 0; + +#[cfg(target_arch = "x86_64")] +/// Errors associated with the wrappers over KVM ioctls. +#[derive(Debug, thiserror::Error)] +pub enum VcpuError { + /// Failed to signal Vcpu. + #[error("cannot signal the vCPU thread")] + SignalVcpu(#[source] vmm_sys_util::errno::Error), + + /// Cannot open the vCPU file descriptor. + #[error("cannot open the vCPU file descriptor")] + VcpuFd(#[source] kvm_ioctls::Error), + + /// Cannot spawn a new vCPU thread. + #[error("cannot spawn vCPU thread")] + VcpuSpawn(#[source] std::io::Error), + + /// Cannot cleanly initialize vCPU TLS. + #[error("cannot cleanly initialize TLS fro vCPU")] + VcpuTlsInit, + + /// Vcpu not present in TLS. + #[error("vCPU not present in the TLS")] + VcpuTlsNotPresent, + + /// Unexpected KVM_RUN exit reason + #[error("Unexpected KVM_RUN exit reason")] + VcpuUnhandledKvmExit, + + /// Pause vcpu failed + #[error("failed to pause vcpus")] + PauseFailed, + + /// Kvm Ioctl Error + #[error("failure in issuing KVM ioctl command")] + Kvm(#[source] kvm_ioctls::Error), + + /// Msr error + #[error("failure to deal with MSRs")] + Msr(vmm_sys_util::fam::Error), + + /// A call to cpuid instruction failed on x86_64. + #[error("failure while configuring CPUID for virtual CPU on x86_64")] + CpuId(dbs_arch::cpuid::Error), + + /// Error configuring the floating point related registers on x86_64. + #[error("failure while configuring the floating point related registers on x86_64")] + FPUConfiguration(dbs_arch::regs::Error), + + /// Cannot set the local interruption due to bad configuration on x86_64. + #[error("cannot set the local interruption due to bad configuration on x86_64")] + LocalIntConfiguration(dbs_arch::interrupts::Error), + + /// Error configuring the MSR registers on x86_64. + #[error("failure while configuring the MSR registers on x86_64")] + MSRSConfiguration(dbs_arch::regs::Error), + + /// Error configuring the general purpose registers on x86_64. + #[error("failure while configuring the general purpose registers on x86_64")] + REGSConfiguration(dbs_arch::regs::Error), + + /// Error configuring the special registers on x86_64. + #[error("failure while configuring the special registers on x86_64")] + SREGSConfiguration(dbs_arch::regs::Error), + + /// Error configuring the page table on x86_64. + #[error("failure while configuring the page table on x86_64")] + PageTable(dbs_boot::Error), + + /// The call to KVM_SET_CPUID2 failed on x86_64. + #[error("failure while calling KVM_SET_CPUID2 on x86_64")] + SetSupportedCpusFailed(#[source] kvm_ioctls::Error), +} + +#[cfg(target_arch = "aarch64")] +/// Errors associated with the wrappers over KVM ioctls. +#[derive(Debug, thiserror::Error)] +pub enum VcpuError { + /// Failed to signal Vcpu. + #[error("cannot signal the vCPU thread")] + SignalVcpu(#[source] vmm_sys_util::errno::Error), + + /// Cannot open the vCPU file descriptor. + #[error("cannot open the vCPU file descriptor")] + VcpuFd(#[source] kvm_ioctls::Error), + + /// Cannot spawn a new vCPU thread. + #[error("cannot spawn vCPU thread")] + VcpuSpawn(#[source] std::io::Error), + + /// Cannot cleanly initialize vCPU TLS. + #[error("cannot cleanly initialize TLS fro vCPU")] + VcpuTlsInit, + + /// Vcpu not present in TLS. + #[error("vCPU not present in the TLS")] + VcpuTlsNotPresent, + + /// Unexpected KVM_RUN exit reason + #[error("Unexpected KVM_RUN exit reason")] + VcpuUnhandledKvmExit, + + /// Pause vcpu failed + #[error("failed to pause vcpus")] + PauseFailed, + + /// Kvm Ioctl Error + #[error("failure in issuing KVM ioctl command")] + Kvm(#[source] kvm_ioctls::Error), + + /// Msr error + #[error("failure to deal with MSRs")] + Msr(vmm_sys_util::fam::Error), + + #[cfg(target_arch = "aarch64")] + /// Error configuring the general purpose aarch64 registers on aarch64. + #[error("failure while configuring the general purpose registers on aarch64")] + REGSConfiguration(dbs_arch::regs::Error), + + #[cfg(target_arch = "aarch64")] + /// Error setting up the global interrupt controller on aarch64. + #[error("failure while setting up the global interrupt controller on aarch64")] + SetupGIC(dbs_arch::gic::Error), + + #[cfg(target_arch = "aarch64")] + /// Error getting the Vcpu preferred target on aarch64. + #[error("failure while getting the vCPU preferred target on aarch64")] + VcpuArmPreferredTarget(kvm_ioctls::Error), + + #[cfg(target_arch = "aarch64")] + /// Error doing vCPU Init on aarch64. + #[error("failure while doing vCPU init on aarch64")] + VcpuArmInit(kvm_ioctls::Error), +} + +/// Result for Vcpu related operations. +pub type Result = result::Result; + +/// List of events that the Vcpu can receive. +#[derive(Debug)] +pub enum VcpuEvent { + /// Kill the Vcpu. + Exit, + /// Pause the Vcpu. + Pause, + /// Event that should resume the Vcpu. + Resume, + /// Get vcpu thread tid + Gettid, + + /// Event to revalidate vcpu IoManager cache + RevalidateCache, +} + +/// List of responses that the Vcpu reports. +pub enum VcpuResponse { + /// Vcpu is paused. + Paused, + /// Vcpu is resumed. + Resumed, + /// Vcpu index and thread tid. + Tid(u8, u32), + /// Requested Vcpu operation is not allowed. + NotAllowed, + /// Requestion action encountered an error + Error(VcpuError), + /// Vcpu IoManager cache is revalidated + CacheRevalidated, +} + +#[derive(Debug, PartialEq)] +/// Vcpu Hotplug Result returned from the guest +pub enum VcpuResizeResult { + /// All vCPU hotplug / hot-unplug operations are successful + Success = 0, + /// vCPU hotplug / hot-unplug failed + Failed = 1, +} + +/// List of events that the vcpu_state_sender can send. +pub enum VcpuStateEvent { + /// (result, response) for hotplug / hot-unplugged. + /// response records how many cpu has successfully being hotplugged / hot-unplugged. + Hotplug((VcpuResizeResult, u32)), +} + +/// Wrapper over vCPU that hides the underlying interactions with the vCPU thread. +pub struct VcpuHandle { + event_sender: Sender, + response_receiver: Receiver, + vcpu_thread: thread::JoinHandle<()>, +} + +impl VcpuHandle { + /// Send event to vCPU thread + pub fn send_event(&self, event: VcpuEvent) -> Result<()> { + // Use expect() to crash if the other thread closed this channel. + self.event_sender + .send(event) + .expect("event sender channel closed on vcpu end."); + // Kick the vCPU so it picks up the message. + self.vcpu_thread + .kill(sigrtmin() + VCPU_RTSIG_OFFSET) + .map_err(VcpuError::SignalVcpu)?; + Ok(()) + } + + /// Receive response from vcpu thread + pub fn response_receiver(&self) -> &Receiver { + &self.response_receiver + } + + #[allow(dead_code)] + /// Join the vcpu thread + pub fn join_vcpu_thread(self) -> thread::Result<()> { + self.vcpu_thread.join() + } +} + +#[derive(PartialEq)] +enum VcpuEmulation { + Handled, + Interrupted, + Stopped, +} + +/// A wrapper around creating and using a kvm-based VCPU. +pub struct Vcpu { + // vCPU fd used by the vCPU + fd: Arc, + // vCPU id info + id: u8, + // Io manager Cached for facilitating IO operations + io_mgr: IoManagerCached, + // Records vCPU create time stamp + create_ts: TimestampUs, + + // The receiving end of events channel owned by the vcpu side. + event_receiver: Receiver, + // The transmitting end of the events channel which will be given to the handler. + event_sender: Option>, + // The receiving end of the responses channel which will be given to the handler. + response_receiver: Option>, + // The transmitting end of the responses channel owned by the vcpu side. + response_sender: Sender, + // Event notifier for CPU hotplug. + // After arm adapts to hotplug vcpu, the dead code macro needs to be removed + #[cfg_attr(target_arch = "aarch64", allow(dead_code))] + vcpu_state_event: EventFd, + // CPU hotplug events. + // After arm adapts to hotplug vcpu, the dead code macro needs to be removed + #[cfg_attr(target_arch = "aarch64", allow(dead_code))] + vcpu_state_sender: Sender, + + // An `EventFd` that will be written into when this vcpu exits. + exit_evt: EventFd, + // Whether kvm used supports immediate_exit flag. + support_immediate_exit: bool, + + // CPUID information for the x86_64 CPU + #[cfg(target_arch = "x86_64")] + cpuid: kvm_bindings::CpuId, + + /// Multiprocessor affinity register recorded for aarch64 + #[cfg(target_arch = "aarch64")] + pub(crate) mpidr: u64, +} + +// Using this for easier explicit type-casting to help IDEs interpret the code. +type VcpuCell = Cell>; + +impl Vcpu { + thread_local!(static TLS_VCPU_PTR: VcpuCell = Cell::new(None)); + + /// Associates `self` with the current thread. + /// + /// It is a prerequisite to successfully run `init_thread_local_data()` before using + /// `run_on_thread_local()` on the current thread. + /// This function will return an error if there already is a `Vcpu` present in the TLS. + fn init_thread_local_data(&mut self) -> Result<()> { + Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { + if cell.get().is_some() { + return Err(VcpuError::VcpuTlsInit); + } + cell.set(Some(self as *const Vcpu)); + Ok(()) + }) + } + + /// Deassociates `self` from the current thread. + /// + /// Should be called if the current `self` had called `init_thread_local_data()` and + /// now needs to move to a different thread. + /// + /// Fails if `self` was not previously associated with the current thread. + fn reset_thread_local_data(&mut self) -> Result<()> { + // Best-effort to clean up TLS. If the `Vcpu` was moved to another thread + // _before_ running this, then there is nothing we can do. + Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { + if let Some(vcpu_ptr) = cell.get() { + if vcpu_ptr == self as *const Vcpu { + Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| cell.take()); + return Ok(()); + } + } + Err(VcpuError::VcpuTlsNotPresent) + }) + } + + /// Runs `func` for the `Vcpu` associated with the current thread. + /// + /// It requires that `init_thread_local_data()` was run on this thread. + /// + /// Fails if there is no `Vcpu` associated with the current thread. + /// + /// # Safety + /// + /// This is marked unsafe as it allows temporary aliasing through + /// dereferencing from pointer an already borrowed `Vcpu`. + unsafe fn run_on_thread_local(func: F) -> Result<()> + where + F: FnOnce(&Vcpu), + { + Self::TLS_VCPU_PTR.with(|cell: &VcpuCell| { + if let Some(vcpu_ptr) = cell.get() { + // Dereferencing here is safe since `TLS_VCPU_PTR` is populated/non-empty, + // and it is being cleared on `Vcpu::drop` so there is no dangling pointer. + let vcpu_ref: &Vcpu = &*vcpu_ptr; + func(vcpu_ref); + Ok(()) + } else { + Err(VcpuError::VcpuTlsNotPresent) + } + }) + } + + /// Registers a signal handler which makes use of TLS and kvm immediate exit to + /// kick the vcpu running on the current thread, if there is one. + pub fn register_kick_signal_handler() { + extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { + // This is safe because it's temporarily aliasing the `Vcpu` object, but we are + // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. + unsafe { + let _ = Vcpu::run_on_thread_local(|vcpu| { + vcpu.fd.set_kvm_immediate_exit(1); + fence(Ordering::Release); + }); + } + } + + register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) + .expect("Failed to register vcpu signal handler"); + } + + /// Returns the cpu index as seen by the guest OS. + pub fn cpu_index(&self) -> u8 { + self.id + } + + /// Moves the vcpu to its own thread and constructs a VcpuHandle. + /// The handle can be used to control the remote vcpu. + pub fn start_threaded( + mut self, + seccomp_filter: BpfProgram, + barrier: Arc, + ) -> Result { + let event_sender = self.event_sender.take().unwrap(); + let response_receiver = self.response_receiver.take().unwrap(); + + let vcpu_thread = thread::Builder::new() + .name(format!("db_vcpu{}", self.cpu_index())) + .spawn(move || { + self.init_thread_local_data() + .expect("Cannot cleanly initialize vcpu TLS."); + barrier.wait(); + self.run(seccomp_filter); + }) + .map_err(VcpuError::VcpuSpawn)?; + + Ok(VcpuHandle { + event_sender, + response_receiver, + vcpu_thread, + }) + } + + /// Extract the vcpu running logic for test mocking. + #[cfg(not(test))] + pub fn emulate(fd: &VcpuFd) -> std::result::Result, kvm_ioctls::Error> { + fd.run() + } + + /// Runs the vCPU in KVM context and handles the kvm exit reason. + /// + /// Returns error or enum specifying whether emulation was handled or interrupted. + fn run_emulation(&mut self) -> Result { + match Vcpu::emulate(&self.fd) { + Ok(run) => { + match run { + #[cfg(target_arch = "x86_64")] + VcpuExit::IoIn(addr, data) => { + let _ = self.io_mgr.pio_read(addr, data); + METRICS.vcpu.exit_io_in.inc(); + Ok(VcpuEmulation::Handled) + } + #[cfg(target_arch = "x86_64")] + VcpuExit::IoOut(addr, data) => { + if !self.check_io_port_info(addr, data)? { + let _ = self.io_mgr.pio_write(addr, data); + } + METRICS.vcpu.exit_io_out.inc(); + Ok(VcpuEmulation::Handled) + } + VcpuExit::MmioRead(addr, data) => { + let _ = self.io_mgr.mmio_read(addr, data); + METRICS.vcpu.exit_mmio_read.inc(); + Ok(VcpuEmulation::Handled) + } + VcpuExit::MmioWrite(addr, data) => { + let _ = self.io_mgr.mmio_write(addr, data); + METRICS.vcpu.exit_mmio_write.inc(); + Ok(VcpuEmulation::Handled) + } + VcpuExit::Hlt => { + info!("Received KVM_EXIT_HLT signal"); + Err(VcpuError::VcpuUnhandledKvmExit) + } + VcpuExit::Shutdown => { + info!("Received KVM_EXIT_SHUTDOWN signal"); + Err(VcpuError::VcpuUnhandledKvmExit) + } + // Documentation specifies that below kvm exits are considered errors. + VcpuExit::FailEntry(reason, cpu) => { + METRICS.vcpu.failures.inc(); + error!("Received KVM_EXIT_FAIL_ENTRY signal, reason {reason}, cpu number {cpu}"); + Err(VcpuError::VcpuUnhandledKvmExit) + } + VcpuExit::InternalError => { + METRICS.vcpu.failures.inc(); + error!("Received KVM_EXIT_INTERNAL_ERROR signal"); + Err(VcpuError::VcpuUnhandledKvmExit) + } + VcpuExit::SystemEvent(event_type, event_flags) => match event_type { + KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => { + info!( + "Received KVM_SYSTEM_EVENT: type: {}, event: {}", + event_type, event_flags + ); + Ok(VcpuEmulation::Stopped) + } + _ => { + METRICS.vcpu.failures.inc(); + error!( + "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}", + event_type, event_flags + ); + Err(VcpuError::VcpuUnhandledKvmExit) + } + }, + r => { + METRICS.vcpu.failures.inc(); + // TODO: Are we sure we want to finish running a vcpu upon + // receiving a vm exit that is not necessarily an error? + error!("Unexpected exit reason on vcpu run: {:?}", r); + Err(VcpuError::VcpuUnhandledKvmExit) + } + } + } + // The unwrap on raw_os_error can only fail if we have a logic + // error in our code in which case it is better to panic. + Err(ref e) => { + match e.errno() { + libc::EAGAIN => Ok(VcpuEmulation::Handled), + libc::EINTR => { + self.fd.set_kvm_immediate_exit(0); + // Notify that this KVM_RUN was interrupted. + Ok(VcpuEmulation::Interrupted) + } + _ => { + METRICS.vcpu.failures.inc(); + error!("Failure during vcpu run: {}", e); + #[cfg(target_arch = "x86_64")] + { + error!( + "dump regs: {:?}, dump sregs: {:?}", + self.fd.get_regs(), + self.fd.get_sregs() + ); + } + Err(VcpuError::VcpuUnhandledKvmExit) + } + } + } + } + } + + #[cfg(target_arch = "x86_64")] + // checkout the io port that dragonball used only + fn check_io_port_info(&self, addr: u16, data: &[u8]) -> Result { + let mut checked = false; + + // debug info signal + if addr == MAGIC_IOPORT_DEBUG_INFO && data.len() == 4 { + let data = unsafe { std::ptr::read(data.as_ptr() as *const u32) }; + log::warn!("KDBG: guest kernel debug info: 0x{:x}", data); + checked = true; + }; + + Ok(checked) + } + + fn gettid() -> u32 { + nix::unistd::gettid().as_raw() as u32 + } + + fn revalidate_cache(&mut self) -> Result<()> { + self.io_mgr.revalidate_cache(); + + Ok(()) + } + + /// Main loop of the vCPU thread. + /// + /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in. + /// Note that the state of the VCPU and associated VM must be setup first for this to do + /// anything useful. + pub fn run(&mut self, seccomp_filter: BpfProgram) { + // Load seccomp filters for this vCPU thread. + // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters + // altogether is the desired behaviour. + if let Err(e) = apply_filter(&seccomp_filter) { + if matches!(e, SecError::EmptyFilter) { + info!("vCPU thread {} use empty seccomp filters.", self.id); + } else { + panic!( + "Failed to set the requested seccomp filters on vCPU {}: Error: {}", + self.id, e + ); + } + } + + info!("vcpu {} is running", self.cpu_index()); + + // Start running the machine state in the `Paused` state. + StateMachine::run(self, Self::paused); + } + + // This is the main loop of the `Running` state. + fn running(&mut self) -> StateMachine { + // This loop is here just for optimizing the emulation path. + // No point in ticking the state machine if there are no external events. + loop { + match self.run_emulation() { + // Emulation ran successfully, continue. + Ok(VcpuEmulation::Handled) => { + // We need to break here if kvm doesn't support + // immediate_exit flag. Because the signal sent from vmm + // thread may occurs when handling the vcpu exit events, and + // in this case the external vcpu events may not be handled + // correctly, so we need to check the event_receiver channel + // after handle vcpu exit events to decrease the window that + // doesn't handle the vcpu external events. + if !self.support_immediate_exit { + break; + } + } + // Emulation was interrupted, check external events. + Ok(VcpuEmulation::Interrupted) => break, + // Emulation was stopped due to reset or shutdown. + Ok(VcpuEmulation::Stopped) => return StateMachine::next(Self::waiting_exit), + // Emulation errors lead to vCPU exit. + Err(e) => { + error!("vcpu: {}, run_emulation failed: {:?}", self.id, e); + return StateMachine::next(Self::waiting_exit); + } + } + } + + // By default don't change state. + let mut state = StateMachine::next(Self::running); + + // Break this emulation loop on any transition request/external event. + match self.event_receiver.try_recv() { + // Running ---- Exit ----> Exited + Ok(VcpuEvent::Exit) => { + // Move to 'exited' state. + state = StateMachine::next(Self::exited); + } + // Running ---- Pause ----> Paused + Ok(VcpuEvent::Pause) => { + // Nothing special to do. + self.response_sender + .send(VcpuResponse::Paused) + .expect("failed to send pause status"); + + // TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure + // TODO continued: the guest soft lockup watchdog does not panic on Resume. + //let _ = self.fd.kvmclock_ctrl(); + + // Move to 'paused' state. + state = StateMachine::next(Self::paused); + } + Ok(VcpuEvent::Resume) => { + self.response_sender + .send(VcpuResponse::Resumed) + .expect("failed to send resume status"); + } + Ok(VcpuEvent::Gettid) => { + self.response_sender + .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid())) + .expect("failed to send vcpu thread tid"); + } + Ok(VcpuEvent::RevalidateCache) => { + self.revalidate_cache() + .map(|()| { + self.response_sender + .send(VcpuResponse::CacheRevalidated) + .expect("failed to revalidate vcpu IoManager cache"); + }) + .map_err(|e| self.response_sender.send(VcpuResponse::Error(e))) + .expect("failed to revalidate vcpu IoManager cache"); + } + // Unhandled exit of the other end. + Err(TryRecvError::Disconnected) => { + // Move to 'exited' state. + state = StateMachine::next(Self::exited); + } + // All other events or lack thereof have no effect on current 'running' state. + Err(TryRecvError::Empty) => (), + } + + state + } + + // This is the main loop of the `Paused` state. + fn paused(&mut self) -> StateMachine { + match self.event_receiver.recv() { + // Paused ---- Exit ----> Exited + Ok(VcpuEvent::Exit) => { + // Move to 'exited' state. + StateMachine::next(Self::exited) + } + // Paused ---- Resume ----> Running + Ok(VcpuEvent::Resume) => { + self.response_sender + .send(VcpuResponse::Resumed) + .expect("failed to send resume status"); + // Move to 'running' state. + StateMachine::next(Self::running) + } + Ok(VcpuEvent::Pause) => { + self.response_sender + .send(VcpuResponse::Paused) + .expect("failed to send pause status"); + // continue 'pause' state. + StateMachine::next(Self::paused) + } + Ok(VcpuEvent::Gettid) => { + self.response_sender + .send(VcpuResponse::Tid(self.cpu_index(), Vcpu::gettid())) + .expect("failed to send vcpu thread tid"); + StateMachine::next(Self::paused) + } + Ok(VcpuEvent::RevalidateCache) => { + self.revalidate_cache() + .map(|()| { + self.response_sender + .send(VcpuResponse::CacheRevalidated) + .expect("failed to revalidate vcpu IoManager cache"); + }) + .map_err(|e| self.response_sender.send(VcpuResponse::Error(e))) + .expect("failed to revalidate vcpu IoManager cache"); + + StateMachine::next(Self::paused) + } + // Unhandled exit of the other end. + Err(_) => { + // Move to 'exited' state. + StateMachine::next(Self::exited) + } + } + } + + // This is the main loop of the `WaitingExit` state. + fn waiting_exit(&mut self) -> StateMachine { + // trigger vmm to stop machine + if let Err(e) = self.exit_evt.write(1) { + METRICS.vcpu.failures.inc(); + error!("Failed signaling vcpu exit event: {}", e); + } + + let mut state = StateMachine::next(Self::waiting_exit); + + match self.event_receiver.recv() { + Ok(VcpuEvent::Exit) => state = StateMachine::next(Self::exited), + Ok(_) => error!( + "wrong state received in waiting exit state on vcpu {}", + self.id + ), + Err(_) => { + error!( + "vcpu channel closed in waiting exit state on vcpu {}", + self.id + ); + state = StateMachine::next(Self::exited); + } + } + + state + } + + // This is the main loop of the `Exited` state. + fn exited(&mut self) -> StateMachine { + // State machine reached its end. + StateMachine::finish(Self::exited) + } + + /// Get vcpu file descriptor. + pub fn vcpu_fd(&self) -> &VcpuFd { + self.fd.as_ref() + } +} + +impl Drop for Vcpu { + fn drop(&mut self) { + let _ = self.reset_thread_local_data(); + } +} + +#[cfg(test)] +pub mod tests { + use std::sync::mpsc::{channel, Receiver}; + use std::sync::Mutex; + + use arc_swap::ArcSwap; + use dbs_device::device_manager::IoManager; + use lazy_static::lazy_static; + use test_utils::skip_if_not_root; + + use super::*; + use crate::kvm_context::KvmContext; + + pub enum EmulationCase { + IoIn, + IoOut, + MmioRead, + MmioWrite, + Hlt, + Shutdown, + FailEntry(u64, u32), + InternalError, + Unknown, + SystemEvent(u32, u64), + Error(i32), + } + + lazy_static! { + pub static ref EMULATE_RES: Mutex = Mutex::new(EmulationCase::Unknown); + } + + impl Vcpu { + pub fn emulate(_fd: &VcpuFd) -> std::result::Result, kvm_ioctls::Error> { + let res = &*EMULATE_RES.lock().unwrap(); + match res { + EmulationCase::IoIn => Ok(VcpuExit::IoIn(0, &mut [])), + EmulationCase::IoOut => Ok(VcpuExit::IoOut(0, &[])), + EmulationCase::MmioRead => Ok(VcpuExit::MmioRead(0, &mut [])), + EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])), + EmulationCase::Hlt => Ok(VcpuExit::Hlt), + EmulationCase::Shutdown => Ok(VcpuExit::Shutdown), + EmulationCase::FailEntry(error_type, cpu_num) => { + Ok(VcpuExit::FailEntry(*error_type, *cpu_num)) + } + EmulationCase::InternalError => Ok(VcpuExit::InternalError), + EmulationCase::Unknown => Ok(VcpuExit::Unknown), + EmulationCase::SystemEvent(event_type, event_flags) => { + Ok(VcpuExit::SystemEvent(*event_type, *event_flags)) + } + EmulationCase::Error(e) => Err(kvm_ioctls::Error::new(*e)), + } + } + } + + #[cfg(target_arch = "x86_64")] + fn create_vcpu() -> (Vcpu, Receiver) { + let kvm_context = KvmContext::new(None).unwrap(); + let vm = kvm_context.kvm().create_vm().unwrap(); + let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap()); + let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new())))); + let supported_cpuid = kvm_context + .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) + .unwrap(); + let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let (tx, rx) = channel(); + let time_stamp = TimestampUs::default(); + + let vcpu = Vcpu::new_x86_64( + 0, + vcpu_fd, + io_manager, + supported_cpuid, + reset_event_fd, + vcpu_state_event, + tx, + time_stamp, + false, + ) + .unwrap(); + + (vcpu, rx) + } + + #[cfg(target_arch = "aarch64")] + fn create_vcpu() -> (Vcpu, Receiver) { + use kvm_ioctls::Kvm; + use std::os::fd::AsRawFd; + // Call for kvm too frequently would cause error in some host kernel. + std::thread::sleep(std::time::Duration::from_millis(5)); + + let kvm = Kvm::new().unwrap(); + let vm = Arc::new(kvm.create_vm().unwrap()); + let _kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap(); + let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap()); + let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new())))); + let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let vcpu_state_event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let (tx, rx) = channel(); + let time_stamp = TimestampUs::default(); + + let vcpu = Vcpu::new_aarch64( + 0, + vcpu_fd, + io_manager, + reset_event_fd, + vcpu_state_event, + tx, + time_stamp, + false, + ) + .unwrap(); + + (vcpu, rx) + } + + #[test] + fn test_vcpu_run_emulation() { + skip_if_not_root!(); + + let (mut vcpu, _) = create_vcpu(); + + #[cfg(target_arch = "x86_64")] + { + // Io in + *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoIn; + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Handled))); + + // Io out + *(EMULATE_RES.lock().unwrap()) = EmulationCase::IoOut; + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Handled))); + } + + // Mmio read + *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioRead; + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Handled))); + + // Mmio write + *(EMULATE_RES.lock().unwrap()) = EmulationCase::MmioWrite; + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Handled))); + + // KVM_EXIT_HLT signal + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Hlt; + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // KVM_EXIT_SHUTDOWN signal + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Shutdown; + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // KVM_EXIT_FAIL_ENTRY signal + *(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry(0, 0); + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // KVM_EXIT_INTERNAL_ERROR signal + *(EMULATE_RES.lock().unwrap()) = EmulationCase::InternalError; + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // KVM_SYSTEM_EVENT_RESET + *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_RESET, 0); + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Stopped))); + + // KVM_SYSTEM_EVENT_SHUTDOWN + *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, 0); + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Stopped))); + + // Other system event + *(EMULATE_RES.lock().unwrap()) = EmulationCase::SystemEvent(0, 0); + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // Unknown exit reason + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Unknown; + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + + // Error: EAGAIN + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EAGAIN); + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Handled))); + + // Error: EINTR + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + let res = vcpu.run_emulation(); + assert!(matches!(res, Ok(VcpuEmulation::Interrupted))); + + // other error + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINVAL); + let res = vcpu.run_emulation(); + assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit))); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_vcpu_check_io_port_info() { + skip_if_not_root!(); + + let (vcpu, _receiver) = create_vcpu(); + + // debug info signal + let res = vcpu + .check_io_port_info(MAGIC_IOPORT_DEBUG_INFO, &[0, 0, 0, 0]) + .unwrap(); + assert!(res); + } +} diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs new file mode 100644 index 000000000000..dff3aefc3bbd --- /dev/null +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -0,0 +1,1482 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +// +// Copyright © 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +//! vCPU manager to enable bootstrap and CPU hotplug. +use std::io; +use std::os::unix::io::AsRawFd; +use std::sync::mpsc::{channel, Receiver, RecvError, RecvTimeoutError, Sender}; +use std::sync::{Arc, Barrier, Mutex, RwLock}; +use std::time::Duration; + +use dbs_arch::VpmuFeatureLevel; +#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] +use dbs_upcall::{DevMgrService, UpcallClient}; +use dbs_utils::epoll_manager::{EpollManager, EventOps, EventSet, Events, MutEventSubscriber}; +use dbs_utils::time::TimestampUs; +use kvm_ioctls::{Cap, VcpuFd, VmFd}; +use log::{debug, error, info}; +use seccompiler::{apply_filter, BpfProgram, Error as SecError}; +use vm_memory::GuestAddress; +use vmm_sys_util::eventfd::EventFd; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::api::v1::InstanceInfo; +use crate::kvm_context::KvmContext; +use crate::vcpu::vcpu_impl::{ + Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResizeResult, VcpuResponse, VcpuStateEvent, +}; +use crate::vcpu::VcpuConfig; +use crate::vm::VmConfigInfo; +use crate::IoManagerCached; + +/// the timeout for communication with vcpu threads +const CPU_RECV_TIMEOUT_MS: u64 = 1000; + +/// vCPU manager error +#[derive(Debug, thiserror::Error)] +pub enum VcpuManagerError { + /// IO errors in vCPU manager + #[error("IO errors in vCPU manager {0}")] + VcpuIO(#[source] io::Error), + + /// vCPU manager is not initialized + #[error("vcpu manager is not initialized")] + VcpuManagerNotInitialized, + + /// Expected vcpu exceed max count + #[error("expected vcpu exceed max count")] + ExpectedVcpuExceedMax, + + /// vCPU not found + #[error("vcpu not found {0}")] + VcpuNotFound(u8), + + /// Cannot recv vCPU thread tid + #[error("cannot get vCPU thread id")] + VcpuGettid, + + /// vCPU pause failed. + #[error("failure while pausing vCPU thread")] + VcpuPause, + + /// vCPU resume failed. + #[error("failure while resuming vCPU thread")] + VcpuResume, + + /// vCPU save failed. + #[error("failure while save vCPU state")] + VcpuSave, + + /// Vcpu is in unexpected state. + #[error("Vcpu is in unexpected state")] + UnexpectedVcpuResponse, + + /// Vcpu not create + #[error("Vcpu is not create")] + VcpuNotCreate, + + /// The number of max_vcpu reached kvm's limitation + #[error("specified vcpu count {0} is greater than max allowed count {1} by kvm")] + MaxVcpuLimitation(u8, usize), + + /// Revalidate vcpu IoManager cache failed. + #[error("failure while revalidating vcpu IoManager cache")] + VcpuRevalidateCache, + + /// Event fd is already set so there could be some problem in the VMM if we try to reset it. + #[error("Event fd is already set for the vcpu")] + EventAlreadyExist, + + /// Response channel error + #[error("Response channel error: {0}")] + VcpuResponseChannel(RecvError), + + /// Vcpu response timeout + #[error("Vcpu response timeout: {0}")] + VcpuResponseTimeout(RecvTimeoutError), + + /// Cannot build seccomp filters. + #[error("failure while configuring seccomp filters: {0}")] + SeccompFilters(#[source] seccompiler::Error), + + /// Cannot send event to vCPU. + #[error("failure while sending message to vCPU thread: {0}")] + VcpuEvent(#[source] VcpuError), + + /// vCPU Error + #[error("vcpu internal error: {0}")] + Vcpu(#[source] VcpuError), + + /// Kvm Ioctl Error + #[error("failure in issuing KVM ioctl command: {0}")] + Kvm(#[source] kvm_ioctls::Error), +} + +#[cfg(feature = "hotplug")] +/// Errror associated with resize instance +#[derive(Debug, thiserror::Error)] +pub enum VcpuResizeError { + /// vcpu is in hotplug process + #[error("vcpu is in hotplug process")] + VcpuIsHotplugging, + + /// Cannot update the configuration of the microvm pre boot. + #[error("resize vcpu operation is not allowed pre boot")] + UpdateNotAllowedPreBoot, + + /// Cannot update the configuration of the microvm post boot. + #[error("resize vcpu operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// Expected vcpu exceed max count + #[error("expected vcpu exceed max count")] + ExpectedVcpuExceedMax, + + /// vcpu 0 can't be removed + #[error("vcpu 0 can't be removed")] + Vcpu0CanNotBeRemoved, + + /// Lack removable vcpu + #[error("Removable vcpu not enough, removable vcpu num: {0}, number to remove: {1}, present vcpu count {2}")] + LackRemovableVcpus(u16, u16, u16), + + #[cfg(feature = "dbs-upcall")] + /// Cannot update the configuration by upcall channel. + #[error("cannot update the configuration by upcall channel: {0}")] + Upcall(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "dbs-upcall")] + /// Cannot find upcall client + #[error("Cannot find upcall client")] + UpcallClientMissing, + + #[cfg(feature = "dbs-upcall")] + /// Upcall server is not ready + #[error("Upcall server is not ready")] + UpcallServerNotReady, + + /// Vcpu manager error + #[error("Vcpu manager error : {0}")] + Vcpu(#[source] VcpuManagerError), +} + +/// Result for vCPU manager operations +pub type Result = std::result::Result; + +#[derive(Debug, PartialEq, Copy, Clone)] +enum VcpuAction { + None, + Hotplug, + Hotunplug, +} + +/// VcpuResizeInfo describes the information for vcpu hotplug / hot-unplug +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct VcpuResizeInfo { + /// The desired vcpu count to resize. + pub vcpu_count: Option, +} + +/// Infos related to per vcpu +#[derive(Default)] +pub(crate) struct VcpuInfo { + pub(crate) vcpu: Option, + vcpu_fd: Option>, + handle: Option, + tid: u32, +} + +impl std::fmt::Debug for VcpuInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VcpuInfo") + .field("vcpu", &self.vcpu.is_some()) + .field("vcpu_fd", &self.vcpu_fd.is_some()) + .field("handle", &self.handle.is_some()) + .field("tid", &self.tid) + .finish() + } +} + +/// Manage all vcpu related actions +pub struct VcpuManager { + pub(crate) vcpu_infos: Vec, + vcpu_config: VcpuConfig, + vcpu_seccomp_filter: BpfProgram, + vcpu_state_event: EventFd, + vcpu_state_sender: Sender, + support_immediate_exit: bool, + + // The purpose of putting a reference of IoManager here is to simplify the + // design of the API when creating vcpus, and the IoManager has numerous OS + // resources that need to be released when vmm exits. However, since + // VcpuManager is referenced by VcpuEpollHandler and VcpuEpollHandler will + // not be released when vmm is closed, we need to release io manager + // manually when we exit all vcpus. + io_manager: Option, + shared_info: Arc>, + vm_as: GuestAddressSpaceImpl, + pub(crate) vm_fd: Arc, + + action_sycn_tx: Option>, + vcpus_in_action: (VcpuAction, Vec), + pub(crate) reset_event_fd: Option, + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_channel: Option>>, + + // X86 specific fields. + #[cfg(target_arch = "x86_64")] + pub(crate) supported_cpuid: kvm_bindings::CpuId, +} + +#[allow(clippy::too_many_arguments)] +impl VcpuManager { + /// Get a new VcpuManager instance + pub fn new( + vm_fd: Arc, + kvm_context: &KvmContext, + vm_config_info: &VmConfigInfo, + vm_as: GuestAddressSpaceImpl, + vcpu_seccomp_filter: BpfProgram, + shared_info: Arc>, + io_manager: IoManagerCached, + epoll_manager: EpollManager, + ) -> Result>> { + let support_immediate_exit = kvm_context.kvm().check_extension(Cap::ImmediateExit); + let max_vcpu_count = vm_config_info.max_vcpu_count; + let kvm_max_vcpu_count = kvm_context.get_max_vcpus(); + + // check the max vcpu count in kvm. max_vcpu_count is u8 and kvm_context.get_max_vcpus() + // returns usize, so convert max_vcpu_count to usize instead of converting kvm max vcpu to + // u8, to avoid wraping usize. Otherwise if kvm_max_vcpu_count is greater than 255, it'll + // be casted into a smaller number. + if max_vcpu_count as usize > kvm_max_vcpu_count { + error!( + "vcpu_manager: specified vcpu count {} is greater than max allowed count {} by kvm", + max_vcpu_count, kvm_max_vcpu_count + ); + return Err(VcpuManagerError::MaxVcpuLimitation( + max_vcpu_count, + kvm_max_vcpu_count, + )); + } + + let mut vcpu_infos = Vec::with_capacity(max_vcpu_count.into()); + vcpu_infos.resize_with(max_vcpu_count.into(), Default::default); + + let (tx, rx) = channel(); + let vcpu_state_event = + EventFd::new(libc::EFD_NONBLOCK).map_err(VcpuManagerError::VcpuIO)?; + let vcpu_state_event2 = vcpu_state_event + .try_clone() + .map_err(VcpuManagerError::VcpuIO)?; + + #[cfg(target_arch = "x86_64")] + let supported_cpuid = kvm_context + .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) + .map_err(VcpuManagerError::Kvm)?; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + let vpmu_feature_level = match vm_config_info.vpmu_feature { + #[cfg(target_arch = "x86_64")] + 1 => VpmuFeatureLevel::LimitedlyEnabled, + #[cfg(target_arch = "aarch64")] + 1 => { + log::warn!( + "Limitedly enabled vpmu feature isn't supported on aarch64 for now.\ + This will be supported in the future. The vpmu_feature will be set disabled!" + ); + VpmuFeatureLevel::Disabled + } + 2 => VpmuFeatureLevel::FullyEnabled, + _ => VpmuFeatureLevel::Disabled, + }; + + let vcpu_manager = Arc::new(Mutex::new(VcpuManager { + vcpu_infos, + vcpu_config: VcpuConfig { + boot_vcpu_count: vm_config_info.vcpu_count, + max_vcpu_count, + threads_per_core: vm_config_info.cpu_topology.threads_per_core, + cores_per_die: vm_config_info.cpu_topology.cores_per_die, + dies_per_socket: vm_config_info.cpu_topology.dies_per_socket, + sockets: vm_config_info.cpu_topology.sockets, + vpmu_feature: vpmu_feature_level, + }, + vcpu_seccomp_filter, + vcpu_state_event, + vcpu_state_sender: tx, + support_immediate_exit, + io_manager: Some(io_manager), + shared_info, + vm_as, + vm_fd, + action_sycn_tx: None, + vcpus_in_action: (VcpuAction::None, Vec::new()), + reset_event_fd: None, + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_channel: None, + #[cfg(target_arch = "x86_64")] + supported_cpuid, + })); + + let handler = Box::new(VcpuEpollHandler { + vcpu_manager: vcpu_manager.clone(), + eventfd: vcpu_state_event2, + rx, + }); + epoll_manager.add_subscriber(handler); + + Ok(vcpu_manager) + } + + /// get vcpu instances in vcpu manager + pub fn vcpus(&self) -> Vec<&Vcpu> { + let mut vcpus = Vec::new(); + for vcpu_info in &self.vcpu_infos { + if let Some(vcpu) = &vcpu_info.vcpu { + vcpus.push(vcpu); + } + } + vcpus + } + + /// get vcpu instances in vcpu manager as mut + pub fn vcpus_mut(&mut self) -> Vec<&mut Vcpu> { + let mut vcpus = Vec::new(); + for vcpu_info in &mut self.vcpu_infos { + if let Some(vcpu) = &mut vcpu_info.vcpu { + vcpus.push(vcpu); + } + } + vcpus + } + + /// add reset event fd for each vcpu, if the reset_event_fd is already set, error will be returned. + pub fn set_reset_event_fd(&mut self, reset_event_fd: EventFd) -> Result<()> { + if self.reset_event_fd.is_some() { + return Err(VcpuManagerError::EventAlreadyExist); + } + self.reset_event_fd = Some(reset_event_fd); + Ok(()) + } + + /// create default num of vcpus for bootup + pub fn create_boot_vcpus( + &mut self, + request_ts: TimestampUs, + entry_addr: GuestAddress, + ) -> Result<()> { + info!("create boot vcpus"); + let boot_vcpu_count = if cfg!(target_arch = "aarch64") { + // On aarch64, kvm doesn't allow to call KVM_CREATE_VCPU ioctl after vm has been booted + // because of vgic check. To support vcpu hotplug/hotunplug feature, we should create + // all the vcpufd at booting procedure. + // SetVmConfiguration API will ensure max_vcpu_count >= boot_vcpu_count, so it is safe + // to directly use max_vcpu_count here. + self.vcpu_config.max_vcpu_count + } else { + self.vcpu_config.boot_vcpu_count + }; + self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?; + + Ok(()) + } + + /// start the boot vcpus + pub fn start_boot_vcpus(&mut self, vmm_seccomp_filter: BpfProgram) -> Result<()> { + info!("start boot vcpus"); + self.start_vcpus(self.vcpu_config.boot_vcpu_count, vmm_seccomp_filter, true)?; + + Ok(()) + } + + /// create a specified num of vcpu + /// note: we can't create vcpus again until the previously created vcpus are + /// started + pub fn create_vcpus( + &mut self, + vcpu_count: u8, + request_ts: Option, + entry_addr: Option, + ) -> Result> { + info!("create vcpus"); + if vcpu_count > self.vcpu_config.max_vcpu_count { + return Err(VcpuManagerError::ExpectedVcpuExceedMax); + } + + let request_ts = request_ts.unwrap_or_default(); + let mut created_cpus = Vec::new(); + for cpu_id in self.calculate_available_vcpus(vcpu_count) { + self.create_vcpu(cpu_id, request_ts.clone(), entry_addr)?; + created_cpus.push(cpu_id); + } + + Ok(created_cpus) + } + + /// start a specified num of vcpu + pub fn start_vcpus( + &mut self, + vcpu_count: u8, + vmm_seccomp_filter: BpfProgram, + need_resume: bool, + ) -> Result<()> { + info!("start vcpus"); + Vcpu::register_kick_signal_handler(); + self.activate_vcpus(vcpu_count, need_resume)?; + + // Load seccomp filters for the VMM thread. + // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters + // altogether is the desired behaviour. + if let Err(e) = apply_filter(&vmm_seccomp_filter) { + if !matches!(e, SecError::EmptyFilter) { + return Err(VcpuManagerError::SeccompFilters(e)); + } + } + + Ok(()) + } + + /// pause all vcpus + pub fn pause_all_vcpus(&mut self) -> Result<()> { + self.pause_vcpus(&self.present_vcpus()) + } + + /// resume all vcpus + pub fn resume_all_vcpus(&mut self) -> Result<()> { + self.resume_vcpus(&self.present_vcpus()) + } + + /// exit all vcpus, and never restart again + pub fn exit_all_vcpus(&mut self) -> Result<()> { + self.exit_vcpus(&self.present_vcpus())?; + // clear all vcpu infos + self.vcpu_infos.clear(); + // release io manager's reference manually + self.io_manager.take(); + + Ok(()) + } + + /// revalidate IoManager cache of all vcpus + pub fn revalidate_all_vcpus_cache(&mut self) -> Result<()> { + self.revalidate_vcpus_cache(&self.present_vcpus()) + } + + /// return all present vcpus + pub fn present_vcpus(&self) -> Vec { + self.vcpu_infos + .iter() + .enumerate() + .filter(|(_i, info)| info.handle.is_some()) + .map(|(i, _info)| i as u8) + .collect() + } + + /// Get available vcpus to create with target vcpu_count + /// Argument: + /// * vcpu_count: target vcpu_count online in VcpuManager. + /// Return: + /// * return available vcpu ids to create vcpu . + fn calculate_available_vcpus(&self, vcpu_count: u8) -> Vec { + let present_vcpus_count = self.present_vcpus_count(); + let mut available_vcpus = Vec::new(); + + if present_vcpus_count < vcpu_count { + let mut size = vcpu_count - present_vcpus_count; + for cpu_id in 0..self.vcpu_config.max_vcpu_count { + let info = &self.vcpu_infos[cpu_id as usize]; + if info.handle.is_none() { + available_vcpus.push(cpu_id); + size -= 1; + if size == 0 { + break; + } + } + } + } + + available_vcpus + } + + /// Present vcpus count + fn present_vcpus_count(&self) -> u8 { + self.vcpu_infos + .iter() + .fold(0, |sum, info| sum + info.handle.is_some() as u8) + } + + /// Configure single vcpu + fn configure_single_vcpu( + &mut self, + entry_addr: Option, + vcpu: &mut Vcpu, + ) -> std::result::Result<(), VcpuError> { + vcpu.configure( + &self.vcpu_config, + &self.vm_fd, + &self.vm_as, + entry_addr, + None, + ) + } + + fn create_vcpu( + &mut self, + cpu_index: u8, + request_ts: TimestampUs, + entry_addr: Option, + ) -> Result<()> { + info!("creating vcpu {}", cpu_index); + if self.vcpu_infos.get(cpu_index as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(cpu_index)); + } + // We will reuse the kvm's vcpufd after first creation, for we can't + // create vcpufd with same id in one kvm instance. + let kvm_vcpu = match &self.vcpu_infos[cpu_index as usize].vcpu_fd { + Some(vcpu_fd) => vcpu_fd.clone(), + None => { + let vcpu_fd = Arc::new( + self.vm_fd + .create_vcpu(cpu_index as u64) + .map_err(VcpuError::VcpuFd) + .map_err(VcpuManagerError::Vcpu)?, + ); + self.vcpu_infos[cpu_index as usize].vcpu_fd = Some(vcpu_fd.clone()); + vcpu_fd + } + }; + + let mut vcpu = self.create_vcpu_arch(cpu_index, kvm_vcpu, request_ts)?; + self.configure_single_vcpu(entry_addr, &mut vcpu) + .map_err(VcpuManagerError::Vcpu)?; + self.vcpu_infos[cpu_index as usize].vcpu = Some(vcpu); + + Ok(()) + } + + fn start_vcpu(&mut self, cpu_index: u8, barrier: Arc) -> Result<()> { + info!("starting vcpu {}", cpu_index); + if self.vcpu_infos.get(cpu_index as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(cpu_index)); + } + if let Some(vcpu) = self.vcpu_infos[cpu_index as usize].vcpu.take() { + let handle = vcpu + .start_threaded(self.vcpu_seccomp_filter.clone(), barrier) + .map_err(VcpuManagerError::Vcpu)?; + self.vcpu_infos[cpu_index as usize].handle = Some(handle); + Ok(()) + } else { + Err(VcpuManagerError::VcpuNotCreate) + } + } + + fn get_vcpus_tid(&mut self, cpu_indexes: &[u8]) -> Result<()> { + for cpu_id in cpu_indexes { + if self.vcpu_infos.get(*cpu_id as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + handle + .send_event(VcpuEvent::Gettid) + .map_err(VcpuManagerError::VcpuEvent)?; + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + for cpu_id in cpu_indexes { + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + match handle + .response_receiver() + .recv_timeout(Duration::from_millis(CPU_RECV_TIMEOUT_MS)) + { + Ok(VcpuResponse::Tid(_, id)) => self.vcpu_infos[*cpu_id as usize].tid = id, + Err(e) => { + error!("vCPU get tid error! {:?}", e); + return Err(VcpuManagerError::VcpuGettid); + } + _ => { + error!("vCPU get tid error!"); + return Err(VcpuManagerError::VcpuGettid); + } + } + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + // Save all vCPU thread ID to self.shared_info + let tids: Vec<(u8, u32)> = cpu_indexes + .iter() + .map(|cpu_id| (*cpu_id, self.vcpu_infos[*cpu_id as usize].tid)) + .collect(); + + // Append the new started vcpu thread IDs into self.shared_info + self.shared_info + .write() + .unwrap() + .tids + .extend_from_slice(&tids[..]); + + Ok(()) + } + + fn revalidate_vcpus_cache(&mut self, cpu_indexes: &[u8]) -> Result<()> { + for cpu_id in cpu_indexes { + if self.vcpu_infos.get(*cpu_id as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + handle + .send_event(VcpuEvent::RevalidateCache) + .map_err(VcpuManagerError::VcpuEvent)?; + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + Ok(()) + } + + fn pause_vcpus(&mut self, cpu_indexes: &[u8]) -> Result<()> { + for cpu_id in cpu_indexes { + if self.vcpu_infos.get(*cpu_id as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + handle + .send_event(VcpuEvent::Pause) + .map_err(VcpuManagerError::VcpuEvent)?; + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + Ok(()) + } + + fn resume_vcpus(&mut self, cpu_indexes: &[u8]) -> Result<()> { + for cpu_id in cpu_indexes { + if self.vcpu_infos.get(*cpu_id as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + handle + .send_event(VcpuEvent::Resume) + .map_err(VcpuManagerError::VcpuEvent)?; + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + Ok(()) + } + + // exit vcpus and notify the vmm exit event + fn exit_vcpus(&mut self, cpu_indexes: &[u8]) -> Result<()> { + info!("exiting vcpus {:?}", cpu_indexes); + for cpu_id in cpu_indexes { + if self.vcpu_infos.get(*cpu_id as usize).is_none() { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + if let Some(handle) = &self.vcpu_infos[*cpu_id as usize].handle { + handle + .send_event(VcpuEvent::Exit) + .map_err(VcpuManagerError::VcpuEvent)?; + } else { + return Err(VcpuManagerError::VcpuNotFound(*cpu_id)); + } + } + + for cpu_id in cpu_indexes { + let handle = self.vcpu_infos[*cpu_id as usize].handle.take().unwrap(); + handle + .join_vcpu_thread() + .map_err(|e| error!("vcpu exit error! {:?}", e)) + .ok(); + } + + let tids: &mut Vec<(u8, u32)> = &mut self + .shared_info + .write() + .expect( + "Failed to stop vcpus because shared info couldn't be written due to poisoned lock", + ) + .tids; + + // Here's a trick: since we always stop the vcpus started latest, + // thus it's ok here to remove the stopped vcpus from end to head. + tids.truncate(tids.len() - cpu_indexes.len()); + + Ok(()) + } + + fn stop_vcpus_in_action(&mut self) -> Result<()> { + let vcpus_in_action = self.vcpus_in_action.1.clone(); + self.exit_vcpus(&vcpus_in_action) + } + + fn activate_vcpus(&mut self, vcpu_count: u8, need_resume: bool) -> Result> { + let present_vcpus_count = self.present_vcpus_count(); + if vcpu_count > self.vcpu_config.max_vcpu_count { + return Err(VcpuManagerError::ExpectedVcpuExceedMax); + } else if vcpu_count < present_vcpus_count { + return Ok(Vec::new()); + } + + let available_vcpus = self.calculate_available_vcpus(vcpu_count); + let barrier = Arc::new(Barrier::new(available_vcpus.len() + 1_usize)); + for cpu_id in available_vcpus.iter() { + self.start_vcpu(*cpu_id, barrier.clone())?; + } + barrier.wait(); + + self.get_vcpus_tid(&available_vcpus)?; + if need_resume { + self.resume_vcpus(&available_vcpus)?; + } + + Ok(available_vcpus) + } + + fn sync_action_finish(&mut self, got_error: bool) { + if let Some(tx) = self.action_sycn_tx.take() { + if let Err(e) = tx.send(got_error) { + debug!("cpu sync action send to closed channel {}", e); + } + } + } + + fn set_vcpus_action(&mut self, action: VcpuAction, vcpus: Vec) { + self.vcpus_in_action = (action, vcpus); + } + + fn get_vcpus_action(&self) -> VcpuAction { + self.vcpus_in_action.0 + } +} + +#[cfg(target_arch = "x86_64")] +impl VcpuManager { + fn create_vcpu_arch( + &self, + cpu_index: u8, + vcpu_fd: Arc, + request_ts: TimestampUs, + ) -> Result { + // It's safe to unwrap because guest_kernel always exist until vcpu manager done + Vcpu::new_x86_64( + cpu_index, + vcpu_fd, + // safe to unwrap + self.io_manager.as_ref().unwrap().clone(), + self.supported_cpuid.clone(), + self.reset_event_fd.as_ref().unwrap().try_clone().unwrap(), + self.vcpu_state_event.try_clone().unwrap(), + self.vcpu_state_sender.clone(), + request_ts, + self.support_immediate_exit, + ) + .map_err(VcpuManagerError::Vcpu) + } +} + +#[cfg(target_arch = "aarch64")] +impl VcpuManager { + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before + // setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + fn create_vcpu_arch( + &self, + cpu_index: u8, + vcpu_fd: Arc, + request_ts: TimestampUs, + ) -> Result { + Vcpu::new_aarch64( + cpu_index, + vcpu_fd, + // safe to unwrap + self.io_manager.as_ref().unwrap().clone(), + self.reset_event_fd.as_ref().unwrap().try_clone().unwrap(), + self.vcpu_state_event.try_clone().unwrap(), + self.vcpu_state_sender.clone(), + request_ts, + self.support_immediate_exit, + ) + .map_err(VcpuManagerError::Vcpu) + } + + /// get vpmu_feature config + pub fn vpmu_feature(&self) -> VpmuFeatureLevel { + self.vcpu_config.vpmu_feature + } +} + +#[cfg(feature = "hotplug")] +mod hotplug { + #[cfg(feature = "dbs-upcall")] + use super::*; + #[cfg(feature = "dbs-upcall")] + use dbs_upcall::{CpuDevRequest, DevMgrRequest}; + #[cfg(feature = "dbs-upcall")] + use std::cmp::Ordering; + + #[cfg(all(target_arch = "x86_64", feature = "dbs-upcall"))] + use dbs_boot::mptable::APIC_VERSION; + #[cfg(target_arch = "aarch64")] + const APIC_VERSION: u8 = 0; + + #[cfg(feature = "dbs-upcall")] + impl VcpuManager { + /// add upcall channel for vcpu manager + pub fn set_upcall_channel( + &mut self, + upcall_channel: Option>>, + ) { + self.upcall_channel = upcall_channel; + } + + /// resize the count of vcpu in runtime + pub fn resize_vcpu( + &mut self, + vcpu_count: u8, + sync_tx: Option>, + ) -> std::result::Result<(), VcpuResizeError> { + if self.get_vcpus_action() != VcpuAction::None { + return Err(VcpuResizeError::VcpuIsHotplugging); + } + self.action_sycn_tx = sync_tx; + + if let Some(upcall) = self.upcall_channel.clone() { + let now_vcpu = self.present_vcpus_count(); + info!("resize vcpu: now: {}, desire: {}", now_vcpu, vcpu_count); + match vcpu_count.cmp(&now_vcpu) { + Ordering::Equal => { + info!("resize vcpu: no need to resize"); + self.sync_action_finish(false); + Ok(()) + } + Ordering::Greater => self.do_add_vcpu(vcpu_count, upcall), + Ordering::Less => self.do_del_vcpu(vcpu_count, upcall), + } + } else { + Err(VcpuResizeError::UpdateNotAllowedPostBoot) + } + } + + fn do_add_vcpu( + &mut self, + vcpu_count: u8, + upcall_client: Arc>, + ) -> std::result::Result<(), VcpuResizeError> { + info!("resize vcpu: add"); + if vcpu_count > self.vcpu_config.max_vcpu_count { + return Err(VcpuResizeError::ExpectedVcpuExceedMax); + } + + let created_vcpus = self + .create_vcpus(vcpu_count, None, None) + .map_err(VcpuResizeError::Vcpu)?; + let cpu_ids = self + .activate_vcpus(vcpu_count, true) + .map_err(|e| { + // we need to rollback when activate vcpu error + error!("activate vcpu error, rollback! {:?}", e); + let activated_vcpus: Vec = created_vcpus + .iter() + .filter(|&cpu_id| self.vcpu_infos[*cpu_id as usize].handle.is_some()) + .copied() + .collect(); + if let Err(e) = self.exit_vcpus(&activated_vcpus) { + error!("try to rollback error, stop_vcpu: {:?}", e); + } + e + }) + .map_err(VcpuResizeError::Vcpu)?; + + let mut cpu_ids_array = [0u8; (u8::MAX as usize) + 1]; + cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]); + let req = DevMgrRequest::AddVcpu(CpuDevRequest { + count: cpu_ids.len() as u8, + #[cfg(target_arch = "x86_64")] + apic_ids: cpu_ids_array, + #[cfg(target_arch = "x86_64")] + apic_ver: APIC_VERSION, + }); + self.send_upcall_action(upcall_client, req)?; + + self.set_vcpus_action(VcpuAction::Hotplug, cpu_ids); + + Ok(()) + } + + fn do_del_vcpu( + &mut self, + vcpu_count: u8, + upcall_client: Arc>, + ) -> std::result::Result<(), VcpuResizeError> { + info!("resize vcpu: delete"); + if vcpu_count == 0 { + return Err(VcpuResizeError::Vcpu0CanNotBeRemoved); + } + + let mut cpu_ids = self.calculate_removable_vcpus(); + let cpu_num_to_be_del = (self.present_vcpus_count() - vcpu_count) as usize; + if cpu_num_to_be_del >= cpu_ids.len() { + return Err(VcpuResizeError::LackRemovableVcpus( + cpu_ids.len() as u16, + cpu_num_to_be_del as u16, + self.present_vcpus_count() as u16, + )); + } + + cpu_ids.reverse(); + cpu_ids.truncate(cpu_num_to_be_del); + + let mut cpu_ids_array = [0u8; 256]; + cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]); + let req = DevMgrRequest::DelVcpu(CpuDevRequest { + count: cpu_num_to_be_del as u8, + #[cfg(target_arch = "x86_64")] + apic_ids: cpu_ids_array, + #[cfg(target_arch = "x86_64")] + apic_ver: APIC_VERSION, + }); + self.send_upcall_action(upcall_client, req)?; + + self.set_vcpus_action(VcpuAction::Hotunplug, cpu_ids); + + Ok(()) + } + + #[cfg(test)] + fn send_upcall_action( + &self, + _upcall_client: Arc>, + _request: DevMgrRequest, + ) -> std::result::Result<(), VcpuResizeError> { + Ok(()) + } + + #[cfg(not(test))] + fn send_upcall_action( + &self, + upcall_client: Arc>, + request: DevMgrRequest, + ) -> std::result::Result<(), VcpuResizeError> { + // This is used to fix clippy warnings. + use dbs_upcall::{DevMgrResponse, UpcallClientRequest, UpcallClientResponse}; + + let vcpu_state_event = self.vcpu_state_event.try_clone().unwrap(); + let vcpu_state_sender = self.vcpu_state_sender.clone(); + + upcall_client + .send_request( + UpcallClientRequest::DevMgr(request), + Box::new(move |result| match result { + UpcallClientResponse::DevMgr(response) => { + if let DevMgrResponse::CpuDev(resp) = response { + let result: VcpuResizeResult = if resp.result == 0 { + VcpuResizeResult::Success + } else { + VcpuResizeResult::Failed + }; + vcpu_state_sender + .send(VcpuStateEvent::Hotplug(( + result, + #[cfg(target_arch = "x86_64")] + resp.info.apic_id_index, + #[cfg(target_arch = "aarch64")] + resp.info.cpu_id, + ))) + .unwrap(); + vcpu_state_event.write(1).unwrap(); + } + } + UpcallClientResponse::UpcallReset => { + vcpu_state_sender + .send(VcpuStateEvent::Hotplug((VcpuResizeResult::Success, 0))) + .unwrap(); + vcpu_state_event.write(1).unwrap(); + } + #[cfg(test)] + UpcallClientResponse::FakeResponse => { + panic!("shouldn't happen"); + } + }), + ) + .map_err(VcpuResizeError::Upcall) + } + + /// Get removable vcpus. + /// Return: + /// * return removable vcpu_id with cascade order. + fn calculate_removable_vcpus(&self) -> Vec { + self.present_vcpus() + } + } +} + +struct VcpuEpollHandler { + vcpu_manager: Arc>, + eventfd: EventFd, + rx: Receiver, +} + +impl VcpuEpollHandler { + fn process_cpu_state_event(&mut self, _ops: &mut EventOps) { + // It's level triggered, so it's safe to ignore the result. + let _ = self.eventfd.read(); + while let Ok(event) = self.rx.try_recv() { + match event { + VcpuStateEvent::Hotplug((success, cpu_count)) => { + info!( + "get vcpu event, cpu_index {} success {:?}", + cpu_count, success + ); + self.process_cpu_action(success, cpu_count); + } + } + } + } + + fn process_cpu_action(&self, result: VcpuResizeResult, _cpu_index: u32) { + let mut vcpu_manager = self.vcpu_manager.lock().unwrap(); + if result == VcpuResizeResult::Success { + match vcpu_manager.get_vcpus_action() { + VcpuAction::Hotplug => { + // Notify hotplug success + vcpu_manager.sync_action_finish(false); + } + VcpuAction::Hotunplug => { + if let Err(e) = vcpu_manager.stop_vcpus_in_action() { + error!("stop vcpus in action error: {:?}", e); + } + // notify hotunplug success + vcpu_manager.sync_action_finish(false); + } + VcpuAction::None => { + error!("cannot be here"); + } + }; + vcpu_manager.set_vcpus_action(VcpuAction::None, Vec::new()); + + vcpu_manager.sync_action_finish(true); + // TODO(sicun): rollback + } + } +} + +impl MutEventSubscriber for VcpuEpollHandler { + fn process(&mut self, events: Events, ops: &mut EventOps) { + let vcpu_state_eventfd = self.eventfd.as_raw_fd(); + + match events.fd() { + fd if fd == vcpu_state_eventfd => self.process_cpu_state_event(ops), + _ => error!("vcpu manager epoll handler: unknown event"), + } + } + + fn init(&mut self, ops: &mut EventOps) { + ops.add(Events::new(&self.eventfd, EventSet::IN)).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use std::os::unix::io::AsRawFd; + use std::sync::{Arc, RwLock}; + + use dbs_utils::epoll_manager::EpollManager; + #[cfg(feature = "hotplug")] + use dbs_virtio_devices::vsock::backend::VsockInnerBackend; + use seccompiler::BpfProgram; + use test_utils::skip_if_not_root; + use vmm_sys_util::eventfd::EventFd; + + use super::*; + use crate::api::v1::InstanceInfo; + use crate::vcpu::vcpu_impl::tests::{EmulationCase, EMULATE_RES}; + use crate::vm::{CpuTopology, Vm, VmConfigInfo}; + + fn get_vm() -> Vm { + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap(); + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 100, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 3, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config); + vm.init_guest_memory().unwrap(); + + vm.init_vcpu_manager(vm.vm_as().unwrap().clone(), BpfProgram::default()) + .unwrap(); + + vm.vcpu_manager() + .unwrap() + .set_reset_event_fd(EventFd::new(libc::EFD_NONBLOCK).unwrap()) + .unwrap(); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + vm.setup_interrupt_controller().unwrap(); + } + + vm + } + + fn get_present_unstart_vcpus(vcpu_manager: &std::sync::MutexGuard<'_, VcpuManager>) -> u8 { + vcpu_manager + .vcpu_infos + .iter() + .fold(0, |sum, info| sum + info.vcpu.is_some() as u8) + } + + #[test] + fn test_vcpu_manager_config() { + skip_if_not_root!(); + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap(); + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 2, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 1, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 2, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config.clone()); + vm.init_guest_memory().unwrap(); + + vm.init_vcpu_manager(vm.vm_as().unwrap().clone(), BpfProgram::default()) + .unwrap(); + + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + // test the vcpu_config + assert_eq!( + vcpu_manager.vcpu_infos.len(), + vm_config.max_vcpu_count as usize + ); + assert_eq!( + vcpu_manager.vcpu_config.boot_vcpu_count, + vm_config.vcpu_count + ); + assert_eq!( + vcpu_manager.vcpu_config.max_vcpu_count, + vm_config.max_vcpu_count + ); + + let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let reset_event_fd_raw = reset_event_fd.as_raw_fd(); + vcpu_manager.set_reset_event_fd(reset_event_fd).unwrap(); + + // test the reset_event_fd + assert_eq!( + vcpu_manager.reset_event_fd.as_ref().unwrap().as_raw_fd(), + reset_event_fd_raw + ); + } + + #[test] + fn test_vcpu_manager_boot_vcpus() { + skip_if_not_root!(); + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + // test create boot vcpu + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + // test start boot vcpus + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + } + + #[test] + fn test_vcpu_manager_operate_vcpus() { + skip_if_not_root!(); + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + // test create vcpu more than max + let res = vcpu_manager.create_vcpus(20, None, None); + assert!(matches!(res, Err(VcpuManagerError::ExpectedVcpuExceedMax))); + + // test create vcpus + assert!(vcpu_manager.create_vcpus(2, None, None).is_ok()); + assert_eq!(vcpu_manager.present_vcpus_count(), 0); + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + assert_eq!(vcpu_manager.vcpus().len(), 2); + assert_eq!(vcpu_manager.vcpus_mut().len(), 2); + + // test start vcpus + assert!(vcpu_manager + .start_vcpus(1, BpfProgram::default(), false) + .is_ok()); + assert_eq!(vcpu_manager.present_vcpus_count(), 1); + assert_eq!(vcpu_manager.present_vcpus(), vec![0]); + assert!(vcpu_manager + .start_vcpus(2, BpfProgram::default(), false) + .is_ok()); + assert_eq!(vcpu_manager.present_vcpus_count(), 2); + assert_eq!(vcpu_manager.present_vcpus(), vec![0, 1]); + + // test start vcpus more than created + let res = vcpu_manager.start_vcpus(3, BpfProgram::default(), false); + assert!(matches!(res, Err(VcpuManagerError::VcpuNotCreate))); + + // test start vcpus less than started + assert!(vcpu_manager + .start_vcpus(1, BpfProgram::default(), false) + .is_ok()); + } + #[test] + fn test_vcpu_manager_pause_resume_vcpus() { + skip_if_not_root!(); + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // invalid cpuid for pause + let cpu_indexes = vec![2]; + let res = vcpu_manager.pause_vcpus(&cpu_indexes); + assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_)))); + + // pause success + let cpu_indexes = vec![0]; + assert!(vcpu_manager.pause_vcpus(&cpu_indexes).is_ok()); + + // invalid cpuid for resume + let cpu_indexes = vec![2]; + let res = vcpu_manager.resume_vcpus(&cpu_indexes); + assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_)))); + + // success resume + let cpu_indexes = vec![0]; + assert!(vcpu_manager.resume_vcpus(&cpu_indexes).is_ok()); + + // pause and resume all + assert!(vcpu_manager.pause_all_vcpus().is_ok()); + assert!(vcpu_manager.resume_all_vcpus().is_ok()); + } + + #[test] + fn test_vcpu_manager_exit_vcpus() { + skip_if_not_root!(); + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // invalid cpuid for exit + let cpu_indexes = vec![2]; + + let res = vcpu_manager.exit_vcpus(&cpu_indexes); + assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_)))); + + // exit success + let cpu_indexes = vec![0]; + assert!(vcpu_manager.exit_vcpus(&cpu_indexes).is_ok()); + } + + #[test] + fn test_vcpu_manager_exit_all_vcpus() { + skip_if_not_root!(); + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // exit all success + assert!(vcpu_manager.exit_all_vcpus().is_ok()); + assert_eq!(vcpu_manager.vcpu_infos.len(), 0); + assert!(vcpu_manager.io_manager.is_none()); + } + + #[test] + fn test_vcpu_manager_revalidate_vcpus_cache() { + skip_if_not_root!(); + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // invalid cpuid for exit + let cpu_indexes = vec![2]; + + let res = vcpu_manager.revalidate_vcpus_cache(&cpu_indexes); + assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_)))); + + // revalidate success + let cpu_indexes = vec![0]; + assert!(vcpu_manager.revalidate_vcpus_cache(&cpu_indexes).is_ok()); + } + + #[test] + fn test_vcpu_manager_revalidate_all_vcpus_cache() { + skip_if_not_root!(); + *(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR); + + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // revalidate all success + assert!(vcpu_manager.revalidate_all_vcpus_cache().is_ok()); + } + + #[test] + #[cfg(feature = "hotplug")] + fn test_vcpu_manager_resize_cpu() { + skip_if_not_root!(); + let vm = get_vm(); + let mut vcpu_manager = vm.vcpu_manager().unwrap(); + + assert!(vcpu_manager + .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) + .is_ok()); + #[cfg(target_arch = "x86_64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); + + // set vcpus in hotplug action + let cpu_ids = vec![0]; + vcpu_manager.set_vcpus_action(VcpuAction::Hotplug, cpu_ids); + + // vcpu is already in hotplug process + let res = vcpu_manager.resize_vcpu(1, None); + assert!(matches!(res, Err(VcpuResizeError::VcpuIsHotplugging))); + + // clear vcpus action + let cpu_ids = vec![0]; + vcpu_manager.set_vcpus_action(VcpuAction::None, cpu_ids); + + // no upcall channel + let res = vcpu_manager.resize_vcpu(1, None); + assert!(matches!( + res, + Err(VcpuResizeError::UpdateNotAllowedPostBoot) + )); + + // init upcall channel + let dev_mgr_service = DevMgrService {}; + let vsock_backend = VsockInnerBackend::new().unwrap(); + let connector = vsock_backend.get_connector(); + let epoll_manager = EpollManager::default(); + let mut upcall_client = + UpcallClient::new(connector, epoll_manager, dev_mgr_service).unwrap(); + assert!(upcall_client.connect().is_ok()); + vcpu_manager.set_upcall_channel(Some(Arc::new(upcall_client))); + + // success: no need to resize + vcpu_manager.resize_vcpu(1, None).unwrap(); + + // exceeed max vcpu count + let res = vcpu_manager.resize_vcpu(4, None); + assert!(matches!(res, Err(VcpuResizeError::ExpectedVcpuExceedMax))); + + // remove vcpu 0 + let res = vcpu_manager.resize_vcpu(0, None); + assert!(matches!(res, Err(VcpuResizeError::Vcpu0CanNotBeRemoved))); + } +} diff --git a/src/dragonball/src/vcpu/x86_64.rs b/src/dragonball/src/vcpu/x86_64.rs new file mode 100644 index 000000000000..f5616066cb0b --- /dev/null +++ b/src/dragonball/src/vcpu/x86_64.rs @@ -0,0 +1,149 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::sync::mpsc::{channel, Sender}; +use std::sync::Arc; + +use dbs_arch::cpuid::{process_cpuid, VmSpec}; +use dbs_arch::gdt::gdt_entry; +use dbs_utils::time::TimestampUs; +use kvm_bindings::CpuId; +use kvm_ioctls::{VcpuFd, VmFd}; +use log::error; +use vm_memory::{Address, GuestAddress, GuestAddressSpace}; +use vmm_sys_util::eventfd::EventFd; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::metric::{IncMetric, METRICS}; +use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; +use crate::vcpu::VcpuConfig; +use crate::IoManagerCached; + +impl Vcpu { + /// Constructs a new VCPU for `vm`. + /// + /// # Arguments + /// + /// * `id` - Represents the CPU number between [0, max vcpus). + /// * `vcpu_fd` - The kvm `VcpuFd` for the vcpu. + /// * `io_mgr` - The io-manager used to access port-io and mmio devices. + /// * `cpuid` - The `CpuId` listing the supported capabilities of this vcpu. + /// * `exit_evt` - An `EventFd` that will be written into when this vcpu + /// exits. + /// * `vcpu_state_event` - The eventfd which can notify vmm state of some + /// vcpu should change. + /// * `vcpu_state_sender` - The channel to send state change message from + /// vcpu thread to vmm thread. + /// * `create_ts` - A timestamp used by the vcpu to calculate its lifetime. + /// * `support_immediate_exit` - whether kvm used supports immediate_exit flag. + #[allow(clippy::too_many_arguments)] + pub fn new_x86_64( + id: u8, + vcpu_fd: Arc, + io_mgr: IoManagerCached, + cpuid: CpuId, + exit_evt: EventFd, + vcpu_state_event: EventFd, + vcpu_state_sender: Sender, + create_ts: TimestampUs, + support_immediate_exit: bool, + ) -> Result { + let (event_sender, event_receiver) = channel(); + let (response_sender, response_receiver) = channel(); + // Initially the cpuid per vCPU is the one supported by this VM. + Ok(Vcpu { + fd: vcpu_fd, + id, + io_mgr, + create_ts, + event_receiver, + event_sender: Some(event_sender), + response_receiver: Some(response_receiver), + response_sender, + vcpu_state_event, + vcpu_state_sender, + exit_evt, + support_immediate_exit, + cpuid, + }) + } + + /// Configures a x86_64 specific vcpu and should be called once per vcpu. + /// + /// # Arguments + /// + /// * `vm_config` - The machine configuration of this microvm needed for the CPUID configuration. + /// * `vm_fd` - The kvm `VmFd` for the virtual machine this vcpu will get attached to. + /// * `vm_memory` - The guest memory used by this microvm. + /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts. + /// * `pgtable_addr` - pgtable address for ap vcpu + pub fn configure( + &mut self, + vcpu_config: &VcpuConfig, + _vm_fd: &VmFd, + vm_as: &GuestAddressSpaceImpl, + kernel_start_addr: Option, + _pgtable_addr: Option, + ) -> Result<()> { + self.set_cpuid(vcpu_config)?; + + dbs_arch::regs::setup_msrs(&self.fd).map_err(VcpuError::MSRSConfiguration)?; + if let Some(start_addr) = kernel_start_addr { + dbs_arch::regs::setup_regs( + &self.fd, + start_addr.raw_value(), + dbs_boot::layout::BOOT_STACK_POINTER, + dbs_boot::layout::BOOT_STACK_POINTER, + dbs_boot::layout::ZERO_PAGE_START, + ) + .map_err(VcpuError::REGSConfiguration)?; + dbs_arch::regs::setup_fpu(&self.fd).map_err(VcpuError::FPUConfiguration)?; + let gdt_table: [u64; dbs_boot::layout::BOOT_GDT_MAX] = [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xa09b, 0, 0xfffff), // CODE + gdt_entry(0xc093, 0, 0xfffff), // DATA + gdt_entry(0x808b, 0, 0xfffff), // TSS + ]; + let pgtable_addr = + dbs_boot::setup_identity_mapping(&*vm_as.memory()).map_err(VcpuError::PageTable)?; + dbs_arch::regs::setup_sregs( + &*vm_as.memory(), + &self.fd, + pgtable_addr, + &gdt_table, + dbs_boot::layout::BOOT_GDT_OFFSET, + dbs_boot::layout::BOOT_IDT_OFFSET, + ) + .map_err(VcpuError::SREGSConfiguration)?; + } + dbs_arch::interrupts::set_lint(&self.fd).map_err(VcpuError::LocalIntConfiguration)?; + + Ok(()) + } + + fn set_cpuid(&mut self, vcpu_config: &VcpuConfig) -> Result<()> { + let cpuid_vm_spec = VmSpec::new( + self.id, + vcpu_config.max_vcpu_count, + vcpu_config.threads_per_core, + vcpu_config.cores_per_die, + vcpu_config.dies_per_socket, + vcpu_config.vpmu_feature, + ) + .map_err(VcpuError::CpuId)?; + process_cpuid(&mut self.cpuid, &cpuid_vm_spec).map_err(|e| { + METRICS.vcpu.filter_cpuid.inc(); + error!("Failure in configuring CPUID for vcpu {}: {:?}", self.id, e); + VcpuError::CpuId(e) + })?; + + self.fd + .set_cpuid2(&self.cpuid) + .map_err(VcpuError::SetSupportedCpusFailed) + } +} diff --git a/src/dragonball/src/vm/aarch64.rs b/src/dragonball/src/vm/aarch64.rs new file mode 100644 index 000000000000..fe8f23207c4f --- /dev/null +++ b/src/dragonball/src/vm/aarch64.rs @@ -0,0 +1,193 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::ops::Deref; +use std::sync::MutexGuard; + +use dbs_arch::gic::GICDevice; +use dbs_arch::pmu::initialize_pmu; +use dbs_arch::{MMIODeviceInfo, VpmuFeatureLevel}; +use dbs_boot::fdt_utils::*; +use dbs_boot::InitrdConfig; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use linux_loader::cmdline::{Cmdline, Error as CmdlineError}; +use vm_memory::GuestAddressSpace; +use vmm_sys_util::eventfd::EventFd; + +use super::{Vm, VmError}; +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::error::{Error, StartMicroVmError}; +use crate::event_manager::EventManager; +use crate::vcpu::VcpuManager; + +impl Vm { + /// Gets a reference to the irqchip of the VM + pub fn get_irqchip(&self) -> &dyn GICDevice { + self.irqchip_handle.as_ref().unwrap().as_ref() + } + + /// Creates the irq chip in-kernel device model. + pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicroVmError> { + let vcpu_count = self.vm_config.max_vcpu_count; + + self.irqchip_handle = Some( + dbs_arch::gic::create_gic(&self.vm_fd, vcpu_count.into()) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::SetupGIC(e)))?, + ); + + Ok(()) + } + + /// Setup pmu devices for guest vm. + pub fn setup_pmu_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { + let vm = self.vm_fd(); + let mut vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?; + let vpmu_feature = vcpu_manager.vpmu_feature(); + if vpmu_feature == VpmuFeatureLevel::Disabled { + return Ok(()); + } + + for vcpu in vcpu_manager.vcpus_mut() { + initialize_pmu(vm, vcpu.vcpu_fd()) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::SetupPmu(e)))?; + } + + Ok(()) + } + + /// Initialize the virtual machine instance. + /// + /// It initialize the virtual machine instance by: + /// 1) Initialize virtual machine reset event fd. + /// 2) Create and initialize vCPUs. + /// 3) Create and initialize interrupt controller. + /// 4) Create and initialize vPMU device. + /// 5) Create and initialize devices, such as virtio, block, net, vsock, vfio etc. + pub fn init_microvm( + &mut self, + epoll_mgr: EpollManager, + vm_as: GuestAddressSpaceImpl, + request_ts: TimestampUs, + ) -> Result<(), StartMicroVmError> { + let reset_eventfd = + EventFd::new(libc::EFD_NONBLOCK).map_err(|_| StartMicroVmError::EventFd)?; + self.reset_eventfd = Some( + reset_eventfd + .try_clone() + .map_err(|_| StartMicroVmError::EventFd)?, + ); + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .set_reset_event_fd(reset_eventfd) + .map_err(StartMicroVmError::Vcpu)?; + + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before + // setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?; + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) + .map_err(StartMicroVmError::Vcpu)?; + self.setup_interrupt_controller()?; + self.setup_pmu_devices()?; + self.init_devices(epoll_mgr)?; + + Ok(()) + } + + /// Generate fdt information about VM. + fn get_fdt_vm_info<'a>( + &'a self, + vm_memory: &'a GuestMemoryImpl, + cmdline: &'a str, + initrd_config: Option<&'a InitrdConfig>, + vcpu_manager: &'a MutexGuard, + ) -> FdtVmInfo { + let guest_memory = vm_memory.memory(); + let vcpu_mpidr = vcpu_manager + .vcpus() + .into_iter() + .map(|cpu| cpu.get_mpidr()) + .collect(); + let vm_config = self.vm_config(); + let mut vcpu_boot_onlined = vec![1; vm_config.vcpu_count as usize]; + vcpu_boot_onlined.resize(vm_config.max_vcpu_count as usize, 0); + let vpmu_feature = vcpu_manager.vpmu_feature(); + // This configuration is used for passing cache information into guest. + // TODO: dragonball-sandbox #274; kata-containers #6969 + let cache_passthrough_enabled = false; + let fdt_vcpu_info = FdtVcpuInfo::new( + vcpu_mpidr, + vcpu_boot_onlined, + vpmu_feature, + cache_passthrough_enabled, + ); + + FdtVmInfo::new(guest_memory, cmdline, initrd_config, fdt_vcpu_info) + } + + // This method is used for passing cache/numa information into guest + // TODO: dragonball-sandbox #274,#275; kata-containers #6969 + /// Generate fdt information about cache/numa + fn get_fdt_numa_info(&self) -> FdtNumaInfo { + FdtNumaInfo::default() + } + + /// Generate fdt information about devices + fn get_fdt_device_info(&self) -> FdtDeviceInfo { + FdtDeviceInfo::new( + self.device_manager().get_mmio_device_info(), + self.get_irqchip(), + ) + } + + /// Execute system architecture specific configurations. + /// + /// 1) set guest kernel boot parameters + /// 2) setup FDT data structs. + pub fn configure_system_arch( + &self, + vm_memory: &GuestMemoryImpl, + cmdline: &Cmdline, + initrd: Option, + ) -> std::result::Result<(), StartMicroVmError> { + let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?; + let cmdline_cstring = cmdline + .as_cstring() + .map_err(StartMicroVmError::ProcessCommandlne)?; + let fdt_vm_info = self.get_fdt_vm_info( + vm_memory, + cmdline_cstring + .to_str() + .map_err(|_| StartMicroVmError::ProcessCommandlne(CmdlineError::InvalidAscii))?, + initrd.as_ref(), + &vcpu_manager, + ); + let fdt_numa_info = self.get_fdt_numa_info(); + let fdt_device_info = self.get_fdt_device_info(); + + dbs_boot::fdt::create_fdt(fdt_vm_info, fdt_numa_info, fdt_device_info) + .map(|_| ()) + .map_err(|e| StartMicroVmError::ConfigureSystem(Error::BootSystem(e))) + } + + pub(crate) fn register_events( + &mut self, + event_mgr: &mut EventManager, + ) -> std::result::Result<(), StartMicroVmError> { + let reset_evt = self.get_reset_eventfd().ok_or(StartMicroVmError::EventFd)?; + event_mgr + .register_exit_eventfd(reset_evt) + .map_err(|_| StartMicroVmError::RegisterEvent)?; + + Ok(()) + } +} diff --git a/src/dragonball/src/vm/kernel_config.rs b/src/dragonball/src/vm/kernel_config.rs new file mode 100644 index 000000000000..fb51f8fc136d --- /dev/null +++ b/src/dragonball/src/vm/kernel_config.rs @@ -0,0 +1,72 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::File; + +/// Structure to hold guest kernel configuration information. +pub struct KernelConfigInfo { + /// The descriptor to the kernel file. + kernel_file: File, + /// The descriptor to the initrd file, if there is one + initrd_file: Option, + /// The commandline for guest kernel. + cmdline: linux_loader::cmdline::Cmdline, +} + +impl KernelConfigInfo { + /// Create a KernelConfigInfo instance. + pub fn new( + kernel_file: File, + initrd_file: Option, + cmdline: linux_loader::cmdline::Cmdline, + ) -> Self { + KernelConfigInfo { + kernel_file, + initrd_file, + cmdline, + } + } + + /// Get a mutable reference to the kernel file. + pub fn kernel_file_mut(&mut self) -> &mut File { + &mut self.kernel_file + } + + /// Get an immutable reference to the initrd file. + pub fn initrd_file(&self) -> Option<&File> { + self.initrd_file.as_ref() + } + + /// Get a mutable reference to the initrd file. + pub fn initrd_file_mut(&mut self) -> Option<&mut File> { + self.initrd_file.as_mut() + } + + /// Get a shared reference to the guest kernel boot parameter object. + pub fn kernel_cmdline(&self) -> &linux_loader::cmdline::Cmdline { + &self.cmdline + } + + /// Get a mutable reference to the guest kernel boot parameter object. + pub fn kernel_cmdline_mut(&mut self) -> &mut linux_loader::cmdline::Cmdline { + &mut self.cmdline + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vmm_sys_util::tempfile::TempFile; + + #[test] + fn test_kernel_config_info() { + let kernel = TempFile::new().unwrap(); + let initrd = TempFile::new().unwrap(); + let mut cmdline = linux_loader::cmdline::Cmdline::new(1024).unwrap(); + cmdline.insert_str("ro").unwrap(); + let mut info = KernelConfigInfo::new(kernel.into_file(), Some(initrd.into_file()), cmdline); + + assert_eq!(info.cmdline.as_cstring().unwrap().as_bytes(), b"ro"); + assert!(info.initrd_file_mut().is_some()); + } +} diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs new file mode 100644 index 000000000000..2964936b7fce --- /dev/null +++ b/src/dragonball/src/vm/mod.rs @@ -0,0 +1,1122 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::io::{self, Read, Seek, SeekFrom}; +use std::ops::Deref; +use std::os::unix::io::RawFd; + +use std::sync::{Arc, Mutex, RwLock}; + +use dbs_address_space::AddressSpace; +#[cfg(target_arch = "aarch64")] +use dbs_arch::gic::GICDevice; +#[cfg(target_arch = "aarch64")] +use dbs_arch::pmu::PmuError; +use dbs_boot::InitrdConfig; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use kvm_ioctls::VmFd; +use linux_loader::loader::{KernelLoader, KernelLoaderResult}; +use seccompiler::BpfProgram; +use serde_derive::{Deserialize, Serialize}; +use slog::{error, info}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace}; +use vmm_sys_util::eventfd::EventFd; + +#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] +use dbs_upcall::{DevMgrService, UpcallClient}; +#[cfg(feature = "hotplug")] +use std::sync::mpsc::Sender; + +use crate::address_space_manager::{ + AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, + GuestMemoryImpl, +}; +use crate::api::v1::{InstanceInfo, InstanceState}; +use crate::device_manager::console_manager::DmesgWriter; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::error::{LoadInitrdError, Result, StartMicroVmError, StopMicrovmError}; +use crate::event_manager::EventManager; +use crate::kvm_context::KvmContext; +use crate::resource_manager::ResourceManager; +use crate::vcpu::{VcpuManager, VcpuManagerError}; +#[cfg(feature = "hotplug")] +use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; +#[cfg(target_arch = "aarch64")] +use dbs_arch::gic::Error as GICError; + +mod kernel_config; +pub use self::kernel_config::KernelConfigInfo; + +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod aarch64; + +#[cfg(target_arch = "x86_64")] +#[path = "x86_64.rs"] +mod x86_64; + +/// Errors associated with virtual machine instance related operations. +#[derive(Debug, thiserror::Error)] +pub enum VmError { + /// Cannot configure the IRQ. + #[error("failed to configure IRQ fot the virtual machine: {0}")] + Irq(#[source] kvm_ioctls::Error), + + /// Cannot configure the microvm. + #[error("failed to initialize the virtual machine: {0}")] + VmSetup(#[source] kvm_ioctls::Error), + + /// Cannot setup GIC + #[cfg(target_arch = "aarch64")] + #[error("failed to configure GIC")] + SetupGIC(GICError), + + /// Cannot setup pmu device + #[cfg(target_arch = "aarch64")] + #[error("failed to setup pmu device")] + SetupPmu(#[source] PmuError), +} + +/// Configuration information for user defined NUMA nodes. +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct NumaRegionInfo { + /// memory size for this region (unit: MiB) + pub size: u64, + /// numa node id on host for this region + pub host_numa_node_id: Option, + /// numa node id on guest for this region + pub guest_numa_node_id: Option, + /// vcpu ids belonging to this region + pub vcpu_ids: Vec, +} + +/// Information for cpu topology to guide guest init +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct CpuTopology { + /// threads per core to indicate hyperthreading is enabled or not + pub threads_per_core: u8, + /// cores per die to guide guest cpu topology init + pub cores_per_die: u8, + /// dies per socket to guide guest cpu topology + pub dies_per_socket: u8, + /// number of sockets + pub sockets: u8, +} + +impl Default for CpuTopology { + fn default() -> Self { + CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + } + } +} + +/// Configuration information for virtual machine instance. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct VmConfigInfo { + /// Number of vcpu to start. + pub vcpu_count: u8, + /// Max number of vcpu can be added + pub max_vcpu_count: u8, + /// cpu power management. + pub cpu_pm: String, + /// cpu topology information + pub cpu_topology: CpuTopology, + /// vpmu support level + pub vpmu_feature: u8, + + /// Memory type that can be either hugetlbfs or shmem, default is shmem + pub mem_type: String, + /// Memory file path + pub mem_file_path: String, + /// The memory size in MiB. + pub mem_size_mib: usize, + + /// sock path + pub serial_path: Option, +} + +impl Default for VmConfigInfo { + fn default() -> Self { + VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 1, + cpu_pm: String::from("on"), + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + mem_type: String::from("shmem"), + mem_file_path: String::from(""), + mem_size_mib: 128, + serial_path: None, + } + } +} + +/// Struct to manage resources and control states of an virtual machine instance. +/// +/// An `Vm` instance holds a resources assigned to a virtual machine instance, such as CPU, memory, +/// devices etc. When an `Vm` instance gets deconstructed, all resources assigned should be +/// released. +/// +/// We have explicit build the object model as: +/// |---Vmm API Server--<-1:1-> HTTP API Server +/// | |----------<-1:1-> Shimv2/CRI API Server +/// | +/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory +/// ^ ^---1:1-> Device Manager <-1:N-> Device +/// | ^---1:1-> Resource Manager +/// | ^---1:N-> Vcpu +/// |---<-1:N-> Event Manager +pub struct Vm { + epoll_manager: EpollManager, + kvm: KvmContext, + shared_info: Arc>, + + address_space: AddressSpaceMgr, + device_manager: DeviceManager, + dmesg_fifo: Option>, + kernel_config: Option, + logger: slog::Logger, + reset_eventfd: Option, + resource_manager: Arc, + vcpu_manager: Option>>, + vm_config: VmConfigInfo, + vm_fd: Arc, + + start_instance_request_ts: u64, + start_instance_request_cpu_ts: u64, + start_instance_downtime: u64, + + // Arm specific fields. + // On aarch64 we need to keep around the fd obtained by creating the VGIC device. + #[cfg(target_arch = "aarch64")] + irqchip_handle: Option>, + + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_client: Option>>, +} + +impl Vm { + /// Constructs a new `Vm` instance using the given `Kvm` instance. + pub fn new( + kvm_fd: Option, + api_shared_info: Arc>, + epoll_manager: EpollManager, + ) -> Result { + let id = api_shared_info.read().unwrap().id.clone(); + let logger = slog_scope::logger().new(slog::o!("id" => id)); + let kvm = KvmContext::new(kvm_fd)?; + let vm_fd = Arc::new(kvm.create_vm()?); + let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots()))); + let device_manager = DeviceManager::new( + vm_fd.clone(), + resource_manager.clone(), + epoll_manager.clone(), + &logger, + api_shared_info.clone(), + ); + + Ok(Vm { + epoll_manager, + kvm, + shared_info: api_shared_info, + + address_space: AddressSpaceMgr::default(), + device_manager, + dmesg_fifo: None, + kernel_config: None, + logger, + reset_eventfd: None, + resource_manager, + vcpu_manager: None, + vm_config: Default::default(), + vm_fd, + + start_instance_request_ts: 0, + start_instance_request_cpu_ts: 0, + start_instance_downtime: 0, + + #[cfg(target_arch = "aarch64")] + irqchip_handle: None, + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + upcall_client: None, + }) + } + + /// Gets a reference to the device manager by this VM. + pub fn device_manager(&self) -> &DeviceManager { + &self.device_manager + } + + /// Gets a mutable reference to the device manager by this VM. + pub fn device_manager_mut(&mut self) -> &mut DeviceManager { + &mut self.device_manager + } + + /// Get a reference to EpollManager. + pub fn epoll_manager(&self) -> &EpollManager { + &self.epoll_manager + } + + /// Get eventfd for exit notification. + pub fn get_reset_eventfd(&self) -> Option<&EventFd> { + self.reset_eventfd.as_ref() + } + + /// Set guest kernel boot configurations. + pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) { + self.kernel_config = Some(kernel_config); + } + + /// Get virtual machine shared instance information. + pub fn shared_info(&self) -> &Arc> { + &self.shared_info + } + + /// Gets a reference to the address_space.address_space for guest memory owned by this VM. + pub fn vm_address_space(&self) -> Option<&AddressSpace> { + self.address_space.get_address_space() + } + + /// Gets a reference to the address space for guest memory owned by this VM. + /// + /// Note that `GuestMemory` does not include any device memory that may have been added after + /// this VM was constructed. + pub fn vm_as(&self) -> Option<&GuestAddressSpaceImpl> { + self.address_space.get_vm_as() + } + + /// Get a immutable reference to the virtual machine configuration information. + pub fn vm_config(&self) -> &VmConfigInfo { + &self.vm_config + } + + /// Set the virtual machine configuration information. + pub fn set_vm_config(&mut self, config: VmConfigInfo) { + self.vm_config = config; + } + + /// Gets a reference to the kvm file descriptor owned by this VM. + pub fn vm_fd(&self) -> &VmFd { + &self.vm_fd + } + + /// returns true if system upcall service is ready + pub fn is_upcall_client_ready(&self) -> bool { + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + { + if let Some(upcall_client) = self.upcall_client() { + return upcall_client.is_ready(); + } + } + + false + } + + /// Check whether the VM has been initialized. + pub fn is_vm_initialized(&self) -> bool { + let instance_state = { + // Use expect() to crash if the other thread poisoned this lock. + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + shared_info.state + }; + instance_state != InstanceState::Uninitialized + } + + /// Check whether the VM instance is running. + pub fn is_vm_running(&self) -> bool { + let instance_state = { + // Use expect() to crash if the other thread poisoned this lock. + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + shared_info.state + }; + instance_state == InstanceState::Running + } + + /// Save VM instance exit state + pub fn vm_exit(&self, exit_code: i32) { + if let Ok(mut info) = self.shared_info.write() { + info.state = InstanceState::Exited(exit_code); + } else { + error!( + self.logger, + "Failed to save exit state, couldn't be written due to poisoned lock" + ); + } + } + + /// Create device operation context. + /// vm is not running, return false + /// vm is running, but hotplug feature is not enable, return error + /// vm is running, but upcall initialize failed, return error + /// vm is running, upcall initialize OK, return true + pub fn create_device_op_context( + &mut self, + epoll_mgr: Option, + ) -> std::result::Result { + if !self.is_vm_initialized() { + Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr)) + } else { + self.create_device_hotplug_context(epoll_mgr) + } + } + + pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicroVmError> { + if self.kernel_config.is_none() { + return Err(StartMicroVmError::MissingKernelConfig); + } + Ok(()) + } + + pub(crate) fn get_dragonball_info(&self) -> (String, String) { + let guard = self.shared_info.read().unwrap(); + let instance_id = guard.id.clone(); + let dragonball_version = guard.vmm_version.clone(); + + (dragonball_version, instance_id) + } + + pub(crate) fn stop_prealloc(&mut self) -> std::result::Result<(), StartMicroVmError> { + if self.address_space.is_initialized() { + return self + .address_space + .wait_prealloc(true) + .map_err(StartMicroVmError::AddressManagerError); + } + + Err(StartMicroVmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + )) + } +} + +impl Vm { + pub(crate) fn init_vcpu_manager( + &mut self, + vm_as: GuestAddressSpaceImpl, + vcpu_seccomp_filter: BpfProgram, + ) -> std::result::Result<(), VcpuManagerError> { + let vcpu_manager = VcpuManager::new( + self.vm_fd.clone(), + &self.kvm, + &self.vm_config, + vm_as, + vcpu_seccomp_filter, + self.shared_info.clone(), + self.device_manager.io_manager(), + self.epoll_manager.clone(), + )?; + self.vcpu_manager = Some(vcpu_manager); + + Ok(()) + } + + /// get the cpu manager's reference + pub(crate) fn vcpu_manager( + &self, + ) -> std::result::Result, VcpuManagerError> { + self.vcpu_manager + .as_ref() + .ok_or(VcpuManagerError::VcpuManagerNotInitialized) + .map(|mgr| mgr.lock().unwrap()) + } + + /// Pause all vcpus and record the instance downtime + pub fn pause_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> { + let ts = TimestampUs::default(); + self.start_instance_downtime = ts.time_us; + + self.vcpu_manager()?.pause_all_vcpus()?; + + Ok(()) + } + + /// Resume all vcpus and calc the intance downtime + pub fn resume_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> { + self.vcpu_manager()?.resume_all_vcpus()?; + + if self.start_instance_downtime != 0 { + let now = TimestampUs::default(); + let downtime = now.time_us - self.start_instance_downtime; + info!(self.logger, "VM: instance downtime: {} us", downtime); + self.start_instance_downtime = 0; + if let Ok(mut info) = self.shared_info.write() { + info.last_instance_downtime = downtime; + } else { + error!(self.logger, "Failed to update live upgrade downtime, couldn't be written due to poisoned lock"); + } + } + + Ok(()) + } + + pub(crate) fn init_devices( + &mut self, + epoll_manager: EpollManager, + ) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM: initializing devices ..."); + + let kernel_config = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)?; + + info!(self.logger, "VM: create interrupt manager"); + self.device_manager + .create_interrupt_manager() + .map_err(StartMicroVmError::DeviceManager)?; + + info!(self.logger, "VM: create devices"); + let vm_as = + self.address_space + .get_vm_as() + .ok_or(StartMicroVmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + ))?; + self.device_manager.create_devices( + vm_as.clone(), + epoll_manager, + kernel_config, + self.dmesg_fifo.take(), + self.address_space.address_space(), + &self.vm_config, + )?; + + info!(self.logger, "VM: start devices"); + self.device_manager.start_devices()?; + + info!(self.logger, "VM: initializing devices done"); + Ok(()) + } + + /// Remove devices when shutdown vm + pub fn remove_devices(&mut self) -> std::result::Result<(), StopMicrovmError> { + info!(self.logger, "VM: remove devices"); + let vm_as = self + .address_space + .get_vm_as() + .ok_or(StopMicrovmError::GuestMemoryNotInitialized)?; + + self.device_manager + .remove_devices( + vm_as.clone(), + self.epoll_manager.clone(), + self.address_space.address_space(), + ) + .map_err(StopMicrovmError::DeviceManager) + } + + /// Remove upcall client when the VM is destoryed. + #[cfg(feature = "dbs-upcall")] + pub fn remove_upcall(&mut self) -> std::result::Result<(), StopMicrovmError> { + self.upcall_client = None; + Ok(()) + } + + /// Reset the console into canonical mode. + pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> { + self.device_manager.reset_console() + } + + pub(crate) fn init_dmesg_logger(&mut self) { + let writer = self.dmesg_logger(); + self.dmesg_fifo = Some(writer); + } + + /// dmesg write to logger + fn dmesg_logger(&self) -> Box { + Box::new(DmesgWriter::new(&self.logger)) + } + + pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM: initializing guest memory..."); + // We are not allowing reinitialization of vm guest memory. + if self.address_space.is_initialized() { + return Ok(()); + } + + // vcpu boot up require local memory. reserve 100 MiB memory + let mem_size = (self.vm_config.mem_size_mib as u64) << 20; + + let mem_type = self.vm_config.mem_type.clone(); + let mut mem_file_path = String::from(""); + if mem_type == "hugetlbfs" { + mem_file_path = self.vm_config.mem_file_path.clone(); + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + mem_file_path.push_str("/dragonball/"); + mem_file_path.push_str(shared_info.id.as_str()); + } + + let mut vcpu_ids: Vec = Vec::new(); + for i in 0..self.vm_config().max_vcpu_count { + vcpu_ids.push(i as u32); + } + + // init default regions. + let mut numa_regions = Vec::with_capacity(1); + let numa_node = NumaRegionInfo { + size: self.vm_config.mem_size_mib as u64, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids, + }; + numa_regions.push(numa_node); + + info!( + self.logger, + "VM: mem_type:{} mem_file_path:{}, mem_size:{}, numa_regions:{:?}", + mem_type, + mem_file_path, + mem_size, + numa_regions, + ); + + let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) + .map_err(StartMicroVmError::AddressManagerError)?; + address_space_param.set_kvm_vm_fd(self.vm_fd.clone()); + self.address_space + .create_address_space(&self.resource_manager, &numa_regions, address_space_param) + .map_err(StartMicroVmError::AddressManagerError)?; + + info!(self.logger, "VM: initializing guest memory done"); + Ok(()) + } + + fn init_configure_system( + &mut self, + vm_as: &GuestAddressSpaceImpl, + ) -> std::result::Result<(), StartMicroVmError> { + let vm_memory = vm_as.memory(); + let kernel_config = self + .kernel_config + .as_ref() + .ok_or(StartMicroVmError::MissingKernelConfig)?; + //let cmdline = kernel_config.cmdline.clone(); + let initrd: Option = match kernel_config.initrd_file() { + Some(f) => { + let initrd_file = f.try_clone(); + if initrd_file.is_err() { + return Err(StartMicroVmError::InitrdLoader( + LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)), + )); + } + let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?; + Some(res) + } + None => None, + }; + + self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd) + } + + /// Loads the initrd from a file into the given memory slice. + /// + /// * `vm_memory` - The guest memory the initrd is written to. + /// * `image` - The initrd image. + /// + /// Returns the result of initrd loading + fn load_initrd( + &self, + vm_memory: &GuestMemoryImpl, + image: &mut F, + ) -> std::result::Result + where + F: Read + Seek, + { + use crate::error::LoadInitrdError::*; + + let size: usize; + // Get the image size + match image.seek(SeekFrom::End(0)) { + Err(e) => return Err(ReadInitrd(e)), + Ok(0) => { + return Err(ReadInitrd(io::Error::new( + io::ErrorKind::InvalidData, + "Initrd image seek returned a size of zero", + ))) + } + Ok(s) => size = s as usize, + }; + // Go back to the image start + image.seek(SeekFrom::Start(0)).map_err(ReadInitrd)?; + + // Get the target address + let address = dbs_boot::initrd_load_addr(vm_memory, size as u64).map_err(|_| LoadInitrd)?; + + // Load the image into memory + vm_memory + .read_from(GuestAddress(address), image, size) + .map_err(|_| LoadInitrd)?; + + Ok(InitrdConfig { + address: GuestAddress(address), + size, + }) + } + + fn load_kernel( + &mut self, + vm_memory: &GuestMemoryImpl, + ) -> std::result::Result { + // This is the easy way out of consuming the value of the kernel_cmdline. + let kernel_config = self + .kernel_config + .as_mut() + .ok_or(StartMicroVmError::MissingKernelConfig)?; + let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start()); + + #[cfg(target_arch = "x86_64")] + return linux_loader::loader::elf::Elf::load( + vm_memory, + None, + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicroVmError::KernelLoader); + + #[cfg(target_arch = "aarch64")] + return linux_loader::loader::pe::PE::load( + vm_memory, + Some(GuestAddress(dbs_boot::get_kernel_start())), + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicroVmError::KernelLoader); + } + + /// Set up the initial microVM state and start the vCPU threads. + /// + /// This is the main entrance of the Vm object, to bring up the virtual machine instance into + /// running state. + pub fn start_microvm( + &mut self, + event_mgr: &mut EventManager, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + ) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM: received instance start command"); + if self.is_vm_initialized() { + return Err(StartMicroVmError::MicroVMAlreadyRunning); + } + + let request_ts = TimestampUs::default(); + self.start_instance_request_ts = request_ts.time_us; + self.start_instance_request_cpu_ts = request_ts.cputime_us; + + self.init_dmesg_logger(); + self.check_health()?; + + // Use expect() to crash if the other thread poisoned this lock. + self.shared_info + .write() + .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock") + .state = InstanceState::Starting; + + self.init_guest_memory()?; + let vm_as = self + .vm_as() + .cloned() + .ok_or(StartMicroVmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + ))?; + + self.init_vcpu_manager(vm_as.clone(), vcpu_seccomp_filter) + .map_err(StartMicroVmError::Vcpu)?; + self.init_microvm(event_mgr.epoll_manager(), vm_as.clone(), request_ts)?; + self.init_configure_system(&vm_as)?; + #[cfg(feature = "dbs-upcall")] + self.init_upcall()?; + + info!(self.logger, "VM: register events"); + self.register_events(event_mgr)?; + + info!(self.logger, "VM: start vcpus"); + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .start_boot_vcpus(vmm_seccomp_filter) + .map_err(StartMicroVmError::Vcpu)?; + + // Use expect() to crash if the other thread poisoned this lock. + self.shared_info + .write() + .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock") + .state = InstanceState::Running; + + info!(self.logger, "VM started"); + Ok(()) + } +} + +#[cfg(feature = "hotplug")] +impl Vm { + #[cfg(feature = "dbs-upcall")] + /// initialize upcall client for guest os + fn new_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { + // get vsock inner connector for upcall + let inner_connector = self + .device_manager + .get_vsock_inner_connector() + .ok_or(StartMicroVmError::UpcallMissVsock)?; + let mut upcall_client = UpcallClient::new( + inner_connector, + self.epoll_manager.clone(), + DevMgrService::default(), + ) + .map_err(StartMicroVmError::UpcallInitError)?; + + upcall_client + .connect() + .map_err(StartMicroVmError::UpcallConnectError)?; + self.upcall_client = Some(Arc::new(upcall_client)); + + info!(self.logger, "upcall client init success"); + Ok(()) + } + + #[cfg(feature = "dbs-upcall")] + fn init_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM upcall init"); + if let Err(e) = self.new_upcall() { + info!( + self.logger, + "VM upcall init failed, no support hotplug: {}", e + ); + Err(e) + } else { + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .set_upcall_channel(self.upcall_client().clone()); + Ok(()) + } + } + + #[cfg(feature = "dbs-upcall")] + /// Get upcall client. + pub fn upcall_client(&self) -> &Option>> { + &self.upcall_client + } + + #[cfg(feature = "dbs-upcall")] + fn create_device_hotplug_context( + &self, + epoll_mgr: Option, + ) -> std::result::Result { + if self.upcall_client().is_none() { + Err(StartMicroVmError::UpcallMissVsock) + } else if self.is_upcall_client_ready() { + Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) + } else { + Err(StartMicroVmError::UpcallServerNotReady) + } + } + + /// Resize MicroVM vCPU number + #[cfg(feature = "dbs-upcall")] + pub fn resize_vcpu( + &mut self, + config: VcpuResizeInfo, + sync_tx: Option>, + ) -> std::result::Result<(), VcpuResizeError> { + if self.upcall_client().is_none() { + Err(VcpuResizeError::UpcallClientMissing) + } else if self.is_upcall_client_ready() { + if let Some(vcpu_count) = config.vcpu_count { + self.vcpu_manager() + .map_err(VcpuResizeError::Vcpu)? + .resize_vcpu(vcpu_count, sync_tx)?; + + self.vm_config.vcpu_count = vcpu_count; + } + Ok(()) + } else { + Err(VcpuResizeError::UpcallServerNotReady) + } + } + + // We will support hotplug without upcall in future stages. + #[cfg(not(feature = "dbs-upcall"))] + fn create_device_hotplug_context( + &self, + _epoll_mgr: Option, + ) -> std::result::Result { + Err(StartMicroVmError::MicroVMAlreadyRunning) + } +} + +#[cfg(not(feature = "hotplug"))] +impl Vm { + fn init_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { + Ok(()) + } + + fn create_device_hotplug_context( + &self, + _epoll_mgr: Option, + ) -> std::result::Result { + Err(StartMicroVmError::MicroVMAlreadyRunning) + } +} + +#[cfg(test)] +pub mod tests { + #[cfg(target_arch = "aarch64")] + use dbs_boot::layout::GUEST_MEM_START; + #[cfg(target_arch = "x86_64")] + use kvm_ioctls::VcpuExit; + use linux_loader::cmdline::Cmdline; + use test_utils::skip_if_not_root; + use vm_memory::GuestMemory; + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::test_utils::tests::create_vm_for_test; + + impl Vm { + pub fn set_instance_state(&mut self, mstate: InstanceState) { + self.shared_info + .write() + .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock") + .state = mstate; + } + } + + pub fn create_vm_instance() -> Vm { + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + Vm::new(None, instance_info, epoll_manager).unwrap() + } + + #[test] + fn test_create_vm_instance() { + skip_if_not_root!(); + let vm = create_vm_instance(); + assert!(vm.check_health().is_err()); + assert!(vm.kernel_config.is_none()); + assert!(vm.get_reset_eventfd().is_none()); + assert!(!vm.is_vm_initialized()); + assert!(!vm.is_vm_running()); + assert!(vm.reset_console().is_ok()); + } + + #[test] + fn test_vm_init_guest_memory() { + skip_if_not_root!(); + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + let mut vm = create_vm_instance(); + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + let vm_memory = vm.address_space.vm_memory().unwrap(); + + assert_eq!(vm_memory.num_regions(), 1); + #[cfg(target_arch = "x86_64")] + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + #[cfg(target_arch = "aarch64")] + assert_eq!( + vm_memory.last_addr(), + GuestAddress(GUEST_MEM_START + 0xffffff) + ); + + // Reconfigure an already configured vm will be ignored and just return OK. + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + let vm_memory = vm.address_space.vm_memory().unwrap(); + assert_eq!(vm_memory.num_regions(), 1); + #[cfg(target_arch = "x86_64")] + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + #[cfg(target_arch = "aarch64")] + assert_eq!( + vm_memory.last_addr(), + GuestAddress(GUEST_MEM_START + 0xffffff) + ); + + #[cfg(target_arch = "x86_64")] + let obj_addr = GuestAddress(0xf0); + #[cfg(target_arch = "aarch64")] + let obj_addr = GuestAddress(GUEST_MEM_START + 0xf0); + vm_memory.write_obj(67u8, obj_addr).unwrap(); + let read_val: u8 = vm_memory.read_obj(obj_addr).unwrap(); + assert_eq!(read_val, 67u8); + } + + #[test] + fn test_vm_create_devices() { + skip_if_not_root!(); + let epoll_mgr = EpollManager::default(); + let vmm = Arc::new(Mutex::new(crate::vmm::tests::create_vmm_instance( + epoll_mgr.clone(), + ))); + + let mut guard = vmm.lock().unwrap(); + let vm = guard.get_vm_mut().unwrap(); + + let vm_config = VmConfigInfo { + vcpu_count: 1, + max_vcpu_count: 3, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 16, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + vm.set_vm_config(vm_config); + assert!(vm.init_guest_memory().is_ok()); + assert!(vm.setup_interrupt_controller().is_ok()); + + let vm_memory = vm.address_space.vm_memory().unwrap(); + assert_eq!(vm_memory.num_regions(), 1); + #[cfg(target_arch = "x86_64")] + assert_eq!(vm_memory.last_addr(), GuestAddress(0xffffff)); + #[cfg(target_arch = "aarch64")] + assert_eq!( + vm_memory.last_addr(), + GuestAddress(GUEST_MEM_START + 0xffffff) + ); + + let kernel_file = TempFile::new().unwrap(); + let cmd_line = Cmdline::new(64).unwrap(); + + vm.set_kernel_config(KernelConfigInfo::new( + kernel_file.into_file(), + None, + cmd_line, + )); + + vm.init_devices(epoll_mgr).unwrap(); + } + + #[test] + fn test_vm_delete_devices() { + skip_if_not_root!(); + let mut vm = create_vm_for_test(); + let epoll_mgr = EpollManager::default(); + + vm.setup_interrupt_controller().unwrap(); + vm.init_devices(epoll_mgr).unwrap(); + assert!(vm.remove_devices().is_ok()); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_run_code() { + skip_if_not_root!(); + + use std::io::{self, Write}; + // This example is based on https://lwn.net/Articles/658511/ + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, (%dx) */ + 0xb0, b'\n', /* mov $'\n', %al */ + 0xee, /* out %al, (%dx) */ + 0xf4, /* hlt */ + ]; + let load_addr = GuestAddress(0x1000); + let instance_info = Arc::new(RwLock::new(InstanceInfo::default())); + let epoll_manager = EpollManager::default(); + let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap(); + + let vcpu_count = 1; + let vm_config = VmConfigInfo { + vcpu_count, + max_vcpu_count: 1, + cpu_pm: "off".to_string(), + mem_type: "shmem".to_string(), + mem_file_path: "".to_string(), + mem_size_mib: 10, + serial_path: None, + cpu_topology: CpuTopology { + threads_per_core: 1, + cores_per_die: 1, + dies_per_socket: 1, + sockets: 1, + }, + vpmu_feature: 0, + }; + + vm.set_vm_config(vm_config); + vm.init_guest_memory().unwrap(); + + let vm_memory = vm.address_space.vm_memory().unwrap(); + vm_memory.write_obj(code, load_addr).unwrap(); + + let vcpu_fd = vm.vm_fd().create_vcpu(0).unwrap(); + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + + vcpu_regs.rip = 0x1000; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoOut(0x3f8, data) => { + assert_eq!(data.len(), 1); + io::stdout().write_all(data).unwrap(); + } + VcpuExit::Hlt => { + io::stdout().write_all(b"KVM_EXIT_HLT\n").unwrap(); + } + r => panic!("unexpected exit reason: {:?}", r), + } + } +} diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs new file mode 100644 index 000000000000..9593e82761ef --- /dev/null +++ b/src/dragonball/src/vm/x86_64.rs @@ -0,0 +1,282 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::convert::TryInto; +use std::ops::Deref; + +use dbs_address_space::AddressSpace; +use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig}; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use kvm_bindings::{kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_PIT_SPEAKER_DUMMY}; +use linux_loader::cmdline::Cmdline; +use linux_loader::configurator::{linux::LinuxBootConfigurator, BootConfigurator, BootParams}; +use slog::info; +use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory}; + +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::error::{Error, Result, StartMicroVmError}; +use crate::event_manager::EventManager; +use crate::vm::{Vm, VmError}; + +/// Configures the system and should be called once per vm before starting vcpu +/// threads. +/// +/// # Arguments +/// +/// * `guest_mem` - The memory to be used by the guest. +/// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was +/// loaded. +/// * `cmdline_size` - Size of the kernel command line in bytes including the +/// null terminator. +/// * `initrd` - Information about where the ramdisk image was loaded in the +/// `guest_mem`. +/// * `boot_cpus` - Number of virtual CPUs the guest will have at boot time. +/// * `max_cpus` - Max number of virtual CPUs the guest will have. +/// * `rsv_mem_bytes` - Reserve memory from microVM.. +#[allow(clippy::too_many_arguments)] +fn configure_system( + guest_mem: &M, + address_space: Option<&AddressSpace>, + cmdline_addr: GuestAddress, + cmdline_size: usize, + initrd: &Option, + boot_cpus: u8, + max_cpus: u8, +) -> super::Result<()> { + const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; + const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; + const KERNEL_LOADER_OTHER: u8 = 0xff; + const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero. + + let mmio_start = GuestAddress(layout::MMIO_LOW_START); + let mmio_end = GuestAddress(layout::MMIO_LOW_END); + let himem_start = GuestAddress(layout::HIMEM_START); + + // Note that this puts the mptable at the last 1k of Linux's 640k base RAM + mptable::setup_mptable(guest_mem, boot_cpus, max_cpus).map_err(Error::MpTableSetup)?; + + let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); + + params.0.hdr.type_of_loader = KERNEL_LOADER_OTHER; + params.0.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC; + params.0.hdr.header = KERNEL_HDR_MAGIC; + params.0.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32; + params.0.hdr.cmdline_size = cmdline_size as u32; + params.0.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES; + if let Some(initrd_config) = initrd { + params.0.hdr.ramdisk_image = initrd_config.address.raw_value() as u32; + params.0.hdr.ramdisk_size = initrd_config.size as u32; + } + + add_e820_entry(&mut params.0, 0, layout::EBDA_START, bootparam::E820_RAM) + .map_err(Error::BootSystem)?; + + let mem_end = address_space.ok_or(Error::AddressSpace)?.last_addr(); + if mem_end < mmio_start { + add_e820_entry( + &mut params.0, + himem_start.raw_value(), + // it's safe to use unchecked_offset_from because + // mem_end > himem_start + mem_end.unchecked_offset_from(himem_start) + 1, + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + } else { + add_e820_entry( + &mut params.0, + himem_start.raw_value(), + // it's safe to use unchecked_offset_from because + // end_32bit_gap_start > himem_start + mmio_start.unchecked_offset_from(himem_start), + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + if mem_end > mmio_end { + add_e820_entry( + &mut params.0, + mmio_end.raw_value() + 1, + // it's safe to use unchecked_offset_from because mem_end > mmio_end + mem_end.unchecked_offset_from(mmio_end), + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + } + } + + LinuxBootConfigurator::write_bootparams( + &BootParams::new(¶ms, GuestAddress(layout::ZERO_PAGE_START)), + guest_mem, + ) + .map_err(|_| Error::ZeroPageSetup) +} + +impl Vm { + /// Get the status of in-kernel PIT. + pub fn get_pit_state(&self) -> Result { + self.vm_fd + .get_pit2() + .map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Set the status of in-kernel PIT. + pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> { + self.vm_fd + .set_pit2(pit_state) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Get the status of in-kernel ioapic. + pub fn get_irqchip_state(&self, chip_id: u32) -> Result { + let mut irqchip: kvm_irqchip = kvm_irqchip { + chip_id, + ..kvm_irqchip::default() + }; + self.vm_fd + .get_irqchip(&mut irqchip) + .map(|_| irqchip) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Set the status of in-kernel ioapic. + pub fn set_irqchip_state(&self, irqchip: &kvm_irqchip) -> Result<()> { + self.vm_fd + .set_irqchip(irqchip) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } +} + +impl Vm { + /// Initialize the virtual machine instance. + /// + /// It initialize the virtual machine instance by: + /// 1) initialize virtual machine global state and configuration. + /// 2) create system devices, such as interrupt controller, PIT etc. + /// 3) create and start IO devices, such as serial, console, block, net, vsock etc. + /// 4) create and initialize vCPUs. + /// 5) configure CPU power management features. + /// 6) load guest kernel image. + pub fn init_microvm( + &mut self, + epoll_mgr: EpollManager, + vm_as: GuestAddressSpaceImpl, + request_ts: TimestampUs, + ) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM: start initializing microvm ..."); + + self.init_tss()?; + // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + // while on aarch64 we need to do it the other way around. + self.setup_interrupt_controller()?; + self.create_pit()?; + self.init_devices(epoll_mgr)?; + + let reset_event_fd = self.device_manager.get_reset_eventfd().unwrap(); + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .set_reset_event_fd(reset_event_fd) + .map_err(StartMicroVmError::Vcpu)?; + + if self.vm_config.cpu_pm == "on" { + // TODO: add cpu_pm support. issue #4590. + info!(self.logger, "VM: enable CPU disable_idle_exits capability"); + } + + let vm_memory = vm_as.memory(); + let kernel_loader_result = self.load_kernel(vm_memory.deref())?; + self.vcpu_manager() + .map_err(StartMicroVmError::Vcpu)? + .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) + .map_err(StartMicroVmError::Vcpu)?; + + info!(self.logger, "VM: initializing microvm done"); + Ok(()) + } + + /// Execute system architecture specific configurations. + /// + /// 1) set guest kernel boot parameters + /// 2) setup BIOS configuration data structs, mainly implement the MPSpec. + pub fn configure_system_arch( + &self, + vm_memory: &GuestMemoryImpl, + cmdline: &Cmdline, + initrd: Option, + ) -> std::result::Result<(), StartMicroVmError> { + let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START); + linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline) + .map_err(StartMicroVmError::LoadCommandline)?; + + let cmdline_size = cmdline + .as_cstring() + .map_err(StartMicroVmError::ProcessCommandlne)? + .as_bytes_with_nul() + .len(); + + configure_system( + vm_memory, + self.address_space.address_space(), + cmdline_addr, + cmdline_size, + &initrd, + self.vm_config.vcpu_count, + self.vm_config.max_vcpu_count, + ) + .map_err(StartMicroVmError::ConfigureSystem) + } + + /// Initializes the guest memory. + pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicroVmError> { + self.vm_fd + .set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap()) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) + } + + /// Creates the irq chip and an in-kernel device model for the PIT. + pub(crate) fn setup_interrupt_controller( + &mut self, + ) -> std::result::Result<(), StartMicroVmError> { + self.vm_fd + .create_irq_chip() + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) + } + + /// Creates an in-kernel device model for the PIT. + pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicroVmError> { + info!(self.logger, "VM: create pit"); + // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 + // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..kvm_pit_config::default() + }; + + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. + self.vm_fd + .create_pit2(pit_config) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) + } + + pub(crate) fn register_events( + &mut self, + event_mgr: &mut EventManager, + ) -> std::result::Result<(), StartMicroVmError> { + let reset_evt = self + .device_manager + .get_reset_eventfd() + .map_err(StartMicroVmError::DeviceManager)?; + event_mgr + .register_exit_eventfd(&reset_evt) + .map_err(|_| StartMicroVmError::RegisterEvent)?; + self.reset_eventfd = Some(reset_evt); + + Ok(()) + } +} diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs new file mode 100644 index 000000000000..72c799e111a5 --- /dev/null +++ b/src/dragonball/src/vmm.rs @@ -0,0 +1,225 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex, RwLock}; + +use dbs_utils::epoll_manager::EpollManager; +use log::{error, info, warn}; +use seccompiler::BpfProgram; +use vmm_sys_util::eventfd::EventFd; + +use crate::api::v1::{InstanceInfo, VmmService}; +use crate::error::{EpollError, Result}; +use crate::event_manager::{EventContext, EventManager}; +use crate::vm::Vm; +use crate::{EXIT_CODE_GENERIC_ERROR, EXIT_CODE_OK}; + +/// Global coordinator to manage API servers, virtual machines, upgrade etc. +/// +/// Originally firecracker assumes an VMM only manages an VM, and doesn't distinguish VMM and VM. +/// Thus caused a mixed and confusion design. Now we have explicit build the object model as: +/// |---Vmm API Server--<-1:1-> HTTP API Server +/// | |----------<-1:1-> Shimv2/CRI API Server +/// | +/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory +/// ^ ^---1:1-> Device Manager <-1:N-> Device +/// | ^---1:1-> Resource Manager +/// | ^---1:N-> Vcpu +/// |---<-1:N-> Event Manager +pub struct Vmm { + pub(crate) event_ctx: EventContext, + epoll_manager: EpollManager, + + // Will change to a HashMap when enabling 1 VMM with multiple VMs. + vm: Vm, + + vcpu_seccomp_filter: BpfProgram, + vmm_seccomp_filter: BpfProgram, +} + +impl Vmm { + /// Create a Virtual Machine Monitor instance. + pub fn new( + api_shared_info: Arc>, + api_event_fd: EventFd, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let epoll_manager = EpollManager::default(); + Self::new_with_epoll_manager( + api_shared_info, + api_event_fd, + epoll_manager, + vmm_seccomp_filter, + vcpu_seccomp_filter, + kvm_fd, + ) + } + + /// Create a Virtual Machine Monitor instance with a epoll_manager. + pub fn new_with_epoll_manager( + api_shared_info: Arc>, + api_event_fd: EventFd, + epoll_manager: EpollManager, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let vm = Vm::new(kvm_fd, api_shared_info, epoll_manager.clone())?; + let event_ctx = EventContext::new(api_event_fd)?; + + Ok(Vmm { + event_ctx, + epoll_manager, + vm, + vcpu_seccomp_filter, + vmm_seccomp_filter, + }) + } + + /// Get a reference to a virtual machine managed by the VMM. + pub fn get_vm(&self) -> Option<&Vm> { + Some(&self.vm) + } + + /// Get a mutable reference to a virtual machine managed by the VMM. + pub fn get_vm_mut(&mut self) -> Option<&mut Vm> { + Some(&mut self.vm) + } + + /// Get the seccomp rules for vCPU threads. + pub fn vcpu_seccomp_filter(&self) -> BpfProgram { + self.vcpu_seccomp_filter.clone() + } + + /// Get the seccomp rules for VMM threads. + pub fn vmm_seccomp_filter(&self) -> BpfProgram { + self.vmm_seccomp_filter.clone() + } + + /// Run the event loop to service API requests. + /// + /// # Arguments + /// + /// * `vmm` - An Arc reference to the global Vmm instance. + /// * `service` - VMM Service provider. + pub fn run_vmm_event_loop(vmm: Arc>, mut service: VmmService) -> i32 { + let epoll_mgr = vmm.lock().unwrap().epoll_manager.clone(); + let mut event_mgr = + EventManager::new(&vmm, epoll_mgr).expect("Cannot create epoll manager"); + + 'poll: loop { + match event_mgr.handle_events(-1) { + Ok(_) => { + // Check whether there are pending vmm events. + if event_mgr.fetch_vmm_event_count() == 0 { + continue; + } + + let mut v = vmm.lock().unwrap(); + if v.event_ctx.api_event_triggered { + // The run_vmm_action() needs to access event_mgr, so it could + // not be handled in EpollHandler::handle_events(). It has been + // delayed to the main loop. + v.event_ctx.api_event_triggered = false; + service + .run_vmm_action(&mut v, &mut event_mgr) + .unwrap_or_else(|_| { + warn!("got spurious notification from api thread"); + }); + } + if v.event_ctx.exit_evt_triggered { + info!("Gracefully terminated VMM control loop"); + return v.stop(EXIT_CODE_OK as i32); + } + } + Err(e) => { + error!("Abruptly exited VMM control loop: {:?}", e); + if let EpollError::EpollMgr(dbs_utils::epoll_manager::Error::Epoll(e)) = e { + if e.errno() == libc::EAGAIN || e.errno() == libc::EINTR { + continue 'poll; + } + } + return vmm.lock().unwrap().stop(EXIT_CODE_GENERIC_ERROR as i32); + } + } + } + } + + /// Waits for all vCPUs to exit and terminates the Dragonball process. + fn stop(&mut self, exit_code: i32) -> i32 { + info!("Vmm is stopping."); + if let Some(vm) = self.get_vm_mut() { + if vm.is_vm_initialized() { + if let Err(e) = vm.remove_devices() { + warn!("failed to remove devices: {:?}", e); + } + + #[cfg(feature = "dbs-upcall")] + if let Err(e) = vm.remove_upcall() { + warn!("failed to remove upcall: {:?}", e); + } + + if let Err(e) = vm.reset_console() { + warn!("Cannot set canonical mode for the terminal. {:?}", e); + } + + // Now, we use exit_code instead of invoking _exit to + // terminate process, so all of vcpu threads should be stopped + // prior to vmm event loop. + match vm.vcpu_manager() { + Ok(mut mgr) => { + if let Err(e) = mgr.exit_all_vcpus() { + warn!("Failed to exit vcpu thread. {:?}", e); + } + #[cfg(feature = "dbs-upcall")] + mgr.set_upcall_channel(None); + } + Err(e) => warn!("Failed to get vcpu manager {:?}", e), + } + + // save exit state to VM, instead of exit process. + vm.vm_exit(exit_code); + } + } + + exit_code + } +} + +#[cfg(test)] +pub(crate) mod tests { + use test_utils::skip_if_not_root; + + use super::*; + + pub fn create_vmm_instance(epoll_manager: EpollManager) -> Vmm { + let info = Arc::new(RwLock::new(InstanceInfo::default())); + let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let seccomp_filter: BpfProgram = Vec::new(); + + Vmm::new_with_epoll_manager( + info, + event_fd, + epoll_manager, + seccomp_filter.clone(), + seccomp_filter, + None, + ) + .unwrap() + } + + #[test] + fn test_create_vmm_instance() { + skip_if_not_root!(); + + create_vmm_instance(EpollManager::default()); + } +} diff --git a/src/kata-opa/allow-all-except-exec-process.rego b/src/kata-opa/allow-all-except-exec-process.rego new file mode 100644 index 000000000000..7d77015b2408 --- /dev/null +++ b/src/kata-opa/allow-all-except-exec-process.rego @@ -0,0 +1,39 @@ +package agent_policy + +default AddARPNeighborsRequest := true +default AddSwapRequest := true +default CloseStdinRequest := true +default CopyFileRequest := true +default CreateContainerRequest := true +default CreateSandboxRequest := true +default DestroySandboxRequest := true +default GetMetricsRequest := true +default GetOOMEventRequest := true +default GuestDetailsRequest := true +default ListInterfacesRequest := true +default ListRoutesRequest := true +default MemHotplugByProbeRequest := true +default OnlineCPUMemRequest := true +default PauseContainerRequest := true +default PullImageRequest := true +default ReadStreamRequest := true +default RemoveContainerRequest := true +default RemoveStaleVirtiofsShareMountsRequest := true +default ReseedRandomDevRequest := false +default ResumeContainerRequest := true +default SetGuestDateTimeRequest := true +default SetPolicyRequest := true +default SignalProcessRequest := true +default StartContainerRequest := true +default StartTracingRequest := true +default StatsContainerRequest := true +default StopTracingRequest := true +default TtyWinResizeRequest := true +default UpdateContainerRequest := true +default UpdateEphemeralMountsRequest := true +default UpdateInterfaceRequest := true +default UpdateRoutesRequest := true +default WaitProcessRequest := true +default WriteStreamRequest := true + +default ExecProcessRequest := false diff --git a/src/kata-opa/allow-all.rego b/src/kata-opa/allow-all.rego new file mode 100644 index 000000000000..cf813a600b26 --- /dev/null +++ b/src/kata-opa/allow-all.rego @@ -0,0 +1,38 @@ +package agent_policy + +default AddARPNeighborsRequest := true +default AddSwapRequest := true +default CloseStdinRequest := true +default CopyFileRequest := true +default CreateContainerRequest := true +default CreateSandboxRequest := true +default DestroySandboxRequest := true +default ExecProcessRequest := true +default GetMetricsRequest := true +default GetOOMEventRequest := true +default GuestDetailsRequest := true +default ListInterfacesRequest := true +default ListRoutesRequest := true +default MemHotplugByProbeRequest := true +default OnlineCPUMemRequest := true +default PauseContainerRequest := true +default PullImageRequest := true +default ReadStreamRequest := true +default RemoveContainerRequest := true +default RemoveStaleVirtiofsShareMountsRequest := true +default ReseedRandomDevRequest := false +default ResumeContainerRequest := true +default SetGuestDateTimeRequest := true +default SetPolicyRequest := true +default SignalProcessRequest := true +default StartContainerRequest := true +default StartTracingRequest := true +default StatsContainerRequest := true +default StopTracingRequest := true +default TtyWinResizeRequest := true +default UpdateContainerRequest := true +default UpdateEphemeralMountsRequest := true +default UpdateInterfaceRequest := true +default UpdateRoutesRequest := true +default WaitProcessRequest := true +default WriteStreamRequest := true diff --git a/src/kata-opa/kata-opa.service.in b/src/kata-opa/kata-opa.service.in new file mode 100644 index 000000000000..acb24d941ae9 --- /dev/null +++ b/src/kata-opa/kata-opa.service.in @@ -0,0 +1,29 @@ +# +# Copyright (c) 2023 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +[Unit] +Description=Open Policy Agent for Kata Containers +Documentation=https://github.com/kata-containers +ConditionPathExists=@SETTINGSDIR@/default-policy.rego + +# kata-agent connects to OPA while starting up. +Before=kata-agent.service + +[Service] +Type=simple +ExecStart=@BINDIR@/opa run --server --disable-telemetry --addr 127.0.0.1:8181 --log-level info +DynamicUser=yes +RuntimeDirectory=kata-opa +LimitNOFILE=1048576 + +# Don't restart because there may be an active policy that would be lost. +Restart=no + +# Send log output to tty to allow capturing debug logs from a VM vsock port. +StandardError=tty + +# Discourage OOM-killer from touching the policy service. +OOMScoreAdjust=-997 diff --git a/src/libs/Cargo.lock b/src/libs/Cargo.lock index 99a395749ba1..0c63e3b217c8 100644 --- a/src/libs/Cargo.lock +++ b/src/libs/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.57" @@ -25,11 +34,28 @@ dependencies = [ "syn", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "bitflags" @@ -37,6 +63,28 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bitmask-enum" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd9e32d7420c85055e8107e5b2463c4eeefeaac18b52359fe9f9c08a18f342b2" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "bumpalo" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" + +[[package]] +name = "byte-unit" +version = "3.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8" + [[package]] name = "byteorder" version = "1.4.3" @@ -72,23 +120,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "chrono" -version = "0.4.19" +name = "cgroups-rs" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965" dependencies = [ "libc", + "log", + "nix 0.25.1", + "regex", + "thiserror", +] + +[[package]] +name = "chrono" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6127248204b9aba09a362f6c930ef6a78f2c1b2215f8a7b398c06e1083f17af0" +dependencies = [ + "js-sys", "num-integer", "num-traits", - "time", + "time 0.1.43", + "wasm-bindgen", "winapi", ] +[[package]] +name = "common-path" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2382f75942f4b3be3690fe4f86365e9c853c1587d6ee58212cebf6e2a9ccd101" + [[package]] name = "crossbeam-channel" -version = "0.5.2" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", @@ -115,12 +183,44 @@ dependencies = [ "syn", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "either" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "fail" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" +dependencies = [ + "lazy_static", + "log", + "rand", +] + [[package]] name = "fastrand" version = "1.6.0" @@ -136,6 +236,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "futures" version = "0.3.21" @@ -225,6 +331,23 @@ dependencies = [ "slab", ] +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "hashbrown" version = "0.11.2" @@ -240,6 +363,91 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "http" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +dependencies = [ + "bytes 1.1.0", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes 1.1.0", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "hyper" +version = "0.14.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +dependencies = [ + "bytes 1.1.0", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyperlocal" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fafdf7b2b2de7c9784f76e02c0935e65a8117ec3b768644379983ab333ac98c" +dependencies = [ + "futures-util", + "hex", + "hyper", + "pin-project", + "tokio", +] + [[package]] name = "indexmap" version = "1.8.1" @@ -283,6 +491,67 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" +[[package]] +name = "js-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kata-sys-util" +version = "0.1.0" +dependencies = [ + "anyhow", + "byteorder", + "cgroups-rs", + "chrono", + "common-path", + "fail", + "kata-types", + "lazy_static", + "libc", + "nix 0.24.2", + "num_cpus", + "oci", + "once_cell", + "rand", + "serde_json", + "serial_test", + "slog", + "slog-scope", + "subprocess", + "tempfile", + "thiserror", +] + +[[package]] +name = "kata-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "base64", + "bitmask-enum", + "byte-unit", + "glob", + "lazy_static", + "nix 0.24.2", + "num_cpus", + "oci", + "regex", + "safe-path", + "serde", + "serde_json", + "slog", + "slog-scope", + "tempfile", + "test-utils", + "thiserror", + "toml", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -291,9 +560,19 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.124" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] [[package]] name = "log" @@ -313,6 +592,7 @@ dependencies = [ "slog-async", "slog-json", "slog-scope", + "slog-term", "tempfile", ] @@ -362,9 +642,9 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "nix" -version = "0.20.2" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5e06129fb611568ef4e868c14b326274959aa70ff7776e9d55323531c374945" +checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" dependencies = [ "bitflags", "cc", @@ -375,17 +655,28 @@ dependencies = [ [[package]] name = "nix" -version = "0.23.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" +checksum = "195cdbc1741b8134346d515b3a56a1c94b0912758009cfd53f99ea0f57b065fc" dependencies = [ "bitflags", - "cc", "cfg-if", "libc", "memoffset", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ntapi" version = "0.3.7" @@ -414,11 +705,65 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + +[[package]] +name = "oci" +version = "0.1.0" +dependencies = [ + "libc", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "once_cell" -version = "1.9.0" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] [[package]] name = "petgraph" @@ -430,6 +775,26 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pin-project" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.8" @@ -442,6 +807,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + [[package]] name = "proc-macro2" version = "1.0.37" @@ -504,31 +875,68 @@ dependencies = [ [[package]] name = "protobuf" -version = "2.14.0" +version = "2.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e86d370532557ae7573551a1ec8235a0f8d6cb276c7c9e6aa490b511c447485" +checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" + +[[package]] +name = "protobuf" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55bad9126f378a853655831eb7363b7b01b81d19f8cb1218861086ca4a1a61e" dependencies = [ - "serde", - "serde_derive", + "once_cell", + "protobuf-support", + "thiserror", +] + +[[package]] +name = "protobuf-codegen" +version = "2.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aec1632b7c8f2e620343439a7dfd1f3c47b18906c4be58982079911482b5d707" +dependencies = [ + "protobuf 2.27.1", ] [[package]] name = "protobuf-codegen" -version = "2.14.0" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd418ac3c91caa4032d37cb80ff0d44e2ebe637b2fb243b6234bf89cdac4901" +dependencies = [ + "anyhow", + "once_cell", + "protobuf 3.2.0", + "protobuf-parse", + "regex", + "tempfile", + "thiserror", +] + +[[package]] +name = "protobuf-parse" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de113bba758ccf2c1ef816b127c958001b7831136c9bc3f8e9ec695ac4e82b0c" +checksum = "9d39b14605eaa1f6a340aec7f320b34064feb26c93aec35d6a9a2272a8ddfa49" dependencies = [ - "protobuf", + "anyhow", + "indexmap", + "log", + "protobuf 3.2.0", + "protobuf-support", + "tempfile", + "thiserror", + "which", ] [[package]] -name = "protobuf-codegen-pure" -version = "2.14.0" +name = "protobuf-support" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1a4febc73bf0cada1d77c459a0c8e5973179f1cfd5b0f1ab789d45b17b6440" +checksum = "a5d4d7b8601c814cfb36bcebb79f0e61e45e1e93640cf778837833bbed05c372" dependencies = [ - "protobuf", - "protobuf-codegen", + "thiserror", ] [[package]] @@ -536,7 +944,8 @@ name = "protocols" version = "0.1.0" dependencies = [ "async-trait", - "protobuf", + "oci", + "protobuf 3.2.0", "serde", "serde_json", "ttrpc", @@ -552,15 +961,73 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + [[package]] name = "redox_syscall" -version = "0.2.10" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -570,6 +1037,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "rustversion" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" + [[package]] name = "ryu" version = "1.0.9" @@ -584,20 +1057,26 @@ dependencies = [ "tempfile", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "serde" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.133" +version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed201699328568d8d08208fdd080e3ff594e6c422e438b6705905da01005d537" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" dependencies = [ "proc-macro2", "quote", @@ -615,6 +1094,39 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shim-interface" +version = "0.1.0" +dependencies = [ + "anyhow", + "hyper", + "hyperlocal", + "kata-types", + "tokio", +] + [[package]] name = "slab" version = "0.4.6" @@ -641,9 +1153,9 @@ dependencies = [ [[package]] name = "slog-json" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e9b96fb6b5e80e371423b4aca6656eb537661ce8f82c2697e619f8ca85d043" +checksum = "0f7f7a952ce80fca9da17bf0a53895d11f8aa1ba063668ca53fc72e7869329e9" dependencies = [ "chrono", "serde", @@ -662,11 +1174,40 @@ dependencies = [ "slog", ] +[[package]] +name = "slog-term" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +dependencies = [ + "atty", + "slog", + "term", + "thread_local", + "time 0.3.22", +] + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + [[package]] name = "socket2" -version = "0.4.4" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "subprocess" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" +checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" dependencies = [ "libc", "winapi", @@ -703,6 +1244,24 @@ dependencies = [ "winapi", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "test-utils" +version = "0.1.0" +dependencies = [ + "nix 0.24.2", +] + [[package]] name = "thiserror" version = "1.0.30" @@ -725,24 +1284,52 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" dependencies = [ "once_cell", ] [[package]] name = "time" -version = "0.1.44" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] +[[package]] +name = "time" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" +dependencies = [ + "time-core", +] + [[package]] name = "tokio" version = "1.17.0" @@ -753,6 +1340,7 @@ dependencies = [ "libc", "memchr", "mio", + "num_cpus", "pin-project-lite", "socket2", "tokio-macros", @@ -783,20 +1371,61 @@ dependencies = [ "vsock", ] +[[package]] +name = "toml" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +dependencies = [ + "serde", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + [[package]] name = "ttrpc" -version = "0.5.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a973ce6d5eaa20c173635b29ffb660dafbc7ef109172c0015ba44e47a23711" +checksum = "a35f22a2964bea14afee161665bb260b83cb48e665e0260ca06ec0e775c8b06c" dependencies = [ "async-trait", "byteorder", "futures", "libc", "log", - "nix 0.20.2", - "protobuf", - "protobuf-codegen-pure", + "nix 0.23.1", + "protobuf 3.2.0", + "protobuf-codegen 3.2.0", "thiserror", "tokio", "tokio-vsock", @@ -804,28 +1433,28 @@ dependencies = [ [[package]] name = "ttrpc-codegen" -version = "0.2.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809eda4e459820237104e4b61d6b41bbe6c9e1ce6adf4057955e6e6722a90408" +checksum = "94d7f7631d7a9ebed715a47cd4cb6072cbc7ae1d4ec01598971bbec0024340c2" dependencies = [ - "protobuf", - "protobuf-codegen", - "protobuf-codegen-pure", + "protobuf 2.27.1", + "protobuf-codegen 3.2.0", + "protobuf-support", "ttrpc-compiler", ] [[package]] name = "ttrpc-compiler" -version = "0.4.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2978ed3fa047d8fd55cbeb4d4a61d461fb3021a90c9618519c73ce7e5bb66c15" +checksum = "ec3cb5dbf1f0865a34fe3f722290fe776cacb16f50428610b779467b76ddf647" dependencies = [ "derive-new", "prost", "prost-build", "prost-types", - "protobuf", - "protobuf-codegen", + "protobuf 2.27.1", + "protobuf-codegen 2.27.1", "tempfile", ] @@ -851,11 +1480,21 @@ dependencies = [ "nix 0.23.1", ] +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasi" @@ -863,6 +1502,60 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" + [[package]] name = "which" version = "4.2.5" diff --git a/src/libs/Cargo.toml b/src/libs/Cargo.toml index 16eedb91f272..698c44a5bc41 100644 --- a/src/libs/Cargo.toml +++ b/src/libs/Cargo.toml @@ -1,7 +1,12 @@ [workspace] members = [ + "kata-sys-util", + "kata-types", "logging", - "safe-path", + "oci", "protocols", + "safe-path", + "shim-interface", + "test-utils", ] resolver = "2" diff --git a/src/libs/Makefile b/src/libs/Makefile new file mode 100644 index 000000000000..aabaa3391345 --- /dev/null +++ b/src/libs/Makefile @@ -0,0 +1,45 @@ +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +EXTRA_RUSTFEATURES := + +EXTRA_TEST_FLAGS := +USERID=$(shell id -u) +ifeq ($(USERID), 0) + override EXTRA_TEST_FLAGS = --ignored +endif + +default: build + +build: + cargo build --all-features + +static-checks-build: + @echo "INFO: static-checks-build do nothing.." + +check: clippy format + +clippy: + @echo "INFO: cargo clippy..." + cargo clippy --all-targets --all-features --release \ + -- \ + -D warnings + +format: + @echo "INFO: cargo fmt..." + cargo fmt -- --check + +clean: + cargo clean + +# It is essential to run these tests using *both* build profiles. +# See the `test_logger_levels()` test for further information. +test: + @echo "INFO: testing libraries for development build" + cargo test --all $(EXTRA_RUSTFEATURES) -- --nocapture $(EXTRA_TEST_FLAGS) + @echo "INFO: testing libraries for release build" + cargo test --release --all $(EXTRA_RUSTFEATURES) -- --nocapture $(EXTRA_TEST_FLAGS) + +.PHONY: install vendor diff --git a/src/libs/README.md b/src/libs/README.md index 36f2f00d73cc..a7ac0349a120 100644 --- a/src/libs/README.md +++ b/src/libs/README.md @@ -5,6 +5,9 @@ or published to [`crates.io`](https://crates.io/index.html). Currently it provides following library crates: | Library | Description | -|-|-|-| -| [logging](logging/) | Facilities to setup logging subsystem based slog. | +|-|-| +| [logging](logging/) | Facilities to setup logging subsystem based on slog. | +| [system utilities](kata-sys-util/) | Collection of facilities and helpers to access system services. | +| [types](kata-types/) | Collection of constants and data types shared by multiple Kata Containers components. | | [safe-path](safe-path/) | Utilities to safely resolve filesystem paths. | +| [test utilities](test-utils/) | Utilities to share test code. | diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml new file mode 100644 index 000000000000..0d6cff91fba5 --- /dev/null +++ b/src/libs/kata-sys-util/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "kata-sys-util" +version = "0.1.0" +description = "System Utilities for Kata Containers" +keywords = ["kata", "container", "runtime"] +authors = ["The Kata Containers community "] +repository = "https://github.com/kata-containers/kata-containers.git" +homepage = "https://katacontainers.io/" +readme = "README.md" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +anyhow = "1.0.31" +byteorder = "1.4.3" +cgroups = { package = "cgroups-rs", version = "0.3.2" } +chrono = "0.4.0" +common-path = "=1.0.0" +fail = "0.5.0" +lazy_static = "1.4.0" +libc = "0.2.100" +nix = "0.24.2" +once_cell = "1.9.0" +serde_json = "1.0.73" +slog = "2.5.2" +slog-scope = "4.4.0" +subprocess = "0.2.8" +rand = "0.8.5" +thiserror = "1.0.30" + +kata-types = { path = "../kata-types" } +oci = { path = "../oci" } + +[dev-dependencies] +num_cpus = "1.13.1" +serial_test = "0.5.1" +tempfile = "3.2.0" diff --git a/src/libs/kata-sys-util/README.md b/src/libs/kata-sys-util/README.md new file mode 100644 index 000000000000..0c3f887bcbea --- /dev/null +++ b/src/libs/kata-sys-util/README.md @@ -0,0 +1,19 @@ +# kata-sys-util + +This crate is a collection of utilities and helpers for +[Kata Containers](https://github.com/kata-containers/kata-containers/) components to access system services. + +It provides safe wrappers over system services, such as: +- cgroups +- file systems +- mount +- NUMA + +## Support + +**Operating Systems**: +- Linux + +## License + +This code is licensed under [Apache-2.0](../../../LICENSE). diff --git a/src/libs/kata-sys-util/src/cpu.rs b/src/libs/kata-sys-util/src/cpu.rs new file mode 100644 index 000000000000..97bc2fd94366 --- /dev/null +++ b/src/libs/kata-sys-util/src/cpu.rs @@ -0,0 +1,414 @@ +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; + +#[cfg(target_arch = "s390x")] +use std::collections::HashMap; +#[cfg(target_arch = "s390x")] +use std::io::BufRead; +#[cfg(target_arch = "s390x")] +use std::io::BufReader; + +#[allow(dead_code)] +const ERR_NO_CPUINFO: &str = "cpu_info string is empty"; + +pub const PROC_CPUINFO: &str = "/proc/cpuinfo"; + +#[cfg(target_arch = "x86_64")] +pub const CPUINFO_DELIMITER: &str = "\nprocessor"; +#[cfg(target_arch = "x86_64")] +pub const CPUINFO_FLAGS_TAG: &str = "flags"; + +fn read_file_contents(file_path: &str) -> Result { + let contents = std::fs::read_to_string(file_path)?; + Ok(contents) +} + +// get_single_cpu_info returns the contents of the first cpu from +// the specified cpuinfo file by parsing based on a specified delimiter +pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result { + let contents = read_file_contents(cpu_info_file)?; + + if contents.is_empty() { + return Err(anyhow!(ERR_NO_CPUINFO)); + } + + let subcontents: Vec<&str> = contents.split(substring).collect(); + let result = subcontents + .first() + .ok_or("error splitting contents of cpuinfo") + .map_err(|e| anyhow!(e))? + .to_string(); + Ok(result) +} + +// get_cpu_flags returns a string of cpu flags from cpuinfo, passed in +// as a string +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +pub fn get_cpu_flags(cpu_info: &str, cpu_flags_tag: &str) -> Result { + if cpu_info.is_empty() { + return Err(anyhow!(ERR_NO_CPUINFO)); + } + + if cpu_flags_tag.is_empty() { + return Err(anyhow!("cpu flags delimiter string is empty"))?; + } + + get_cpu_flags_from_file(cpu_info, cpu_flags_tag) +} + +// get a list of cpu flags in cpu_info_flags +// +// cpu_info is the content of cpuinfo file passed in as a string +// returns empty Vec if no flags are found +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +pub fn get_cpu_flags_vec(cpu_info: &str, cpu_flags_tag: &str) -> Result> { + if cpu_info.is_empty() { + return Err(anyhow!(ERR_NO_CPUINFO)); + } + + if cpu_flags_tag.is_empty() { + return Err(anyhow!("cpu flags delimiter string is empty"))?; + } + + let flags = get_cpu_flags_from_file(cpu_info, cpu_flags_tag)?; + + // take each flag, trim whitespace, convert to String, and add to list + // skip the first token in the iterator since it is empty + let flags_vec: Vec = flags + .split(' ') + .skip(1) + .map(|f| f.trim().to_string()) + .collect::>(); + + Ok(flags_vec) +} + +// check if the given flag exists in the given flags_vec +// +// flags_vec can be created by calling get_cpu_flags_vec +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +pub fn contains_cpu_flag(flags_vec: &[String], flag: &str) -> Result { + if flag.is_empty() { + return Err(anyhow!("parameter specifying flag to look for is empty"))?; + } + + Ok(flags_vec.iter().any(|f| f == flag)) +} + +// get a String containing the cpu flags in cpu_info +// +// this function returns the list of flags as a single String +// if no flags are found, returns an empty String +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +fn get_cpu_flags_from_file(cpu_info: &str, cpu_flags_tag: &str) -> Result { + let subcontents: Vec<&str> = cpu_info.split('\n').collect(); + for line in subcontents { + if line.starts_with(cpu_flags_tag) { + let line_data: Vec<&str> = line.split(':').collect(); + let flags = line_data + .last() + .ok_or("error splitting flags in cpuinfo") + .map_err(|e| anyhow!(e))? + .to_string(); + return Ok(flags); + } + } + + Ok("".to_string()) +} + +#[cfg(target_arch = "s390x")] +pub fn retrieve_cpu_facilities() -> Result> { + let f = std::fs::File::open(PROC_CPUINFO)?; + let mut reader = BufReader::new(f); + let mut contents = String::new(); + let facilities_field = "facilities"; + let mut facilities = HashMap::new(); + + while reader.read_line(&mut contents)? > 0 { + let fields: Vec<&str> = contents.split_whitespace().collect(); + if fields.len() < 2 { + contents.clear(); + continue; + } + + if !fields[0].starts_with(facilities_field) { + contents.clear(); + continue; + } + + let mut start = 1; + if fields[1] == ":" { + start = 2; + } + + for field in fields.iter().skip(start) { + let bit = field.parse::()?; + facilities.insert(bit, true); + } + return Ok(facilities); + } + + Ok(facilities) +} + +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::io::Write; + use tempfile::tempdir; + + #[test] + fn test_get_single_cpu_info() { + // Valid cpuinfo example + let dir = tempdir().unwrap(); + let file_path_full = dir.path().join("cpuinfo_full"); + let path_full = file_path_full.clone(); + let mut file_full = fs::File::create(file_path_full).unwrap(); + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n".to_string(); + writeln!(file_full, "{}", contents).unwrap(); + + // Empty cpuinfo example + let file_path_empty = dir.path().join("cpuinfo_empty"); + let path_empty = file_path_empty.clone(); + let mut _file_empty = fs::File::create(file_path_empty).unwrap(); + + #[derive(Debug)] + struct TestData<'a> { + cpuinfo_path: &'a str, + processor_delimiter_str: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpuinfo_path: "", + processor_delimiter_str: "", + result: Err(anyhow!("No such file or directory (os error 2)")), + }, + TestData { + cpuinfo_path: &path_empty.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + // Success scenarios + TestData { + cpuinfo_path: &path_full.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Ok( + "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4" + .to_string(), + ), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_single_cpu_info(d.cpuinfo_path, d.processor_delimiter_str); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + + #[test] + fn test_get_cpu_flags() { + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n"; + + #[derive(Debug)] + struct TestData<'a> { + cpu_info_str: &'a str, + cpu_flags_tag: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpu_info_str: "", + cpu_flags_tag: "", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: "", + cpu_flags_tag: "flags", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "", + result: Err(anyhow!("cpu flags delimiter string is empty")), + }, + // Success scenarios + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags", + result: Ok(" flag_1 flag_2 flag_3 flag_4".to_string()), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags_err", + result: Ok("".to_string()), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_cpu_flags(d.cpu_info_str, d.cpu_flags_tag); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + + #[test] + fn test_get_cpu_flags_vec() { + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n"; + + #[derive(Debug)] + struct TestData<'a> { + cpu_info_str: &'a str, + cpu_flags_tag: &'a str, + result: Result>, + } + let tests = &[ + // Failure scenarios + TestData { + cpu_info_str: "", + cpu_flags_tag: "", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: "", + cpu_flags_tag: "flags", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "", + result: Err(anyhow!("cpu flags delimiter string is empty")), + }, + // Success scenarios + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags", + result: Ok(vec![ + "flag_1".to_string(), + "flag_2".to_string(), + "flag_3".to_string(), + "flag_4".to_string(), + ]), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags_err", + result: Ok(Vec::new()), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_cpu_flags_vec(d.cpu_info_str, d.cpu_flags_tag); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + + #[test] + fn test_contains_cpu_flag() { + let flags_vec = vec![ + "flag_1".to_string(), + "flag_2".to_string(), + "flag_3".to_string(), + "flag_4".to_string(), + ]; + + #[derive(Debug)] + struct TestData<'a> { + cpu_flags_vec: &'a Vec, + cpu_flag: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpu_flags_vec: &flags_vec, + cpu_flag: "flag_5", + result: Ok(false), + }, + TestData { + cpu_flags_vec: &flags_vec, + cpu_flag: "", + result: Err(anyhow!("parameter specifying flag to look for is empty")), + }, + // Success scenarios + TestData { + cpu_flags_vec: &flags_vec, + cpu_flag: "flag_1", + result: Ok(true), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = contains_cpu_flag(d.cpu_flags_vec, d.cpu_flag); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } +} diff --git a/src/libs/kata-sys-util/src/device.rs b/src/libs/kata-sys-util/src/device.rs new file mode 100644 index 000000000000..00a2ade12788 --- /dev/null +++ b/src/libs/kata-sys-util/src/device.rs @@ -0,0 +1,104 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::io::Result; +use std::os::unix::fs::{FileTypeExt, MetadataExt}; +use std::path::{Path, PathBuf}; + +use nix::sys::stat; + +use crate::{eother, sl}; + +const SYS_DEV_BLOCK_PATH: &str = "/sys/dev/block"; +const BLKDEV_PARTITION: &str = "partition"; +const BLKDEV_DEV_FILE: &str = "dev"; + +/// Get major and minor number of the device or of the device hosting the regular file/directory. +pub fn get_devid_for_blkio_cgroup>(path: P) -> Result> { + let md = fs::metadata(path)?; + + if md.is_dir() || md.is_file() { + // For regular file/directory, get major/minor of the block device hosting it. + // Note that we need to get the major/minor of the block device instead of partition, + // e.g. /dev/sda instead of /dev/sda3, because blkio cgroup works with block major/minor. + let id = md.dev(); + Ok(Some((stat::major(id), stat::minor(id)))) + } else if md.file_type().is_block_device() { + // For block device, get major/minor of the device special file itself + get_block_device_id(md.rdev()) + } else { + Ok(None) + } +} + +/// Get the block device major/minor number from a partition/block device(itself). +/// +/// For example, given the dev_t of /dev/sda3 returns major and minor of /dev/sda. We rely on the +/// fact that if /sys/dev/block/$major:$minor/partition exists, then it's a partition, and find its +/// parent for the real device. +fn get_block_device_id(dev: stat::dev_t) -> Result> { + let major = stat::major(dev); + let minor = stat::minor(dev); + let mut blk_dev_path = PathBuf::from(SYS_DEV_BLOCK_PATH) + .join(format!("{}:{}", major, minor)) + .canonicalize()?; + + // If 'partition' file exists, then it's a partition of the real device, take its parent. + // Otherwise it's already the real device. + loop { + if !blk_dev_path.join(BLKDEV_PARTITION).exists() { + break; + } + blk_dev_path = match blk_dev_path.parent() { + Some(p) => p.to_path_buf(), + None => { + return Err(eother!( + "Can't find real device for dev {}:{}", + major, + minor + )) + } + }; + } + + // Parse major:minor in dev file + let dev_path = blk_dev_path.join(BLKDEV_DEV_FILE); + let dev_buf = fs::read_to_string(&dev_path)?; + let dev_buf = dev_buf.trim_end(); + debug!( + sl!(), + "get_real_devid: dev {}:{} -> {:?} ({})", major, minor, blk_dev_path, dev_buf + ); + + if let Some((major, minor)) = dev_buf.split_once(':') { + let major = major + .parse::() + .map_err(|_e| eother!("Failed to parse major number: {}", major))?; + let minor = minor + .parse::() + .map_err(|_e| eother!("Failed to parse minor number: {}", minor))?; + Ok(Some((major, minor))) + } else { + Err(eother!( + "Wrong format in {}: {}", + dev_path.to_string_lossy(), + dev_buf + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_devid() { + //let (major, minor) = get_devid_for_blkio_cgroup("/dev/vda1").unwrap().unwrap(); + assert!(get_devid_for_blkio_cgroup("/dev/tty").unwrap().is_none()); + get_devid_for_blkio_cgroup("/do/not/exist/file_______name").unwrap_err(); + } +} diff --git a/src/libs/kata-sys-util/src/fs.rs b/src/libs/kata-sys-util/src/fs.rs new file mode 100644 index 000000000000..bec806c46c68 --- /dev/null +++ b/src/libs/kata-sys-util/src/fs.rs @@ -0,0 +1,212 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::ffi::OsString; +use std::fs::{self, File}; +use std::io::{Error, Result}; +use std::os::unix::io::AsRawFd; +use std::path::Path; +use std::process::Command; + +use crate::{eother, sl}; + +// nix filesystem_type for different libc and architectures +#[cfg(all(target_os = "linux", target_env = "musl"))] +type FsType = libc::c_ulong; +#[cfg(all( + target_os = "linux", + not(any(target_env = "musl", target_arch = "s390x")) +))] +type FsType = libc::__fsword_t; +#[cfg(all(target_os = "linux", not(target_env = "musl"), target_arch = "s390x"))] +type FsType = libc::c_uint; + +// from linux.git/fs/fuse/inode.c: #define FUSE_SUPER_MAGIC 0x65735546 +const FUSE_SUPER_MAGIC: FsType = 0x65735546; +// from linux.git/include/uapi/linux/magic.h +const OVERLAYFS_SUPER_MAGIC: FsType = 0x794c7630; + +/// Get the basename of the canonicalized path +pub fn get_base_name>(src: P) -> Result { + let s = src.as_ref().canonicalize()?; + s.file_name().map(|v| v.to_os_string()).ok_or_else(|| { + eother!( + "failed to get base name of path {}", + src.as_ref().to_string_lossy() + ) + }) +} + +/// Check whether `path` is on a fuse filesystem. +pub fn is_fuse_fs>(path: P) -> bool { + if let Ok(st) = nix::sys::statfs::statfs(path.as_ref()) { + if st.filesystem_type().0 == FUSE_SUPER_MAGIC { + return true; + } + } + false +} + +/// Check whether `path` is on a overlay filesystem. +pub fn is_overlay_fs>(path: P) -> bool { + if let Ok(st) = nix::sys::statfs::statfs(path.as_ref()) { + if st.filesystem_type().0 == OVERLAYFS_SUPER_MAGIC { + return true; + } + } + false +} + +/// Check whether the given path is a symlink. +pub fn is_symlink>(path: P) -> std::io::Result { + let path = path.as_ref(); + let meta = fs::symlink_metadata(path)?; + + Ok(meta.file_type().is_symlink()) +} + +/// Reflink copy src to dst, and falls back to regular copy if reflink copy fails. +/// +/// # Safety +/// The `reflink_copy()` doesn't preserve permission/security context for the copied file, +/// so caller needs to take care of it. +pub fn reflink_copy, D: AsRef>(src: S, dst: D) -> Result<()> { + let src_path = src.as_ref(); + let dst_path = dst.as_ref(); + let src = src_path.to_string_lossy(); + let dst = dst_path.to_string_lossy(); + + if !src_path.is_file() { + return Err(eother!("reflink_copy src {} is not a regular file", src)); + } + + // Make sure dst's parent exist. If dst is a regular file, then unlink it for later copy. + if dst_path.exists() { + if !dst_path.is_file() { + return Err(eother!("reflink_copy dst {} is not a regular file", dst)); + } else { + fs::remove_file(dst_path)?; + } + } else if let Some(dst_parent) = dst_path.parent() { + if !dst_parent.exists() { + if let Err(e) = fs::create_dir_all(dst_parent) { + return Err(eother!( + "reflink_copy: create_dir_all {} failed: {:?}", + dst_parent.to_str().unwrap(), + e + )); + } + } else if !dst_parent.is_dir() { + return Err(eother!("reflink_copy parent of {} is not a directory", dst)); + } + } + + // Reflink copy, and fallback to regular copy if reflink fails. + let src_file = fs::File::open(src_path)?; + let dst_file = fs::File::create(dst_path)?; + if let Err(e) = do_reflink_copy(src_file, dst_file) { + match e.raw_os_error() { + // Cross dev copy or filesystem doesn't support reflink, do regular copy + Some(os_err) + if os_err == nix::Error::EXDEV as i32 + || os_err == nix::Error::EOPNOTSUPP as i32 => + { + warn!( + sl!(), + "reflink_copy: reflink is not supported ({:?}), do regular copy instead", e, + ); + if let Err(e) = do_regular_copy(src.as_ref(), dst.as_ref()) { + return Err(eother!( + "reflink_copy: regular copy {} to {} failed: {:?}", + src, + dst, + e + )); + } + } + // Reflink copy failed + _ => { + return Err(eother!( + "reflink_copy: copy {} to {} failed: {:?}", + src, + dst, + e, + )) + } + } + } + + Ok(()) +} + +// Copy file using cp command, which handles sparse file copy. +fn do_regular_copy(src: &str, dst: &str) -> Result<()> { + let mut cmd = Command::new("/bin/cp"); + cmd.args(["--sparse=auto", src, dst]); + + match cmd.output() { + Ok(output) => match output.status.success() { + true => Ok(()), + false => Err(eother!("`{:?}` failed: {:?}", cmd, output)), + }, + Err(e) => Err(eother!("`{:?}` failed: {:?}", cmd, e)), + } +} + +/// Copy file by reflink +fn do_reflink_copy(src: File, dst: File) -> Result<()> { + use nix::ioctl_write_int; + // FICLONE ioctl number definition, from include/linux/fs.h + const FS_IOC_MAGIC: u8 = 0x94; + const FS_IOC_FICLONE: u8 = 9; + // Define FICLONE ioctl using nix::ioctl_write_int! macro. + // The generated function has the following signature: + // pub unsafe fn ficlone(fd: libc::c_int, data: libc::c_ulang) -> Result + ioctl_write_int!(ficlone, FS_IOC_MAGIC, FS_IOC_FICLONE); + + // Safe because the `src` and `dst` are valid file objects and we have checked the result. + unsafe { ficlone(dst.as_raw_fd(), src.as_raw_fd() as u64) } + .map(|_| ()) + .map_err(|e| Error::from_raw_os_error(e as i32)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_base_name() { + assert_eq!(&get_base_name("/etc/hostname").unwrap(), "hostname"); + assert_eq!(&get_base_name("/bin").unwrap(), "bin"); + assert!(&get_base_name("/").is_err()); + assert!(&get_base_name("").is_err()); + assert!(get_base_name("/no/such/path________yeah").is_err()); + } + + #[test] + fn test_is_symlink() { + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path(); + + std::os::unix::fs::symlink(path, path.join("a")).unwrap(); + assert!(is_symlink(path.join("a")).unwrap()); + } + + #[test] + fn test_reflink_copy() { + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path().join("mounts"); + reflink_copy("/proc/mounts", &path).unwrap(); + let content = fs::read_to_string(&path).unwrap(); + assert!(!content.is_empty()); + reflink_copy("/proc/mounts", &path).unwrap(); + let content = fs::read_to_string(&path).unwrap(); + assert!(!content.is_empty()); + + reflink_copy("/proc/mounts", tmpdir.path()).unwrap_err(); + reflink_copy("/proc/mounts_not_exist", &path).unwrap_err(); + } +} diff --git a/src/libs/kata-sys-util/src/hooks.rs b/src/libs/kata-sys-util/src/hooks.rs new file mode 100644 index 000000000000..8a36e606e1a8 --- /dev/null +++ b/src/libs/kata-sys-util/src/hooks.rs @@ -0,0 +1,541 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::ffi::OsString; +use std::hash::{Hash, Hasher}; +use std::io::{self, Read, Result}; +use std::path::Path; +use std::time::Duration; + +use subprocess::{ExitStatus, Popen, PopenConfig, PopenError, Redirection}; + +use crate::validate::valid_env; +use crate::{eother, sl}; + +const DEFAULT_HOOK_TIMEOUT_SEC: i32 = 10; + +/// A simple wrapper over `oci::Hook` to provide `Hash, Eq`. +/// +/// The `oci::Hook` is auto-generated from protobuf source file, which doesn't implement `Hash, Eq`. +#[derive(Debug, Default, Clone)] +struct HookKey(oci::Hook); + +impl From<&oci::Hook> for HookKey { + fn from(hook: &oci::Hook) -> Self { + HookKey(hook.clone()) + } +} + +impl PartialEq for HookKey { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for HookKey {} + +impl Hash for HookKey { + fn hash(&self, state: &mut H) { + self.0.path.hash(state); + self.0.args.hash(state); + self.0.env.hash(state); + self.0.timeout.hash(state); + } +} + +/// Execution state of OCI hooks. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum HookState { + /// Hook is pending for executing/retry. + Pending, + /// Hook has been successfully executed. + Done, + /// Hook has been marked as ignore. + Ignored, +} + +/// Structure to maintain state for hooks. +#[derive(Default)] +pub struct HookStates { + states: HashMap, +} + +impl HookStates { + /// Create a new instance of [`HookStates`]. + pub fn new() -> Self { + Self::default() + } + + /// Get execution state of a hook. + pub fn get(&self, hook: &oci::Hook) -> HookState { + self.states + .get(&hook.into()) + .copied() + .unwrap_or(HookState::Pending) + } + + /// Update execution state of a hook. + pub fn update(&mut self, hook: &oci::Hook, state: HookState) { + self.states.insert(hook.into(), state); + } + + /// Remove an execution state of a hook. + pub fn remove(&mut self, hook: &oci::Hook) { + self.states.remove(&hook.into()); + } + + /// Check whether some hooks are still pending and should retry execution. + pub fn should_retry(&self) -> bool { + for state in self.states.values() { + if *state == HookState::Pending { + return true; + } + } + false + } + + /// Execute an OCI hook. + /// + /// If `state` is valid, it will be sent to subprocess' STDIN. + /// + /// The [OCI Runtime specification 1.0.0](https://github.com/opencontainers/runtime-spec/releases/download/v1.0.0/oci-runtime-spec-v1.0.0.pdf) + /// states: + /// - path (string, REQUIRED) with similar semantics to IEEE Std 1003.1-2008 execv's path. + /// This specification extends the IEEE standard in that path MUST be absolute. + /// - args (array of strings, OPTIONAL) with the same semantics as IEEE Std 1003.1-2008 execv's + /// argv. + /// - env (array of strings, OPTIONAL) with the same semantics as IEEE Std 1003.1-2008's environ. + /// - timeout (int, OPTIONAL) is the number of seconds before aborting the hook. If set, timeout + /// MUST be greater than zero. + /// + /// The OCI spec also defines the context to invoke hooks, caller needs to take the responsibility + /// to setup execution context, such as namespace etc. + pub fn execute_hook(&mut self, hook: &oci::Hook, state: Option) -> Result<()> { + if self.get(hook) != HookState::Pending { + return Ok(()); + } + + fail::fail_point!("execute_hook", |_| { + Err(eother!("execute hook fail point injection")) + }); + info!(sl!(), "execute hook {:?}", hook); + + self.states.insert(hook.into(), HookState::Pending); + + let mut executor = HookExecutor::new(hook)?; + let stdin = if state.is_some() { + Redirection::Pipe + } else { + Redirection::None + }; + let mut popen = Popen::create( + &executor.args, + PopenConfig { + stdin, + stdout: Redirection::Pipe, + stderr: Redirection::Pipe, + executable: executor.executable.to_owned(), + detached: true, + env: Some(executor.envs.clone()), + ..Default::default() + }, + ) + .map_err(|e| eother!("failed to create subprocess for hook {:?}: {}", hook, e))?; + + if let Some(state) = state { + executor.execute_with_input(&mut popen, state)?; + } + executor.execute_and_wait(&mut popen)?; + info!(sl!(), "hook {} finished", hook.path); + self.states.insert(hook.into(), HookState::Done); + + Ok(()) + } + + /// Try to execute hooks and remember execution result. + /// + /// The `execute_hooks()` will be called multiple times. + /// It will first be called before creating the VMM when creating the sandbox, so hooks could be + /// used to setup environment for the VMM, such as creating tap device etc. + /// It will also be called during starting containers, to setup environment for those containers. + /// + /// The execution result will be recorded for each hook. Once a hook returns success, it will not + /// be invoked anymore. + pub fn execute_hooks(&mut self, hooks: &[oci::Hook], state: Option) -> Result<()> { + for hook in hooks.iter() { + if let Err(e) = self.execute_hook(hook, state.clone()) { + // Ignore error and try next hook, the caller should retry. + error!(sl!(), "hook {} failed: {}", hook.path, e); + } + } + + Ok(()) + } +} + +struct HookExecutor<'a> { + hook: &'a oci::Hook, + executable: Option, + args: Vec, + envs: Vec<(OsString, OsString)>, + timeout: u64, +} + +impl<'a> HookExecutor<'a> { + fn new(hook: &'a oci::Hook) -> Result { + // Ensure Hook.path is present and is an absolute path. + let executable = if hook.path.is_empty() { + return Err(eother!("path of hook {:?} is empty", hook)); + } else { + let path = Path::new(&hook.path); + if !path.is_absolute() { + return Err(eother!("path of hook {:?} is not absolute", hook)); + } + Some(path.as_os_str().to_os_string()) + }; + + // Hook.args is optional, use Hook.path as arg0 if Hook.args is empty. + let args = if hook.args.is_empty() { + vec![hook.path.clone()] + } else { + hook.args.clone() + }; + + let mut envs: Vec<(OsString, OsString)> = Vec::new(); + for e in hook.env.iter() { + if let Some((key, value)) = valid_env(e) { + envs.push((OsString::from(key), OsString::from(value))); + } + } + + // Use Hook.timeout if it's valid, otherwise default to 10s. + let mut timeout = DEFAULT_HOOK_TIMEOUT_SEC as u64; + if let Some(t) = hook.timeout { + if t > 0 { + timeout = t as u64; + } + } + + Ok(HookExecutor { + hook, + executable, + args, + envs, + timeout, + }) + } + + fn execute_with_input(&mut self, popen: &mut Popen, state: oci::State) -> Result<()> { + let st = serde_json::to_string(&state)?; + let (stdout, stderr) = popen + .communicate_start(Some(st.as_bytes().to_vec())) + .limit_time(Duration::from_secs(self.timeout)) + .read_string() + .map_err(|e| e.error)?; + if let Some(err) = stderr { + if !err.is_empty() { + error!(sl!(), "hook {} exec failed: {}", self.hook.path, err); + } + } + if let Some(out) = stdout { + if !out.is_empty() { + info!(sl!(), "hook {} exec stdout: {}", self.hook.path, out); + } + } + // Give a grace period for `execute_and_wait()`. + self.timeout = 1; + Ok(()) + } + + fn execute_and_wait(&mut self, popen: &mut Popen) -> Result<()> { + match popen.wait_timeout(Duration::from_secs(self.timeout)) { + Ok(v) => self.handle_exit_status(v, popen), + Err(e) => self.handle_popen_wait_error(e, popen), + } + } + + fn handle_exit_status(&mut self, result: Option, popen: &mut Popen) -> Result<()> { + if let Some(exit_status) = result { + // the process has finished + info!( + sl!(), + "exit status of hook {:?} : {:?}", self.hook, exit_status + ); + self.print_result(popen); + match exit_status { + subprocess::ExitStatus::Exited(code) => { + if code == 0 { + info!(sl!(), "hook {:?} succeeds", self.hook); + Ok(()) + } else { + warn!(sl!(), "hook {:?} exit status with {}", self.hook, code,); + Err(eother!("hook {:?} exit status with {}", self.hook, code)) + } + } + _ => { + error!( + sl!(), + "no exit code for hook {:?}: {:?}", self.hook, exit_status + ); + Err(eother!( + "no exit code for hook {:?}: {:?}", + self.hook, + exit_status + )) + } + } + } else { + // may be timeout + error!(sl!(), "hook poll failed, kill it"); + // it is still running, kill it + popen.kill()?; + let _ = popen.wait(); + self.print_result(popen); + Err(io::Error::from(io::ErrorKind::TimedOut)) + } + } + + fn handle_popen_wait_error(&mut self, e: PopenError, popen: &mut Popen) -> Result<()> { + self.print_result(popen); + error!(sl!(), "wait_timeout for hook {:?} failed: {}", self.hook, e); + Err(eother!( + "wait_timeout for hook {:?} failed: {}", + self.hook, + e + )) + } + + fn print_result(&mut self, popen: &mut Popen) { + if let Some(file) = popen.stdout.as_mut() { + let mut buffer = String::new(); + file.read_to_string(&mut buffer).ok(); + if !buffer.is_empty() { + info!(sl!(), "hook stdout: {}", buffer); + } + } + if let Some(file) = popen.stderr.as_mut() { + let mut buffer = String::new(); + file.read_to_string(&mut buffer).ok(); + if !buffer.is_empty() { + info!(sl!(), "hook stderr: {}", buffer); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::{self, set_permissions, File, Permissions}; + use std::io::Write; + use std::os::unix::fs::PermissionsExt; + use std::time::Instant; + + fn test_hook_eq(hook1: &oci::Hook, hook2: &oci::Hook, expected: bool) { + let key1 = HookKey::from(hook1); + let key2 = HookKey::from(hook2); + + assert_eq!(key1 == key2, expected); + } + #[test] + fn test_hook_key() { + let hook = oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec![], + timeout: Some(0), + }; + let cases = [ + ( + oci::Hook { + path: "1000".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec![], + timeout: Some(0), + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "4".to_string()], + env: vec![], + timeout: Some(0), + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string()], + env: vec![], + timeout: Some(0), + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec!["5".to_string()], + timeout: Some(0), + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec![], + timeout: Some(6), + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec![], + timeout: None, + }, + false, + ), + ( + oci::Hook { + path: "1".to_string(), + args: vec!["2".to_string(), "3".to_string()], + env: vec![], + timeout: Some(0), + }, + true, + ), + ]; + + for case in cases.iter() { + test_hook_eq(&hook, &case.0, case.1); + } + } + + #[test] + fn test_execute_hook() { + // test need root permission + if !nix::unistd::getuid().is_root() { + println!("test need root permission"); + return; + } + + let tmpdir = tempfile::tempdir().unwrap(); + let file = tmpdir.path().join("data"); + let file_str = file.to_string_lossy(); + let mut states = HookStates::new(); + + // test case 1: normal + // execute hook + let hook = oci::Hook { + path: "/bin/touch".to_string(), + args: vec!["touch".to_string(), file_str.to_string()], + env: vec![], + timeout: Some(0), + }; + let ret = states.execute_hook(&hook, None); + assert!(ret.is_ok()); + assert!(fs::metadata(&file).is_ok()); + assert!(!states.should_retry()); + + // test case 2: timeout in 10s + let hook = oci::Hook { + path: "/bin/sleep".to_string(), + args: vec!["sleep".to_string(), "3600".to_string()], + env: vec![], + timeout: Some(0), // default timeout is 10 seconds + }; + let start = Instant::now(); + let ret = states.execute_hook(&hook, None).unwrap_err(); + let duration = start.elapsed(); + let used = duration.as_secs(); + assert!((10..12u64).contains(&used)); + assert_eq!(ret.kind(), io::ErrorKind::TimedOut); + assert_eq!(states.get(&hook), HookState::Pending); + assert!(states.should_retry()); + states.remove(&hook); + + // test case 3: timeout in 5s + let hook = oci::Hook { + path: "/bin/sleep".to_string(), + args: vec!["sleep".to_string(), "3600".to_string()], + env: vec![], + timeout: Some(5), // timeout is set to 5 seconds + }; + let start = Instant::now(); + let ret = states.execute_hook(&hook, None).unwrap_err(); + let duration = start.elapsed(); + let used = duration.as_secs(); + assert!((5..7u64).contains(&used)); + assert_eq!(ret.kind(), io::ErrorKind::TimedOut); + assert_eq!(states.get(&hook), HookState::Pending); + assert!(states.should_retry()); + states.remove(&hook); + + // test case 4: with envs + let create_shell = |shell_path: &str, data_path: &str| -> Result<()> { + let shell = format!( + r#"#!/bin/sh +echo -n "K1=${{K1}}" > {} +"#, + data_path + ); + let mut output = File::create(shell_path)?; + output.write_all(shell.as_bytes())?; + + // set to executable + let permissions = Permissions::from_mode(0o755); + set_permissions(shell_path, permissions)?; + + Ok(()) + }; + let shell_path = format!("{}/test.sh", tmpdir.path().to_string_lossy()); + let ret = create_shell(&shell_path, file_str.as_ref()); + assert!(ret.is_ok()); + let hook = oci::Hook { + path: shell_path, + args: vec![], + env: vec!["K1=V1".to_string()], + timeout: Some(5), + }; + let ret = states.execute_hook(&hook, None); + assert!(ret.is_ok()); + assert!(!states.should_retry()); + let contents = fs::read_to_string(file); + match contents { + Err(e) => panic!("got error {}", e), + Ok(s) => assert_eq!(s, "K1=V1"), + } + + // test case 5: timeout in 5s with state + let hook = oci::Hook { + path: "/bin/sleep".to_string(), + args: vec!["sleep".to_string(), "3600".to_string()], + env: vec![], + timeout: Some(6), // timeout is set to 5 seconds + }; + let state = oci::State { + version: "".to_string(), + id: "".to_string(), + status: oci::ContainerState::Creating, + pid: 10, + bundle: "nouse".to_string(), + annotations: Default::default(), + }; + let start = Instant::now(); + let ret = states.execute_hook(&hook, Some(state)).unwrap_err(); + let duration = start.elapsed(); + let used = duration.as_secs(); + assert!((6..8u64).contains(&used)); + assert_eq!(ret.kind(), io::ErrorKind::TimedOut); + assert!(states.should_retry()); + } +} diff --git a/src/libs/kata-sys-util/src/k8s.rs b/src/libs/kata-sys-util/src/k8s.rs new file mode 100644 index 000000000000..4ae31921e725 --- /dev/null +++ b/src/libs/kata-sys-util/src/k8s.rs @@ -0,0 +1,71 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Utilities to support Kubernetes (K8s). +//! +//! This module depends on kubelet internal implementation details, a better way is needed +//! to detect K8S EmptyDir medium type from `oci::spec::Mount` objects. + +use kata_types::mount; +use oci::Spec; + +use crate::mount::get_linux_mount_info; + +pub use kata_types::k8s::is_empty_dir; + +/// Check whether the given path is a kubernetes ephemeral volume. +/// +/// This method depends on a specific path used by k8s to detect if it's type of ephemeral. +/// As of now, this is a very k8s specific solution that works but in future there should be a +/// better way for this method to determine if the path is for ephemeral volume type. +pub fn is_ephemeral_volume(path: &str) -> bool { + if is_empty_dir(path) { + if let Ok(info) = get_linux_mount_info(path) { + if info.fs_type == "tmpfs" { + return true; + } + } + } + + false +} + +/// Check whether the given path is a kubernetes empty-dir volume of medium "default". +/// +/// K8s `EmptyDir` volumes are directories on the host. If the fs type is tmpfs, it's a ephemeral +/// volume instead of a `EmptyDir` volume. +pub fn is_host_empty_dir(path: &str) -> bool { + if is_empty_dir(path) { + if let Ok(info) = get_linux_mount_info(path) { + if info.fs_type != "tmpfs" { + return true; + } + } + } + + false +} + +// update_ephemeral_storage_type sets the mount type to 'ephemeral' +// if the mount source path is provisioned by k8s for ephemeral storage. +// For the given pod ephemeral volume is created only once +// backed by tmpfs inside the VM. For successive containers +// of the same pod the already existing volume is reused. +pub fn update_ephemeral_storage_type(oci_spec: &mut Spec) { + for m in oci_spec.mounts.iter_mut() { + if mount::is_kata_guest_mount_volume(&m.r#type) { + continue; + } + + if is_ephemeral_volume(&m.source) { + m.r#type = String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE); + } else if is_host_empty_dir(&m.source) { + // FIXME support disable_guest_empty_dir + // https://github.com/kata-containers/kata-containers/blob/02a51e75a7e0c6fce5e8abe3b991eeac87e09645/src/runtime/pkg/katautils/create.go#L105 + m.r#type = String::from(mount::KATA_HOST_DIR_VOLUME_TYPE); + } + } +} diff --git a/src/libs/kata-sys-util/src/lib.rs b/src/libs/kata-sys-util/src/lib.rs new file mode 100644 index 000000000000..531883a398d9 --- /dev/null +++ b/src/libs/kata-sys-util/src/lib.rs @@ -0,0 +1,77 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +pub mod cpu; +pub mod device; +pub mod fs; +pub mod hooks; +pub mod k8s; +pub mod mount; +pub mod numa; +pub mod protection; +pub mod rand; +pub mod spec; +pub mod validate; + +use anyhow::Result; +use std::io::BufRead; +use std::io::BufReader; + +// Convenience macro to obtain the scoped logger +#[macro_export] +macro_rules! sl { + () => { + slog_scope::logger() + }; +} + +#[macro_export] +macro_rules! eother { + () => (std::io::Error::new(std::io::ErrorKind::Other, "")); + ($fmt:expr, $($arg:tt)*) => ({ + std::io::Error::new(std::io::ErrorKind::Other, format!($fmt, $($arg)*)) + }) +} + +pub fn check_kernel_cmd_line( + kernel_cmdline_path: &str, + search_param: &str, + search_values: &[&str], +) -> Result { + let f = std::fs::File::open(kernel_cmdline_path)?; + let reader = BufReader::new(f); + + let check_fn = if search_values.is_empty() { + |param: &str, search_param: &str, _search_values: &[&str]| { + param.eq_ignore_ascii_case(search_param) + } + } else { + |param: &str, search_param: &str, search_values: &[&str]| { + let split: Vec<&str> = param.splitn(2, '=').collect(); + if split.len() < 2 || split[0] != search_param { + return false; + } + + for value in search_values { + if value.eq_ignore_ascii_case(split[1]) { + return true; + } + } + false + } + }; + + for line in reader.lines() { + for field in line?.split_whitespace() { + if check_fn(field, search_param, search_values) { + return Ok(true); + } + } + } + Ok(false) +} diff --git a/src/libs/kata-sys-util/src/mount.rs b/src/libs/kata-sys-util/src/mount.rs new file mode 100644 index 000000000000..873db5f5b9b3 --- /dev/null +++ b/src/libs/kata-sys-util/src/mount.rs @@ -0,0 +1,1108 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Utilities and helpers to execute mount operations on Linux systems. +//! +//! These utilities and helpers are specially designed and implemented to support container runtimes +//! on Linux systems, so they may not be generic enough. +//! +//! # Quotation from [mount(2)](https://man7.org/linux/man-pages/man2/mount.2.html) +//! +//! A call to mount() performs one of a number of general types of operation, depending on the bits +//! specified in mountflags. The choice of which operation to perform is determined by testing the +//! bits set in mountflags, with the tests being conducted in the order listed here: +//! - Remount an existing mount: mountflags includes MS_REMOUNT. +//! - Create a bind mount: mountflags includes MS_BIND. +//! - Change the propagation type of an existing mount: mountflags includes one of MS_SHARED, +//! MS_PRIVATE, MS_SLAVE, or MS_UNBINDABLE. +//! - Move an existing mount to a new location: mountflags includes MS_MOVE. +//! - Create a new mount: mountflags includes none of the above flags. +//! +//! Since Linux 2.6.26, the MS_REMOUNT flag can be used with MS_BIND to modify only the +//! per-mount-point flags. This is particularly useful for setting or clearing the "read-only" +//! flag on a mount without changing the underlying filesystem. Specifying mountflags as: +//! MS_REMOUNT | MS_BIND | MS_RDONLY +//! will make access through this mountpoint read-only, without affecting other mounts. +//! +//! # Safety +//! +//! Mount related operations are sensitive to security flaws, especially when dealing with symlinks. +//! There are several CVEs related to file path handling, for example +//! [CVE-2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r). +//! +//! So some design rules are adopted here: +//! - all mount variants (`bind_remount_read_only()`, `bind_mount()`, `Mounter::mount()`) assume +//! that all received paths are safe. +//! - the caller must ensure safe version of `PathBuf` are passed to mount variants. +//! - `create_mount_destination()` may be used to generated safe `PathBuf` for mount destinations. +//! - the `safe_path` crate should be used to generate safe `PathBuf` for general cases. + +use std::fmt::Debug; +use std::fs; +use std::io::{self, BufRead}; +use std::os::unix::fs::{DirBuilderExt, OpenOptionsExt}; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Instant; + +use lazy_static::lazy_static; +use nix::mount::{mount, MntFlags, MsFlags}; +use nix::{unistd, NixPath}; + +use crate::fs::is_symlink; +use crate::sl; + +/// Default permission for directories created for mountpoint. +const MOUNT_DIR_PERM: u32 = 0o755; +const MOUNT_FILE_PERM: u32 = 0o644; + +pub const PROC_MOUNTS_FILE: &str = "/proc/mounts"; +const PROC_FIELDS_PER_LINE: usize = 6; +const PROC_DEVICE_INDEX: usize = 0; +const PROC_PATH_INDEX: usize = 1; +const PROC_TYPE_INDEX: usize = 2; + +lazy_static! { + static ref MAX_MOUNT_PARAM_SIZE: usize = + if let Ok(Some(v)) = unistd::sysconf(unistd::SysconfVar::PAGE_SIZE) { + v as usize + } else { + panic!("cannot get PAGE_SIZE by sysconf()"); + }; + +// Propagation flags for mounting container volumes. + static ref PROPAGATION_FLAGS: MsFlags = + MsFlags::MS_SHARED | MsFlags::MS_PRIVATE | MsFlags::MS_SLAVE | MsFlags::MS_UNBINDABLE; + +} + +/// Errors related to filesystem mount operations. +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Can not bind mount {0} to {1}: {2}")] + BindMount(PathBuf, PathBuf, nix::Error), + #[error("Failure injection: {0}")] + FailureInject(String), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Invalid mountpoint entry (expected {0} fields, got {1}) fields: {2}")] + InvalidMountEntry(usize, usize, String), + #[error("Invalid mount option: {0}")] + InvalidMountOption(String), + #[error("Invalid path: {0}")] + InvalidPath(PathBuf), + #[error("Failure in waiting for thread: {0}")] + Join(String), + #[error("Can not mount {0} to {1}: {2}")] + Mount(PathBuf, PathBuf, nix::Error), + #[error("Mount option exceeds 4K size")] + MountOptionTooBig, + #[error("Path for mountpoint is null")] + NullMountPointPath, + #[error("Invalid Propagation type Flag")] + InvalidPgMountFlag, + #[error("Faile to open file {0} by path, {1}")] + OpenByPath(PathBuf, io::Error), + #[error("Can not read metadata of {0}, {1}")] + ReadMetadata(PathBuf, io::Error), + #[error("Can not remount {0}: {1}")] + Remount(PathBuf, nix::Error), + #[error("Can not find mountpoint for {0}")] + NoMountEntry(String), + #[error("Can not umount {0}, {1}")] + Umount(PathBuf, io::Error), +} + +/// A specialized version of `std::result::Result` for mount operations. +pub type Result = std::result::Result; + +/// Information of mount record from `/proc/mounts`. +pub struct LinuxMountInfo { + /// Source of mount, first field of records from `/proc/mounts`. + pub device: String, + /// Destination of mount, second field of records from `/proc/mounts`. + pub path: String, + /// Filesystem type of mount, third field of records from `/proc/mounts`. + pub fs_type: String, +} + +/// Get the device and file system type of a mount point by parsing `/proc/mounts`. +pub fn get_linux_mount_info(mount_point: &str) -> Result { + let mount_file = fs::File::open(PROC_MOUNTS_FILE)?; + let lines = io::BufReader::new(mount_file).lines(); + + for mount in lines.flatten() { + let fields: Vec<&str> = mount.split(' ').collect(); + + if fields.len() != PROC_FIELDS_PER_LINE { + return Err(Error::InvalidMountEntry( + PROC_FIELDS_PER_LINE, + fields.len(), + mount, + )); + } + + if mount_point == fields[PROC_PATH_INDEX] { + return Ok(LinuxMountInfo { + device: fields[PROC_DEVICE_INDEX].to_string(), + path: fields[PROC_PATH_INDEX].to_string(), + fs_type: fields[PROC_TYPE_INDEX].to_string(), + }); + } + } + + Err(Error::NoMountEntry(mount_point.to_owned())) +} + +/// Recursively create destination for a mount. +/// +/// For a normal mount, the destination will always be a directory. For bind mount, the destination +/// must be a directory if the source is a directory, otherwise the destination must be a normal +/// file. If directories are created, their permissions are initialized to MountPerm. +/// +/// # Safety +/// +/// Every container has a root filesystems `rootfs`. When creating bind mounts for a container, +/// the destination should always be within the container's `rootfs`. Otherwise it's a serious +/// security flaw for container to read/override host side filesystem contents. Please refer to +/// following CVEs for example: +/// - [CVE-2021-30465](https://github.com/opencontainers/runc/security/advisories/GHSA-c3xm-pvg7-gh7r) +/// +/// To ensure security, the `create_mount_destination()` function takes an extra parameter `root`, +/// which is used to ensure that `dst` is within the specified directory. And a safe version of +/// `PathBuf` is returned to avoid TOCTTOU type of flaws. +pub fn create_mount_destination, D: AsRef, R: AsRef>( + src: S, + dst: D, + _root: R, + fs_type: &str, +) -> Result + Debug> { + // TODO: https://github.com/kata-containers/kata-containers/issues/3473 + let dst = dst.as_ref(); + let parent = dst + .parent() + .ok_or_else(|| Error::InvalidPath(dst.to_path_buf()))?; + let mut builder = fs::DirBuilder::new(); + builder + .mode(MOUNT_DIR_PERM) + .recursive(true) + .create(parent)?; + + if fs_type == "bind" { + // The source and destination for bind mounting must be the same type: file or directory. + if !src.as_ref().is_dir() { + fs::OpenOptions::new() + .mode(MOUNT_FILE_PERM) + .write(true) + .create(true) + .open(dst)?; + return Ok(dst.to_path_buf()); + } + } + + if let Err(e) = builder.create(dst) { + if e.kind() != std::io::ErrorKind::AlreadyExists { + return Err(e.into()); + } + } + if !dst.is_dir() { + Err(Error::InvalidPath(dst.to_path_buf())) + } else { + Ok(dst.to_path_buf()) + } +} + +/// Remount a bind mount +/// +/// # Safety +/// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks. +pub fn bind_remount>(dst: P, readonly: bool) -> Result<()> { + let dst = dst.as_ref(); + if dst.is_empty() { + return Err(Error::NullMountPointPath); + } + let dst = dst + .canonicalize() + .map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?; + + do_rebind_mount(dst, readonly, MsFlags::empty()) +} + +/// Bind mount `src` to `dst` with a custom propagation type, optionally in readonly mode if +/// `readonly` is true. +/// +/// Propagation type: MsFlags::MS_SHARED or MsFlags::MS_SLAVE +/// MsFlags::MS_SHARED is used to bind mount the sandbox path to enable `exec` (in case of FC +/// jailer). +/// MsFlags::MS_SLAVE is used on all other cases. +/// +/// # Safety +/// Caller needs to ensure: +/// - `src` exists. +/// - `dst` exists, and is suitable as destination for bind mount. +/// - `dst` is free of file path based attacks. +pub fn bind_mount_unchecked, D: AsRef>( + src: S, + dst: D, + readonly: bool, + pgflag: MsFlags, +) -> Result<()> { + fail::fail_point!("bind_mount", |_| { + Err(Error::FailureInject( + "Bind mount fail point injection".to_string(), + )) + }); + + let src = src.as_ref(); + let dst = dst.as_ref(); + if src.is_empty() { + return Err(Error::NullMountPointPath); + } + if dst.is_empty() { + return Err(Error::NullMountPointPath); + } + let abs_src = src + .canonicalize() + .map_err(|_e| Error::InvalidPath(src.to_path_buf()))?; + + create_mount_destination(src, dst, "/", "bind")?; + // Bind mount `src` to `dst`. + mount( + Some(&abs_src), + dst, + Some("bind"), + MsFlags::MS_BIND, + Some(""), + ) + .map_err(|e| Error::BindMount(abs_src, dst.to_path_buf(), e))?; + + // Change into the chosen propagation mode. + if !(pgflag == MsFlags::MS_SHARED || pgflag == MsFlags::MS_SLAVE) { + return Err(Error::InvalidPgMountFlag); + } + mount(Some(""), dst, Some(""), pgflag, Some("")) + .map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?; + + // Optionally rebind into readonly mode. + if readonly { + do_rebind_mount(dst, readonly, MsFlags::empty())?; + } + + Ok(()) +} + +/// Trait to mount a `kata_types::mount::Mount`. +pub trait Mounter { + /// Mount to the specified `target`. + /// + /// # Safety + /// Caller needs to ensure: + /// - `target` exists, and is suitable as destination for mount. + /// - `target` is free of file path based attacks. + fn mount>(&self, target: P) -> Result<()>; +} + +impl Mounter for kata_types::mount::Mount { + // This function is modelled after + // [Mount::Mount()](https://github.com/containerd/containerd/blob/main/mount/mount_linux.go) + // from [Containerd](https://github.com/containerd/containerd) project. + fn mount>(&self, target: P) -> Result<()> { + fail::fail_point!("Mount::mount", |_| { + Err(Error::FailureInject( + "Mount::mount() fail point injection".to_string(), + )) + }); + + let target = target.as_ref().to_path_buf(); + let (chdir, (flags, data)) = + // Follow the same algorithm as Containerd: reserve 512 bytes to avoid hitting one page + // limit of mounting argument buffer. + if self.fs_type == "overlay" && self.option_size() >= *MAX_MOUNT_PARAM_SIZE - 512 { + info!( + sl!(), + "overlay mount option too long, maybe failed to mount" + ); + let (chdir, options) = compact_lowerdir_option(&self.options); + (chdir, parse_mount_options(&options)?) + } else { + (None, parse_mount_options(&self.options)?) + }; + + // Ensure propagation type change flags aren't included in other calls. + let o_flag = flags & (!*PROPAGATION_FLAGS); + + // - Normal mount without MS_REMOUNT flag + // - In the case of remounting with changed data (data != ""), need to call mount + if (flags & MsFlags::MS_REMOUNT) == MsFlags::empty() || !data.is_empty() { + mount_at( + chdir, + &self.source, + target.clone(), + &self.fs_type, + o_flag, + &data, + )?; + } + + // Change mount propagation type. + if (flags & *PROPAGATION_FLAGS) != MsFlags::empty() { + let propagation_flag = *PROPAGATION_FLAGS | MsFlags::MS_REC | MsFlags::MS_SILENT; + debug!( + sl!(), + "Change mount propagation flags to: 0x{:x}", + propagation_flag.bits() + ); + mount( + Some(""), + &target, + Some(""), + flags & propagation_flag, + Some(""), + ) + .map_err(|e| Error::Mount(PathBuf::new(), target.clone(), e))?; + } + + // Bind mount readonly. + let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY; + if (o_flag & bro_flag) == bro_flag { + do_rebind_mount(target, true, o_flag)?; + } + + Ok(()) + } +} + +#[inline] +fn do_rebind_mount>(path: P, readonly: bool, flags: MsFlags) -> Result<()> { + mount( + Some(""), + path.as_ref(), + Some(""), + if readonly { + flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY + } else { + flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT + }, + Some(""), + ) + .map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e)) +} + +/// Take fstab style mount options and parses them for use with a standard mount() syscall. +pub fn parse_mount_options>(options: &[T]) -> Result<(MsFlags, String)> { + let mut flags: MsFlags = MsFlags::empty(); + let mut data: Vec = Vec::new(); + + for opt in options.iter() { + if opt.as_ref() == "loop" { + return Err(Error::InvalidMountOption("loop".to_string())); + } else if let Some(v) = parse_mount_flags(flags, opt.as_ref()) { + flags = v; + } else { + data.push(opt.as_ref().to_string()); + } + } + + let data = data.join(","); + if data.len() > *MAX_MOUNT_PARAM_SIZE { + return Err(Error::MountOptionTooBig); + } + + Ok((flags, data)) +} + +fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option { + // Following mount options are applicable to fstab only. + // - _netdev: The filesystem resides on a device that requires network access (used to prevent + // the system from attempting to mount these filesystems until the network has been enabled + // on the system). + // - auto: Can be mounted with the -a option. + // - group: Allow an ordinary user to mount the filesystem if one of that user’s groups matches + // the group of the device. This option implies the options nosuid and nodev (unless + // overridden by subsequent options, as in the option line group,dev,suid). + // - noauto: Can only be mounted explicitly (i.e., the -a option will not cause the filesystem + // to be mounted). + // - nofail: Do not report errors for this device if it does not exist. + // - owner: Allow an ordinary user to mount the filesystem if that user is the owner of the + // device. This option implies the options nosuid and nodev (unless overridden by subsequent + // options, as in the option line owner,dev,suid). + // - user: Allow an ordinary user to mount the filesystem. The name of the mounting user is + // written to the mtab file (or to the private libmount file in /run/mount on systems without + // a regular mtab) so that this same user can unmount the filesystem again. This option + // implies the options noexec, nosuid, and nodev (unless overridden by subsequent options, + // as in the option line user,exec,dev,suid). + // - nouser: Forbid an ordinary user to mount the filesystem. This is the default; it does not + // imply any other options. + // - users: Allow any user to mount and to unmount the filesystem, even when some other ordinary + // user mounted it. This option implies the options noexec, nosuid, and nodev (unless + // overridden by subsequent options, as in the option line users,exec,dev,suid). + match flag_str { + // Clear flags + "defaults" => {} + "async" => flags &= !MsFlags::MS_SYNCHRONOUS, + "atime" => flags &= !MsFlags::MS_NOATIME, + "dev" => flags &= !MsFlags::MS_NODEV, + "diratime" => flags &= !MsFlags::MS_NODIRATIME, + "exec" => flags &= !MsFlags::MS_NOEXEC, + "loud" => flags &= !MsFlags::MS_SILENT, + "noiversion" => flags &= !MsFlags::MS_I_VERSION, + "nomand" => flags &= !MsFlags::MS_MANDLOCK, + "norelatime" => flags &= !MsFlags::MS_RELATIME, + "nostrictatime" => flags &= !MsFlags::MS_STRICTATIME, + "rw" => flags &= !MsFlags::MS_RDONLY, + "suid" => flags &= !MsFlags::MS_NOSUID, + // Set flags + "bind" => flags |= MsFlags::MS_BIND, + "dirsync" => flags |= MsFlags::MS_DIRSYNC, + "iversion" => flags |= MsFlags::MS_I_VERSION, + "mand" => flags |= MsFlags::MS_MANDLOCK, + "noatime" => flags |= MsFlags::MS_NOATIME, + "nodev" => flags |= MsFlags::MS_NODEV, + "nodiratime" => flags |= MsFlags::MS_NODIRATIME, + "noexec" => flags |= MsFlags::MS_NOEXEC, + "nosuid" => flags |= MsFlags::MS_NOSUID, + "rbind" => flags |= MsFlags::MS_BIND | MsFlags::MS_REC, + "unbindable" => flags |= MsFlags::MS_UNBINDABLE, + "runbindable" => flags |= MsFlags::MS_UNBINDABLE | MsFlags::MS_REC, + "private" => flags |= MsFlags::MS_PRIVATE, + "rprivate" => flags |= MsFlags::MS_PRIVATE | MsFlags::MS_REC, + "shared" => flags |= MsFlags::MS_SHARED, + "rshared" => flags |= MsFlags::MS_SHARED | MsFlags::MS_REC, + "slave" => flags |= MsFlags::MS_SLAVE, + "rslave" => flags |= MsFlags::MS_SLAVE | MsFlags::MS_REC, + "relatime" => flags |= MsFlags::MS_RELATIME, + "remount" => flags |= MsFlags::MS_REMOUNT, + "ro" => flags |= MsFlags::MS_RDONLY, + "silent" => flags |= MsFlags::MS_SILENT, + "strictatime" => flags |= MsFlags::MS_STRICTATIME, + "sync" => flags |= MsFlags::MS_SYNCHRONOUS, + flag_str => { + warn!(sl!(), "BUG: unknown mount flag: {:?}", flag_str); + return None; + } + } + Some(flags) +} + +// Do mount, optionally change current working directory if `chdir` is not empty. +fn mount_at>( + chdir: Option, + source: P, + target: PathBuf, + fstype: &str, + flags: MsFlags, + data: &str, +) -> Result<()> { + let chdir = match chdir { + Some(v) => v, + None => { + return mount( + Some(source.as_ref()), + &target, + Some(fstype), + flags, + Some(data), + ) + .map_err(|e| Error::Mount(PathBuf::new(), target, e)); + } + }; + + info!( + sl!(), + "mount_at: chdir {}, source {}, target {} , fstype {}, data {}", + chdir.display(), + source.as_ref().display(), + target.display(), + fstype, + data + ); + + // TODO: https://github.com/kata-containers/kata-containers/issues/3473 + let o_flags = nix::fcntl::OFlag::O_PATH | nix::fcntl::OFlag::O_CLOEXEC; + let file = fs::OpenOptions::new() + .read(true) + .custom_flags(o_flags.bits()) + .open(&chdir) + .map_err(|e| Error::OpenByPath(chdir.to_path_buf(), e))?; + match file.metadata() { + Ok(md) => { + if !md.is_dir() { + return Err(Error::InvalidPath(chdir)); + } + } + Err(e) => return Err(Error::ReadMetadata(chdir, e)), + } + + let cwd = unistd::getcwd().map_err(|e| Error::Io(io::Error::from_raw_os_error(e as i32)))?; + let src = source.as_ref().to_path_buf(); + let tgt = target.clone(); + let ftype = String::from(fstype); + let d = String::from(data); + let rx = Arc::new(AtomicBool::new(false)); + let tx = rx.clone(); + + // A working thread is spawned to ease error handling. + let child = std::thread::Builder::new() + .name("async_mount".to_string()) + .spawn(move || { + match unistd::fchdir(file.as_raw_fd()) { + Ok(_) => info!(sl!(), "chdir from {} to {}", cwd.display(), chdir.display()), + Err(e) => { + error!( + sl!(), + "failed to chdir from {} to {} error {:?}", + cwd.display(), + chdir.display(), + e + ); + return; + } + } + match mount( + Some(src.as_path()), + &tgt, + Some(ftype.as_str()), + flags, + Some(d.as_str()), + ) { + Ok(_) => tx.store(true, Ordering::Release), + Err(e) => error!(sl!(), "failed to mount in chdir {}: {}", chdir.display(), e), + } + match unistd::chdir(&cwd) { + Ok(_) => info!(sl!(), "chdir from {} to {}", chdir.display(), cwd.display()), + Err(e) => { + error!( + sl!(), + "failed to chdir from {} to {} error {:?}", + chdir.display(), + cwd.display(), + e + ); + } + } + })?; + child.join().map_err(|e| Error::Join(format!("{:?}", e)))?; + + if !rx.load(Ordering::Acquire) { + Err(Error::Mount( + source.as_ref().to_path_buf(), + target, + nix::Error::EIO, + )) + } else { + Ok(()) + } +} + +/// When the size of mount options is bigger than one page, try to reduce the size by compressing +/// the `lowerdir` option for overlayfs. The assumption is that lower directories for overlayfs +/// often have a common prefix. +fn compact_lowerdir_option(opts: &[String]) -> (Option, Vec) { + let mut n_opts = opts.to_vec(); + // No need to compact if there is no overlay or only one lowerdir + let (idx, lower_opts) = match find_overlay_lowerdirs(opts) { + None => return (None, n_opts), + Some(v) => { + if v.1.len() <= 1 { + return (None, n_opts); + } + v + } + }; + + let idx = idx; + let common_dir = match get_longest_common_prefix(&lower_opts) { + None => return (None, n_opts), + Some(v) => { + if v.is_absolute() && v.parent().is_none() { + return (None, n_opts); + } + v + } + }; + let common_prefix = match common_dir.as_os_str().to_str() { + None => return (None, n_opts), + Some(v) => { + let mut p = v.to_string(); + p.push('/'); + p + } + }; + + info!( + sl!(), + "compact_lowerdir_option get common prefix: {}", + common_dir.display() + ); + let lower: Vec = lower_opts + .iter() + .map(|c| c.replace(&common_prefix, "")) + .collect(); + n_opts[idx] = format!("lowerdir={}", lower.join(":")); + + (Some(common_dir), n_opts) +} + +fn find_overlay_lowerdirs(opts: &[String]) -> Option<(usize, Vec)> { + for (idx, o) in opts.iter().enumerate() { + if let Some(lower) = o.strip_prefix("lowerdir=") { + if !lower.is_empty() { + let c_opts: Vec = lower.split(':').map(|c| c.to_string()).collect(); + return Some((idx, c_opts)); + } + } + } + + None +} + +fn get_longest_common_prefix(opts: &[String]) -> Option { + if opts.is_empty() { + return None; + } + + let mut paths = Vec::with_capacity(opts.len()); + for opt in opts.iter() { + match Path::new(opt).parent() { + None => return None, + Some(v) => paths.push(v), + } + } + + let mut path = PathBuf::new(); + paths.sort_unstable(); + for (first, last) in paths[0] + .components() + .zip(paths[paths.len() - 1].components()) + { + if first != last { + break; + } + path.push(first); + } + + Some(path) +} + +/// Umount a mountpoint with timeout. +/// +/// # Safety +/// Caller needs to ensure safety of the `path` to avoid possible file path based attacks. +pub fn umount_timeout>(path: P, timeout: u64) -> Result<()> { + // Protect from symlink based attacks, please refer to: + // https://github.com/kata-containers/runtime/issues/2474 + // For Kata specific, we do extra protection for parent directory too. + let path = path.as_ref(); + let parent = path + .parent() + .ok_or_else(|| Error::InvalidPath(path.to_path_buf()))?; + // TODO: https://github.com/kata-containers/kata-containers/issues/3473 + if is_symlink(path).map_err(|e| Error::ReadMetadata(path.to_owned(), e))? + || is_symlink(parent).map_err(|e| Error::ReadMetadata(path.to_owned(), e))? + { + warn!( + sl!(), + "unable to umount {} which is a symbol link", + path.display() + ); + return Ok(()); + } + + if timeout == 0 { + // Lazy unmounting the mountpoint with the MNT_DETACH flag. + umount2(path, true).map_err(|e| Error::Umount(path.to_owned(), e))?; + info!(sl!(), "lazy umount for {}", path.display()); + } else { + let start_time = Instant::now(); + while let Err(e) = umount2(path, false) { + match e.kind() { + // The mountpoint has been concurrently unmounted by other threads. + io::ErrorKind::InvalidInput => break, + io::ErrorKind::WouldBlock => { + let time_now = Instant::now(); + if time_now.duration_since(start_time).as_millis() > timeout as u128 { + warn!(sl!(), + "failed to umount {} in {} ms because of EBUSY, try again with lazy umount", + path.display(), + Instant::now().duration_since(start_time).as_millis()); + return umount2(path, true).map_err(|e| Error::Umount(path.to_owned(), e)); + } + } + _ => return Err(Error::Umount(path.to_owned(), e)), + } + } + + info!( + sl!(), + "umount {} in {} ms", + path.display(), + Instant::now().duration_since(start_time).as_millis() + ); + } + + Ok(()) +} + +/// Umount all filesystems mounted at the `mountpoint`. +/// +/// If `mountpoint` is empty or doesn't exist, `umount_all()` is a noop. Otherwise it will try to +/// unmount all filesystems mounted at `mountpoint` repeatedly. For example: +/// - bind mount /dev/sda to /tmp/mnt +/// - bind mount /tmp/b to /tmp/mnt +/// - umount_all("tmp/mnt") will umount both /tmp/b and /dev/sda +/// +/// # Safety +/// Caller needs to ensure safety of the `path` to avoid possible file path based attacks. +pub fn umount_all>(mountpoint: P, lazy_umount: bool) -> Result<()> { + if mountpoint.as_ref().is_empty() || !mountpoint.as_ref().exists() { + return Ok(()); + } + + loop { + if let Err(e) = umount2(mountpoint.as_ref(), lazy_umount) { + // EINVAL is returned if the target is not a mount point, indicating that we are + // done. It can also indicate a few other things (such as invalid flags) which we + // unfortunately end up squelching here too. + if e.kind() == io::ErrorKind::InvalidInput { + break; + } else { + return Err(Error::Umount(mountpoint.as_ref().to_path_buf(), e)); + } + } + } + + Ok(()) +} + +// Counterpart of nix::umount2, with support of `UMOUNT_FOLLOW`. +fn umount2>(path: P, lazy_umount: bool) -> std::io::Result<()> { + let mut flags = MntFlags::UMOUNT_NOFOLLOW; + if lazy_umount { + flags |= MntFlags::MNT_DETACH; + } + nix::mount::umount2(path.as_ref(), flags).map_err(io::Error::from) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_get_linux_mount_info() { + let info = get_linux_mount_info("/sys/fs/cgroup").unwrap(); + + assert_eq!(&info.device, "tmpfs"); + assert_eq!(&info.fs_type, "tmpfs"); + assert_eq!(&info.path, "/sys/fs/cgroup"); + + assert!(matches!( + get_linux_mount_info(""), + Err(Error::NoMountEntry(_)) + )); + assert!(matches!( + get_linux_mount_info("/sys/fs/cgroup/do_not_exist/____hi"), + Err(Error::NoMountEntry(_)) + )); + } + + #[test] + fn test_create_mount_destination() { + let tmpdir = tempfile::tempdir().unwrap(); + let src = Path::new("/proc/mounts"); + let mut dst = tmpdir.path().to_owned(); + dst.push("proc"); + dst.push("mounts"); + let dst = create_mount_destination(src, dst.as_path(), tmpdir.path(), "bind").unwrap(); + let abs_dst = dst.as_ref().canonicalize().unwrap(); + assert!(abs_dst.is_file()); + + let dst = Path::new("/"); + assert!(matches!( + create_mount_destination(src, dst, "/", "bind"), + Err(Error::InvalidPath(_)) + )); + + let src = Path::new("/proc"); + let dst = Path::new("/proc/mounts"); + assert!(matches!( + create_mount_destination(src, dst, "/", "bind"), + Err(Error::InvalidPath(_)) + )); + } + + #[test] + #[ignore] + fn test_bind_remount() { + let tmpdir = tempfile::tempdir().unwrap(); + let tmpdir2 = tempfile::tempdir().unwrap(); + + assert!(matches!( + bind_remount(PathBuf::from(""), true), + Err(Error::NullMountPointPath) + )); + assert!(matches!( + bind_remount(PathBuf::from("../______doesn't____exist____nnn"), true), + Err(Error::InvalidPath(_)) + )); + + bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true, MsFlags::MS_SLAVE).unwrap(); + bind_remount(tmpdir.path(), true).unwrap(); + umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap(); + } + + #[test] + #[ignore] + fn test_bind_mount() { + let tmpdir = tempfile::tempdir().unwrap(); + let tmpdir2 = tempfile::tempdir().unwrap(); + let mut src = tmpdir.path().to_owned(); + src.push("src"); + let mut dst = tmpdir.path().to_owned(); + dst.push("src"); + + assert!(matches!( + bind_mount_unchecked(Path::new(""), Path::new(""), false, MsFlags::MS_SLAVE), + Err(Error::NullMountPointPath) + )); + assert!(matches!( + bind_mount_unchecked(tmpdir2.path(), Path::new(""), false, MsFlags::MS_SLAVE), + Err(Error::NullMountPointPath) + )); + assert!(matches!( + bind_mount_unchecked( + Path::new("/_does_not_exist_/___aahhhh"), + Path::new("/tmp/_does_not_exist/___bbb"), + false, + MsFlags::MS_SLAVE + ), + Err(Error::InvalidPath(_)) + )); + + let dst = create_mount_destination(tmpdir2.path(), &dst, tmpdir.path(), "bind").unwrap(); + bind_mount_unchecked(tmpdir2.path(), dst.as_ref(), true, MsFlags::MS_SLAVE).unwrap(); + bind_mount_unchecked(&src, dst.as_ref(), false, MsFlags::MS_SLAVE).unwrap(); + umount_all(dst.as_ref(), false).unwrap(); + + let mut src = tmpdir.path().to_owned(); + src.push("file"); + fs::write(&src, "test").unwrap(); + let mut dst = tmpdir.path().to_owned(); + dst.push("file"); + let dst = create_mount_destination(&src, &dst, tmpdir.path(), "bind").unwrap(); + bind_mount_unchecked(&src, dst.as_ref(), false, MsFlags::MS_SLAVE).unwrap(); + assert!(dst.as_ref().is_file()); + umount_timeout(dst.as_ref(), 0).unwrap(); + } + + #[test] + fn test_compact_overlay_lowerdirs() { + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + "lowerdir=/a/b/c/xxxx/1l:/a/b/c/xxxx/2l:/a/b/c/xxxx/3l:/a/b/c/xxxx/4l".to_string(), + ]; + let (prefix, n_options) = compact_lowerdir_option(&options); + assert_eq!(&prefix.unwrap(), Path::new("/a/b/c/xxxx/")); + assert_eq!(n_options.len(), 3); + assert_eq!(n_options[2], "lowerdir=1l:2l:3l:4l"); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + "lowerdir=/a/b/c/xxxx:/a/b/c/xxxx/2l:/a/b/c/xxxx/3l:/a/b/c/xxxx/4l".to_string(), + ]; + let (prefix, n_options) = compact_lowerdir_option(&options); + assert_eq!(&prefix.unwrap(), Path::new("/a/b/c/")); + assert_eq!(n_options.len(), 3); + assert_eq!(n_options[2], "lowerdir=xxxx:xxxx/2l:xxxx/3l:xxxx/4l"); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + "lowerdir=/1l:/2l:/3l:/4l".to_string(), + ]; + let (prefix, n_options) = compact_lowerdir_option(&options); + assert!(prefix.is_none()); + assert_eq!(n_options, options); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + ]; + let (prefix, n_options) = compact_lowerdir_option(&options); + assert!(prefix.is_none()); + assert_eq!(n_options, options); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "lowerdir=".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + ]; + let (prefix, n_options) = compact_lowerdir_option(&options); + assert!(prefix.is_none()); + assert_eq!(n_options, options); + } + + #[test] + fn test_find_overlay_lowerdirs() { + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + "lowerdir=/a/b/c/xxxx/1l:/a/b/c/xxxx/2l:/a/b/c/xxxx/3l:/a/b/c/xxxx/4l".to_string(), + ]; + let lower_expect = vec![ + "/a/b/c/xxxx/1l".to_string(), + "/a/b/c/xxxx/2l".to_string(), + "/a/b/c/xxxx/3l".to_string(), + "/a/b/c/xxxx/4l".to_string(), + ]; + + let (idx, lower) = find_overlay_lowerdirs(&options).unwrap(); + assert_eq!(idx, 2); + assert_eq!(lower, lower_expect); + + let common_prefix = get_longest_common_prefix(&lower).unwrap(); + assert_eq!(Path::new("/a/b/c/xxxx/"), &common_prefix); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + ]; + let v = find_overlay_lowerdirs(&options); + assert!(v.is_none()); + + let options = vec![ + "workdir=/a/b/c/xxxx/workdir".to_string(), + "lowerdir=".to_string(), + "upperdir=/a/b/c/xxxx/upper".to_string(), + ]; + find_overlay_lowerdirs(&options); + assert!(v.is_none()); + } + + #[test] + fn test_get_common_prefix() { + let lower1 = vec![ + "/a/b/c/xxxx/1l/fs".to_string(), + "/a/b/c/////xxxx/11l/fs".to_string(), + "/a/b/c/././xxxx/13l/fs".to_string(), + "/a/b/c/.////xxxx/14l/fs".to_string(), + ]; + let common_prefix = get_longest_common_prefix(&lower1).unwrap(); + assert_eq!(Path::new("/a/b/c/xxxx/"), &common_prefix); + + let lower2 = vec![ + "/fs".to_string(), + "/s".to_string(), + "/sa".to_string(), + "/s".to_string(), + ]; + let common_prefix = get_longest_common_prefix(&lower2).unwrap(); + assert_eq!(Path::new("/"), &common_prefix); + + let lower3 = vec!["".to_string(), "".to_string()]; + let common_prefix = get_longest_common_prefix(&lower3); + assert!(common_prefix.is_none()); + + let lower = vec!["/".to_string(), "/".to_string()]; + let common_prefix = get_longest_common_prefix(&lower); + assert!(common_prefix.is_none()); + + let lower = vec![ + "/a/b/c".to_string(), + "/a/b/c/d".to_string(), + "/a/b///c".to_string(), + ]; + let common_prefix = get_longest_common_prefix(&lower).unwrap(); + assert_eq!(Path::new("/a/b"), &common_prefix); + + let lower = vec!["a/b/c/e".to_string(), "a/b/c/d".to_string()]; + let common_prefix = get_longest_common_prefix(&lower).unwrap(); + assert_eq!(Path::new("a/b/c"), &common_prefix); + + let lower = vec!["a/b/c".to_string(), "a/b/c/d".to_string()]; + let common_prefix = get_longest_common_prefix(&lower).unwrap(); + assert_eq!(Path::new("a/b"), &common_prefix); + + let lower = vec!["/test".to_string()]; + let common_prefix = get_longest_common_prefix(&lower).unwrap(); + assert_eq!(Path::new("/"), &common_prefix); + + let lower = vec![]; + let common_prefix = get_longest_common_prefix(&lower); + assert!(&common_prefix.is_none()); + } + + #[test] + fn test_parse_mount_options() { + let options: Vec<&str> = vec![]; + let (flags, data) = parse_mount_options(&options).unwrap(); + assert!(flags.is_empty()); + assert!(data.is_empty()); + + let mut options = vec![ + "dev".to_string(), + "ro".to_string(), + "defaults".to_string(), + "data-option".to_string(), + ]; + let (flags, data) = parse_mount_options(&options).unwrap(); + assert_eq!(flags, MsFlags::MS_RDONLY); + assert_eq!(&data, "data-option"); + + options.push("loop".to_string()); + assert!(parse_mount_options(&options).is_err()); + + let idx = options.len() - 1; + options[idx] = " ".repeat(4097); + assert!(parse_mount_options(&options).is_err()); + } + + #[test] + #[ignore] + fn test_mount_at() { + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path().to_path_buf(); + mount_at( + Some(path.clone()), + "/___does_not_exist____a___", + PathBuf::from("/tmp/etc/host.conf"), + "", + MsFlags::empty(), + "", + ) + .unwrap_err(); + + mount_at( + Some(PathBuf::from("/___does_not_exist____a___")), + "/etc/host.conf", + PathBuf::from("/tmp/etc/host.conf"), + "", + MsFlags::empty(), + "", + ) + .unwrap_err(); + + let src = path.join("src"); + fs::write(src, "test").unwrap(); + let dst = path.join("dst"); + fs::write(&dst, "test1").unwrap(); + mount_at( + Some(path), + "src", + PathBuf::from("dst"), + "bind", + MsFlags::MS_BIND, + "", + ) + .unwrap(); + let content = fs::read_to_string(&dst).unwrap(); + assert_eq!(&content, "test"); + } +} diff --git a/src/libs/kata-sys-util/src/numa.rs b/src/libs/kata-sys-util/src/numa.rs new file mode 100644 index 000000000000..4a6b2e576796 --- /dev/null +++ b/src/libs/kata-sys-util/src/numa.rs @@ -0,0 +1,221 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs::DirEntry; +use std::io::Read; +use std::path::PathBuf; + +use kata_types::cpu::CpuSet; +use lazy_static::lazy_static; + +use crate::sl; +use std::str::FromStr; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Invalid CPU number {0}")] + InvalidCpu(u32), + #[error("Invalid node file name {0}")] + InvalidNodeFileName(String), + #[error("Can not read directory {1}: {0}")] + ReadDirectory(#[source] std::io::Error, String), + #[error("Can not read from file {0}, {1:?}")] + ReadFile(String, #[source] std::io::Error), + #[error("Can not open from file {0}, {1:?}")] + OpenFile(String, #[source] std::io::Error), + #[error("Can not parse CPU info, {0:?}")] + ParseCpuInfo(#[from] kata_types::Error), +} + +pub type Result = std::result::Result; + +// global config in UT +#[cfg(test)] +lazy_static! { + static ref SYS_FS_PREFIX: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/texture"); + // numa node file for UT, we can mock data + static ref NUMA_NODE_PATH: PathBuf = (*SYS_FS_PREFIX).join("sys/devices/system/node"); + // sysfs directory for CPU devices + static ref NUMA_CPU_PATH: PathBuf = (*SYS_FS_PREFIX).join("sys/devices/system/cpu"); +} + +// global config in release +#[cfg(not(test))] +lazy_static! { + // numa node file for UT, we can mock data + static ref NUMA_NODE_PATH: PathBuf = PathBuf::from("/sys/devices/system/node"); + // sysfs directory for CPU devices + static ref NUMA_CPU_PATH: PathBuf = PathBuf::from("/sys/devices/system/cpu"); +} + +const NUMA_NODE_PREFIX: &str = "node"; +const NUMA_NODE_CPU_LIST_NAME: &str = "cpulist"; + +/// Get numa node id for a CPU +pub fn get_node_id(cpu: u32) -> Result { + let path = NUMA_CPU_PATH.join(format!("cpu{}", cpu)); + let dirs = path.read_dir().map_err(|_| Error::InvalidCpu(cpu))?; + + for d in dirs { + let d = d.map_err(|e| Error::ReadDirectory(e, path.to_string_lossy().to_string()))?; + if let Some(file_name) = d.file_name().to_str() { + if !file_name.starts_with(NUMA_NODE_PREFIX) { + continue; + } + let index_str = file_name.trim_start_matches(NUMA_NODE_PREFIX); + if let Ok(i) = index_str.parse::() { + return Ok(i); + } + } + } + + // Default to node 0 on UMA systems. + Ok(0) +} + +/// Map cpulist to NUMA node, returns a HashMap>. +pub fn get_node_map(cpus: &str) -> Result>> { + // > + let mut node_map: HashMap> = HashMap::new(); + let cpuset = CpuSet::from_str(cpus)?; + + for c in cpuset.iter() { + let node_id = get_node_id(*c)?; + node_map.entry(node_id).or_insert_with(Vec::new).push(*c); + } + + Ok(node_map) +} + +/// Get CPU to NUMA node mapping by reading `/sys/devices/system/node/nodex/cpulist`. +/// +/// Return a HashMap. The hashmap will be empty if NUMA is not enabled on the +/// system. +pub fn get_numa_nodes() -> Result> { + let mut numa_nodes = HashMap::new(); + let numa_node_path = &*NUMA_NODE_PATH; + if !numa_node_path.exists() { + debug!(sl!(), "no numa node available on this system"); + return Ok(numa_nodes); + } + + let dirs = numa_node_path + .read_dir() + .map_err(|e| Error::ReadDirectory(e, numa_node_path.to_string_lossy().to_string()))?; + for d in dirs { + match d { + Err(e) => { + return Err(Error::ReadDirectory( + e, + numa_node_path.to_string_lossy().to_string(), + )) + } + Ok(d) => { + if let Ok(file_name) = d.file_name().into_string() { + if file_name.starts_with(NUMA_NODE_PREFIX) { + let index_string = file_name.trim_start_matches(NUMA_NODE_PREFIX); + info!( + sl!(), + "get node dir {} node index {}", &file_name, index_string + ); + match index_string.parse::() { + Ok(nid) => read_cpu_info_from_node(&d, nid, &mut numa_nodes)?, + Err(_e) => { + return Err(Error::InvalidNodeFileName(file_name.to_string())) + } + } + } + } + } + } + } + + Ok(numa_nodes) +} + +fn read_cpu_info_from_node( + d: &DirEntry, + node_index: u32, + numa_nodes: &mut HashMap, +) -> Result<()> { + let cpu_list_path = d.path().join(NUMA_NODE_CPU_LIST_NAME); + let mut file = std::fs::File::open(&cpu_list_path) + .map_err(|e| Error::OpenFile(cpu_list_path.to_string_lossy().to_string(), e))?; + let mut cpu_list_string = String::new(); + if let Err(e) = file.read_to_string(&mut cpu_list_string) { + return Err(Error::ReadFile( + cpu_list_path.to_string_lossy().to_string(), + e, + )); + } + let split_cpus = CpuSet::from_str(cpu_list_string.trim())?; + info!( + sl!(), + "node {} list {:?} from {}", node_index, split_cpus, &cpu_list_string + ); + for split_cpu_id in split_cpus.iter() { + numa_nodes.insert(*split_cpu_id, node_index); + } + + Ok(()) +} + +/// Check whether all specified CPUs have associated NUMA node. +pub fn is_valid_numa_cpu(cpus: &[u32]) -> Result { + let numa_nodes = get_numa_nodes()?; + + for cpu in cpus { + if numa_nodes.get(cpu).is_none() { + return Ok(false); + } + } + + Ok(true) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_node_id() { + assert_eq!(get_node_id(0).unwrap(), 0); + assert_eq!(get_node_id(1).unwrap(), 0); + assert_eq!(get_node_id(64).unwrap(), 1); + get_node_id(65).unwrap_err(); + } + + #[test] + fn test_get_node_map() { + let map = get_node_map("0-1,64").unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(map.get(&0).unwrap().len(), 2); + assert_eq!(map.get(&1).unwrap().len(), 1); + + get_node_map("0-1,64,65").unwrap_err(); + } + + #[test] + fn test_get_numa_nodes() { + let map = get_numa_nodes().unwrap(); + assert_eq!(map.len(), 65); + assert_eq!(*map.get(&0).unwrap(), 0); + assert_eq!(*map.get(&1).unwrap(), 0); + assert_eq!(*map.get(&63).unwrap(), 0); + assert_eq!(*map.get(&64).unwrap(), 1); + } + + #[test] + fn test_is_valid_numa_cpu() { + assert!(is_valid_numa_cpu(&[0]).unwrap()); + assert!(is_valid_numa_cpu(&[1]).unwrap()); + assert!(is_valid_numa_cpu(&[63]).unwrap()); + assert!(is_valid_numa_cpu(&[64]).unwrap()); + assert!(is_valid_numa_cpu(&[0, 1, 64]).unwrap()); + assert!(!is_valid_numa_cpu(&[0, 1, 64, 65]).unwrap()); + assert!(!is_valid_numa_cpu(&[65]).unwrap()); + } +} diff --git a/src/libs/kata-sys-util/src/protection.rs b/src/libs/kata-sys-util/src/protection.rs new file mode 100644 index 000000000000..75e6dbf1d406 --- /dev/null +++ b/src/libs/kata-sys-util/src/protection.rs @@ -0,0 +1,253 @@ +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[cfg(any(target_arch = "s390x", target_arch = "x86_64", target_arch = "aarch64"))] +use anyhow::Result; +use std::fmt; +#[cfg(target_arch = "x86_64")] +use std::path::Path; +use thiserror::Error; + +#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))] +use nix::unistd::Uid; + +#[cfg(target_arch = "x86_64")] +use std::fs; + +#[allow(dead_code)] +#[derive(Debug, PartialEq)] +pub enum GuestProtection { + NoProtection, + Tdx, + Sev, + Snp, + Pef, + Se, +} + +impl fmt::Display for GuestProtection { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GuestProtection::Tdx => write!(f, "tdx"), + GuestProtection::Sev => write!(f, "sev"), + GuestProtection::Snp => write!(f, "snp"), + GuestProtection::Pef => write!(f, "pef"), + GuestProtection::Se => write!(f, "se"), + GuestProtection::NoProtection => write!(f, "none"), + } + } +} + +#[allow(dead_code)] +#[derive(Error, Debug)] +pub enum ProtectionError { + #[error("No permission to check guest protection")] + NoPerms, + + #[error("Failed to check guest protection: {0}")] + CheckFailed(String), + + #[error("Invalid guest protection value: {0}")] + InvalidValue(String), +} + +#[cfg(target_arch = "x86_64")] +pub const TDX_SYS_FIRMWARE_DIR: &str = "/sys/firmware/tdx/"; +#[cfg(target_arch = "x86_64")] +pub const SEV_KVM_PARAMETER_PATH: &str = "/sys/module/kvm_amd/parameters/sev"; +#[cfg(target_arch = "x86_64")] +pub const SNP_KVM_PARAMETER_PATH: &str = "/sys/module/kvm_amd/parameters/sev_snp"; + +#[cfg(target_arch = "x86_64")] +pub fn available_guest_protection() -> Result { + if !Uid::effective().is_root() { + return Err(ProtectionError::NoPerms); + } + + arch_guest_protection( + TDX_SYS_FIRMWARE_DIR, + SEV_KVM_PARAMETER_PATH, + SNP_KVM_PARAMETER_PATH, + ) +} + +#[cfg(target_arch = "x86_64")] +pub fn arch_guest_protection( + tdx_path: &str, + sev_path: &str, + snp_path: &str, +) -> Result { + let metadata = fs::metadata(tdx_path); + + if metadata.is_ok() && metadata.unwrap().is_dir() { + return Ok(GuestProtection::Tdx); + } + + let check_contents = |file_name: &str| -> Result { + let file_path = Path::new(file_name); + if !file_path.exists() { + return Ok(false); + } + + let contents = fs::read_to_string(file_name).map_err(|err| { + ProtectionError::CheckFailed(format!("Error reading file {} : {}", file_name, err)) + })?; + + if contents.trim() == "Y" { + return Ok(true); + } + Ok(false) + }; + + if check_contents(snp_path)? { + return Ok(GuestProtection::Snp); + } + + if check_contents(sev_path)? { + return Ok(GuestProtection::Sev); + } + + Ok(GuestProtection::NoProtection) +} + +#[cfg(target_arch = "s390x")] +#[allow(dead_code)] +// Guest protection is not supported on ARM64. +pub fn available_guest_protection() -> Result { + if !Uid::effective().is_root() { + return Err(ProtectionError::NoPerms)?; + } + + let facilities = crate::cpu::retrieve_cpu_facilities().map_err(|err| { + ProtectionError::CheckFailed(format!( + "Error retrieving cpu facilities file : {}", + err.to_string() + )) + })?; + + // Secure Execution + // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html + let se_cpu_facility_bit: i32 = 158; + if !facilities.contains_key(&se_cpu_facility_bit) { + return Ok(GuestProtection::NoProtection); + } + + let cmd_line_values = vec!["1", "on", "y", "yes"]; + let se_cmdline_param = "prot_virt"; + + let se_cmdline_present = + crate::check_kernel_cmd_line("/proc/cmdline", se_cmdline_param, &cmd_line_values) + .map_err(|err| ProtectionError::CheckFailed(err.to_string()))?; + + if !se_cmdline_present { + return Err(ProtectionError::InvalidValue(String::from( + "Protected Virtualization is not enabled on kernel command line!", + ))); + } + + Ok(GuestProtection::Se) +} + +#[cfg(target_arch = "powerpc64le")] +pub fn available_guest_protection() -> Result { + if !Uid::effective().is_root() { + return Err(check::ProtectionError::NoPerms); + } + + let metadata = fs::metadata(PEF_SYS_FIRMWARE_DIR); + if metadata.is_ok() && metadata.unwrap().is_dir() { + Ok(check::GuestProtection::Pef) + } + + Ok(check::GuestProtection::NoProtection) +} + +#[cfg(target_arch = "aarch64")] +#[allow(dead_code)] +// Guest protection is not supported on ARM64. +pub fn available_guest_protection() -> Result { + Ok(GuestProtection::NoProtection) +} + +#[cfg(target_arch = "x86_64")] +#[cfg(test)] +mod tests { + use super::*; + use nix::unistd::Uid; + use std::fs; + use std::io::Write; + use tempfile::tempdir; + + #[test] + fn test_available_guest_protection_no_privileges() { + if !Uid::effective().is_root() { + let res = available_guest_protection(); + assert!(res.is_err()); + assert_eq!( + "No permission to check guest protection", + res.unwrap_err().to_string() + ); + } + } + + #[test] + fn test_arch_guest_protection_snp() { + // Test snp + let dir = tempdir().unwrap(); + let snp_file_path = dir.path().join("sev_snp"); + let path = snp_file_path.clone(); + let mut snp_file = fs::File::create(snp_file_path).unwrap(); + writeln!(snp_file, "Y").unwrap(); + + let actual = arch_guest_protection("/xyz/tmp", "/xyz/tmp", path.to_str().unwrap()); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::Snp); + + writeln!(snp_file, "N").unwrap(); + let actual = arch_guest_protection("/xyz/tmp", "/xyz/tmp", path.to_str().unwrap()); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::NoProtection); + } + + #[test] + fn test_arch_guest_protection_sev() { + // Test sev + let dir = tempdir().unwrap(); + let sev_file_path = dir.path().join("sev"); + let sev_path = sev_file_path.clone(); + let mut sev_file = fs::File::create(sev_file_path).unwrap(); + writeln!(sev_file, "Y").unwrap(); + + let actual = arch_guest_protection("/xyz/tmp", sev_path.to_str().unwrap(), "/xyz/tmp"); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::Sev); + + writeln!(sev_file, "N").unwrap(); + let actual = arch_guest_protection("/xyz/tmp", sev_path.to_str().unwrap(), "/xyz/tmp"); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::NoProtection); + } + + #[test] + fn test_arch_guest_protection_tdx() { + let dir = tempdir().unwrap(); + + let invalid_dir = dir.path().join("enoent"); + let invalid_dir = invalid_dir.to_str().unwrap(); + + let tdx_file_path = dir.path().join("tdx"); + let tdx_path = tdx_file_path; + + std::fs::create_dir_all(tdx_path.clone()).unwrap(); + + let actual = arch_guest_protection(invalid_dir, invalid_dir, invalid_dir); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::NoProtection); + + let actual = arch_guest_protection(tdx_path.to_str().unwrap(), invalid_dir, invalid_dir); + assert!(actual.is_ok()); + assert_eq!(actual.unwrap(), GuestProtection::Tdx); + } +} diff --git a/src/libs/kata-sys-util/src/rand/mod.rs b/src/libs/kata-sys-util/src/rand/mod.rs new file mode 100644 index 000000000000..adc098ff6865 --- /dev/null +++ b/src/libs/kata-sys-util/src/rand/mod.rs @@ -0,0 +1,10 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod random_bytes; +pub use random_bytes::RandomBytes; +mod uuid; +pub use uuid::UUID; diff --git a/src/libs/kata-sys-util/src/rand/random_bytes.rs b/src/libs/kata-sys-util/src/rand/random_bytes.rs new file mode 100644 index 000000000000..183856d6b5e6 --- /dev/null +++ b/src/libs/kata-sys-util/src/rand/random_bytes.rs @@ -0,0 +1,62 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt; + +use rand::RngCore; + +pub struct RandomBytes { + pub bytes: Vec, +} + +impl RandomBytes { + pub fn new(n: usize) -> Self { + let mut bytes = vec![0u8; n]; + rand::thread_rng().fill_bytes(&mut bytes); + Self { bytes } + } +} + +impl fmt::LowerHex for RandomBytes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for byte in &self.bytes { + write!(f, "{:x}", byte)?; + } + Ok(()) + } +} + +impl fmt::UpperHex for RandomBytes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for byte in &self.bytes { + write!(f, "{:X}", byte)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn random_bytes() { + let b = RandomBytes::new(16); + assert_eq!(b.bytes.len(), 16); + + // check lower hex + let lower_hex = format!("{:x}", b); + assert_eq!(lower_hex, lower_hex.to_lowercase()); + + // check upper hex + let upper_hex = format!("{:X}", b); + assert_eq!(upper_hex, upper_hex.to_uppercase()); + + // check new random bytes + let b1 = RandomBytes::new(16); + assert_ne!(b.bytes, b1.bytes); + } +} diff --git a/src/libs/kata-sys-util/src/rand/uuid.rs b/src/libs/kata-sys-util/src/rand/uuid.rs new file mode 100644 index 000000000000..905ba05e24ae --- /dev/null +++ b/src/libs/kata-sys-util/src/rand/uuid.rs @@ -0,0 +1,74 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{convert::From, fmt}; + +use byteorder::{BigEndian, ByteOrder}; +use rand::RngCore; + +pub struct UUID([u8; 16]); + +impl Default for UUID { + fn default() -> Self { + Self::new() + } +} + +impl UUID { + pub fn new() -> Self { + let mut b = [0u8; 16]; + rand::thread_rng().fill_bytes(&mut b); + b[6] = (b[6] & 0x0f) | 0x40; + b[8] = (b[8] & 0x3f) | 0x80; + Self(b) + } +} + +/// From: convert UUID to string +impl From<&UUID> for String { + fn from(from: &UUID) -> Self { + let time_low = BigEndian::read_u32(&from.0[..4]); + let time_mid = BigEndian::read_u16(&from.0[4..6]); + let time_hi = BigEndian::read_u16(&from.0[6..8]); + let clk_seq_hi = from.0[8]; + let clk_seq_low = from.0[9]; + let mut buf = [0u8; 8]; + buf[2..].copy_from_slice(&from.0[10..]); + let node = BigEndian::read_u64(&buf); + + format!( + "{:08x}-{:04x}-{:04x}-{:02x}{:02x}-{:012x}", + time_low, time_mid, time_hi, clk_seq_hi, clk_seq_low, node + ) + } +} + +impl fmt::Display for UUID { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", String::from(self)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_uuid() { + let uuid1 = UUID::new(); + let s1: String = String::from(&uuid1); + + let uuid2 = UUID::new(); + let s2: String = String::from(&uuid2); + + assert_eq!(s1.len(), s2.len()); + assert_ne!(s1, s2); + + let uuid3 = UUID([0u8, 1u8, 2u8, 3u8, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let s3 = String::from(&uuid3); + assert_eq!(&s3, "00010203-0405-0607-0809-0a0b0c0d0e0f"); + } +} diff --git a/src/libs/kata-sys-util/src/spec.rs b/src/libs/kata-sys-util/src/spec.rs new file mode 100644 index 000000000000..24bcf16e91b2 --- /dev/null +++ b/src/libs/kata-sys-util/src/spec.rs @@ -0,0 +1,94 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::PathBuf; + +use kata_types::container::ContainerType; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// unknown container type + #[error("unknown container type {0}")] + UnknownContainerType(String), + /// missing sandboxID + #[error("missing sandboxID")] + MissingSandboxID, + /// oci error + #[error("oci error")] + Oci(#[from] oci::Error), +} + +const CRI_CONTAINER_TYPE_KEY_LIST: &[&str] = &[ + // cri containerd + "io.kubernetes.cri.container-type", + // cri-o + "io.kubernetes.cri-o.ContainerType", + // docker shim + "io.kubernetes.docker.type", +]; + +const CRI_SANDBOX_ID_KEY_LIST: &[&str] = &[ + // cri containerd + "io.kubernetes.cri.sandbox-id", + // cri-o + "io.kubernetes.cri-o.SandboxID", + // docker shim + "io.kubernetes.sandbox.id", +]; + +/// container sandbox info +#[derive(Debug, Clone)] +pub enum ShimIdInfo { + /// Sandbox + Sandbox, + /// Container + Container(String), +} + +/// get container type +pub fn get_container_type(spec: &oci::Spec) -> Result { + for k in CRI_CONTAINER_TYPE_KEY_LIST.iter() { + if let Some(type_value) = spec.annotations.get(*k) { + match type_value.as_str() { + "sandbox" => return Ok(ContainerType::PodSandbox), + "podsandbox" => return Ok(ContainerType::PodSandbox), + "container" => return Ok(ContainerType::PodContainer), + _ => return Err(Error::UnknownContainerType(type_value.clone())), + } + } + } + + Ok(ContainerType::PodSandbox) +} + +/// get shim id info +pub fn get_shim_id_info() -> Result { + let spec = load_oci_spec()?; + match get_container_type(&spec)? { + ContainerType::PodSandbox | ContainerType::SingleContainer => Ok(ShimIdInfo::Sandbox), + ContainerType::PodContainer => { + for k in CRI_SANDBOX_ID_KEY_LIST { + if let Some(sandbox_id) = spec.annotations.get(*k) { + return Ok(ShimIdInfo::Container(sandbox_id.into())); + } + } + Err(Error::MissingSandboxID) + } + } +} + +/// get bundle path +pub fn get_bundle_path() -> std::io::Result { + std::env::current_dir() +} + +/// load oci spec +pub fn load_oci_spec() -> oci::Result { + let bundle_path = get_bundle_path()?; + let spec_file = bundle_path.join("config.json"); + + oci::Spec::load(spec_file.to_str().unwrap_or_default()) +} diff --git a/src/libs/kata-sys-util/src/validate.rs b/src/libs/kata-sys-util/src/validate.rs new file mode 100644 index 000000000000..1ad843c0c7d3 --- /dev/null +++ b/src/libs/kata-sys-util/src/validate.rs @@ -0,0 +1,326 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("invalid container ID {0}")] + InvalidContainerID(String), +} + +// A container ID or exec ID must match this regex: +// +// ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$ +// +pub fn verify_id(id: &str) -> Result<(), Error> { + let mut chars = id.chars(); + + let valid = matches!(chars.next(), Some(first) if first.is_alphanumeric() + && id.len() > 1 + && chars.all(|c| c.is_alphanumeric() || ['.', '-', '_'].contains(&c))); + + match valid { + true => Ok(()), + false => Err(Error::InvalidContainerID(id.to_string())), + } +} + +// check and reserve valid environment variables +// invalid env var may cause panic, refer to https://doc.rust-lang.org/std/env/fn.set_var.html#panics +// key should not: +// * contain NUL character '\0' +// * contain ASCII equal sign '=' +// * be empty +// value should not: +// * contain NUL character '\0' +pub fn valid_env(e: &str) -> Option<(&str, &str)> { + // split the env str by '=' at the first time to ensure there is no '=' in key, + // and also to ensure there is at least '=' in env str + if let Some((key, value)) = e.split_once('=') { + if !key.is_empty() && !key.as_bytes().contains(&b'\0') && !value.as_bytes().contains(&b'\0') + { + return Some((key.trim(), value.trim())); + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_verify_cid() { + #[derive(Debug)] + struct TestData<'a> { + id: &'a str, + expect_error: bool, + } + + let tests = &[ + TestData { + // Cannot be blank + id: "", + expect_error: true, + }, + TestData { + // Cannot be a space + id: " ", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: ".", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "-", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "_", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: " a", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: ".a", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "-a", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "_a", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "..", + expect_error: true, + }, + TestData { + // Too short + id: "a", + expect_error: true, + }, + TestData { + // Too short + id: "z", + expect_error: true, + }, + TestData { + // Too short + id: "A", + expect_error: true, + }, + TestData { + // Too short + id: "Z", + expect_error: true, + }, + TestData { + // Too short + id: "0", + expect_error: true, + }, + TestData { + // Too short + id: "9", + expect_error: true, + }, + TestData { + // Must start with an alphanumeric + id: "-1", + expect_error: true, + }, + TestData { + id: "/", + expect_error: true, + }, + TestData { + id: "a/", + expect_error: true, + }, + TestData { + id: "a/../", + expect_error: true, + }, + TestData { + id: "../a", + expect_error: true, + }, + TestData { + id: "../../a", + expect_error: true, + }, + TestData { + id: "../../../a", + expect_error: true, + }, + TestData { + id: "foo/../bar", + expect_error: true, + }, + TestData { + id: "foo bar", + expect_error: true, + }, + TestData { + id: "a.", + expect_error: false, + }, + TestData { + id: "a..", + expect_error: false, + }, + TestData { + id: "aa", + expect_error: false, + }, + TestData { + id: "aa.", + expect_error: false, + }, + TestData { + id: "hello..world", + expect_error: false, + }, + TestData { + id: "hello/../world", + expect_error: true, + }, + TestData { + id: "aa1245124sadfasdfgasdga.", + expect_error: false, + }, + TestData { + id: "aAzZ0123456789_.-", + expect_error: false, + }, + TestData { + id: "abcdefghijklmnopqrstuvwxyz0123456789.-_", + expect_error: false, + }, + TestData { + id: "0123456789abcdefghijklmnopqrstuvwxyz.-_", + expect_error: false, + }, + TestData { + id: " abcdefghijklmnopqrstuvwxyz0123456789.-_", + expect_error: true, + }, + TestData { + id: ".abcdefghijklmnopqrstuvwxyz0123456789.-_", + expect_error: true, + }, + TestData { + id: "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", + expect_error: false, + }, + TestData { + id: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.-_", + expect_error: false, + }, + TestData { + id: " ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", + expect_error: true, + }, + TestData { + id: ".ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_", + expect_error: true, + }, + TestData { + id: "/a/b/c", + expect_error: true, + }, + TestData { + id: "a/b/c", + expect_error: true, + }, + TestData { + id: "foo/../../../etc/passwd", + expect_error: true, + }, + TestData { + id: "../../../../../../etc/motd", + expect_error: true, + }, + TestData { + id: "/etc/passwd", + expect_error: true, + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = verify_id(d.id); + + let msg = format!("{}, result: {:?}", msg, result); + + if result.is_ok() { + assert!(!d.expect_error, "{}", msg); + } else { + assert!(d.expect_error, "{}", msg); + } + } + } + + #[test] + fn test_valid_env() { + let env = valid_env("a=b=c"); + assert_eq!(Some(("a", "b=c")), env); + + let env = valid_env("a=b"); + assert_eq!(Some(("a", "b")), env); + let env = valid_env("a =b"); + assert_eq!(Some(("a", "b")), env); + + let env = valid_env(" a =b"); + assert_eq!(Some(("a", "b")), env); + + let env = valid_env("a= b"); + assert_eq!(Some(("a", "b")), env); + + let env = valid_env("a=b "); + assert_eq!(Some(("a", "b")), env); + let env = valid_env("a=b c "); + assert_eq!(Some(("a", "b c")), env); + + let env = valid_env("=b"); + assert_eq!(None, env); + + let env = valid_env("a="); + assert_eq!(Some(("a", "")), env); + + let env = valid_env("a=="); + assert_eq!(Some(("a", "=")), env); + + let env = valid_env("a"); + assert_eq!(None, env); + + let invalid_str = vec![97, b'\0', 98]; + let invalid_string = std::str::from_utf8(&invalid_str).unwrap(); + + let invalid_env = format!("{}=value", invalid_string); + let env = valid_env(&invalid_env); + assert_eq!(None, env); + + let invalid_env = format!("key={}", invalid_string); + let env = valid_env(&invalid_env); + assert_eq!(None, env); + } +} diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpulist b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpulist new file mode 100644 index 000000000000..4cfe9ed52f53 --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpulist @@ -0,0 +1 @@ +0,1-63 diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpumap b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpumap new file mode 100644 index 000000000000..2f3bb0650db2 --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu0/node0/cpumap @@ -0,0 +1 @@ +ffffffff,ffffffff diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu1/node0/cpulist b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu1/node0/cpulist new file mode 100644 index 000000000000..4cfe9ed52f53 --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu1/node0/cpulist @@ -0,0 +1 @@ +0,1-63 diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu64/node1/cpulist b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu64/node1/cpulist new file mode 100644 index 000000000000..900731ffd51f --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/cpu/cpu64/node1/cpulist @@ -0,0 +1 @@ +64 diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpulist b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpulist new file mode 100644 index 000000000000..3498c1f2daa5 --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpulist @@ -0,0 +1 @@ +0-63 diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpumap b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpumap new file mode 100644 index 000000000000..2f3bb0650db2 --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node0/cpumap @@ -0,0 +1 @@ +ffffffff,ffffffff diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpulist b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpulist new file mode 100644 index 000000000000..900731ffd51f --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpulist @@ -0,0 +1 @@ +64 diff --git a/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpumap b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpumap new file mode 100644 index 000000000000..62fe293eb1fd --- /dev/null +++ b/src/libs/kata-sys-util/test/texture/sys/devices/system/node/node1/cpumap @@ -0,0 +1 @@ +1,00000000,00000000 diff --git a/src/libs/kata-types/.gitignore b/src/libs/kata-types/.gitignore new file mode 100644 index 000000000000..03314f77b5aa --- /dev/null +++ b/src/libs/kata-types/.gitignore @@ -0,0 +1 @@ +Cargo.lock diff --git a/src/libs/kata-types/Cargo.toml b/src/libs/kata-types/Cargo.toml new file mode 100644 index 000000000000..57e21c6731c8 --- /dev/null +++ b/src/libs/kata-types/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "kata-types" +version = "0.1.0" +description = "Constants and data types shared by Kata Containers components" +keywords = ["kata", "container", "runtime"] +authors = ["The Kata Containers community "] +repository = "https://github.com/kata-containers/kata-containers.git" +homepage = "https://katacontainers.io/" +readme = "README.md" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +bitmask-enum = "2.1.0" +anyhow = "1.0" +base64 = "0.13.0" +byte-unit = "3.1.4" +glob = "0.3.0" +lazy_static = "1.4.0" +num_cpus = "1.13.1" +regex = "1.5.6" +serde = { version = "1.0.100", features = ["derive"] } +slog = "2.5.2" +slog-scope = "4.4.0" +serde_json = "1.0.73" +thiserror = "1.0" +toml = "0.5.8" +serde-enum-str = "0.4" + +oci = { path = "../oci" } +safe-path = { path = "../safe-path" } + +[dev-dependencies] +tempfile = "3" +test-utils = { path = "../test-utils" } +nix = "0.24.2" + +[features] +default = [] +enable-vendor = [] diff --git a/src/libs/kata-types/README.md b/src/libs/kata-types/README.md new file mode 100644 index 000000000000..334c879e20a3 --- /dev/null +++ b/src/libs/kata-types/README.md @@ -0,0 +1,18 @@ +# kata-types + +This crate is a collection of constants and data types shared by multiple +[Kata Containers](https://github.com/kata-containers/kata-containers/) components. + +It defines constants and data types used by multiple Kata Containers components. Those constants +and data types may be defined by Kata Containers or by other projects/specifications, such as: +- [Containerd](https://github.com/containerd/containerd) +- [Kubelet](https://github.com/kubernetes/kubelet) + +## Support + +**Operating Systems**: +- Linux + +## License + +This code is licensed under [Apache-2.0](../../../LICENSE). diff --git a/src/libs/kata-types/src/annotations/cri_containerd.rs b/src/libs/kata-types/src/annotations/cri_containerd.rs new file mode 100644 index 000000000000..8b2d63fafd75 --- /dev/null +++ b/src/libs/kata-types/src/annotations/cri_containerd.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2019 Alibaba Cloud +// Copyright (c) 2019 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(missing_docs)] + +pub const CONTAINER_TYPE_LABEL_KEY: &str = "io.kubernetes.cri.container-type"; +pub const SANDBOX: &str = "sandbox"; +pub const CONTAINER: &str = "container"; + +pub const SANDBOX_ID_LABEL_KEY: &str = "io.kubernetes.cri.sandbox-id"; + +// Ref: https://pkg.go.dev/github.com/containerd/containerd@v1.6.7/pkg/cri/annotations +// SandboxCPU annotations are based on the initial CPU configuration for the sandbox. This is calculated as the +// sum of container CPU resources, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig +pub const SANDBOX_CPU_QUOTA_KEY: &str = "io.kubernetes.cri.sandbox-cpu-quota"; +pub const SANDBOX_CPU_PERIOD_KEY: &str = "io.kubernetes.cri.sandbox-cpu-period"; +pub const SANDBOX_CPU_SHARE_KEY: &str = "io.kubernetes.cri.sandbox-cpu-shares"; + +// SandboxMemory is the initial amount of memory associated with this sandbox. This is calculated as the sum +// of container memory, optionally provided by Kubelet (introduced in 1.23) as part of the PodSandboxConfig +pub const SANDBOX_MEM_KEY: &str = "io.kubernetes.cri.sandbox-memory"; diff --git a/src/libs/kata-types/src/annotations/crio.rs b/src/libs/kata-types/src/annotations/crio.rs new file mode 100644 index 000000000000..c8b2311f844d --- /dev/null +++ b/src/libs/kata-types/src/annotations/crio.rs @@ -0,0 +1,13 @@ +// Copyright (c) 2019 Alibaba Cloud +// Copyright (c) 2019 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(missing_docs)] + +pub const CONTAINER_TYPE_LABEL_KEY: &str = "io.kubernetes.cri.container-type"; +pub const SANDBOX: &str = "sandbox"; +pub const CONTAINER: &str = "container"; + +pub const SANDBOX_ID_LABEL_KEY: &str = "io.kubernetes.cri-o.SandboxID"; diff --git a/src/libs/kata-types/src/annotations/dockershim.rs b/src/libs/kata-types/src/annotations/dockershim.rs new file mode 100644 index 000000000000..df1279dc5ac1 --- /dev/null +++ b/src/libs/kata-types/src/annotations/dockershim.rs @@ -0,0 +1,23 @@ +// Copyright (c) 2019 Alibaba Cloud +// Copyright (c) 2019 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#![allow(missing_docs)] + +//! Copied from k8s.io/pkg/kubelet/dockershim/docker_service.go, used to identify whether a docker +//! container is a sandbox or a regular container, will be removed after defining those as public +//! fields in dockershim. + +/// ContainerTypeLabelKey is the container type (podsandbox or container) of key. +pub const CONTAINER_TYPE_LABEL_KEY: &str = "io.kubernetes.docker.type"; + +/// ContainerTypeLabelSandbox represents a sandbox sandbox container. +pub const SANDBOX: &str = "podsandbox"; + +/// ContainerTypeLabelContainer represents a container running within a sandbox. +pub const CONTAINER: &str = "container"; + +/// SandboxIDLabelKey is the sandbox ID annotation. +pub const SANDBOX_ID_LABEL_KEY: &str = "io.kubernetes.sandbox.id"; diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs new file mode 100644 index 000000000000..0b17b10628ef --- /dev/null +++ b/src/libs/kata-types/src/annotations/mod.rs @@ -0,0 +1,993 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs::File; +use std::io::{self, BufReader, Result}; +use std::result::{self}; +use std::u32; + +use serde::Deserialize; + +use crate::config::default::DEFAULT_AGENT_TYPE_NAME; +use crate::config::default::DEFAULT_HYPERVISOR; +use crate::config::default::DEFAULT_RUNTIME_NAME; +use crate::config::hypervisor::{get_hypervisor_plugin, HugePageType}; + +use crate::config::TomlConfig; +use crate::sl; + +use self::cri_containerd::{SANDBOX_CPU_PERIOD_KEY, SANDBOX_CPU_QUOTA_KEY, SANDBOX_MEM_KEY}; + +/// CRI-containerd specific annotations. +pub mod cri_containerd; + +/// CRI-O specific annotations. +pub mod crio; + +/// Dockershim specific annotations. +pub mod dockershim; + +/// Third-party annotations. +pub mod thirdparty; + +// Common section +/// Prefix for Kata specific annotations +pub const KATA_ANNO_PREFIX: &str = "io.katacontainers."; +/// Prefix for Kata configuration annotations +pub const KATA_ANNO_CFG_PREFIX: &str = "io.katacontainers.config."; +/// Prefix for Kata container annotations +pub const KATA_ANNO_CONTAINER_PREFIX: &str = "io.katacontainers.container."; +/// The annotation key to fetch runtime configuration file. +pub const SANDBOX_CFG_PATH_KEY: &str = "io.katacontainers.config_path"; + +// OCI section +/// The annotation key to fetch the OCI configuration file path. +pub const BUNDLE_PATH_KEY: &str = "io.katacontainers.pkg.oci.bundle_path"; +/// The annotation key to fetch container type. +pub const CONTAINER_TYPE_KEY: &str = "io.katacontainers.pkg.oci.container_type"; + +// Container resource related annotations +/// Prefix for Kata container resource related annotations. +pub const KATA_ANNO_CONTAINER_RES_PREFIX: &str = "io.katacontainers.container.resource"; +/// A container annotation to specify the Resources.Memory.Swappiness. +pub const KATA_ANNO_CONTAINER_RES_SWAPPINESS: &str = + "io.katacontainers.container.resource.swappiness"; +/// A container annotation to specify the Resources.Memory.Swap. +pub const KATA_ANNO_CONTAINER_RES_SWAP_IN_BYTES: &str = + "io.katacontainers.container.resource.swap_in_bytes"; + +// Agent related annotations +/// Prefix for Agent configurations. +pub const KATA_ANNO_CFG_AGENT_PREFIX: &str = "io.katacontainers.config.agent."; +/// KernelModules is the annotation key for passing the list of kernel modules and their parameters +/// that will be loaded in the guest kernel. +/// +/// Semicolon separated list of kernel modules and their parameters. These modules will be loaded +/// in the guest kernel using modprobe(8). +/// The following example can be used to load two kernel modules with parameters +/// +/// annotations: +/// io.katacontainers.config.agent.kernel_modules: "e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0" +/// +/// The first word is considered as the module name and the rest as its parameters. +pub const KATA_ANNO_CFG_KERNEL_MODULES: &str = "io.katacontainers.config.agent.kernel_modules"; +/// A sandbox annotation to enable tracing for the agent. +pub const KATA_ANNO_CFG_AGENT_TRACE: &str = "io.katacontainers.config.agent.enable_tracing"; +/// An annotation to specify the size of the pipes created for containers. +pub const KATA_ANNO_CFG_AGENT_CONTAINER_PIPE_SIZE: &str = + "io.katacontainers.config.agent.container_pipe_size"; +/// An annotation key to specify the size of the pipes created for containers. +pub const CONTAINER_PIPE_SIZE_KERNEL_PARAM: &str = "agent.container_pipe_size"; + +// Hypervisor related annotations +/// Prefix for Hypervisor configurations. +pub const KATA_ANNO_CFG_HYPERVISOR_PREFIX: &str = "io.katacontainers.config.hypervisor."; +/// A sandbox annotation for passing a per container path pointing at the hypervisor that will run +/// the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_PATH: &str = "io.katacontainers.config.hypervisor.path"; +/// A sandbox annotation for passing a container hypervisor binary SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_HASH: &str = "io.katacontainers.config.hypervisor.path_hash"; +/// A sandbox annotation for passing a per container path pointing at the hypervisor control binary +/// that will run the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_CTLPATH: &str = "io.katacontainers.config.hypervisor.ctlpath"; +/// A sandbox annotation for passing a container hypervisor control binary SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_CTLHASH: &str = + "io.katacontainers.config.hypervisor.hypervisorctl_hash"; +/// A sandbox annotation for passing a per container path pointing at the jailer that will constrain +/// the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_JAILER_PATH: &str = + "io.katacontainers.config.hypervisor.jailer_path"; +/// A sandbox annotation for passing a jailer binary SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_JAILER_HASH: &str = + "io.katacontainers.config.hypervisor.jailer_hash"; +/// A sandbox annotation to enable IO to be processed in a separate thread. +/// Supported currently for virtio-scsi driver. +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_IO_THREADS: &str = + "io.katacontainers.config.hypervisor.enable_iothreads"; +/// The hash type used for assets verification +pub const KATA_ANNO_CFG_HYPERVISOR_ASSET_HASH_TYPE: &str = + "io.katacontainers.config.hypervisor.asset_hash_type"; +/// SHA512 is the SHA-512 (64) hash algorithm +pub const SHA512: &str = "sha512"; + +// Hypervisor Block Device related annotations +/// Specify the driver to be used for block device either VirtioSCSI or VirtioBlock +pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_DRIVER: &str = + "io.katacontainers.config.hypervisor.block_device_driver"; +/// A sandbox annotation that disallows a block device from being used. +pub const KATA_ANNO_CFG_HYPERVISOR_DISABLE_BLOCK_DEV_USE: &str = + "io.katacontainers.config.hypervisor.disable_block_device_use"; +/// A sandbox annotation that specifies cache-related options will be set to block devices or not. +pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_SET: &str = + "io.katacontainers.config.hypervisor.block_device_cache_set"; +/// A sandbox annotation that specifies cache-related options for block devices. +/// Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_DIRECT: &str = + "io.katacontainers.config.hypervisor.block_device_cache_direct"; +/// A sandbox annotation that specifies cache-related options for block devices. +/// Denotes whether flush requests for the device are ignored. +pub const KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_NOFLUSH: &str = + "io.katacontainers.config.hypervisor.block_device_cache_noflush"; +/// A sandbox annotation to specify use of nvdimm device for guest rootfs image. +pub const KATA_ANNO_CFG_HYPERVISOR_DISABLE_IMAGE_NVDIMM: &str = + "io.katacontainers.config.hypervisor.disable_image_nvdimm"; +/// A sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_MEMORY_OFFSET: &str = + "io.katacontainers.config.hypervisor.memory_offset"; +/// A sandbox annotation to specify if vhost-user-blk/scsi is abailable on the host +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_VHOSTUSER_STORE: &str = + "io.katacontainers.config.hypervisor.enable_vhost_user_store"; +/// A sandbox annotation to specify the directory path where vhost-user devices related folders, +/// sockets and device nodes should be. +pub const KATA_ANNO_CFG_HYPERVISOR_VHOSTUSER_STORE_PATH: &str = + "io.katacontainers.config.hypervisor.vhost_user_store_path"; + +// Hypervisor Guest Boot related annotations +/// A sandbox annotation for passing a per container path pointing at the kernel needed to boot +/// the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_KERNEL_PATH: &str = "io.katacontainers.config.hypervisor.kernel"; +/// A sandbox annotation for passing a container kernel image SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_KERNEL_HASH: &str = + "io.katacontainers.config.hypervisor.kernel_hash"; +/// A sandbox annotation for passing a per container path pointing at the guest image that will run +/// in the container VM. +/// A sandbox annotation for passing additional guest kernel parameters. +pub const KATA_ANNO_CFG_HYPERVISOR_KERNEL_PARAMS: &str = + "io.katacontainers.config.hypervisor.kernel_params"; +/// A sandbox annotation for passing a container guest image path. +pub const KATA_ANNO_CFG_HYPERVISOR_IMAGE_PATH: &str = "io.katacontainers.config.hypervisor.image"; +/// A sandbox annotation for passing a container guest image SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_IMAGE_HASH: &str = + "io.katacontainers.config.hypervisor.image_hash"; +/// A sandbox annotation for passing a per container path pointing at the initrd that will run +/// in the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_INITRD_PATH: &str = "io.katacontainers.config.hypervisor.initrd"; +/// A sandbox annotation for passing a container guest initrd SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_INITRD_HASH: &str = + "io.katacontainers.config.hypervisor.initrd_hash"; +/// A sandbox annotation for passing a per container path pointing at the guest firmware that will +/// run the container VM. +pub const KATA_ANNO_CFG_HYPERVISOR_FIRMWARE_PATH: &str = + "io.katacontainers.config.hypervisor.firmware"; +/// A sandbox annotation for passing a container guest firmware SHA-512 hash value. +pub const KATA_ANNO_CFG_HYPERVISOR_FIRMWARE_HASH: &str = + "io.katacontainers.config.hypervisor.firmware_hash"; + +// Hypervisor CPU related annotations +/// A sandbox annotation to specify cpu specific features. +pub const KATA_ANNO_CFG_HYPERVISOR_CPU_FEATURES: &str = + "io.katacontainers.config.hypervisor.cpu_features"; +/// A sandbox annotation for passing the default vCPUs assigned for a VM by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS: &str = + "io.katacontainers.config.hypervisor.default_vcpus"; +/// A sandbox annotation that specifies the maximum number of vCPUs allocated for the VM by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MAX_VCPUS: &str = + "io.katacontainers.config.hypervisor.default_max_vcpus"; + +// Hypervisor Device related annotations +/// A sandbox annotation used to indicate if devices need to be hotplugged on the root bus instead +/// of a bridge. +pub const KATA_ANNO_CFG_HYPERVISOR_HOTPLUG_VFIO_ON_ROOT_BUS: &str = + "io.katacontainers.config.hypervisor.hotplug_vfio_on_root_bus"; +/// PCIeRootPort is used to indicate the number of PCIe Root Port devices +pub const KATA_ANNO_CFG_HYPERVISOR_PCIE_ROOT_PORT: &str = + "io.katacontainers.config.hypervisor.pcie_root_port"; +/// A sandbox annotation to specify if the VM should have a vIOMMU device. +pub const KATA_ANNO_CFG_HYPERVISOR_IOMMU: &str = "io.katacontainers.config.hypervisor.enable_iommu"; +/// Enable Hypervisor Devices IOMMU_PLATFORM +pub const KATA_ANNO_CFG_HYPERVISOR_IOMMU_PLATFORM: &str = + "io.katacontainers.config.hypervisor.enable_iommu_platform"; + +// Hypervisor Machine related annotations +/// A sandbox annotation to specify the type of machine being emulated by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_MACHINE_TYPE: &str = + "io.katacontainers.config.hypervisor.machine_type"; +/// A sandbox annotation to specify machine specific accelerators for the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_MACHINE_ACCELERATORS: &str = + "io.katacontainers.config.hypervisor.machine_accelerators"; +/// EntropySource is a sandbox annotation to specify the path to a host source of +/// entropy (/dev/random, /dev/urandom or real hardware RNG device) +pub const KATA_ANNO_CFG_HYPERVISOR_ENTROPY_SOURCE: &str = + "io.katacontainers.config.hypervisor.entropy_source"; + +// Hypervisor Memory related annotations +/// A sandbox annotation for the memory assigned for a VM by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY: &str = + "io.katacontainers.config.hypervisor.default_memory"; +/// A sandbox annotation to specify the memory slots assigned to the VM by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_MEMORY_SLOTS: &str = + "io.katacontainers.config.hypervisor.memory_slots"; +/// A sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. +pub const KATA_ANNO_CFG_HYPERVISOR_MEMORY_PREALLOC: &str = + "io.katacontainers.config.hypervisor.enable_mem_prealloc"; +/// A sandbox annotation to specify if the memory should be pre-allocated from huge pages. +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_HUGEPAGES: &str = + "io.katacontainers.config.hypervisor.enable_hugepages"; +/// A sandbox annotation to specify huge page mode of memory backend. +pub const KATA_ANNO_CFG_HYPERVISOR_HUGEPAGE_TYPE: &str = + "io.katacontainers.config.hypervisor.hugepage_type"; +/// A sandbox annotation to soecify file based memory backend root directory. +pub const KATA_ANNO_CFG_HYPERVISOR_FILE_BACKED_MEM_ROOT_DIR: &str = + "io.katacontainers.config.hypervisor.file_mem_backend"; +/// A sandbox annotation that is used to enable/disable virtio-mem. +pub const KATA_ANNO_CFG_HYPERVISOR_VIRTIO_MEM: &str = + "io.katacontainers.config.hypervisor.enable_virtio_mem"; +/// A sandbox annotation to enable swap of vm memory. +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_SWAP: &str = + "io.katacontainers.config.hypervisor.enable_swap"; +/// A sandbox annotation to enable swap in the guest. +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP: &str = + "io.katacontainers.config.hypervisor.enable_guest_swap"; + +// Hypervisor Network related annotations +/// A sandbox annotation to specify if vhost-net is not available on the host. +pub const KATA_ANNO_CFG_HYPERVISOR_DISABLE_VHOST_NET: &str = + "io.katacontainers.config.hypervisor.disable_vhost_net"; +/// A sandbox annotation that specifies max rate on network I/O inbound bandwidth. +pub const KATA_ANNO_CFG_HYPERVISOR_RX_RATE_LIMITER_MAX_RATE: &str = + "io.katacontainers.config.hypervisor.rx_rate_limiter_max_rate"; +/// A sandbox annotation that specifies max rate on network I/O outbound bandwidth. +pub const KATA_ANNO_CFG_HYPERVISOR_TX_RATE_LIMITER_MAX_RATE: &str = + "io.katacontainers.config.hypervisor.tx_rate_limiter_max_rate"; + +// Hypervisor Security related annotations +/// A sandbox annotation to specify the path within the VM that will be used for 'drop-in' hooks. +pub const KATA_ANNO_CFG_HYPERVISOR_GUEST_HOOK_PATH: &str = + "io.katacontainers.config.hypervisor.guest_hook_path"; +/// A sandbox annotation to enable rootless hypervisor (only supported in QEMU currently). +pub const KATA_ANNO_CFG_HYPERVISOR_ENABLE_ROOTLESS_HYPERVISOR: &str = + "io.katacontainers.config.hypervisor.rootless"; + +// Hypervisor Shared File System related annotations +/// A sandbox annotation to specify the shared file system type, either inline-virtio-fs (default), virtio-9p, virtio-fs or virtio-fs-nydus. +pub const KATA_ANNO_CFG_HYPERVISOR_SHARED_FS: &str = + "io.katacontainers.config.hypervisor.shared_fs"; +/// A sandbox annotations to specify virtio-fs vhost-user daemon path. +pub const KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_DAEMON: &str = + "io.katacontainers.config.hypervisor.virtio_fs_daemon"; +/// A sandbox annotation to specify the cache mode for fs version cache. +pub const KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_CACHE: &str = + "io.katacontainers.config.hypervisor.virtio_fs_cache"; +/// A sandbox annotation to specify the DAX cache size in MiB. +pub const KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_CACHE_SIZE: &str = + "io.katacontainers.config.hypervisor.virtio_fs_cache_size"; +/// A sandbox annotation to pass options to virtiofsd daemon. +pub const KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_EXTRA_ARGS: &str = + "io.katacontainers.config.hypervisor.virtio_fs_extra_args"; +/// A sandbox annotation to specify as the msize for 9p shares. +pub const KATA_ANNO_CFG_HYPERVISOR_MSIZE_9P: &str = "io.katacontainers.config.hypervisor.msize_9p"; + +// Runtime related annotations +/// Prefix for Runtime configurations. +pub const KATA_ANNO_CFG_RUNTIME_PREFIX: &str = "io.katacontainers.config.runtime."; +/// runtime name +pub const KATA_ANNO_CFG_RUNTIME_NAME: &str = "io.katacontainers.config.runtime.name"; +/// hypervisor name +pub const KATA_ANNO_CFG_RUNTIME_HYPERVISOR: &str = + "io.katacontainers.config.runtime.hypervisor_name"; +/// agent name +pub const KATA_ANNO_CFG_RUNTIME_AGENT: &str = "io.katacontainers.config.runtime.agent_name"; +/// A sandbox annotation that determines if seccomp should be applied inside guest. +pub const KATA_ANNO_CFG_DISABLE_GUEST_SECCOMP: &str = + "io.katacontainers.config.runtime.disable_guest_seccomp"; +/// A sandbox annotation that determines if pprof enabled. +pub const KATA_ANNO_CFG_ENABLE_PPROF: &str = "io.katacontainers.config.runtime.enable_pprof"; +/// A sandbox annotation that determines if experimental features enabled. +pub const KATA_ANNO_CFG_EXPERIMENTAL: &str = "io.katacontainers.config.runtime.experimental"; +/// A sandbox annotaion that determines how the VM should be connected to the the container network +/// interface. +pub const KATA_ANNO_CFG_INTER_NETWORK_MODEL: &str = + "io.katacontainers.config.runtime.internetworking_model"; +/// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup. +pub const KATA_ANNO_CFG_SANDBOX_CGROUP_ONLY: &str = + "io.katacontainers.config.runtime.sandbox_cgroup_only"; +/// A sandbox annotation that determines if create a netns for hypervisor process. +pub const KATA_ANNO_CFG_DISABLE_NEW_NETNS: &str = + "io.katacontainers.config.runtime.disable_new_netns"; +/// A sandbox annotation to specify how attached VFIO devices should be treated. +pub const KATA_ANNO_CFG_VFIO_MODE: &str = "io.katacontainers.config.runtime.vfio_mode"; + +/// A sandbox annotation used to specify prefetch_files.list host path container image +/// being used, +/// and runtime will pass it to Hypervisor to search for corresponding prefetch list file. +/// "io.katacontainers.config.hypervisor.prefetch_files.list" +/// = "/path/to//xyz.com/fedora:36/prefetch_file.list" +pub const KATA_ANNO_CFG_HYPERVISOR_PREFETCH_FILES_LIST: &str = + "io.katacontainers.config.hypervisor.prefetch_files.list"; + +/// A sandbox annotation for sandbox level volume sharing with host. +pub const KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS: &str = + "io.katacontainers.config.runtime.sandbox_bind_mounts"; + +/// A helper structure to query configuration information by check annotations. +#[derive(Debug, Default, Deserialize)] +pub struct Annotation { + annotations: HashMap, +} + +impl From> for Annotation { + fn from(annotations: HashMap) -> Self { + Annotation { annotations } + } +} + +impl Annotation { + /// Create a new instance of [`Annotation`]. + pub fn new(annotations: HashMap) -> Annotation { + Annotation { annotations } + } + + /// Deserialize an object from a json string. + pub fn deserialize(path: &str) -> Result + where + for<'a> T: Deserialize<'a>, + { + let f = BufReader::new(File::open(path)?); + Ok(serde_json::from_reader(f)?) + } + + /// Get an immutable reference to the annotation hashmap. + pub fn get_annotations(&self) -> &HashMap { + &self.annotations + } + + /// Get a mutable reference to the annotation hashmap. + pub fn get_annotations_mut(&mut self) -> &mut HashMap { + &mut self.annotations + } + + /// Get the value of annotation with `key` + pub fn get_value( + &self, + key: &str, + ) -> result::Result, ::Err> + where + T: std::str::FromStr, + { + if let Some(value) = self.get(key) { + return value.parse::().map(Some); + } + Ok(None) + } + + /// Get the value of annotation with `key` as string. + pub fn get(&self, key: &str) -> Option { + self.annotations.get(key).map(|v| String::from(v.trim())) + } +} + +// Miscellaneous annotations. +impl Annotation { + /// Get the annotation of sandbox configuration file path. + pub fn get_sandbox_config_path(&self) -> Option { + self.get(SANDBOX_CFG_PATH_KEY) + } + + /// Get the annotation of bundle path. + pub fn get_bundle_path(&self) -> Option { + self.get(BUNDLE_PATH_KEY) + } + + /// Get the annotation of container type. + pub fn get_container_type(&self) -> Option { + self.get(CONTAINER_TYPE_KEY) + } + + /// Get the annotation of cpu quota for sandbox + pub fn get_sandbox_cpu_quota(&self) -> i64 { + let value = self + .get_value::(SANDBOX_CPU_QUOTA_KEY) + .unwrap_or(Some(0)); + value.unwrap_or(0) + } + + /// Get the annotation of cpu period for sandbox + pub fn get_sandbox_cpu_period(&self) -> u64 { + let value = self + .get_value::(SANDBOX_CPU_PERIOD_KEY) + .unwrap_or(Some(0)); + value.unwrap_or(0) + } + + /// Get the annotation of memory for sandbox + pub fn get_sandbox_mem(&self) -> i64 { + let value = self.get_value::(SANDBOX_MEM_KEY).unwrap_or(Some(0)); + value.unwrap_or(0) + } + + /// Get the annotation to specify the Resources.Memory.Swappiness. + pub fn get_container_resource_swappiness(&self) -> Result> { + match self.get_value::(KATA_ANNO_CONTAINER_RES_SWAPPINESS) { + Ok(r) => { + if r.unwrap_or_default() > 100 { + Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("{} greater than 100", r.unwrap_or_default()), + )) + } else { + Ok(r) + } + } + Err(_e) => Err(io::Error::new( + io::ErrorKind::InvalidData, + "parse u32 error".to_string(), + )), + } + } + + /// Get the annotation to specify the Resources.Memory.Swap. + pub fn get_container_resource_swap_in_bytes(&self) -> Option { + self.get(KATA_ANNO_CONTAINER_RES_SWAP_IN_BYTES) + } +} + +impl Annotation { + /// update config info by annotation + pub fn update_config_by_annotation(&self, config: &mut TomlConfig) -> Result<()> { + if let Some(hv) = self.annotations.get(KATA_ANNO_CFG_RUNTIME_HYPERVISOR) { + if config.hypervisor.get(hv).is_some() { + config.runtime.hypervisor_name = hv.to_string(); + } + } + if let Some(ag) = self.annotations.get(KATA_ANNO_CFG_RUNTIME_AGENT) { + if config.agent.get(ag).is_some() { + config.runtime.agent_name = ag.to_string(); + } + } + + // set default values for runtime.name, runtime.hypervisor_name and runtime.agent + if config.runtime.name.is_empty() { + config.runtime.name = DEFAULT_RUNTIME_NAME.to_string() + } + if config.runtime.hypervisor_name.is_empty() { + config.runtime.hypervisor_name = DEFAULT_HYPERVISOR.to_string() + } + if config.runtime.agent_name.is_empty() { + config.runtime.agent_name = DEFAULT_AGENT_TYPE_NAME.to_string() + } + + let hypervisor_name = &config.runtime.hypervisor_name; + let agent_name = &config.runtime.agent_name; + + let bool_err = io::Error::new(io::ErrorKind::InvalidData, "parse bool error".to_string()); + let u32_err = io::Error::new(io::ErrorKind::InvalidData, "parse u32 error".to_string()); + let u64_err = io::Error::new(io::ErrorKind::InvalidData, "parse u64 error".to_string()); + let i32_err = io::Error::new(io::ErrorKind::InvalidData, "parse i32 error".to_string()); + let hv = config.hypervisor.get_mut(hypervisor_name).unwrap(); + let ag = config.agent.get_mut(agent_name).unwrap(); + for (key, value) in &self.annotations { + if hv.security_info.is_annotation_enabled(key) { + match key.as_str() { + // update hypervisor config + // Hypervisor related annotations + KATA_ANNO_CFG_HYPERVISOR_PATH => { + hv.validate_hypervisor_path(value)?; + hv.path = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_CTLPATH => { + hv.validate_hypervisor_ctlpath(value)?; + hv.ctlpath = value.to_string(); + } + + KATA_ANNO_CFG_HYPERVISOR_JAILER_PATH => { + hv.validate_jailer_path(value)?; + hv.jailer_path = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_ENABLE_IO_THREADS => match self.get_value::(key) + { + Ok(r) => { + hv.enable_iothreads = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + // Hypervisor Block Device related annotations + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_DRIVER => { + hv.blockdev_info.block_device_driver = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_DISABLE_BLOCK_DEV_USE => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.disable_block_device_use = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_SET => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.block_device_cache_set = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_DIRECT => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.block_device_cache_direct = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_NOFLUSH => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.block_device_cache_noflush = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_DISABLE_IMAGE_NVDIMM => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.disable_image_nvdimm = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_MEMORY_OFFSET => match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.memory_offset = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u64_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_VHOSTUSER_STORE => { + match self.get_value::(key) { + Ok(r) => { + hv.blockdev_info.enable_vhost_user_store = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_VHOSTUSER_STORE_PATH => { + hv.blockdev_info.validate_vhost_user_store_path(value)?; + hv.blockdev_info.vhost_user_store_path = value.to_string(); + } + // Hypervisor Guest Boot related annotations + KATA_ANNO_CFG_HYPERVISOR_KERNEL_PATH => { + hv.boot_info.validate_boot_path(value)?; + hv.boot_info.kernel = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_KERNEL_PARAMS => { + hv.boot_info.kernel_params = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_IMAGE_PATH => { + hv.boot_info.validate_boot_path(value)?; + hv.boot_info.image = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_INITRD_PATH => { + hv.boot_info.validate_boot_path(value)?; + hv.boot_info.initrd = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_FIRMWARE_PATH => { + hv.boot_info.validate_boot_path(value)?; + hv.boot_info.firmware = value.to_string(); + } + // Hypervisor CPU related annotations + KATA_ANNO_CFG_HYPERVISOR_CPU_FEATURES => { + hv.cpu_info.cpu_features = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS => match self.get_value::(key) { + Ok(num_cpus) => { + let num_cpus = num_cpus.unwrap_or_default(); + if num_cpus + > get_hypervisor_plugin(hypervisor_name) + .unwrap() + .get_max_cpus() as i32 + { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "Vcpus specified in annotation {} is more than maximum limitation {}", + num_cpus, + get_hypervisor_plugin(hypervisor_name) + .unwrap() + .get_max_cpus() + ), + )); + } else { + hv.cpu_info.default_vcpus = num_cpus; + } + } + Err(_e) => { + return Err(i32_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MAX_VCPUS => { + match self.get_value::(key) { + Ok(r) => { + hv.cpu_info.default_maxvcpus = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + } + } + // Hypervisor Device related annotations + KATA_ANNO_CFG_HYPERVISOR_HOTPLUG_VFIO_ON_ROOT_BUS => { + match self.get_value::(key) { + Ok(r) => { + hv.device_info.hotplug_vfio_on_root_bus = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_PCIE_ROOT_PORT => match self.get_value::(key) { + Ok(r) => { + hv.device_info.pcie_root_port = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_IOMMU => match self.get_value::(key) { + Ok(r) => { + hv.device_info.enable_iommu = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_IOMMU_PLATFORM => match self.get_value::(key) { + Ok(r) => { + hv.device_info.enable_iommu_platform = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + // Hypervisor Machine related annotations + KATA_ANNO_CFG_HYPERVISOR_MACHINE_TYPE => { + hv.machine_info.machine_type = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_MACHINE_ACCELERATORS => { + hv.machine_info.machine_accelerators = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_ENTROPY_SOURCE => { + hv.machine_info.validate_entropy_source(value)?; + hv.machine_info.entropy_source = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_PREFETCH_FILES_LIST => { + hv.prefetch_list_path = value.to_string(); + } + // Hypervisor Memory related annotations + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY => { + match byte_unit::Byte::from_str(value) { + Ok(mem_bytes) => { + let memory_size = mem_bytes + .get_adjusted_unit(byte_unit::ByteUnit::MiB) + .get_value() + as u32; + info!(sl!(), "get mem {} from annotations: {}", memory_size, value); + if memory_size + < get_hypervisor_plugin(hypervisor_name) + .unwrap() + .get_min_memory() + { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "memory specified in annotation {} is less than minimum limitation {}", + memory_size, + get_hypervisor_plugin(hypervisor_name) + .unwrap() + .get_min_memory() + ), + )); + } + hv.memory_info.default_memory = memory_size; + } + Err(error) => { + error!( + sl!(), + "failed to parse byte from string {} error {:?}", value, error + ); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_MEMORY_SLOTS => match self.get_value::(key) { + Ok(v) => { + hv.memory_info.memory_slots = v.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + }, + + KATA_ANNO_CFG_HYPERVISOR_MEMORY_PREALLOC => match self.get_value::(key) { + Ok(r) => { + hv.memory_info.enable_mem_prealloc = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_HUGEPAGES => { + match self.get_value::(key) { + Ok(r) => { + hv.memory_info.enable_hugepages = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_HUGEPAGE_TYPE => { + match self.get_value::(key) { + Ok(r) => { + hv.memory_info.hugepage_type = r.unwrap_or_default(); + } + Err(e) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("parse huge pages type: {}, error: {}", value, e), + )); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_FILE_BACKED_MEM_ROOT_DIR => { + hv.memory_info.validate_memory_backend_path(value)?; + hv.memory_info.file_mem_backend = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_MEM => match self.get_value::(key) { + Ok(r) => { + hv.memory_info.enable_virtio_mem = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_SWAP => match self.get_value::(key) { + Ok(r) => { + hv.memory_info.enable_swap = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP => match self.get_value::(key) + { + Ok(r) => { + hv.memory_info.enable_guest_swap = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + // Hypervisor Network related annotations + KATA_ANNO_CFG_HYPERVISOR_DISABLE_VHOST_NET => match self.get_value::(key) + { + Ok(r) => { + hv.network_info.disable_vhost_net = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_HYPERVISOR_RX_RATE_LIMITER_MAX_RATE => { + match self.get_value::(key) { + Ok(r) => { + hv.network_info.rx_rate_limiter_max_rate = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u64_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_TX_RATE_LIMITER_MAX_RATE => { + match self.get_value::(key) { + Ok(r) => { + hv.network_info.tx_rate_limiter_max_rate = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u64_err); + } + } + } + // Hypervisor Security related annotations + KATA_ANNO_CFG_HYPERVISOR_GUEST_HOOK_PATH => { + hv.security_info.validate_path(value)?; + hv.security_info.guest_hook_path = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_ENABLE_ROOTLESS_HYPERVISOR => { + match self.get_value::(key) { + Ok(r) => { + hv.security_info.rootless = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + } + } + // Hypervisor Shared File System related annotations + KATA_ANNO_CFG_HYPERVISOR_SHARED_FS => { + hv.shared_fs.shared_fs = self.get(key); + } + + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_DAEMON => { + hv.shared_fs.validate_virtiofs_daemon_path(value)?; + hv.shared_fs.virtio_fs_daemon = value.to_string(); + } + + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_CACHE => { + hv.shared_fs.virtio_fs_cache = value.to_string(); + } + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_CACHE_SIZE => { + match self.get_value::(key) { + Ok(r) => { + hv.shared_fs.virtio_fs_cache_size = r.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + } + } + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_EXTRA_ARGS => { + let args: Vec = + value.to_string().split(',').map(str::to_string).collect(); + for arg in args { + hv.shared_fs.virtio_fs_extra_args.push(arg.to_string()); + } + } + KATA_ANNO_CFG_HYPERVISOR_MSIZE_9P => match self.get_value::(key) { + Ok(v) => { + hv.shared_fs.msize_9p = v.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + }, + + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("Invalid annotation type {}", key), + )); + } + } + } else { + match key.as_str() { + // update agent config + KATA_ANNO_CFG_KERNEL_MODULES => { + let kernel_mod: Vec = + value.to_string().split(';').map(str::to_string).collect(); + for modules in kernel_mod { + ag.kernel_modules.push(modules.to_string()); + } + } + KATA_ANNO_CFG_AGENT_TRACE => match self.get_value::(key) { + Ok(r) => { + ag.enable_tracing = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_AGENT_CONTAINER_PIPE_SIZE => match self.get_value::(key) { + Ok(v) => { + ag.container_pipe_size = v.unwrap_or_default(); + } + Err(_e) => { + return Err(u32_err); + } + }, + // update runtime config + KATA_ANNO_CFG_RUNTIME_NAME => { + let runtime = vec!["virt-container", "linux-container", "wasm-container"]; + if runtime.contains(&value.as_str()) { + config.runtime.name = value.to_string(); + } else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "runtime specified in annotation {} is not in {:?}", + &value, &runtime + ), + )); + } + } + KATA_ANNO_CFG_DISABLE_GUEST_SECCOMP => match self.get_value::(key) { + Ok(r) => { + config.runtime.disable_guest_seccomp = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_ENABLE_PPROF => match self.get_value::(key) { + Ok(r) => { + config.runtime.enable_pprof = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_EXPERIMENTAL => { + let args: Vec = + value.to_string().split(',').map(str::to_string).collect(); + for arg in args { + config.runtime.experimental.push(arg.to_string()); + } + } + KATA_ANNO_CFG_INTER_NETWORK_MODEL => { + config.runtime.internetworking_model = value.to_string(); + } + KATA_ANNO_CFG_SANDBOX_CGROUP_ONLY => match self.get_value::(key) { + Ok(r) => { + config.runtime.sandbox_cgroup_only = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_DISABLE_NEW_NETNS => match self.get_value::(key) { + Ok(r) => { + config.runtime.disable_new_netns = r.unwrap_or_default(); + } + Err(_e) => { + return Err(bool_err); + } + }, + KATA_ANNO_CFG_VFIO_MODE => { + config.runtime.vfio_mode = value.to_string(); + } + KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS => { + let args: Vec = value + .to_string() + .split_ascii_whitespace() + .map(str::to_string) + .collect(); + for arg in args { + config.runtime.sandbox_bind_mounts.push(arg.to_string()); + } + } + _ => { + warn!(sl!(), "Annotation {} not enabled", key); + } + } + } + } + Ok(()) + } +} diff --git a/src/libs/kata-types/src/annotations/thirdparty.rs b/src/libs/kata-types/src/annotations/thirdparty.rs new file mode 100644 index 000000000000..e8f2a7168360 --- /dev/null +++ b/src/libs/kata-types/src/annotations/thirdparty.rs @@ -0,0 +1,12 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Third-party annotations - annotations defined by other projects or k8s plugins but that can +//! change Kata Containers behaviour. + +/// Annotation to enable SGX. +/// +/// Hardware-based isolation and memory encryption. +pub const SGX_EPC: &str = "sgx.intel.com/epc"; diff --git a/src/libs/kata-types/src/capabilities.rs b/src/libs/kata-types/src/capabilities.rs new file mode 100644 index 000000000000..15207e61363c --- /dev/null +++ b/src/libs/kata-types/src/capabilities.rs @@ -0,0 +1,107 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use bitmask_enum::bitmask; + +/// CapabilityBits +#[bitmask(u8)] +pub enum CapabilityBits { + /// hypervisor supports use block device + BlockDeviceSupport, + /// hypervisor supports block device hotplug + BlockDeviceHotplugSupport, + /// hypervisor supports multi queue + MultiQueueSupport, + /// hypervisor supports filesystem share + FsSharingSupport, +} + +/// Capabilities describe a virtcontainers hypervisor capabilities through a bit mask. +#[derive(Debug, Clone)] +pub struct Capabilities { + /// Capability flags + flags: CapabilityBits, +} + +impl Default for Capabilities { + fn default() -> Self { + Self::new() + } +} + +impl Capabilities { + /// new Capabilities struct + pub fn new() -> Self { + Capabilities { + flags: CapabilityBits { bits: 0 }, + } + } + + /// set CapabilityBits + pub fn set(&mut self, flags: CapabilityBits) { + self.flags = flags; + } + + /// is_block_device_supported tells if an hypervisor supports block devices. + pub fn is_block_device_supported(&self) -> bool { + self.flags.and(CapabilityBits::BlockDeviceSupport) != 0 + } + + /// is_block_device_hotplug_supported tells if an hypervisor supports block devices. + pub fn is_block_device_hotplug_supported(&self) -> bool { + self.flags.and(CapabilityBits::BlockDeviceHotplugSupport) != 0 + } + + /// is_multi_queue_supported tells if an hypervisor supports device multi queue support. + pub fn is_multi_queue_supported(&self) -> bool { + self.flags.and(CapabilityBits::MultiQueueSupport) != 0 + } + + /// is_fs_sharing_supported tells if an hypervisor supports host filesystem sharing. + pub fn is_fs_sharing_supported(&self) -> bool { + self.flags.and(CapabilityBits::FsSharingSupport) != 0 + } +} + +#[cfg(test)] +mod tests { + use crate::capabilities::CapabilityBits; + + use super::Capabilities; + + #[test] + fn test_set_hypervisor_capabilities() { + let mut cap = Capabilities::new(); + assert!(!cap.is_block_device_supported()); + + // test set block device support + cap.set(CapabilityBits::BlockDeviceSupport); + assert!(cap.is_block_device_supported()); + assert!(!cap.is_block_device_hotplug_supported()); + + // test set block device hotplug support + cap.set(CapabilityBits::BlockDeviceSupport | CapabilityBits::BlockDeviceHotplugSupport); + assert!(cap.is_block_device_hotplug_supported()); + assert!(!cap.is_multi_queue_supported()); + + // test set multi queue support + cap.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::MultiQueueSupport, + ); + assert!(cap.is_multi_queue_supported()); + + // test set host filesystem sharing support + cap.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::MultiQueueSupport + | CapabilityBits::FsSharingSupport, + ); + assert!(cap.is_fs_sharing_supported()) + } +} diff --git a/src/libs/kata-types/src/config/agent.rs b/src/libs/kata-types/src/config/agent.rs new file mode 100644 index 000000000000..f30ab9a2385f --- /dev/null +++ b/src/libs/kata-types/src/config/agent.rs @@ -0,0 +1,171 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; + +use crate::config::{ConfigOps, TomlConfig}; + +pub use vendor::AgentVendor; + +use super::default::{ + DEFAULT_AGENT_DIAL_TIMEOUT_MS, DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT, +}; +use crate::eother; + +/// agent name of Kata agent. +pub const AGENT_NAME_KATA: &str = "kata"; + +/// Kata agent configuration information. +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct Agent { + /// If enabled, the agent will log additional debug messages to the system log. + #[serde(default, rename = "enable_debug")] + pub debug: bool, + + /// Enable agent tracing. + /// + /// If enabled, the agent will generate OpenTelemetry trace spans. + /// # Notes: + /// - If the runtime also has tracing enabled, the agent spans will be associated with the + /// appropriate runtime parent span. + /// - If enabled, the runtime will wait for the container to shutdown, increasing the container + /// shutdown time slightly. + #[serde(default)] + pub enable_tracing: bool, + + /// Enable debug console. + /// If enabled, user can connect guest OS running inside hypervisor through + /// "kata-runtime exec " command + #[serde(default)] + pub debug_console_enabled: bool, + + /// Agent server port + #[serde(default = "default_server_port")] + pub server_port: u32, + + /// Agent log port + #[serde(default = "default_log_port")] + pub log_port: u32, + + /// Agent connection dialing timeout value in millisecond + #[serde(default = "default_dial_timeout")] + pub dial_timeout_ms: u32, + + /// Agent reconnect timeout value in millisecond + #[serde(default = "default_reconnect_timeout")] + pub reconnect_timeout_ms: u32, + + /// Agent request timeout value in millisecond + #[serde(default = "default_request_timeout")] + pub request_timeout_ms: u32, + + /// Agent health check request timeout value in millisecond + #[serde(default = "default_health_check_timeout")] + pub health_check_request_timeout_ms: u32, + + /// Comma separated list of kernel modules and their parameters. + /// + /// These modules will be loaded in the guest kernel using modprobe(8). + /// The following example can be used to load two kernel modules with parameters: + /// - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] + /// The first word is considered as the module name and the rest as its parameters. + /// Container will not be started when: + /// - A kernel module is specified and the modprobe command is not installed in the guest + /// or it fails loading the module. + /// - The module is not available in the guest or it doesn't met the guest kernel + /// requirements, like architecture and version. + #[serde(default)] + pub kernel_modules: Vec, + + /// container pipe size + #[serde(default)] + pub container_pipe_size: u32, +} + +impl std::default::Default for Agent { + fn default() -> Self { + Self { + debug: true, + enable_tracing: false, + debug_console_enabled: false, + server_port: DEFAULT_AGENT_VSOCK_PORT, + log_port: DEFAULT_AGENT_LOG_PORT, + dial_timeout_ms: DEFAULT_AGENT_DIAL_TIMEOUT_MS, + reconnect_timeout_ms: 3_000, + request_timeout_ms: 30_000, + health_check_request_timeout_ms: 90_000, + kernel_modules: Default::default(), + container_pipe_size: 0, + } + } +} + +fn default_server_port() -> u32 { + DEFAULT_AGENT_VSOCK_PORT +} + +fn default_log_port() -> u32 { + DEFAULT_AGENT_LOG_PORT +} + +fn default_dial_timeout() -> u32 { + // ms + 10 +} + +fn default_reconnect_timeout() -> u32 { + // ms + 3_000 +} + +fn default_request_timeout() -> u32 { + // ms + 30_000 +} + +fn default_health_check_timeout() -> u32 { + // ms + 90_000 +} + +impl Agent { + fn validate(&self) -> Result<()> { + if self.dial_timeout_ms == 0 { + return Err(eother!("dial_timeout_ms couldn't be 0.")); + } + + Ok(()) + } +} + +impl ConfigOps for Agent { + fn adjust_config(conf: &mut TomlConfig) -> Result<()> { + AgentVendor::adjust_config(conf)?; + Ok(()) + } + + fn validate(conf: &TomlConfig) -> Result<()> { + AgentVendor::validate(conf)?; + for (_, agent_config) in conf.agent.iter() { + agent_config.validate()?; + } + Ok(()) + } +} + +#[cfg(not(feature = "enable-vendor"))] +mod vendor { + use super::*; + + /// Vendor customization agent configuration. + #[derive(Debug, Default, Deserialize, Serialize)] + pub struct AgentVendor {} + + impl ConfigOps for AgentVendor {} +} + +#[cfg(feature = "enable-vendor")] +#[path = "agent_vendor.rs"] +mod vendor; diff --git a/src/libs/kata-types/src/config/agent_vendor.rs b/src/libs/kata-types/src/config/agent_vendor.rs new file mode 100644 index 000000000000..62ce710d01de --- /dev/null +++ b/src/libs/kata-types/src/config/agent_vendor.rs @@ -0,0 +1,12 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::*; + +/// Vendor customization agent configuration. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct AgentVendor {} + +impl ConfigOps for AgentVendor {} diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs new file mode 100644 index 000000000000..f55f597da249 --- /dev/null +++ b/src/libs/kata-types/src/config/default.rs @@ -0,0 +1,87 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Default configuration values. +#![allow(missing_docs)] + +use crate::config::agent::AGENT_NAME_KATA; +use crate::config::hypervisor::HYPERVISOR_NAME_DRAGONBALL; +use crate::config::runtime::RUNTIME_NAME_VIRTCONTAINER; +use lazy_static::lazy_static; + +lazy_static! { + /// Default configuration file paths, vendor may extend the list + pub static ref DEFAULT_RUNTIME_CONFIGURATIONS: Vec::<&'static str> = vec![ + "/etc/kata-containers/configuration.toml", + "/usr/share/defaults/kata-containers/configuration.toml", + "/opt/kata/share/defaults/kata-containers/configuration.toml", + ]; +} + +pub const DEFAULT_AGENT_NAME: &str = "kata-agent"; +pub const DEFAULT_AGENT_VSOCK_PORT: u32 = 1024; +pub const DEFAULT_AGENT_LOG_PORT: u32 = 1025; +pub const DEFAULT_AGENT_DBG_CONSOLE_PORT: u32 = 1026; +pub const DEFAULT_AGENT_TYPE_NAME: &str = AGENT_NAME_KATA; +pub const DEFAULT_AGENT_DIAL_TIMEOUT_MS: u32 = 10; + +pub const DEFAULT_RUNTIME_NAME: &str = RUNTIME_NAME_VIRTCONTAINER; +pub const DEFAULT_HYPERVISOR: &str = HYPERVISOR_NAME_DRAGONBALL; + +pub const DEFAULT_INTERNETWORKING_MODEL: &str = "tcfilter"; + +pub const DEFAULT_BLOCK_DEVICE_TYPE: &str = "virtio-blk-pci"; +pub const DEFAULT_VHOST_USER_STORE_PATH: &str = "/var/run/vhost-user"; +pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0; + +pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs"; +pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "never"; +pub const DEFAULT_VIRTIO_FS_DAX_SIZE_MB: u32 = 1024; +pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 128 * 1024; +pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024; +pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024; + +pub const DEFAULT_GUEST_HOOK_PATH: &str = "/opt/kata/hooks"; +pub const DEFAULT_GUEST_DNS_FILE: &str = "/etc/resolv.conf"; + +pub const DEFAULT_GUEST_VCPUS: u32 = 1; + +// Default configuration for dragonball +pub const DEFAULT_DRAGONBALL_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; +pub const DEFAULT_DRAGONBALL_GUEST_KERNEL_PARAMS: &str = ""; +pub const DEFAULT_DRAGONBALL_ENTROPY_SOURCE: &str = "/dev/urandom"; +pub const DEFAULT_DRAGONBALL_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_DRAGONBALL_MEMORY_SLOTS: u32 = 128; +pub const MAX_DRAGONBALL_VCPUS: u32 = 256; +pub const MIN_DRAGONBALL_MEMORY_SIZE_MB: u32 = 64; +// Default configuration for qemu +pub const DEFAULT_QEMU_BINARY_PATH: &str = "/usr/bin/qemu-system-x86_64"; +pub const DEFAULT_QEMU_CONTROL_PATH: &str = ""; +pub const DEFAULT_QEMU_MACHINE_TYPE: &str = "q35"; +pub const DEFAULT_QEMU_ENTROPY_SOURCE: &str = "/dev/urandom"; +pub const DEFAULT_QEMU_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; +pub const DEFAULT_QEMU_GUEST_KERNEL_PARAMS: &str = ""; +pub const DEFAULT_QEMU_FIRMWARE_PATH: &str = ""; +pub const DEFAULT_QEMU_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_QEMU_MEMORY_SLOTS: u32 = 128; +pub const DEFAULT_QEMU_PCI_BRIDGES: u32 = 2; +pub const MAX_QEMU_PCI_BRIDGES: u32 = 5; +pub const MAX_QEMU_VCPUS: u32 = 256; +pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64; + +// Default configuration for Cloud Hypervisor (CH) +pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor"; +pub const DEFAULT_CH_ROOTFS_TYPE: &str = "ext4"; +pub const DEFAULT_CH_CONTROL_PATH: &str = ""; +pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom"; +pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; +pub const DEFAULT_CH_GUEST_KERNEL_PARAMS: &str = ""; +pub const DEFAULT_CH_FIRMWARE_PATH: &str = ""; +pub const DEFAULT_CH_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_CH_MEMORY_SLOTS: u32 = 128; +pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2; +pub const MAX_CH_PCI_BRIDGES: u32 = 5; +pub const MAX_CH_VCPUS: u32 = 256; +pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64; diff --git a/src/libs/kata-types/src/config/drop_in.rs b/src/libs/kata-types/src/config/drop_in.rs new file mode 100644 index 000000000000..208ea72fdce8 --- /dev/null +++ b/src/libs/kata-types/src/config/drop_in.rs @@ -0,0 +1,384 @@ +// Copyright Red Hat +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub use drop_in_directory_handling::load; + +mod toml_tree_ops { + // The following pair of functions implement toml::Value tree merging, with + // the second argument being merged into the first one and consumed in the + // process. The toml parser crate in use here doesn't support parsing into + // a pre-existing (possibly pre-filled) TomlConfig instance but can parse + // into a toml::Value tree so we use that instead. All files (base and + // drop-ins) are initially parsed into toml::Value trees which are + // subsequently merged. Only when the fully merged tree is computed it is + // converted to a TomlConfig instance. + + fn merge_tables(base_table: &mut toml::value::Table, dropin_table: toml::value::Table) { + for (key, val) in dropin_table.into_iter() { + match base_table.get_mut(&key) { + Some(base_val) => merge(base_val, val), + None => { + base_table.insert(key, val); + } + } + } + } + + pub fn merge(base: &mut toml::Value, dropin: toml::Value) { + match dropin { + toml::Value::Table(dropin_table) => { + if let toml::Value::Table(base_table) = base { + merge_tables(base_table, dropin_table); + } else { + *base = toml::Value::Table(dropin_table); + } + } + + _ => *base = dropin, + } + } + + #[cfg(test)] + mod tests { + use super::*; + + // Mock config structure to stand in for TomlConfig for low-level + // toml::Value trees merging. + #[derive(Deserialize, Debug, Default, PartialEq)] + struct SubConfig { + #[serde(default)] + another_string: String, + #[serde(default)] + yet_another_number: i32, + #[serde(default)] + sub_array: Vec, + } + + #[derive(Deserialize, Debug, Default, PartialEq)] + struct Config { + #[serde(default)] + number: i32, + #[serde(default)] + string: String, + #[serde(default)] + another_number: u8, + #[serde(default)] + array: Vec, + + #[serde(default)] + sub: SubConfig, + } + + #[test] + fn dropin_does_not_interfere_with_base() { + let mut base: toml::Value = toml::from_str( + r#" + number = 42 + "#, + ) + .unwrap(); + + let dropin: toml::Value = toml::from_str( + r#" + string = "foo" + "#, + ) + .unwrap(); + + merge(&mut base, dropin); + + assert_eq!( + base.try_into(), + Ok(Config { + number: 42, + string: "foo".into(), + sub: Default::default(), + ..Default::default() + }) + ); + } + + #[test] + fn dropin_overrides_base() { + let mut base: toml::Value = toml::from_str( + r#" + number = 42 + [sub] + another_string = "foo" + "#, + ) + .unwrap(); + + let dropin: toml::Value = toml::from_str( + r#" + number = 43 + [sub] + another_string = "bar" + "#, + ) + .unwrap(); + + merge(&mut base, dropin); + + assert_eq!( + base.try_into(), + Ok(Config { + number: 43, + sub: SubConfig { + another_string: "bar".into(), + ..Default::default() + }, + ..Default::default() + }) + ); + } + + #[test] + fn dropin_extends_base() { + let mut base: toml::Value = toml::from_str( + r#" + number = 42 + [sub] + another_string = "foo" + "#, + ) + .unwrap(); + + let dropin: toml::Value = toml::from_str( + r#" + string = "hello" + [sub] + yet_another_number = 13 + "#, + ) + .unwrap(); + + merge(&mut base, dropin); + + assert_eq!( + base.try_into(), + Ok(Config { + number: 42, + string: "hello".into(), + sub: SubConfig { + another_string: "foo".into(), + yet_another_number: 13, + ..Default::default() + }, + ..Default::default() + }) + ); + } + + // Drop-ins can change the type of a value. This might look weird but at + // this level we have no idea about semantics so we just do what the + // .toml's tell us. The final type check is only performed by try_into(). + // Also, we don't necessarily test this because it's a desired feature. + // It's just something that seems to follow from the way Value tree + // merging is implemented so why not acknowledge and verify it. + #[test] + fn dropin_overrides_base_type() { + let mut base: toml::Value = toml::from_str( + r#" + number = "foo" + [sub] + another_string = 42 + "#, + ) + .unwrap(); + + let dropin: toml::Value = toml::from_str( + r#" + number = 42 + [sub] + another_string = "foo" + "#, + ) + .unwrap(); + + merge(&mut base, dropin); + + assert_eq!( + base.try_into(), + Ok(Config { + number: 42, + sub: SubConfig { + another_string: "foo".into(), + ..Default::default() + }, + ..Default::default() + }) + ); + } + } +} + +mod drop_in_directory_handling { + use crate::config::TomlConfig; + use std::fs; + use std::io::{self, Result}; + use std::path::{Path, PathBuf}; + + fn get_dropin_dir_path(base_cfg_file_path: &Path) -> Result { + let mut dropin_dir = base_cfg_file_path.to_path_buf(); + if !dropin_dir.pop() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "base cfg file path too short", + )); + } + dropin_dir.push("config.d"); + Ok(dropin_dir) + } + + fn update_from_dropin(base_config: &mut toml::Value, dropin_file: &fs::DirEntry) -> Result<()> { + if !dropin_file.file_type()?.is_file() { + return Err(io::Error::new( + io::ErrorKind::Other, + "drop-in cfg file can only be a regular file or a symlink", + )); + } + let dropin_contents = fs::read_to_string(dropin_file.path())?; + let dropin_config: toml::Value = toml::from_str(&dropin_contents)?; + super::toml_tree_ops::merge(base_config, dropin_config); + Ok(()) + } + + fn update_from_dropins(base_config: &mut toml::Value, dropin_dir: &Path) -> Result<()> { + let dropin_files_iter = match fs::read_dir(dropin_dir) { + Ok(iter) => iter, + Err(err) => { + if err.kind() == io::ErrorKind::NotFound { + return Ok(()); + } else { + return Err(err); + } + } + }; + + let mut dropin_files = dropin_files_iter.collect::>>()?; + dropin_files.sort_by_key(|direntry| direntry.file_name()); + for dropin_file in &dropin_files { + update_from_dropin(base_config, dropin_file)?; + } + Ok(()) + } + + pub fn load(base_cfg_file_path: &Path) -> Result { + let base_toml_str = fs::read_to_string(base_cfg_file_path)?; + let mut base_config: toml::Value = toml::from_str(&base_toml_str)?; + let dropin_dir = get_dropin_dir_path(base_cfg_file_path)?; + + update_from_dropins(&mut base_config, &dropin_dir)?; + + let config: TomlConfig = base_config.try_into()?; + Ok(config) + } + + #[cfg(test)] + mod tests { + use super::*; + use std::io::Write; + + const BASE_CONFIG_DATA: &str = r#" + [hypervisor.qemu] + path = "/usr/bin/qemu-kvm" + default_bridges = 3 + [runtime] + enable_debug = true + internetworking_model="tcfilter" + "#; + + fn check_base_config(config: &TomlConfig) { + assert_eq!( + config.hypervisor["qemu"].path, + "/usr/bin/qemu-kvm".to_string() + ); + assert_eq!(config.hypervisor["qemu"].device_info.default_bridges, 3); + assert!(config.runtime.debug); + assert_eq!(config.runtime.internetworking_model, "tcfilter".to_string()); + } + + fn create_file(path: &Path, contents: &[u8]) -> Result<()> { + fs::File::create(path)?.write_all(contents) + } + + #[test] + fn test_no_dropins_dir() { + let tmpdir = tempfile::tempdir().unwrap(); + + let config_path = tmpdir.path().join("runtime.toml"); + create_file(&config_path, BASE_CONFIG_DATA.as_bytes()).unwrap(); + + let config = load(&config_path).unwrap(); + check_base_config(&config); + } + + #[test] + fn test_no_dropins() { + let tmpdir = tempfile::tempdir().unwrap(); + + let config_path = tmpdir.path().join("runtime.toml"); + create_file(&config_path, BASE_CONFIG_DATA.as_bytes()).unwrap(); + + let dropin_dir = tmpdir.path().join("config.d"); + fs::create_dir(dropin_dir).unwrap(); + + let config = load(&config_path).unwrap(); + check_base_config(&config); + } + + #[test] + fn test_dropins() { + let tmpdir = tempfile::tempdir().unwrap(); + + let dropin_data = r#" + [hypervisor.qemu] + default_vcpus = 2 + default_bridges = 4 + shared_fs = "virtio-fs" + [runtime] + sandbox_cgroup_only=true + internetworking_model="macvtap" + vfio_mode="guest-kernel" + "#; + + let dropin_override_data = r#" + [hypervisor.qemu] + shared_fs = "virtio-9p" + [runtime] + vfio_mode="vfio" + "#; + + let config_path = tmpdir.path().join("runtime.toml"); + create_file(&config_path, BASE_CONFIG_DATA.as_bytes()).unwrap(); + + let dropin_dir = tmpdir.path().join("config.d"); + fs::create_dir(&dropin_dir).unwrap(); + + let dropin_path = dropin_dir.join("10-base"); + create_file(&dropin_path, dropin_data.as_bytes()).unwrap(); + + let dropin_override_path = dropin_dir.join("20-override"); + create_file(&dropin_override_path, dropin_override_data.as_bytes()).unwrap(); + + let config = load(&config_path).unwrap(); + assert_eq!( + config.hypervisor["qemu"].path, + "/usr/bin/qemu-kvm".to_string() + ); + assert_eq!(config.hypervisor["qemu"].cpu_info.default_vcpus, 2); + assert_eq!(config.hypervisor["qemu"].device_info.default_bridges, 4); + assert_eq!( + config.hypervisor["qemu"].shared_fs.shared_fs.as_deref(), + Some("virtio-9p") + ); + assert!(config.runtime.debug); + assert!(config.runtime.sandbox_cgroup_only); + assert_eq!(config.runtime.internetworking_model, "macvtap".to_string()); + assert_eq!(config.runtime.vfio_mode, "vfio".to_string()); + } + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/ch.rs b/src/libs/kata-types/src/config/hypervisor/ch.rs new file mode 100644 index 000000000000..cc752e3852ea --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/ch.rs @@ -0,0 +1,146 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; +use std::sync::Arc; + +use super::{default, register_hypervisor_plugin}; + +use crate::config::default::MAX_CH_VCPUS; +use crate::config::default::MIN_CH_MEMORY_SIZE_MB; + +use crate::config::hypervisor::VIRTIO_BLK_MMIO; +use crate::config::{ConfigPlugin, TomlConfig}; +use crate::{eother, resolve_path, validate_path}; + +/// Hypervisor name for CH, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_CH: &str = "cloud-hypervisor"; + +/// Configuration information for CH. +#[derive(Default, Debug)] +pub struct CloudHypervisorConfig {} + +impl CloudHypervisorConfig { + /// Create a new instance of `CloudHypervisorConfig`. + pub fn new() -> Self { + CloudHypervisorConfig {} + } + + /// Register the CH plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, plugin); + } +} + +impl ConfigPlugin for CloudHypervisorConfig { + fn get_max_cpus(&self) -> u32 { + MAX_CH_VCPUS + } + + fn get_min_memory(&self) -> u32 { + MIN_CH_MEMORY_SIZE_MB + } + + fn name(&self) -> &str { + HYPERVISOR_NAME_CH + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get_mut(HYPERVISOR_NAME_CH) { + if ch.path.is_empty() { + ch.path = default::DEFAULT_CH_BINARY_PATH.to_string(); + } + resolve_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + if ch.ctlpath.is_empty() { + ch.ctlpath = default::DEFAULT_CH_CONTROL_PATH.to_string(); + } + resolve_path!(ch.ctlpath, "CH ctlpath `{}` is invalid: {}")?; + + if ch.boot_info.kernel.is_empty() { + ch.boot_info.kernel = default::DEFAULT_CH_GUEST_KERNEL_IMAGE.to_string(); + } + if ch.boot_info.kernel_params.is_empty() { + ch.boot_info.kernel_params = default::DEFAULT_CH_GUEST_KERNEL_PARAMS.to_string(); + } + if ch.boot_info.firmware.is_empty() { + ch.boot_info.firmware = default::DEFAULT_CH_FIRMWARE_PATH.to_string(); + } + + if ch.device_info.default_bridges == 0 { + ch.device_info.default_bridges = default::DEFAULT_CH_PCI_BRIDGES; + } + + if ch.machine_info.entropy_source.is_empty() { + ch.machine_info.entropy_source = default::DEFAULT_CH_ENTROPY_SOURCE.to_string(); + } + + if ch.memory_info.default_memory == 0 { + ch.memory_info.default_memory = default::DEFAULT_CH_MEMORY_SIZE_MB; + } + if ch.memory_info.memory_slots == 0 { + ch.memory_info.memory_slots = default::DEFAULT_CH_MEMORY_SLOTS; + } + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get(HYPERVISOR_NAME_CH) { + validate_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + validate_path!(ch.ctlpath, "CH control path `{}` is invalid: {}")?; + if !ch.jailer_path.is_empty() { + return Err(eother!("Path for CH jailer should be empty")); + } + if !ch.valid_jailer_paths.is_empty() { + return Err(eother!("Valid CH jailer path list should be empty")); + } + + if !ch.blockdev_info.disable_block_device_use + && ch.blockdev_info.block_device_driver == VIRTIO_BLK_MMIO + { + return Err(eother!("CH doesn't support virtio-blk-mmio")); + } + + if ch.boot_info.kernel.is_empty() { + return Err(eother!("Guest kernel image for CH is empty")); + } + if ch.boot_info.image.is_empty() && ch.boot_info.initrd.is_empty() { + return Err(eother!("Both guest boot image and initrd for CH are empty")); + } + + if (ch.cpu_info.default_vcpus > 0 + && ch.cpu_info.default_vcpus as u32 > default::MAX_CH_VCPUS) + || ch.cpu_info.default_maxvcpus > default::MAX_CH_VCPUS + { + return Err(eother!( + "CH hypervisor cannot support {} vCPUs", + ch.cpu_info.default_maxvcpus + )); + } + + if ch.device_info.default_bridges > default::MAX_CH_PCI_BRIDGES { + return Err(eother!( + "CH hypervisor cannot support {} PCI bridges", + ch.device_info.default_bridges + )); + } + + if ch.memory_info.default_memory < MIN_CH_MEMORY_SIZE_MB { + return Err(eother!( + "CH hypervisor has minimal memory limitation {}", + MIN_CH_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/dragonball.rs b/src/libs/kata-types/src/config/hypervisor/dragonball.rs new file mode 100644 index 000000000000..40573194597a --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/dragonball.rs @@ -0,0 +1,198 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; +use std::sync::Arc; +use std::u32; + +use super::{default, register_hypervisor_plugin}; +use crate::config::default::MAX_DRAGONBALL_VCPUS; +use crate::config::default::MIN_DRAGONBALL_MEMORY_SIZE_MB; +use crate::config::hypervisor::{ + VIRTIO_BLK_MMIO, VIRTIO_BLK_PCI, VIRTIO_FS, VIRTIO_FS_INLINE, VIRTIO_PMEM, +}; +use crate::config::{ConfigPlugin, TomlConfig}; +use crate::{eother, resolve_path, validate_path}; + +/// Hypervisor name for dragonball, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_DRAGONBALL: &str = "dragonball"; + +/// Configuration information for dragonball. +#[derive(Default, Debug)] +pub struct DragonballConfig {} + +impl DragonballConfig { + /// Create a new instance of `DragonballConfig`. + pub fn new() -> Self { + DragonballConfig {} + } + + /// Register the dragonball plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_DRAGONBALL, plugin); + } +} + +impl ConfigPlugin for DragonballConfig { + fn get_max_cpus(&self) -> u32 { + MAX_DRAGONBALL_VCPUS + } + fn get_min_memory(&self) -> u32 { + MIN_DRAGONBALL_MEMORY_SIZE_MB + } + fn name(&self) -> &str { + HYPERVISOR_NAME_DRAGONBALL + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> { + if let Some(db) = conf.hypervisor.get_mut(HYPERVISOR_NAME_DRAGONBALL) { + resolve_path!(db.jailer_path, "dragonball jailer path {} is invalid: {}")?; + + if db.boot_info.kernel.is_empty() { + db.boot_info.kernel = default::DEFAULT_DRAGONBALL_GUEST_KERNEL_IMAGE.to_string(); + } + if db.boot_info.kernel_params.is_empty() { + db.boot_info.kernel_params = + default::DEFAULT_DRAGONBALL_GUEST_KERNEL_PARAMS.to_string(); + } + + if db.cpu_info.default_maxvcpus > default::MAX_DRAGONBALL_VCPUS { + db.cpu_info.default_maxvcpus = default::MAX_DRAGONBALL_VCPUS; + } + + if db.cpu_info.default_vcpus as u32 > db.cpu_info.default_maxvcpus { + db.cpu_info.default_vcpus = db.cpu_info.default_maxvcpus as i32; + } + + if db.machine_info.entropy_source.is_empty() { + db.machine_info.entropy_source = + default::DEFAULT_DRAGONBALL_ENTROPY_SOURCE.to_string(); + } + + if db.memory_info.default_memory == 0 { + db.memory_info.default_memory = default::DEFAULT_DRAGONBALL_MEMORY_SIZE_MB; + } + if db.memory_info.memory_slots == 0 { + db.memory_info.memory_slots = default::DEFAULT_DRAGONBALL_MEMORY_SLOTS; + } + } + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &TomlConfig) -> Result<()> { + if let Some(db) = conf.hypervisor.get(HYPERVISOR_NAME_DRAGONBALL) { + if !db.path.is_empty() { + return Err(eother!("Path for dragonball hypervisor should be empty")); + } + if !db.valid_hypervisor_paths.is_empty() { + return Err(eother!( + "Valid hypervisor path for dragonball hypervisor should be empty" + )); + } + if !db.ctlpath.is_empty() { + return Err(eother!("CtlPath for dragonball hypervisor should be empty")); + } + if !db.valid_ctlpaths.is_empty() { + return Err(eother!("CtlPath for dragonball hypervisor should be empty")); + } + validate_path!(db.jailer_path, "dragonball jailer path {} is invalid: {}")?; + if db.enable_iothreads { + return Err(eother!("dragonball hypervisor doesn't support IO threads.")); + } + + if !db.blockdev_info.disable_block_device_use + && db.blockdev_info.block_device_driver != VIRTIO_BLK_PCI + && db.blockdev_info.block_device_driver != VIRTIO_BLK_MMIO + && db.blockdev_info.block_device_driver != VIRTIO_PMEM + { + return Err(eother!( + "{} is unsupported block device type.", + db.blockdev_info.block_device_driver + )); + } + + if db.boot_info.kernel.is_empty() { + return Err(eother!( + "Guest kernel image for dragonball hypervisor is empty" + )); + } + if db.boot_info.image.is_empty() { + return Err(eother!( + "Guest boot image for dragonball hypervisor is empty" + )); + } + if !db.boot_info.initrd.is_empty() { + return Err(eother!("Initrd for dragonball hypervisor should be empty")); + } + if !db.boot_info.firmware.is_empty() { + return Err(eother!( + "Firmware for dragonball hypervisor should be empty" + )); + } + + if (db.cpu_info.default_vcpus > 0 + && db.cpu_info.default_vcpus as u32 > default::MAX_DRAGONBALL_VCPUS) + || db.cpu_info.default_maxvcpus > default::MAX_DRAGONBALL_VCPUS + { + return Err(eother!( + "dragonball hypervisor can not support {} vCPUs", + db.cpu_info.default_maxvcpus + )); + } + + if db.device_info.enable_iommu || db.device_info.enable_iommu_platform { + return Err(eother!("dragonball hypervisor does not support vIOMMU")); + } + if db.device_info.hotplug_vfio_on_root_bus + || db.device_info.default_bridges > 0 + || db.device_info.pcie_root_port > 0 + { + return Err(eother!( + "dragonball hypervisor does not support PCI hotplug options" + )); + } + + if !db.machine_info.machine_type.is_empty() { + return Err(eother!( + "dragonball hypervisor does not support machine_type" + )); + } + if !db.machine_info.pflashes.is_empty() { + return Err(eother!("dragonball hypervisor does not support pflashes")); + } + + if db.memory_info.enable_guest_swap { + return Err(eother!( + "dragonball hypervisor doesn't support enable_guest_swap" + )); + } + + if db.security_info.rootless { + return Err(eother!( + "dragonball hypervisor does not support rootless mode" + )); + } + + if let Some(v) = db.shared_fs.shared_fs.as_ref() { + if v != VIRTIO_FS && v != VIRTIO_FS_INLINE { + return Err(eother!("dragonball hypervisor doesn't support {}", v)); + } + } + + if db.memory_info.default_memory < MIN_DRAGONBALL_MEMORY_SIZE_MB { + return Err(eother!( + "dragonball hypervisor has minimal memory limitation {}", + MIN_DRAGONBALL_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs new file mode 100644 index 000000000000..19fb83d61a25 --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -0,0 +1,1268 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Configuration information for hypervisors. +//! +//! The configuration information for hypervisors is complex, and different hypervisor requires +//! different configuration information. To make it flexible and extensible, we build a multi-layer +//! architecture to manipulate hypervisor configuration information. +//! - the vendor layer. The `HypervisorVendor` structure provides hook points for vendors to +//! customize the configuration for its deployment. +//! - the hypervisor plugin layer. The hypervisor plugin layer provides hook points for different +//! hypervisors to manipulate the configuration information. +//! - the hypervisor common layer. This layer handles generic logic for all types of hypervisors. +//! +//! These three layers are applied in order. So changes made by the vendor layer will be visible +//! to the hypervisor plugin layer and the common layer. And changes made by the plugin layer will +//! only be visible to the common layer. +//! +//! Ideally the hypervisor configuration information should be split into hypervisor specific +//! part and common part. But the Kata 2.0 has adopted a policy to build a superset for all +//! hypervisors, so let's contain it... + +use std::collections::HashMap; +use std::io::{self, Result}; +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use lazy_static::lazy_static; +use regex::RegexSet; +use serde_enum_str::{Deserialize_enum_str, Serialize_enum_str}; + +use super::{default, ConfigOps, ConfigPlugin, TomlConfig}; +use crate::annotations::KATA_ANNO_CFG_HYPERVISOR_PREFIX; +use crate::{eother, resolve_path, sl, validate_path}; + +mod dragonball; +pub use self::dragonball::{DragonballConfig, HYPERVISOR_NAME_DRAGONBALL}; + +mod qemu; +pub use self::qemu::{QemuConfig, HYPERVISOR_NAME_QEMU}; + +mod ch; +pub use self::ch::{CloudHypervisorConfig, HYPERVISOR_NAME_CH}; + +const VIRTIO_BLK_PCI: &str = "virtio-blk-pci"; +const VIRTIO_BLK_MMIO: &str = "virtio-blk-mmio"; +const VIRTIO_BLK_CCW: &str = "virtio-blk-ccw"; +const VIRTIO_SCSI: &str = "virtio-scsi"; +const VIRTIO_PMEM: &str = "virtio-pmem"; +const VIRTIO_9P: &str = "virtio-9p"; +const VIRTIO_FS: &str = "virtio-fs"; +const VIRTIO_FS_INLINE: &str = "inline-virtio-fs"; +const MAX_BRIDGE_SIZE: u32 = 5; + +const KERNEL_PARAM_DELIMITER: &str = " "; + +lazy_static! { + static ref HYPERVISOR_PLUGINS: Mutex>> = + Mutex::new(HashMap::new()); +} + +/// Register a hypervisor plugin with `name`. +pub fn register_hypervisor_plugin(name: &str, plugin: Arc) { + let mut hypervisors = HYPERVISOR_PLUGINS.lock().unwrap(); + hypervisors.insert(name.to_string(), plugin); +} + +/// Get the hypervisor plugin with `name`. +pub fn get_hypervisor_plugin(name: &str) -> Option> { + let hypervisors = HYPERVISOR_PLUGINS.lock().unwrap(); + hypervisors.get(name).cloned() +} + +/// Configuration information for block device. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct BlockDeviceInfo { + /// Disable block device from being used for a container's rootfs. + /// + /// In case of a storage driver like devicemapper where a container's root file system is + /// backed by a block device, the block device is passed directly to the hypervisor for + /// performance reasons. This flag prevents the block device from being passed to the + /// hypervisor, shared fs is used instead to pass the rootfs. + #[serde(default)] + pub disable_block_device_use: bool, + + /// Block storage driver to be used for the hypervisor in case the container rootfs is backed + /// by a block device. This is virtio-scsi, virtio-blk or nvdimm. + #[serde(default)] + pub block_device_driver: String, + + /// Specifies cache-related options will be set to block devices or not. + #[serde(default)] + pub block_device_cache_set: bool, + + /// Specifies cache-related options for block devices. + /// + /// Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. + #[serde(default)] + pub block_device_cache_direct: bool, + + /// Specifies cache-related options for block devices. + /// Denotes whether flush requests for the device are ignored. + #[serde(default)] + pub block_device_cache_noflush: bool, + + /// If false and nvdimm is supported, use nvdimm device to plug guest image. + #[serde(default)] + pub disable_image_nvdimm: bool, + + /// The size in MiB will be plused to max memory of hypervisor. + /// + /// It is the memory address space for the NVDIMM devie. If set block storage driver + /// (block_device_driver) to "nvdimm", should set memory_offset to the size of block device. + #[serde(default)] + pub memory_offset: u64, + + /// Enable vhost-user storage device, default false + /// + /// Enabling this will result in some Linux reserved block type major range 240-254 being + /// chosen to represent vhost-user devices. + #[serde(default)] + pub enable_vhost_user_store: bool, + + /// The base directory specifically used for vhost-user devices. + /// + /// Its sub-path "block" is used for block devices; "block/sockets" is where we expect + /// vhost-user sockets to live; "block/devices" is where simulated block device nodes for + /// vhost-user devices to live. + #[serde(default)] + pub vhost_user_store_path: String, + + /// List of valid annotations values for the vhost user store path. + /// + /// The default if not set is empty (all annotations rejected.) + #[serde(default)] + pub valid_vhost_user_store_paths: Vec, +} + +impl BlockDeviceInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + if self.disable_block_device_use { + self.block_device_driver = "".to_string(); + self.enable_vhost_user_store = false; + self.memory_offset = 0; + return Ok(()); + } + + if self.block_device_driver.is_empty() { + self.block_device_driver = default::DEFAULT_BLOCK_DEVICE_TYPE.to_string(); + } + if self.memory_offset == 0 { + self.memory_offset = default::DEFAULT_BLOCK_NVDIMM_MEM_OFFSET; + } + if !self.enable_vhost_user_store { + self.vhost_user_store_path = String::new(); + } else if self.vhost_user_store_path.is_empty() { + self.vhost_user_store_path = default::DEFAULT_VHOST_USER_STORE_PATH.to_string(); + } + resolve_path!( + self.vhost_user_store_path, + "Invalid vhost-user-store-path {}: {}" + )?; + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + if self.disable_block_device_use { + return Ok(()); + } + let l = [ + VIRTIO_BLK_PCI, + VIRTIO_BLK_CCW, + VIRTIO_BLK_MMIO, + VIRTIO_PMEM, + VIRTIO_SCSI, + ]; + if !l.contains(&self.block_device_driver.as_str()) { + return Err(eother!( + "{} is unsupported block device type.", + self.block_device_driver + )); + } + validate_path!( + self.vhost_user_store_path, + "Invalid vhost-user-store-path {}: {}" + )?; + + Ok(()) + } + + /// Validate path of vhost-user storage backend. + pub fn validate_vhost_user_store_path>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_vhost_user_store_paths, path) + } +} + +/// Guest kernel boot information. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct BootInfo { + /// Path to guest kernel file on host + #[serde(default)] + pub kernel: String, + /// Guest kernel commandline. + #[serde(default)] + pub kernel_params: String, + /// Path to initrd file on host + #[serde(default)] + pub initrd: String, + /// Path to root device on host + #[serde(default)] + pub image: String, + /// Rootfs filesystem type. + #[serde(default)] + pub rootfs_type: String, + /// Path to the firmware. + /// + /// If you want that qemu uses the default firmware leave this option empty. + #[serde(default)] + pub firmware: String, + /// Block storage driver to be used for the VM rootfs is backed + /// by a block device. This is virtio-pmem, virtio-blk-pci or virtio-blk-mmio + #[serde(default)] + pub vm_rootfs_driver: String, +} + +impl BootInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + resolve_path!(self.kernel, "guest kernel image file {} is invalid: {}")?; + resolve_path!(self.image, "guest boot image file {} is invalid: {}")?; + resolve_path!(self.initrd, "guest initrd image file {} is invalid: {}")?; + resolve_path!(self.firmware, "firmware image file {} is invalid: {}")?; + + if self.vm_rootfs_driver.is_empty() { + self.vm_rootfs_driver = default::DEFAULT_BLOCK_DEVICE_TYPE.to_string(); + } + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + validate_path!(self.kernel, "guest kernel image file {} is invalid: {}")?; + validate_path!(self.image, "guest boot image file {} is invalid: {}")?; + validate_path!(self.initrd, "guest initrd image file {} is invalid: {}")?; + validate_path!(self.firmware, "firmware image file {} is invalid: {}")?; + if !self.image.is_empty() && !self.initrd.is_empty() { + return Err(eother!("Can not configure both initrd and image for boot")); + } + + let l = [ + VIRTIO_BLK_PCI, + VIRTIO_BLK_CCW, + VIRTIO_BLK_MMIO, + VIRTIO_PMEM, + VIRTIO_SCSI, + ]; + if !l.contains(&self.vm_rootfs_driver.as_str()) { + return Err(eother!( + "{} is unsupported block device type.", + self.vm_rootfs_driver + )); + } + + Ok(()) + } + + /// Add kernel parameters to bootinfo. It is always added before the original + /// to let the original one takes priority + pub fn add_kernel_params(&mut self, params: Vec) { + let mut p = params; + if !self.kernel_params.is_empty() { + p.push(self.kernel_params.clone()); // [new_params0, new_params1, ..., original_params] + } + self.kernel_params = p.join(KERNEL_PARAM_DELIMITER); + } + + /// Validate guest kernel image annotaion + pub fn validate_boot_path(&self, path: &str) -> Result<()> { + validate_path!(path, "path {} is invalid{}")?; + Ok(()) + } +} + +/// Virtual CPU configuration information. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct CpuInfo { + /// CPU features, comma-separated list of cpu features to pass to the cpu. + /// For example, `cpu_features = "pmu=off,vmx=off" + #[serde(default)] + pub cpu_features: String, + + /// Default number of vCPUs per SB/VM: + /// - unspecified or 0 --> will be set to @DEFVCPUS@ + /// - < 0 --> will be set to the actual number of physical cores + /// > 0 <= number of physical cores --> will be set to the specified number + /// > number of physical cores --> will be set to the actual number of physical cores + #[serde(default)] + pub default_vcpus: i32, + + /// Default maximum number of vCPUs per SB/VM: + /// - unspecified or == 0 --> will be set to the actual number of physical cores or + /// to the maximum number of vCPUs supported by KVM + /// if that number is exceeded + /// - > 0 <= number of physical cores --> will be set to the specified number + /// - > number of physical cores --> will be set to the actual number of physical cores or + /// to the maximum number of vCPUs supported by KVM + /// if that number is exceeded + /// + /// WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used + /// when the actual number of physical cores is greater than it. + /// + /// WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU + /// the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 + /// vCPUs can be added to a SB/VM, but the memory footprint will be big. Another example, with + /// `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number + /// of vCPUs supported by the SB/VM. In general, we recommend that you do not edit this + /// variable, unless you know what are you doing. + /// + /// NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. + #[serde(default)] + pub default_maxvcpus: u32, +} + +impl CpuInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + let features: Vec<&str> = self.cpu_features.split(',').map(|v| v.trim()).collect(); + self.cpu_features = features.join(","); + + let cpus = num_cpus::get() as u32; + + // adjust default_maxvcpus + if self.default_maxvcpus == 0 || self.default_maxvcpus > cpus { + self.default_maxvcpus = cpus; + } + + // adjust default_vcpus + if self.default_vcpus < 0 || self.default_vcpus as u32 > cpus { + self.default_vcpus = cpus as i32; + } else if self.default_vcpus == 0 { + self.default_vcpus = default::DEFAULT_GUEST_VCPUS as i32; + } + + if self.default_vcpus > self.default_maxvcpus as i32 { + self.default_vcpus = self.default_maxvcpus as i32; + } + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + if self.default_vcpus > self.default_maxvcpus as i32 { + return Err(eother!( + "The default_vcpus({}) is greater than default_maxvcpus({})", + self.default_vcpus, + self.default_maxvcpus + )); + } + Ok(()) + } +} + +/// Configuration information for debug +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct DebugInfo { + /// This option changes the default hypervisor and kernel parameters to enable debug output + /// where available. + #[serde(default)] + pub enable_debug: bool, + + /// Enable dumping information about guest page structures if true. + #[serde(default)] + pub guest_memory_dump_paging: bool, + + /// Set where to save the guest memory dump file. + /// + /// If set, when GUEST_PANICKED event occurred, guest memory will be dumped to host filesystem + /// under guest_memory_dump_path. This directory will be created automatically if it does not + /// exist. The dumped file(also called vmcore) can be processed with crash or gdb. + /// + /// # WARNING: + /// Dump guest's memory can take very long depending on the amount of guest memory and use + /// much disk space. + #[serde(default)] + pub guest_memory_dump_path: String, +} + +impl DebugInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + Ok(()) + } +} + +/// Virtual machine device configuration information. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct DeviceInfo { + /// Bridges can be used to hot plug devices. + /// + /// Limitations: + /// - Currently only pci bridges are supported + /// - Until 30 devices per bridge can be hot plugged. + /// - Until 5 PCI bridges can be cold plugged per VM. + /// + /// This limitation could be a bug in qemu or in the kernel + /// Default number of bridges per SB/VM: + /// - unspecified or 0 --> will be set to @DEFBRIDGES@ + /// - > 1 <= 5 --> will be set to the specified number + /// - > 5 --> will be set to 5 + #[serde(default)] + pub default_bridges: u32, + + /// VFIO devices are hotplugged on a bridge by default. + /// + /// Enable hotplugging on root bus. This may be required for devices with a large PCI bar, + /// as this is a current limitation with hotplugging on a bridge. + #[serde(default)] + pub hotplug_vfio_on_root_bus: bool, + + /// Before hot plugging a PCIe device, you need to add a pcie_root_port device. + /// + /// Use this parameter when using some large PCI bar devices, such as Nvidia GPU. + /// The value means the number of pcie_root_port. + /// This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" + #[serde(default)] + pub pcie_root_port: u32, + + /// Enable vIOMMU, default false + /// + /// Enabling this will result in the VM having a vIOMMU device. This will also add the + /// following options to the kernel's command line: intel_iommu=on,iommu=pt + #[serde(default)] + pub enable_iommu: bool, + + /// Enable IOMMU_PLATFORM, default false + /// + /// Enabling this will result in the VM device having iommu_platform=on set + #[serde(default)] + pub enable_iommu_platform: bool, +} + +impl DeviceInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + if self.default_bridges > MAX_BRIDGE_SIZE { + self.default_bridges = MAX_BRIDGE_SIZE; + } + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + if self.default_bridges > MAX_BRIDGE_SIZE { + return Err(eother!( + "The configured PCI bridges {} are too many", + self.default_bridges + )); + } + Ok(()) + } +} + +/// Configuration information for virtual machine. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct MachineInfo { + /// Virtual machine model/type. + #[serde(default)] + pub machine_type: String, + + /// Machine accelerators. + /// Comma-separated list of machine accelerators to pass to the hypervisor. + /// For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` + #[serde(default)] + pub machine_accelerators: String, + + /// Add flash image file to VM. + /// + /// The arguments of it should be in format of ["/path/to/flash0.img", "/path/to/flash1.img"]. + #[serde(default)] + pub pflashes: Vec, + + /// Default entropy source. + /// The path to a host source of entropy (including a real hardware RNG). + /// `/dev/urandom` and `/dev/random` are two main options. Be aware that `/dev/random` is a + /// blocking source of entropy. If the host runs out of entropy, the VMs boot time will + /// increase leading to get startup timeouts. The source of entropy `/dev/urandom` is + /// non-blocking and provides a generally acceptable source of entropy. It should work well + /// for pretty much all practical purposes. + #[serde(default)] + pub entropy_source: String, + + /// List of valid annotations values for entropy_source. + /// The default if not set is empty (all annotations rejected.) + #[serde(default)] + pub valid_entropy_sources: Vec, +} + +impl MachineInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + let accelerators: Vec<&str> = self + .machine_accelerators + .split(',') + .map(|v| v.trim()) + .collect(); + self.machine_accelerators = accelerators.join(","); + + for pflash in self.pflashes.iter_mut() { + resolve_path!(*pflash, "Flash image file {} is invalide: {}")?; + } + resolve_path!(self.entropy_source, "Entropy source {} is invalid: {}")?; + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + for pflash in self.pflashes.iter() { + validate_path!(*pflash, "Flash image file {} is invalid: {}")?; + } + validate_path!(self.entropy_source, "Entropy source {} is invalid: {}")?; + Ok(()) + } + + /// Validate path of entropy source. + pub fn validate_entropy_source>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_entropy_sources, path) + } +} + +/// Huge page type for VM RAM backend +#[derive(Clone, Debug, Deserialize_enum_str, Serialize_enum_str, PartialEq, Eq)] +pub enum HugePageType { + /// This will result in the VM memory being allocated using hugetlbfs backend. This is useful + /// when you want to use vhost-user network stacks within the container. This will automatically + /// result in memory pre allocation. + #[serde(rename = "hugetlbfs")] + Hugetlbfs, + /// This will result in the VM memory being allocated using transparant huge page backend. + #[serde(rename = "thp")] + THP, +} + +impl Default for HugePageType { + fn default() -> Self { + Self::Hugetlbfs + } +} + +/// Virtual machine memory configuration information. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct MemoryInfo { + /// Default memory size in MiB for SB/VM. + #[serde(default)] + pub default_memory: u32, + + /// Default memory slots per SB/VM. + /// + /// This is will determine the times that memory will be hotadded to sandbox/VM. + #[serde(default)] + pub memory_slots: u32, + + /// Enable file based guest memory support. + /// + /// The default is an empty string which will disable this feature. In the case of virtio-fs, + /// this is enabled automatically and '/dev/shm' is used as the backing folder. This option + /// will be ignored if VM templating is enabled. + #[serde(default)] + pub file_mem_backend: String, + + /// List of valid annotations values for the file_mem_backend annotation + /// + /// The default if not set is empty (all annotations rejected.) + #[serde(default)] + pub valid_file_mem_backends: Vec, + + /// Enable pre allocation of VM RAM, default false + /// + /// Enabling this will result in lower container density as all of the memory will be allocated + /// and locked. This is useful when you want to reserve all the memory upfront or in the cases + /// where you want memory latencies to be very predictable + #[serde(default)] + pub enable_mem_prealloc: bool, + + /// Enable huge pages for VM RAM, default false + /// + /// Enabling this will result in the VM memory being allocated using huge pages. + /// Its backend type is specified by item "hugepage_type" + #[serde(default)] + pub enable_hugepages: bool, + + /// Select huge page type, default "hugetlbfs" + /// Following huge types are supported: + /// - hugetlbfs + /// - thp + #[serde(default)] + pub hugepage_type: HugePageType, + + /// Specifies virtio-mem will be enabled or not. + /// + /// Please note that this option should be used with the command + /// "echo 1 > /proc/sys/vm/overcommit_memory". + #[serde(default)] + pub enable_virtio_mem: bool, + + /// Enable swap of vm memory. Default false. + /// + /// The behaviour is undefined if mem_prealloc is also set to true + #[serde(default)] + pub enable_swap: bool, + + /// Enable swap in the guest. Default false. + /// + /// When enable_guest_swap is enabled, insert a raw file to the guest as the swap device if the + /// swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") + /// is bigger than 0. + /// + /// The size of the swap device should be swap_in_bytes (set by annotation + /// "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. + /// If swap_in_bytes is not set, the size should be memory_limit_in_bytes. + /// If swap_in_bytes and memory_limit_in_bytes is not set, the size should be default_memory. + #[serde(default)] + pub enable_guest_swap: bool, +} + +impl MemoryInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + resolve_path!( + self.file_mem_backend, + "Memory backend file {} is invalid: {}" + )?; + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + validate_path!( + self.file_mem_backend, + "Memory backend file {} is invalid: {}" + )?; + if self.default_memory == 0 { + return Err(eother!("Configured memory size for guest VM is zero")); + } + if self.memory_slots == 0 { + return Err(eother!("Configured memory slots for guest VM are zero")); + } + + Ok(()) + } + + /// Validate path of memory backend files. + pub fn validate_memory_backend_path>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_file_mem_backends, path) + } +} + +/// Configuration information for network. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct NetworkInfo { + /// If vhost-net backend for virtio-net is not desired, set to true. + /// + /// Default is false, which trades off security (vhost-net runs ring0) for network I/O + /// performance. + #[serde(default)] + pub disable_vhost_net: bool, + + /// Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). + /// + /// In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. + /// Default 0-sized value means unlimited rate. + #[serde(default)] + pub rx_rate_limiter_max_rate: u64, + + /// Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). + /// + /// In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional + /// Block) to discipline traffic. + /// Default 0-sized value means unlimited rate. + #[serde(default)] + pub tx_rate_limiter_max_rate: u64, + + /// network queues + #[serde(default)] + pub network_queues: u32, +} + +impl NetworkInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + Ok(()) + } +} + +/// Configuration information for security. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct SecurityInfo { + /// Enable running QEMU VMM as a non-root user. + /// + /// By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as + /// a non-root random user. See documentation for the limitations of this mode. + #[serde(default)] + pub rootless: bool, + + /// Disable seccomp. + #[serde(default)] + pub disable_seccomp: bool, + + /// Enable confidential guest support. + /// + /// Toggling that setting may trigger different hardware features, ranging from memory + /// encryption to both memory and CPU-state encryption and integrity.The Kata Containers + /// runtime dynamically detects the available feature set and aims at enabling the largest + /// possible one. + #[serde(default)] + pub confidential_guest: bool, + + /// Path to OCI hook binaries in the *guest rootfs*. + /// + /// This does not affect host-side hooks which must instead be added to the OCI spec passed to + /// the runtime. + /// + /// You can create a rootfs with hooks by customizing the osbuilder scripts: + /// https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder + /// + /// Hooks must be stored in a subdirectory of guest_hook_path according to their hook type, + /// i.e. "guest_hook_path/{prestart,poststart,poststop}". The agent will scan these directories + /// for executable files and add them, in lexicographical order, to the lifecycle of the guest + /// container. + /// + /// Hooks are executed in the runtime namespace of the guest. See the official documentation: + /// https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks + /// + /// Warnings will be logged if any error is encountered while scanning for hooks, but it will + /// not abort container execution. + #[serde(default)] + pub guest_hook_path: String, + + /// List of valid annotation names for the hypervisor. + /// + /// Each member of the list is a regular expression, which is the base name of the annotation, + /// e.g. "path" for io.katacontainers.config.hypervisor.path" + #[serde(default)] + pub enable_annotations: Vec, +} + +impl SecurityInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + if self.guest_hook_path.is_empty() { + self.guest_hook_path = default::DEFAULT_GUEST_HOOK_PATH.to_string(); + } + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + Ok(()) + } + + /// Check whether annotation key is enabled or not. + pub fn is_annotation_enabled(&self, path: &str) -> bool { + if !path.starts_with(KATA_ANNO_CFG_HYPERVISOR_PREFIX) { + return false; + } + let pos = KATA_ANNO_CFG_HYPERVISOR_PREFIX.len(); + let key = &path[pos..]; + if let Ok(set) = RegexSet::new(&self.enable_annotations) { + return set.is_match(key); + } + false + } + + /// Validate path + pub fn validate_path(&self, path: &str) -> Result<()> { + validate_path!(path, "path {} is invalid{}")?; + Ok(()) + } +} + +/// Configuration information for shared filesystem, such virtio-9p and virtio-fs. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct SharedFsInfo { + /// Shared file system type: + /// - virtio-fs (default) + /// - virtio-9p` + pub shared_fs: Option, + + /// Path to vhost-user-fs daemon. + #[serde(default)] + pub virtio_fs_daemon: String, + + /// List of valid annotations values for the virtiofs daemon + /// The default if not set is empty (all annotations rejected.) + #[serde(default)] + pub valid_virtio_fs_daemon_paths: Vec, + + /// Extra args for virtiofsd daemon + /// + /// Format example: + /// ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] + /// + /// see `virtiofsd -h` for possible options. + #[serde(default)] + pub virtio_fs_extra_args: Vec, + + /// Cache mode: + /// - never: Metadata, data, and pathname lookup are not cached in guest. They are always + /// fetched from host and any changes are immediately pushed to host. + /// - auto: Metadata and pathname lookup cache expires after a configured amount of time + /// (default is 1 second). Data is cached while the file is open (close to open consistency). + /// - always: Metadata, data, and pathname lookup are cached in guest and never expire. + #[serde(default)] + pub virtio_fs_cache: String, + + /// Default size of DAX cache in MiB + #[serde(default)] + pub virtio_fs_cache_size: u32, + + /// Default size of virtqueues + #[serde(default)] + pub virtio_fs_queue_size: u32, + + /// Enable virtio-fs DAX window if true. + #[serde(default)] + pub virtio_fs_is_dax: bool, + + /// This is the msize used for 9p shares. It is the number of bytes used for 9p packet payload. + #[serde(default)] + pub msize_9p: u32, +} + +impl SharedFsInfo { + /// Adjust the configuration information after loading from configuration file. + pub fn adjust_config(&mut self) -> Result<()> { + if self.shared_fs.as_deref() == Some("") { + self.shared_fs = Some(default::DEFAULT_SHARED_FS_TYPE.to_string()); + } + match self.shared_fs.as_deref() { + Some(VIRTIO_FS) => self.adjust_virtio_fs(false)?, + Some(VIRTIO_FS_INLINE) => self.adjust_virtio_fs(true)?, + Some(VIRTIO_9P) => { + if self.msize_9p == 0 { + self.msize_9p = default::DEFAULT_SHARED_9PFS_SIZE_MB; + } + } + _ => {} + } + + Ok(()) + } + + /// Validate the configuration information. + pub fn validate(&self) -> Result<()> { + match self.shared_fs.as_deref() { + None => Ok(()), + Some(VIRTIO_FS) => self.validate_virtio_fs(false), + Some(VIRTIO_FS_INLINE) => self.validate_virtio_fs(true), + Some(VIRTIO_9P) => { + if self.msize_9p < default::MIN_SHARED_9PFS_SIZE_MB + || self.msize_9p > default::MAX_SHARED_9PFS_SIZE_MB + { + return Err(eother!( + "Invalid 9p configuration msize 0x{:x}, min value is 0x{:x}, max value is 0x{:x}", + self.msize_9p,default::MIN_SHARED_9PFS_SIZE_MB, default::MAX_SHARED_9PFS_SIZE_MB + )); + } + Ok(()) + } + Some(v) => Err(eother!("Invalid shared_fs type {}", v)), + } + } + + /// Validate path of virtio-fs daemon, especially for annotations. + pub fn validate_virtiofs_daemon_path>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_virtio_fs_daemon_paths, path) + } + + fn adjust_virtio_fs(&mut self, inline: bool) -> Result<()> { + // inline mode doesn't need external virtiofsd daemon + if !inline { + resolve_path!( + self.virtio_fs_daemon, + "Virtio-fs daemon path {} is invalid: {}" + )?; + } + + if self.virtio_fs_cache.is_empty() { + self.virtio_fs_cache = default::DEFAULT_VIRTIO_FS_CACHE_MODE.to_string(); + } + if self.virtio_fs_cache == *"none" { + warn!(sl!(), "virtio-fs cache mode `none` is deprecated since Kata Containers 2.5.0 and will be removed in the future release, please use `never` instead. For more details please refer to https://github.com/kata-containers/kata-containers/issues/4234."); + self.virtio_fs_cache = default::DEFAULT_VIRTIO_FS_CACHE_MODE.to_string(); + } + if self.virtio_fs_is_dax && self.virtio_fs_cache_size == 0 { + self.virtio_fs_cache_size = default::DEFAULT_VIRTIO_FS_DAX_SIZE_MB; + } + if !self.virtio_fs_is_dax && self.virtio_fs_cache_size != 0 { + self.virtio_fs_is_dax = true; + } + Ok(()) + } + + fn validate_virtio_fs(&self, inline: bool) -> Result<()> { + // inline mode doesn't need external virtiofsd daemon + if !inline { + validate_path!( + self.virtio_fs_daemon, + "Virtio-fs daemon path {} is invalid: {}" + )?; + } + + let l = ["never", "auto", "always"]; + + if !l.contains(&self.virtio_fs_cache.as_str()) { + return Err(eother!( + "Invalid virtio-fs cache mode: {}", + &self.virtio_fs_cache + )); + } + if self.virtio_fs_is_dax && self.virtio_fs_cache_size == 0 { + return Err(eother!( + "Invalid virtio-fs DAX window size: {}", + &self.virtio_fs_cache_size + )); + } + Ok(()) + } +} + +/// Common configuration information for hypervisors. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Hypervisor { + /// Path to the hypervisor executable. + #[serde(default)] + pub path: String, + /// List of valid annotations values for the hypervisor. + /// + /// Each member of the list is a path pattern as described by glob(3). The default if not set + /// is empty (all annotations rejected.) + #[serde(default)] + pub valid_hypervisor_paths: Vec, + + /// Hypervisor control executable path. + #[serde(default)] + pub ctlpath: String, + /// List of valid annotations values for the hypervisor control executable. + /// + /// Each member of the list is a path pattern as described by glob(3). The default if not set + /// is empty (all annotations rejected.) + #[serde(default)] + pub valid_ctlpaths: Vec, + + /// Control channel path. + #[serde(default)] + pub jailer_path: String, + /// List of valid annotations values for the hypervisor jailer path. + /// + /// Each member of the list is a path pattern as described by glob(3). The default if not set + /// is empty (all annotations rejected.) + #[serde(default)] + pub valid_jailer_paths: Vec, + + /// Disable the customizations done in the runtime when it detects that it is running on top + /// a VMM. This will result in the runtime behaving as it would when running on bare metal. + #[serde(default)] + pub disable_nesting_checks: bool, + + /// Enable iothreads (data-plane) to be used. This causes IO to be handled in a separate IO + /// thread. This is currently only implemented for SCSI. + #[serde(default)] + pub enable_iothreads: bool, + + /// Block device configuration information. + #[serde(default, flatten)] + pub blockdev_info: BlockDeviceInfo, + + /// Guest system boot information. + #[serde(default, flatten)] + pub boot_info: BootInfo, + + /// Guest virtual CPU configuration information. + #[serde(default, flatten)] + pub cpu_info: CpuInfo, + + /// Debug configuration information. + #[serde(default, flatten)] + pub debug_info: DebugInfo, + + /// Device configuration information. + #[serde(default, flatten)] + pub device_info: DeviceInfo, + + /// Virtual machine configuration information. + #[serde(default, flatten)] + pub machine_info: MachineInfo, + + /// Virtual machine memory configuration information. + #[serde(default, flatten)] + pub memory_info: MemoryInfo, + + /// Network configuration information. + #[serde(default, flatten)] + pub network_info: NetworkInfo, + + /// Security configuration information. + #[serde(default, flatten)] + pub security_info: SecurityInfo, + + /// Shared file system configuration information. + #[serde(default, flatten)] + pub shared_fs: SharedFsInfo, + + /// A sandbox annotation used to specify prefetch_files.list host path container image + /// being used, and runtime will pass it to Hypervisor to search for corresponding + /// prefetch list file: + /// prefetch_list_path = /path/to//xyz.com/fedora:36/prefetch_file.list + #[serde(default)] + pub prefetch_list_path: String, + + /// Vendor customized runtime configuration. + #[serde(default, flatten)] + pub vendor: HypervisorVendor, +} + +impl Hypervisor { + /// Validate path of hypervisor executable. + pub fn validate_hypervisor_path>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_hypervisor_paths, path) + } + + /// Validate path of hypervisor control executable. + pub fn validate_hypervisor_ctlpath>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_ctlpaths, path) + } + + /// Validate path of jailer executable. + pub fn validate_jailer_path>(&self, path: P) -> Result<()> { + validate_path_pattern(&self.valid_jailer_paths, path) + } +} + +impl ConfigOps for Hypervisor { + fn adjust_config(conf: &mut TomlConfig) -> Result<()> { + HypervisorVendor::adjust_config(conf)?; + let hypervisors: Vec = conf.hypervisor.keys().cloned().collect(); + for hypervisor in hypervisors.iter() { + if let Some(plugin) = get_hypervisor_plugin(hypervisor) { + plugin.adjust_config(conf)?; + // Safe to unwrap() because `hypervisor` is a valid key in the hash map. + let hv = conf.hypervisor.get_mut(hypervisor).ok_or_else(|| { + io::Error::new(io::ErrorKind::NotFound, "hypervisor not found".to_string()) + })?; + hv.blockdev_info.adjust_config()?; + hv.boot_info.adjust_config()?; + hv.cpu_info.adjust_config()?; + hv.debug_info.adjust_config()?; + hv.device_info.adjust_config()?; + hv.machine_info.adjust_config()?; + hv.memory_info.adjust_config()?; + hv.network_info.adjust_config()?; + hv.security_info.adjust_config()?; + hv.shared_fs.adjust_config()?; + resolve_path!( + hv.prefetch_list_path, + "prefetch_list_path `{}` is invalid: {}" + )?; + } else { + return Err(eother!("Can not find plugin for hypervisor {}", hypervisor)); + } + } + + Ok(()) + } + + fn validate(conf: &TomlConfig) -> Result<()> { + HypervisorVendor::validate(conf)?; + + let hypervisors: Vec = conf.hypervisor.keys().cloned().collect(); + for hypervisor in hypervisors.iter() { + if let Some(plugin) = get_hypervisor_plugin(hypervisor) { + plugin.validate(conf)?; + + // Safe to unwrap() because `hypervisor` is a valid key in the hash map. + let hv = conf.hypervisor.get(hypervisor).unwrap(); + hv.blockdev_info.validate()?; + hv.boot_info.validate()?; + hv.cpu_info.validate()?; + hv.debug_info.validate()?; + hv.device_info.validate()?; + hv.machine_info.validate()?; + hv.memory_info.validate()?; + hv.network_info.validate()?; + hv.security_info.validate()?; + hv.shared_fs.validate()?; + validate_path!(hv.path, "Hypervisor binary path `{}` is invalid: {}")?; + validate_path!( + hv.ctlpath, + "Hypervisor control executable `{}` is invalid: {}" + )?; + validate_path!(hv.jailer_path, "Hypervisor jailer path `{}` is invalid: {}")?; + validate_path!( + hv.prefetch_list_path, + "prefetch_files.list path `{}` is invalid: {}" + )?; + } else { + return Err(eother!("Can not find plugin for hypervisor {}", hypervisor)); + } + } + + Ok(()) + } +} + +#[cfg(not(feature = "enable-vendor"))] +mod vendor { + use super::*; + + /// Vendor customization runtime configuration. + #[derive(Clone, Debug, Default, Deserialize, Serialize)] + pub struct HypervisorVendor {} + + impl ConfigOps for HypervisorVendor {} +} + +#[cfg(feature = "enable-vendor")] +#[path = "vendor.rs"] +mod vendor; + +pub use self::vendor::HypervisorVendor; +use crate::config::validate_path_pattern; +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_register_plugin() { + let db = DragonballConfig::new(); + db.register(); + + let db = Arc::new(DragonballConfig::new()); + register_hypervisor_plugin("dragonball", db); + + assert!(get_hypervisor_plugin("dragonball").is_some()); + assert!(get_hypervisor_plugin("dragonball2").is_none()); + } + + #[test] + fn test_add_kernel_params() { + let mut boot_info = BootInfo { + ..Default::default() + }; + let params = vec![ + String::from("foo"), + String::from("bar"), + String::from("baz=faz"), + ]; + boot_info.add_kernel_params(params); + + assert_eq!(boot_info.kernel_params, String::from("foo bar baz=faz")); + + let new_params = vec![ + String::from("boo=far"), + String::from("a"), + String::from("b=c"), + ]; + boot_info.add_kernel_params(new_params); + + assert_eq!( + boot_info.kernel_params, + String::from("boo=far a b=c foo bar baz=faz") + ); + } + + #[test] + fn test_cpu_info_adjust_config() { + // get CPU cores of the test node + let node_cpus = num_cpus::get() as u32; + let default_vcpus = default::DEFAULT_GUEST_VCPUS as i32; + + struct TestData<'a> { + desc: &'a str, + input: &'a mut CpuInfo, + output: CpuInfo, + } + + let tests = &mut [ + TestData { + desc: "all with default values", + input: &mut CpuInfo { + cpu_features: "".to_string(), + default_vcpus: 0, + default_maxvcpus: 0, + }, + output: CpuInfo { + cpu_features: "".to_string(), + default_vcpus, + default_maxvcpus: node_cpus, + }, + }, + TestData { + desc: "all with big values", + input: &mut CpuInfo { + cpu_features: "a,b,c".to_string(), + default_vcpus: 9999999, + default_maxvcpus: 9999999, + }, + output: CpuInfo { + cpu_features: "a,b,c".to_string(), + default_vcpus: node_cpus as i32, + default_maxvcpus: node_cpus, + }, + }, + TestData { + desc: "default_vcpus lager than default_maxvcpus", + input: &mut CpuInfo { + cpu_features: "a, b ,c".to_string(), + default_vcpus: -1, + default_maxvcpus: 1, + }, + output: CpuInfo { + cpu_features: "a,b,c".to_string(), + default_vcpus: 1, + default_maxvcpus: 1, + }, + }, + ]; + + for (_, tc) in tests.iter_mut().enumerate() { + // we can ensure that unwrap will not panic + tc.input.adjust_config().unwrap(); + + assert_eq!( + tc.input.cpu_features, tc.output.cpu_features, + "test[{}] cpu_features", + tc.desc + ); + assert_eq!( + tc.input.default_vcpus, tc.output.default_vcpus, + "test[{}] default_vcpus", + tc.desc + ); + assert_eq!( + tc.input.default_maxvcpus, tc.output.default_maxvcpus, + "test[{}] default_maxvcpus", + tc.desc + ); + } + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/qemu.rs b/src/libs/kata-types/src/config/hypervisor/qemu.rs new file mode 100644 index 000000000000..945abc4b48d6 --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/qemu.rs @@ -0,0 +1,150 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; +use std::sync::Arc; + +use super::{default, register_hypervisor_plugin}; + +use crate::config::default::MAX_QEMU_VCPUS; +use crate::config::default::MIN_QEMU_MEMORY_SIZE_MB; + +use crate::config::hypervisor::VIRTIO_BLK_MMIO; +use crate::config::{ConfigPlugin, TomlConfig}; +use crate::{eother, resolve_path, validate_path}; + +/// Hypervisor name for qemu, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_QEMU: &str = "qemu"; + +/// Configuration information for qemu. +#[derive(Default, Debug)] +pub struct QemuConfig {} + +impl QemuConfig { + /// Create a new instance of `QemuConfig`. + pub fn new() -> Self { + QemuConfig {} + } + + /// Register the qemu plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_QEMU, plugin); + } +} + +impl ConfigPlugin for QemuConfig { + fn get_max_cpus(&self) -> u32 { + MAX_QEMU_VCPUS + } + + fn get_min_memory(&self) -> u32 { + MIN_QEMU_MEMORY_SIZE_MB + } + fn name(&self) -> &str { + HYPERVISOR_NAME_QEMU + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> { + if let Some(qemu) = conf.hypervisor.get_mut(HYPERVISOR_NAME_QEMU) { + if qemu.path.is_empty() { + qemu.path = default::DEFAULT_QEMU_BINARY_PATH.to_string(); + } + resolve_path!(qemu.path, "Qemu binary path `{}` is invalid: {}")?; + if qemu.ctlpath.is_empty() { + qemu.ctlpath = default::DEFAULT_QEMU_CONTROL_PATH.to_string(); + } + resolve_path!(qemu.ctlpath, "Qemu ctlpath `{}` is invalid: {}")?; + + if qemu.boot_info.kernel.is_empty() { + qemu.boot_info.kernel = default::DEFAULT_QEMU_GUEST_KERNEL_IMAGE.to_string(); + } + if qemu.boot_info.kernel_params.is_empty() { + qemu.boot_info.kernel_params = + default::DEFAULT_QEMU_GUEST_KERNEL_PARAMS.to_string(); + } + if qemu.boot_info.firmware.is_empty() { + qemu.boot_info.firmware = default::DEFAULT_QEMU_FIRMWARE_PATH.to_string(); + } + + if qemu.device_info.default_bridges == 0 { + qemu.device_info.default_bridges = default::DEFAULT_QEMU_PCI_BRIDGES; + } + + if qemu.machine_info.machine_type.is_empty() { + qemu.machine_info.machine_type = default::DEFAULT_QEMU_MACHINE_TYPE.to_string(); + } + if qemu.machine_info.entropy_source.is_empty() { + qemu.machine_info.entropy_source = default::DEFAULT_QEMU_ENTROPY_SOURCE.to_string(); + } + + if qemu.memory_info.default_memory == 0 { + qemu.memory_info.default_memory = default::DEFAULT_QEMU_MEMORY_SIZE_MB; + } + if qemu.memory_info.memory_slots == 0 { + qemu.memory_info.memory_slots = default::DEFAULT_QEMU_MEMORY_SLOTS; + } + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &TomlConfig) -> Result<()> { + if let Some(qemu) = conf.hypervisor.get(HYPERVISOR_NAME_QEMU) { + validate_path!(qemu.path, "QEMU binary path `{}` is invalid: {}")?; + validate_path!(qemu.ctlpath, "QEMU control path `{}` is invalid: {}")?; + if !qemu.jailer_path.is_empty() { + return Err(eother!("Path for QEMU jailer should be empty")); + } + if !qemu.valid_jailer_paths.is_empty() { + return Err(eother!("Valid Qemu jailer path list should be empty")); + } + + if !qemu.blockdev_info.disable_block_device_use + && qemu.blockdev_info.block_device_driver == VIRTIO_BLK_MMIO + { + return Err(eother!("Qemu doesn't support virtio-blk-mmio")); + } + + if qemu.boot_info.kernel.is_empty() { + return Err(eother!("Guest kernel image for qemu is empty")); + } + if qemu.boot_info.image.is_empty() && qemu.boot_info.initrd.is_empty() { + return Err(eother!( + "Both guest boot image and initrd for qemu are empty" + )); + } + + if (qemu.cpu_info.default_vcpus > 0 + && qemu.cpu_info.default_vcpus as u32 > default::MAX_QEMU_VCPUS) + || qemu.cpu_info.default_maxvcpus > default::MAX_QEMU_VCPUS + { + return Err(eother!( + "Qemu hypervisor can not support {} vCPUs", + qemu.cpu_info.default_maxvcpus + )); + } + + if qemu.device_info.default_bridges > default::MAX_QEMU_PCI_BRIDGES { + return Err(eother!( + "Qemu hypervisor can not support {} PCI bridges", + qemu.device_info.default_bridges + )); + } + + if qemu.memory_info.default_memory < MIN_QEMU_MEMORY_SIZE_MB { + return Err(eother!( + "Qemu hypervisor has minimal memory limitation {}", + MIN_QEMU_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/vendor.rs b/src/libs/kata-types/src/config/hypervisor/vendor.rs new file mode 100644 index 000000000000..39f5779a451a --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/vendor.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! A sample for vendor to customize the hypervisor implementation. + +use super::*; + +/// Vendor customization runtime configuration. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct HypervisorVendor {} + +impl ConfigOps for HypervisorVendor {} diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs new file mode 100644 index 000000000000..52b621d12c06 --- /dev/null +++ b/src/libs/kata-types/src/config/mod.rs @@ -0,0 +1,385 @@ +// Copyright (c) 2019-2021 Ant Financial +// Copyright (c) 2019-2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs; +use std::io::{self, Result}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; +use std::u32; + +use lazy_static::lazy_static; + +use crate::{eother, sl}; + +/// Default configuration values. +pub mod default; + +mod agent; +mod drop_in; +pub mod hypervisor; + +pub use self::agent::Agent; +use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT; +pub use self::hypervisor::{ + BootInfo, CloudHypervisorConfig, DragonballConfig, Hypervisor, QemuConfig, + HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_QEMU, +}; + +mod runtime; +pub use self::runtime::{Runtime, RuntimeVendor, RUNTIME_NAME_VIRTCONTAINER}; + +pub use self::agent::AGENT_NAME_KATA; + +/// kata run dir +pub const KATA_PATH: &str = "/run/kata"; + +// TODO: let agent use the constants here for consistency +/// Debug console enabled flag for agent +pub const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console"; +/// Tracing enabled flag for agent +pub const TRACE_MODE_OPTION: &str = "agent.trace"; +/// Tracing enabled +pub const TRACE_MODE_ENABLE: &str = "true"; +/// Log level setting key for agent, if debugged mode on, set to debug +pub const LOG_LEVEL_OPTION: &str = "agent.log"; +/// logging level: debug +pub const LOG_LEVEL_DEBUG: &str = "debug"; +/// Option of which port will the debug console connect to +pub const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport"; +/// Option of which port the agent's log will connect to +pub const LOG_VPORT_OPTION: &str = "agent.log_vport"; +/// Option of setting the container's pipe size +pub const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size"; + +/// Trait to manipulate global Kata configuration information. +pub trait ConfigPlugin: Send + Sync { + /// Get the plugin name. + fn name(&self) -> &str; + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, _conf: &mut TomlConfig) -> Result<()>; + + /// Validate the configuration information. + fn validate(&self, _conf: &TomlConfig) -> Result<()>; + + /// Get the minmum memory for hypervisor + fn get_min_memory(&self) -> u32; + + /// Get the max defualt cpus + fn get_max_cpus(&self) -> u32; +} + +/// Trait to manipulate Kata configuration information. +pub trait ConfigOps { + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(_conf: &mut TomlConfig) -> Result<()> { + Ok(()) + } + + /// Validate the configuration information. + fn validate(_conf: &TomlConfig) -> Result<()> { + Ok(()) + } +} + +/// Trait to manipulate global Kata configuration information. +pub trait ConfigObjectOps { + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&mut self) -> Result<()> { + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self) -> Result<()> { + Ok(()) + } +} + +/// Kata configuration information. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct TomlConfig { + /// Configuration information for agents. + #[serde(default)] + pub agent: HashMap, + /// Configuration information for hypervisors. + #[serde(default)] + pub hypervisor: HashMap, + /// Kata runtime configuration information. + #[serde(default)] + pub runtime: Runtime, +} + +impl TomlConfig { + /// Load Kata configuration information from configuration files. + /// + /// If `config_file` is valid, it will used, otherwise a built-in default path list will be + /// scanned. + pub fn load_from_file>(config_file: P) -> Result<(TomlConfig, PathBuf)> { + let mut result = Self::load_raw_from_file(config_file); + if let Ok((ref mut config, _)) = result { + Hypervisor::adjust_config(config)?; + Runtime::adjust_config(config)?; + Agent::adjust_config(config)?; + info!(sl!(), "get kata config: {:?}", config); + } + + result + } + + /// Load raw Kata configuration information from default configuration file. + /// + /// Configuration file is probed according to the default configuration file list + /// default::DEFAULT_RUNTIME_CONFIGURATIONS. + pub fn load_from_default() -> Result<(TomlConfig, PathBuf)> { + Self::load_raw_from_file("") + } + + /// Load raw Kata configuration information from configuration files. + /// + /// If `config_file` is valid, it will used, otherwise a built-in default path list will be + /// scanned. + pub fn load_raw_from_file>(config_file: P) -> Result<(TomlConfig, PathBuf)> { + let file_path = if !config_file.as_ref().as_os_str().is_empty() { + fs::canonicalize(config_file)? + } else { + Self::get_default_config_file()? + }; + + info!( + sl!(), + "load configuration from: {}", + file_path.to_string_lossy() + ); + let config = drop_in::load(&file_path)?; + + Ok((config, file_path)) + } + + /// Load Kata configuration information from string. + /// + /// This function only works with `configuration.toml` and does not handle + /// drop-in config file fragments in config.d/. + pub fn load(content: &str) -> Result { + let mut config: TomlConfig = toml::from_str(content)?; + Hypervisor::adjust_config(&mut config)?; + Runtime::adjust_config(&mut config)?; + Agent::adjust_config(&mut config)?; + info!(sl!(), "get kata config: {:?}", config); + Ok(config) + } + + /// Validate Kata configuration information. + pub fn validate(&self) -> Result<()> { + Hypervisor::validate(self)?; + Runtime::validate(self)?; + Agent::validate(self)?; + + Ok(()) + } + + /// Get agent-specfic kernel parameters for further Hypervisor config revision + pub fn get_agent_kernel_params(&self) -> Result> { + let mut kv = HashMap::new(); + if let Some(cfg) = self.agent.get(&self.runtime.agent_name) { + if cfg.debug { + kv.insert(LOG_LEVEL_OPTION.to_string(), LOG_LEVEL_DEBUG.to_string()); + } + if cfg.enable_tracing { + kv.insert(TRACE_MODE_OPTION.to_string(), TRACE_MODE_ENABLE.to_string()); + } + if cfg.container_pipe_size > 0 { + let container_pipe_size = cfg.container_pipe_size.to_string(); + kv.insert(CONTAINER_PIPE_SIZE_OPTION.to_string(), container_pipe_size); + } + if cfg.debug_console_enabled { + kv.insert(DEBUG_CONSOLE_FLAG.to_string(), "".to_string()); + kv.insert( + DEBUG_CONSOLE_VPORT_OPTION.to_string(), + DEFAULT_AGENT_DBG_CONSOLE_PORT.to_string(), + ); + } + } + Ok(kv) + } + + /// Probe configuration file according to the default configuration file list. + pub fn get_default_config_file() -> Result { + for f in default::DEFAULT_RUNTIME_CONFIGURATIONS.iter() { + if let Ok(path) = fs::canonicalize(f) { + return Ok(path); + } + } + + Err(io::Error::from(io::ErrorKind::NotFound)) + } +} + +/// Validate the `path` matches one of the pattern in `patterns`. +/// +/// Each member in `patterns` is a path pattern as described by glob(3) +pub fn validate_path_pattern>(patterns: &[String], path: P) -> Result<()> { + let path = path + .as_ref() + .to_str() + .ok_or_else(|| eother!("Invalid path {}", path.as_ref().to_string_lossy()))?; + for p in patterns.iter() { + if let Ok(glob) = glob::Pattern::new(p) { + if glob.matches(path) { + return Ok(()); + } + } + } + + Err(eother!("Path {} is not permitted", path)) +} + +/// Kata configuration information. +pub struct KataConfig { + config: Option, + agent: String, + hypervisor: String, +} + +impl KataConfig { + /// Set the default Kata configuration object. + /// + /// The default Kata configuration information is loaded from system configuration file. + pub fn set_default_config(config: Option, hypervisor: &str, agent: &str) { + let kata = KataConfig { + config, + agent: agent.to_string(), + hypervisor: hypervisor.to_string(), + }; + *KATA_DEFAULT_CONFIG.lock().unwrap() = Arc::new(kata); + } + + /// Get the default Kata configuration object. + /// + /// The default Kata configuration information is loaded from system configuration file. + pub fn get_default_config() -> Arc { + KATA_DEFAULT_CONFIG.lock().unwrap().clone() + } + + /// Set the active Kata configuration object. + /// + /// The active Kata configuration information is default configuration information patched + /// with tunable configuration information from annotations. + pub fn set_active_config(config: Option, hypervisor: &str, agent: &str) { + let kata = KataConfig { + config, + agent: agent.to_string(), + hypervisor: hypervisor.to_string(), + }; + *KATA_ACTIVE_CONFIG.lock().unwrap() = Arc::new(kata); + } + + /// Get the active Kata configuration object. + /// + /// The active Kata configuration information is default configuration information patched + /// with tunable configuration information from annotations. + pub fn get_active_config() -> Arc { + KATA_ACTIVE_CONFIG.lock().unwrap().clone() + } + /// Get the config in use + pub fn get_config(&self) -> &TomlConfig { + self.config.as_ref().unwrap() + } + + /// Get the agent configuration in use. + pub fn get_agent(&self) -> Option<&Agent> { + if !self.agent.is_empty() { + self.config.as_ref().unwrap().agent.get(&self.agent) + } else { + None + } + } + + /// Get the hypervisor configuration in use. + pub fn get_hypervisor(&self) -> Option<&Hypervisor> { + if !self.hypervisor.is_empty() { + self.config + .as_ref() + .unwrap() + .hypervisor + .get(&self.hypervisor) + } else { + None + } + } +} + +lazy_static! { + static ref KATA_DEFAULT_CONFIG: Mutex> = { + let config = Some(TomlConfig::load("").unwrap()); + let kata = KataConfig { + config, + agent: String::new(), + hypervisor: String::new(), + }; + + Mutex::new(Arc::new(kata)) + }; + static ref KATA_ACTIVE_CONFIG: Mutex> = { + let config = Some(TomlConfig::load("").unwrap()); + let kata = KataConfig { + config, + agent: String::new(), + hypervisor: String::new(), + }; + Mutex::new(Arc::new(kata)) + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_path_pattern() { + let patterns = []; + validate_path_pattern(&patterns, "/bin/ls").unwrap_err(); + + let patterns = ["/bin".to_string()]; + validate_path_pattern(&patterns, "/bin/ls").unwrap_err(); + + let patterns = ["/bin/*/ls".to_string()]; + validate_path_pattern(&patterns, "/bin/ls").unwrap_err(); + + let patterns = ["/bin/*".to_string()]; + validate_path_pattern(&patterns, "/bin/ls").unwrap(); + + let patterns = ["/*".to_string()]; + validate_path_pattern(&patterns, "/bin/ls").unwrap(); + + let patterns = ["/usr/share".to_string(), "/bin/*".to_string()]; + validate_path_pattern(&patterns, "/bin/ls").unwrap(); + } + + #[test] + fn test_get_agent_kernel_params() { + let mut config = TomlConfig { + ..Default::default() + }; + let agent_config = Agent { + debug: true, + enable_tracing: true, + container_pipe_size: 20, + debug_console_enabled: true, + ..Default::default() + }; + let agent_name = "test_agent"; + config.runtime.agent_name = agent_name.to_string(); + config.agent.insert(agent_name.to_owned(), agent_config); + + let kv = config.get_agent_kernel_params().unwrap(); + assert_eq!(kv.get("agent.log").unwrap(), "debug"); + assert_eq!(kv.get("agent.trace").unwrap(), "true"); + assert_eq!(kv.get("agent.container_pipe_size").unwrap(), "20"); + kv.get("agent.debug_console").unwrap(); + assert_eq!(kv.get("agent.debug_console_vport").unwrap(), "1026"); // 1026 is the default port + } +} diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs new file mode 100644 index 000000000000..853e4aef3fe1 --- /dev/null +++ b/src/libs/kata-types/src/config/runtime.rs @@ -0,0 +1,323 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; + +use super::default; +use crate::config::{ConfigOps, TomlConfig}; +use crate::mount::split_bind_mounts; +use crate::{eother, validate_path}; + +/// Type of runtime VirtContainer. +pub const RUNTIME_NAME_VIRTCONTAINER: &str = "virt_container"; + +/// Kata runtime configuration information. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct Runtime { + /// Runtime name: Plan to support virt-container, linux-container, wasm-container + #[serde(default)] + pub name: String, + + /// Hypervisor name: Plan to support dragonball, qemu + #[serde(default)] + pub hypervisor_name: String, + + /// Agent name + #[serde(default)] + pub agent_name: String, + + /// If enabled, the runtime will log additional debug messages to the system log. + #[serde(default, rename = "enable_debug")] + pub debug: bool, + + /// Enabled experimental feature list, format: ["a", "b"]. + /// + /// Experimental features are features not stable enough for production, they may break + /// compatibility, and are prepared for a big version bump. + #[serde(default)] + pub experimental: Vec, + + /// Determines how the VM should be connected to the container network interface. + /// + /// Options: + /// - macvtap: used when the Container network interface can be bridged using macvtap. + /// - none: used when customize network. Only creates a tap device. No veth pair. + /// - tcfilter: uses tc filter rules to redirect traffic from the network interface provided + /// by plugin to a tap interface connected to the VM. + #[serde(default)] + pub internetworking_model: String, + + /// If enabled, the runtime won't create a network namespace for shim and hypervisor processes. + /// + /// This option may have some potential impacts to your host. It should only be used when you + /// know what you're doing. + /// + /// `disable_new_netns` conflicts with `internetworking_model=tcfilter` and + /// `internetworking_model=macvtap`. It works only with `internetworking_model=none`. + /// The tap device will be in the host network namespace and can connect to a bridge (like OVS) + /// directly. + /// + /// If you are using docker, `disable_new_netns` only works with `docker run --net=none` + #[serde(default)] + pub disable_new_netns: bool, + + /// If specified, sandbox_bind_mounts identifies host paths to be mounted into the sandboxes + /// shared path. + /// + /// This is only valid if filesystem sharing is utilized. The provided path(s) will be bind + /// mounted into the shared fs directory. If defaults are utilized, these mounts should be + /// available in the guest at `/run/kata-containers/shared/containers/passthrough/sandbox-mounts`. + /// These will not be exposed to the container workloads, and are only provided for potential + /// guest services. + #[serde(default)] + pub sandbox_bind_mounts: Vec, + + /// If enabled, the runtime will add all the kata processes inside one dedicated cgroup. + /// + /// The container cgroups in the host are not created, just one single cgroup per sandbox. + /// The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. + /// The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. + /// The sandbox cgroup is constrained if there is no container type annotation. + /// See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType + #[serde(default)] + pub sandbox_cgroup_only: bool, + + /// If enabled, the runtime will create opentracing.io traces and spans. + /// See https://www.jaegertracing.io/docs/getting-started. + #[serde(default)] + pub enable_tracing: bool, + /// The full url to the Jaeger HTTP Thrift collector. + #[serde(default)] + pub jaeger_endpoint: String, + /// The username to be used if basic auth is required for Jaeger. + #[serde(default)] + pub jaeger_user: String, + /// The password to be used if basic auth is required for Jaeger. + #[serde(default)] + pub jaeger_password: String, + + /// If enabled, user can run pprof tools with shim v2 process through kata-monitor. + #[serde(default)] + pub enable_pprof: bool, + + /// If enabled, static resource management will calculate the vcpu and memory for the sandbox/container + /// And pod configured this will not be able to further update its CPU/Memory resource + #[serde(default)] + pub static_sandbox_resource_mgmt: bool, + + /// Determines whether container seccomp profiles are passed to the virtual machine and + /// applied by the kata agent. If set to true, seccomp is not applied within the guest. + #[serde(default)] + pub disable_guest_seccomp: bool, + + /// Determines how VFIO devices should be be presented to the container. + /// + /// Options: + /// - vfio: Matches behaviour of OCI runtimes (e.g. runc) as much as possible. VFIO devices + /// will appear in the container as VFIO character devices under /dev/vfio. The exact names + /// may differ from the host (they need to match the VM's IOMMU group numbers rather than + /// the host's) + /// - guest-kernel: This is a Kata-specific behaviour that's useful in certain cases. + /// The VFIO device is managed by whatever driver in the VM kernel claims it. This means + /// it will appear as one or more device nodes or network interfaces depending on the nature + /// of the device. Using this mode requires specially built workloads that know how to locate + /// the relevant device interfaces within the VM. + #[serde(default)] + pub vfio_mode: String, + + /// Vendor customized runtime configuration. + #[serde(default, flatten)] + pub vendor: RuntimeVendor, + + /// If keep_abnormal is enabled, it means that 1) if the runtime exits abnormally, the cleanup process + /// will be skipped, and 2) the runtime will not exit even if the health check fails. + /// This option is typically used to retain abnormal information for debugging. + #[serde(default)] + pub keep_abnormal: bool, + + /// Base directory of directly attachable network config, the default value + /// is "/run/kata-containers/dans". + /// + /// Network devices for VM-based containers are allowed to be placed in the + /// host netns to eliminate as many hops as possible, which is what we + /// called a "directly attachable network". The config, set by special CNI + /// plugins, is used to tell the Kata Containers what devices are attached + /// to the hypervisor. + #[serde(default)] + pub dan_conf: String, +} + +impl ConfigOps for Runtime { + fn adjust_config(conf: &mut TomlConfig) -> Result<()> { + RuntimeVendor::adjust_config(conf)?; + if conf.runtime.internetworking_model.is_empty() { + conf.runtime.internetworking_model = default::DEFAULT_INTERNETWORKING_MODEL.to_owned(); + } + + for bind in conf.runtime.sandbox_bind_mounts.iter_mut() { + // Split the bind mount, canonicalize the path and then append rw mode to it. + let (real_path, mode) = split_bind_mounts(bind); + match Path::new(real_path).canonicalize() { + Err(e) => return Err(eother!("sandbox bind mount `{}` is invalid: {}", bind, e)), + Ok(path) => { + *bind = format!("{}{}", path.display(), mode); + } + } + } + + Ok(()) + } + + fn validate(conf: &TomlConfig) -> Result<()> { + RuntimeVendor::validate(conf)?; + + let net_model = &conf.runtime.internetworking_model; + if !net_model.is_empty() + && net_model != "macvtap" + && net_model != "none" + && net_model != "tcfilter" + { + return Err(eother!( + "Invalid internetworking_model `{}` in configuration file", + net_model + )); + } + + let vfio_mode = &conf.runtime.vfio_mode; + if !vfio_mode.is_empty() && vfio_mode != "vfio" && vfio_mode != "guest-kernel" { + return Err(eother!( + "Invalid vfio_mode `{}` in configuration file", + vfio_mode + )); + } + + for bind in conf.runtime.sandbox_bind_mounts.iter() { + // Just validate the real_path. + let (real_path, _mode) = split_bind_mounts(bind); + validate_path!( + real_path.to_owned(), + "sandbox bind mount `{}` is invalid: {}" + )?; + } + + Ok(()) + } +} + +impl Runtime { + /// Check whether experiment `feature` is enabled or not. + pub fn is_experiment_enabled(&self, feature: &str) -> bool { + self.experimental.contains(&feature.to_string()) + } +} + +#[cfg(not(feature = "enable-vendor"))] +mod vendor { + use super::*; + + /// Vendor customization runtime configuration. + #[derive(Debug, Default, Deserialize, Serialize)] + pub struct RuntimeVendor {} + + impl ConfigOps for RuntimeVendor {} +} + +#[cfg(feature = "enable-vendor")] +#[path = "runtime_vendor.rs"] +mod vendor; + +pub use vendor::RuntimeVendor; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_invalid_config() { + let content = r#" +[runtime] +enable_debug = 10 +"#; + TomlConfig::load(content).unwrap_err(); + + let content = r#" +[runtime] +enable_debug = true +internetworking_model = "test" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + + let content = r#" +[runtime] +enable_debug = true +internetworking_model = "macvtap,none" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + + let content = r#" +[runtime] +enable_debug = true +vfio_mode = "none" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + + let content = r#" +[runtime] +enable_debug = true +vfio_mode = "vfio,guest-kernel" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + + let content = r#" +[runtime] +enable_debug = true +vfio_mode = "guest_kernel" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + } + + #[test] + fn test_config() { + let content = r#" +[runtime] +name = "virt-container" +enable_debug = true +experimental = ["a", "b"] +internetworking_model = "macvtap" +disable_new_netns = true +sandbox_bind_mounts = [] +sandbox_cgroup_only = true +enable_tracing = true +jaeger_endpoint = "localhost:1234" +jaeger_user = "user" +jaeger_password = "pw" +enable_pprof = true +disable_guest_seccomp = true +vfio_mode = "vfio" +field_should_be_ignored = true +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap(); + assert_eq!(&config.runtime.name, "virt-container"); + assert!(config.runtime.debug); + assert_eq!(config.runtime.experimental.len(), 2); + assert_eq!(&config.runtime.experimental[0], "a"); + assert_eq!(&config.runtime.experimental[1], "b"); + assert_eq!(&config.runtime.internetworking_model, "macvtap"); + assert!(config.runtime.disable_new_netns); + assert_eq!(config.runtime.sandbox_bind_mounts.len(), 0); + assert!(config.runtime.sandbox_cgroup_only); + assert!(config.runtime.enable_tracing); + assert!(config.runtime.is_experiment_enabled("a")); + assert!(config.runtime.is_experiment_enabled("b")); + assert!(!config.runtime.is_experiment_enabled("c")); + } +} diff --git a/src/libs/kata-types/src/config/runtime_vendor.rs b/src/libs/kata-types/src/config/runtime_vendor.rs new file mode 100644 index 000000000000..e12a63f399f8 --- /dev/null +++ b/src/libs/kata-types/src/config/runtime_vendor.rs @@ -0,0 +1,83 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! A sample for vendor to customize the runtime implementation. + +use super::*; +use slog::Level; +/// Vendor customization runtime configuration. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct RuntimeVendor { + /// Log level + #[serde(default)] + pub log_level: u32, + + /// Prefix for log messages + #[serde(default)] + pub log_prefix: String, +} + +impl ConfigOps for RuntimeVendor { + fn adjust_config(conf: &mut TomlConfig) -> Result<()> { + if conf.runtime.vendor.log_level > Level::Debug as u32 { + conf.runtime.debug = true; + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(conf: &TomlConfig) -> Result<()> { + if conf.runtime.vendor.log_level > 10 { + return Err(eother!( + "log level {} in configuration file is invalid", + conf.runtime.vendor.log_level + )); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_invalid_vendor_config() { + let content = r#" +[runtime] +debug = false +log_level = 20 +log_prefix = "test" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap_err(); + + let content = r#" +[runtime] +debug = false +log_level = "test" +log_prefix = "test" +"#; + TomlConfig::load(content).unwrap_err(); + } + + #[test] + fn test_vendor_config() { + let content = r#" +[runtime] +debug = false +log_level = 10 +log_prefix = "test" +log_fmt = "nouse" +"#; + let config: TomlConfig = TomlConfig::load(content).unwrap(); + config.validate().unwrap(); + assert!(config.runtime.debug); + assert_eq!(config.runtime.vendor.log_level, 10); + assert_eq!(&config.runtime.vendor.log_prefix, "test"); + } +} diff --git a/src/libs/kata-types/src/container.rs b/src/libs/kata-types/src/container.rs new file mode 100644 index 000000000000..31e729ab855c --- /dev/null +++ b/src/libs/kata-types/src/container.rs @@ -0,0 +1,214 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +// a container running within a pod +pub(crate) const POD_CONTAINER: &str = "pod_container"; +// cri containerd/crio/docker: a container running within a pod +pub(crate) const CONTAINER: &str = "container"; + +// a pod sandbox container +pub(crate) const POD_SANDBOX: &str = "pod_sandbox"; +// cri containerd/crio: a pod sandbox container +pub(crate) const SANDBOX: &str = "sandbox"; +// docker: a sandbox sandbox container +pub(crate) const PODSANDBOX: &str = "podsandbox"; + +pub(crate) const SINGLE_CONTAINER: &str = "single_container"; + +const STATE_READY: &str = "ready"; +const STATE_RUNNING: &str = "running"; +const STATE_STOPPED: &str = "stopped"; +const STATE_PAUSED: &str = "paused"; + +/// Error codes for container related operations. +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// Invalid container type + #[error("Invalid container type {0}")] + InvalidContainerType(String), + /// Invalid container state + #[error("Invalid sandbox state {0}")] + InvalidState(String), + /// Invalid container state transition + #[error("Can not transit from {0} to {1}")] + InvalidStateTransition(State, State), +} + +/// Types of pod containers: container or sandbox. +#[derive(PartialEq, Debug, Clone)] +pub enum ContainerType { + /// A pod container. + PodContainer, + /// A pod sandbox. + PodSandbox, + /// A single container. + SingleContainer, +} + +impl ContainerType { + /// Check whether it's a pod container. + pub fn is_pod_container(&self) -> bool { + matches!(self, ContainerType::PodContainer) + } + + /// Check whether it's a pod container. + pub fn is_pod_sandbox(&self) -> bool { + matches!(self, ContainerType::PodSandbox) + } +} + +impl Display for ContainerType { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match self { + ContainerType::PodContainer => write!(f, "{}", POD_CONTAINER), + ContainerType::PodSandbox => write!(f, "{}", POD_SANDBOX), + ContainerType::SingleContainer => write!(f, "{}", SINGLE_CONTAINER), + } + } +} + +impl FromStr for ContainerType { + type Err = Error; + + fn from_str(value: &str) -> Result { + match value { + POD_CONTAINER | CONTAINER => Ok(ContainerType::PodContainer), + POD_SANDBOX | PODSANDBOX | SANDBOX => Ok(ContainerType::PodSandbox), + _ => Err(Error::InvalidContainerType(value.to_owned())), + } + } +} + +/// Process states. +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum State { + /// The container is ready to run. + Ready, + /// The container executed the user-specified program but has not exited + Running, + /// The container has exited + Stopped, + /// The container has been paused. + Paused, +} + +impl Display for State { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match self { + State::Ready => write!(f, "{}", STATE_READY), + State::Running => write!(f, "{}", STATE_RUNNING), + State::Stopped => write!(f, "{}", STATE_STOPPED), + State::Paused => write!(f, "{}", STATE_PAUSED), + } + } +} + +impl FromStr for State { + type Err = Error; + + fn from_str(value: &str) -> Result { + match value { + STATE_READY => Ok(State::Ready), + STATE_RUNNING => Ok(State::Running), + STATE_STOPPED => Ok(State::Stopped), + STATE_PAUSED => Ok(State::Paused), + _ => Err(Error::InvalidState(value.to_owned())), + } + } +} + +impl State { + /// Check whether it's a valid state transition from self to the `new_state`. + pub fn check_transition(self, new_state: State) -> Result<(), Error> { + match self { + State::Ready if new_state == State::Running || new_state == State::Stopped => Ok(()), + State::Running if new_state == State::Stopped => Ok(()), + State::Stopped if new_state == State::Running => Ok(()), + State::Paused if new_state == State::Paused => Ok(()), + _ => Err(Error::InvalidStateTransition(self, new_state)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_container_type() { + assert!(ContainerType::PodContainer.is_pod_container()); + assert!(!ContainerType::PodContainer.is_pod_sandbox()); + + assert!(ContainerType::PodSandbox.is_pod_sandbox()); + assert!(!ContainerType::PodSandbox.is_pod_container()); + } + + #[test] + fn test_container_type_display() { + assert_eq!(format!("{}", ContainerType::PodContainer), POD_CONTAINER); + assert_eq!(format!("{}", ContainerType::PodSandbox), POD_SANDBOX); + } + + #[test] + fn test_container_type_from_str() { + assert_eq!( + ContainerType::from_str("pod_container").unwrap(), + ContainerType::PodContainer + ); + assert_eq!( + ContainerType::from_str("container").unwrap(), + ContainerType::PodContainer + ); + assert_eq!( + ContainerType::from_str("pod_sandbox").unwrap(), + ContainerType::PodSandbox + ); + assert_eq!( + ContainerType::from_str("podsandbox").unwrap(), + ContainerType::PodSandbox + ); + assert_eq!( + ContainerType::from_str("sandbox").unwrap(), + ContainerType::PodSandbox + ); + ContainerType::from_str("test").unwrap_err(); + } + + #[test] + fn test_valid() { + let mut state = State::from_str("invalid_state"); + assert!(state.is_err()); + + state = State::from_str("ready"); + assert!(state.is_ok()); + + state = State::from_str("running"); + assert!(state.is_ok()); + + state = State::from_str("stopped"); + assert!(state.is_ok()); + } + + #[test] + fn test_valid_transition() { + use State::*; + + assert!(Ready.check_transition(Ready).is_err()); + assert!(Ready.check_transition(Running).is_ok()); + assert!(Ready.check_transition(Stopped).is_ok()); + + assert!(Running.check_transition(Ready).is_err()); + assert!(Running.check_transition(Running).is_err()); + assert!(Running.check_transition(Stopped).is_ok()); + + assert!(Stopped.check_transition(Ready).is_err()); + assert!(Stopped.check_transition(Running).is_ok()); + assert!(Stopped.check_transition(Stopped).is_err()); + } +} diff --git a/src/libs/kata-types/src/cpu.rs b/src/libs/kata-types/src/cpu.rs new file mode 100644 index 000000000000..e47681f62313 --- /dev/null +++ b/src/libs/kata-types/src/cpu.rs @@ -0,0 +1,255 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::TryFrom; +use std::str::FromStr; + +use oci::LinuxCpu; + +/// A set of CPU ids. +pub type CpuSet = crate::utils::u32_set::U32Set; + +/// A set of NUMA memory nodes. +pub type NumaNodeSet = crate::utils::u32_set::U32Set; + +/// Error code for CPU related operations. +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// Invalid CPU list. + #[error("Invalid CPU list: {0}")] + InvalidCpuSet(crate::Error), + /// Invalid NUMA memory node list. + #[error("Invalid NUMA memory node list: {0}")] + InvalidNodeSet(crate::Error), +} + +/// Assigned CPU resources for a Linux container. +#[derive(Clone, Default, Debug)] +pub struct LinuxContainerCpuResources { + shares: u64, + period: u64, + quota: i64, + cpuset: CpuSet, + nodeset: NumaNodeSet, + calculated_vcpu_time_ms: Option, +} + +impl LinuxContainerCpuResources { + /// Get the CPU shares. + pub fn shares(&self) -> u64 { + self.shares + } + + /// Get the CPU schedule period. + pub fn period(&self) -> u64 { + self.period + } + + /// Get the CPU schedule quota. + pub fn quota(&self) -> i64 { + self.quota + } + + /// Get the CPU set. + pub fn cpuset(&self) -> &CpuSet { + &self.cpuset + } + + /// Get the NUMA memory node set. + pub fn nodeset(&self) -> &NumaNodeSet { + &self.nodeset + } + + /// Get number of vCPUs to fulfill the CPU resource request, `None` means unconstrained. + pub fn get_vcpus(&self) -> Option { + self.calculated_vcpu_time_ms + .map(|v| v.saturating_add(999) / 1000) + } +} + +impl TryFrom<&LinuxCpu> for LinuxContainerCpuResources { + type Error = Error; + + // Unhandled fields: realtime_runtime, realtime_period, mems + fn try_from(value: &LinuxCpu) -> Result { + let period = value.period.unwrap_or(0); + let quota = value.quota.unwrap_or(-1); + let cpuset = CpuSet::from_str(&value.cpus).map_err(Error::InvalidCpuSet)?; + let nodeset = NumaNodeSet::from_str(&value.mems).map_err(Error::InvalidNodeSet)?; + + // If quota is -1, it means the CPU resource request is unconstrained. In that case, + // we don't currently assign additional CPUs. + let milli_sec = if quota >= 0 && period != 0 { + Some((quota as u64).saturating_mul(1000) / period) + } else { + None + }; + + Ok(LinuxContainerCpuResources { + shares: value.shares.unwrap_or(0), + period, + quota, + cpuset, + nodeset, + calculated_vcpu_time_ms: milli_sec, + }) + } +} + +/// Assigned CPU resources for a Linux sandbox/pod. +#[derive(Default, Debug)] +pub struct LinuxSandboxCpuResources { + shares: u64, + calculated_vcpu_time_ms: u64, + cpuset: CpuSet, + nodeset: NumaNodeSet, +} + +impl LinuxSandboxCpuResources { + /// Create a new instance of `LinuxSandboxCpuResources`. + pub fn new(shares: u64) -> Self { + Self { + shares, + ..Default::default() + } + } + + /// Get the CPU shares. + pub fn shares(&self) -> u64 { + self.shares + } + + /// Get assigned vCPU time in ms. + pub fn calculated_vcpu_time_ms(&self) -> u64 { + self.calculated_vcpu_time_ms + } + + /// Get the CPU set. + pub fn cpuset(&self) -> &CpuSet { + &self.cpuset + } + + /// Get the NUMA memory node set. + pub fn nodeset(&self) -> &NumaNodeSet { + &self.nodeset + } + + /// Get number of vCPUs to fulfill the CPU resource request. + pub fn get_vcpus(&self) -> u64 { + if self.calculated_vcpu_time_ms == 0 && !self.cpuset.is_empty() { + self.cpuset.len() as u64 + } else { + self.calculated_vcpu_time_ms.saturating_add(999) / 1000 + } + } + + /// Merge resources assigned to a container into the sandbox/pod resources. + pub fn merge(&mut self, container_resource: &LinuxContainerCpuResources) -> &mut Self { + if let Some(v) = container_resource.calculated_vcpu_time_ms.as_ref() { + self.calculated_vcpu_time_ms += v; + } + self.cpuset.extend(&container_resource.cpuset); + self.nodeset.extend(&container_resource.nodeset); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_linux_container_cpu_resources() { + let resources = LinuxContainerCpuResources::default(); + + assert_eq!(resources.shares(), 0); + assert_eq!(resources.calculated_vcpu_time_ms, None); + assert!(resources.cpuset.is_empty()); + assert!(resources.nodeset.is_empty()); + assert!(resources.calculated_vcpu_time_ms.is_none()); + + let oci = oci::LinuxCpu { + shares: Some(2048), + quota: Some(1001), + period: Some(100), + realtime_runtime: None, + realtime_period: None, + cpus: "1,2,3".to_string(), + mems: "1".to_string(), + }; + let resources = LinuxContainerCpuResources::try_from(&oci).unwrap(); + assert_eq!(resources.shares(), 2048); + assert_eq!(resources.period(), 100); + assert_eq!(resources.quota(), 1001); + assert_eq!(resources.calculated_vcpu_time_ms, Some(10010)); + assert_eq!(resources.get_vcpus().unwrap(), 11); + assert_eq!(resources.cpuset().len(), 3); + assert_eq!(resources.nodeset().len(), 1); + + let oci = oci::LinuxCpu { + shares: Some(2048), + quota: None, + period: None, + realtime_runtime: None, + realtime_period: None, + cpus: "1".to_string(), + mems: "1-2".to_string(), + }; + let resources = LinuxContainerCpuResources::try_from(&oci).unwrap(); + assert_eq!(resources.shares(), 2048); + assert_eq!(resources.period(), 0); + assert_eq!(resources.quota(), -1); + assert_eq!(resources.calculated_vcpu_time_ms, None); + assert!(resources.get_vcpus().is_none()); + assert_eq!(resources.cpuset().len(), 1); + assert_eq!(resources.nodeset().len(), 2); + } + + #[test] + fn test_linux_sandbox_cpu_resources() { + let mut sandbox = LinuxSandboxCpuResources::new(1024); + + assert_eq!(sandbox.shares(), 1024); + assert_eq!(sandbox.get_vcpus(), 0); + assert_eq!(sandbox.calculated_vcpu_time_ms(), 0); + assert!(sandbox.cpuset().is_empty()); + assert!(sandbox.nodeset().is_empty()); + + let oci = oci::LinuxCpu { + shares: Some(2048), + quota: Some(1001), + period: Some(100), + realtime_runtime: None, + realtime_period: None, + cpus: "1,2,3".to_string(), + mems: "1".to_string(), + }; + let resources = LinuxContainerCpuResources::try_from(&oci).unwrap(); + sandbox.merge(&resources); + assert_eq!(sandbox.shares(), 1024); + assert_eq!(sandbox.get_vcpus(), 11); + assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010); + assert_eq!(sandbox.cpuset().len(), 3); + assert_eq!(sandbox.nodeset().len(), 1); + + let oci = oci::LinuxCpu { + shares: Some(2048), + quota: None, + period: None, + realtime_runtime: None, + realtime_period: None, + cpus: "1,4".to_string(), + mems: "1-2".to_string(), + }; + let resources = LinuxContainerCpuResources::try_from(&oci).unwrap(); + sandbox.merge(&resources); + + assert_eq!(sandbox.shares(), 1024); + assert_eq!(sandbox.get_vcpus(), 11); + assert_eq!(sandbox.calculated_vcpu_time_ms(), 10010); + assert_eq!(sandbox.cpuset().len(), 4); + assert_eq!(sandbox.nodeset().len(), 2); + } +} diff --git a/src/libs/kata-types/src/k8s.rs b/src/libs/kata-types/src/k8s.rs new file mode 100644 index 000000000000..2b5971ccc10a --- /dev/null +++ b/src/libs/kata-types/src/k8s.rs @@ -0,0 +1,371 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; + +use crate::annotations; +use crate::container::ContainerType; +use std::str::FromStr; + +// K8S_EMPTY_DIR is the K8s specific path for `empty-dir` volumes +const K8S_EMPTY_DIR: &str = "kubernetes.io~empty-dir"; +// K8S_CONFIGMAP is the K8s specific path for `configmap` volumes +const K8S_CONFIGMAP: &str = "kubernetes.io~configmap"; +// K8S_SECRET is the K8s specific path for `secret` volumes +const K8S_SECRET: &str = "kubernetes.io~secret"; + +/// Check whether the path is a K8s empty directory. +pub fn is_empty_dir>(path: P) -> bool { + is_special_dir(path, K8S_EMPTY_DIR) +} + +/// Check whether the path is a K8s configmap. +pub fn is_configmap>(path: P) -> bool { + is_special_dir(path, K8S_CONFIGMAP) +} + +/// Check whether the path is a K8s secret. +pub fn is_secret>(path: P) -> bool { + is_special_dir(path, K8S_SECRET) +} + +/// Check whether the path is a K8s empty directory, configmap, or secret. +/// +/// For example, given a K8s EmptyDir, Kubernetes mounts +/// "/var/lib/kubelet/pods//volumes/kubernetes.io~empty-dir/" +/// to "/". +pub fn is_special_dir>(path: P, dir_type: &str) -> bool { + let path = path.as_ref(); + + if let Some(parent) = path.parent() { + if let Some(pname) = parent.file_name() { + if pname == dir_type && parent.parent().is_some() { + return true; + } + } + } + + false +} + +/// Get K8S container type from OCI annotations. +pub fn container_type(spec: &oci::Spec) -> ContainerType { + // PodSandbox: "sandbox" (Containerd & CRI-O), "podsandbox" (dockershim) + // PodContainer: "container" (Containerd & CRI-O & dockershim) + for k in [ + annotations::crio::CONTAINER_TYPE_LABEL_KEY, + annotations::cri_containerd::CONTAINER_TYPE_LABEL_KEY, + annotations::dockershim::CONTAINER_TYPE_LABEL_KEY, + ] + .iter() + { + if let Some(v) = spec.annotations.get(k.to_owned()) { + if let Ok(t) = ContainerType::from_str(v) { + return t; + } + } + } + + ContainerType::SingleContainer +} + +/// Determine the k8s sandbox ID from OCI annotations. +/// +/// This function is expected to be called only when the container type is "PodContainer". +pub fn container_type_with_id(spec: &oci::Spec) -> (ContainerType, Option) { + let container_type = container_type(spec); + let mut sid = None; + if container_type == ContainerType::PodContainer { + for k in [ + annotations::crio::SANDBOX_ID_LABEL_KEY, + annotations::cri_containerd::SANDBOX_ID_LABEL_KEY, + annotations::dockershim::SANDBOX_ID_LABEL_KEY, + ] + .iter() + { + if let Some(id) = spec.annotations.get(k.to_owned()) { + sid = Some(id.to_string()); + break; + } + } + } + + (container_type, sid) +} + +// count_files will return the number of files within a given path. +// If the total number of +// files observed is greater than limit, break and return -1 +fn count_files>(path: P, limit: i32) -> std::io::Result { + // First, Check to see if the path exists + let src = std::fs::canonicalize(path)?; + + // Special case if this is just a file, not a directory: + if !src.is_dir() { + return Ok(1); + } + + let mut num_files = 0; + + for entry in std::fs::read_dir(src)? { + let file = entry?; + let p = file.path(); + if p.is_dir() { + num_files += count_files(&p, limit)?; + } else { + num_files += 1; + } + + if num_files > limit { + return Ok(-1); + } + } + + Ok(num_files) +} + +/// Check if a volume should be processed as a watchable volume, +/// which adds inotify-like function for virtio-fs. +pub fn is_watchable_mount>(path: P) -> bool { + if !is_secret(&path) && !is_configmap(&path) { + return false; + } + + // we have a cap on number of FDs which can be present in mount + // to determine if watchable. A similar Check exists within the agent, + // which may or may not help handle case where extra files are added to + // a mount after the fact + let count = count_files(&path, 8).unwrap_or(0); + count > 0 +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{annotations, container}; + use std::fs; + use test_utils::skip_if_not_root; + + #[test] + fn test_is_watchable_mount() { + skip_if_not_root!(); + + let result = is_watchable_mount(""); + assert!(!result); + + // path does not exist, failure expected: + let result = is_watchable_mount("/var/lib/kubelet/pods/5f0861a0-a987-4a3a-bb0f-1058ddb9678f/volumes/kubernetes.io~empty-dir/foobar"); + assert!(!result); + + let test_tmp_dir = tempfile::tempdir().expect("failed to create tempdir"); + + // Verify secret is successful (single file mount): + // /tmppath/kubernetes.io~secret/super-secret-thing + let secret_path = test_tmp_dir.path().join(K8S_SECRET); + let result = fs::create_dir_all(&secret_path); + assert!(result.is_ok()); + let secret_file = &secret_path.join("super-secret-thing"); + let result = fs::File::create(secret_file); + assert!(result.is_ok()); + + let result = is_watchable_mount(secret_file); + assert!(result); + + // Verify that if we have too many files, it will no longer be watchable: + // /tmp/kubernetes.io~configmap/amazing-dir-of-configs/ + // | - c0 + // | - c1 + // ... + // | - c7 + // should be okay. + // + // 9 files should cause the mount to be deemed "not watchable" + let configmap_path = test_tmp_dir + .path() + .join(K8S_CONFIGMAP) + .join("amazing-dir-of-configs"); + let result = fs::create_dir_all(&configmap_path); + assert!(result.is_ok()); + + // not a watchable mount if no files available. + let result = is_watchable_mount(&configmap_path); + assert!(!result); + + for i in 0..8 { + let configmap_file = &configmap_path.join(format!("c{}", i)); + let result = fs::File::create(configmap_file); + assert!(result.is_ok()); + + let result = is_watchable_mount(&configmap_path); + assert!(result); + } + let configmap_file = &configmap_path.join("too_much_files"); + let result = fs::File::create(configmap_file); + assert!(result.is_ok()); + + let result = is_watchable_mount(&configmap_path); + assert!(!result); + } + + #[test] + fn test_is_empty_dir() { + let empty_dir = "/volumes/kubernetes.io~empty-dir/shm"; + assert!(is_empty_dir(empty_dir)); + + let empty_dir = "/volumes/kubernetes.io~empty-dir//shm"; + assert!(is_empty_dir(empty_dir)); + + let empty_dir = "/volumes/kubernetes.io~empty-dir-test/shm"; + assert!(!is_empty_dir(empty_dir)); + + let empty_dir = "/volumes/kubernetes.io~empty-dir"; + assert!(!is_empty_dir(empty_dir)); + + let empty_dir = "kubernetes.io~empty-dir"; + assert!(!is_empty_dir(empty_dir)); + + let empty_dir = "/kubernetes.io~empty-dir/shm"; + assert!(is_empty_dir(empty_dir)); + } + + #[test] + fn test_is_configmap() { + let path = "/volumes/kubernetes.io~configmap/cm"; + assert!(is_configmap(path)); + + let path = "/volumes/kubernetes.io~configmap//cm"; + assert!(is_configmap(path)); + + let path = "/volumes/kubernetes.io~configmap-test/cm"; + assert!(!is_configmap(path)); + + let path = "/volumes/kubernetes.io~configmap"; + assert!(!is_configmap(path)); + } + + #[test] + fn test_is_secret() { + let path = "/volumes/kubernetes.io~secret/test-serect"; + assert!(is_secret(path)); + + let path = "/volumes/kubernetes.io~secret//test-serect"; + assert!(is_secret(path)); + + let path = "/volumes/kubernetes.io~secret-test/test-serect"; + assert!(!is_secret(path)); + + let path = "/volumes/kubernetes.io~secret"; + assert!(!is_secret(path)); + } + + #[test] + fn test_container_type() { + let sid = "sid".to_string(); + let mut spec = oci::Spec::default(); + + // default + assert_eq!( + container_type_with_id(&spec), + (ContainerType::SingleContainer, None) + ); + + // crio sandbox + spec.annotations = [( + annotations::crio::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::SANDBOX.to_string(), + )] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodSandbox, None) + ); + + // cri containerd sandbox + spec.annotations = [( + annotations::crio::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::POD_SANDBOX.to_string(), + )] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodSandbox, None) + ); + + // docker shim sandbox + spec.annotations = [( + annotations::crio::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::PODSANDBOX.to_string(), + )] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodSandbox, None) + ); + + // crio container + spec.annotations = [ + ( + annotations::crio::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::CONTAINER.to_string(), + ), + ( + annotations::crio::SANDBOX_ID_LABEL_KEY.to_string(), + sid.clone(), + ), + ] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodContainer, Some(sid.clone())) + ); + + // cri containerd container + spec.annotations = [ + ( + annotations::cri_containerd::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::POD_CONTAINER.to_string(), + ), + ( + annotations::cri_containerd::SANDBOX_ID_LABEL_KEY.to_string(), + sid.clone(), + ), + ] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodContainer, Some(sid.clone())) + ); + + // docker shim container + spec.annotations = [ + ( + annotations::dockershim::CONTAINER_TYPE_LABEL_KEY.to_string(), + container::CONTAINER.to_string(), + ), + ( + annotations::dockershim::SANDBOX_ID_LABEL_KEY.to_string(), + sid.clone(), + ), + ] + .iter() + .cloned() + .collect(); + assert_eq!( + container_type_with_id(&spec), + (ContainerType::PodContainer, Some(sid)) + ); + } +} diff --git a/src/libs/kata-types/src/lib.rs b/src/libs/kata-types/src/lib.rs new file mode 100644 index 000000000000..5eb407561cba --- /dev/null +++ b/src/libs/kata-types/src/lib.rs @@ -0,0 +1,96 @@ +// Copyright (c) 2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Constants and Data Types shared by Kata Containers components. + +#![deny(missing_docs)] +#[macro_use] +extern crate slog; +#[macro_use] +extern crate serde; + +/// Constants and data types related to annotations. +pub mod annotations; + +/// Kata configuration information from configuration file. +pub mod config; + +/// Constants and data types related to container. +pub mod container; + +/// Constants and data types related to CPU. +pub mod cpu; + +/// Constants and data types related to Kubernetes/kubelet. +pub mod k8s; + +/// Constants and data types related to mount point. +pub mod mount; + +pub(crate) mod utils; + +/// hypervisor capabilities +pub mod capabilities; + +/// Common error codes. +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// Invalid configuration list. + #[error("invalid list {0}")] + InvalidList(String), +} + +/// Convenience macro to obtain the scoped logger +#[macro_export] +macro_rules! sl { + () => { + slog_scope::logger() + }; +} + +/// Helper to create std::io::Error(std::io::ErrorKind::Other) +#[macro_export] +macro_rules! eother { + () => (std::io::Error::new(std::io::ErrorKind::Other, "")); + ($fmt:expr) => ({ + std::io::Error::new(std::io::ErrorKind::Other, format!($fmt)) + }); + ($fmt:expr, $($arg:tt)*) => ({ + std::io::Error::new(std::io::ErrorKind::Other, format!($fmt, $($arg)*)) + }); +} + +/// Resolve a path to its final value. +#[macro_export] +macro_rules! resolve_path { + ($field:expr, $fmt:expr) => {{ + if !$field.is_empty() { + match Path::new(&$field).canonicalize() { + Err(e) => Err(eother!($fmt, &$field, e)), + Ok(path) => { + $field = path.to_string_lossy().to_string(); + Ok(()) + } + } + } else { + Ok(()) + } + }}; +} + +/// Validate a path. +#[macro_export] +macro_rules! validate_path { + ($field:expr, $fmt:expr) => {{ + if !$field.is_empty() { + Path::new(&$field) + .canonicalize() + .map_err(|e| eother!($fmt, &$field, e)) + .map(|_| ()) + } else { + Ok(()) + } + }}; +} diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs new file mode 100644 index 000000000000..0cccab574de8 --- /dev/null +++ b/src/libs/kata-types/src/mount.rs @@ -0,0 +1,713 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Error, Result}; +use std::collections::hash_map::Entry; +use std::convert::TryFrom; +use std::{collections::HashMap, fs, path::PathBuf}; + +/// Prefix to mark a volume as Kata special. +pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:"; + +/// The Mount should be ignored by the host and handled by the guest. +pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:"; + +/// The sharedfs volume is mounted by guest OS before starting the kata-agent. +pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared"; + +/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. +pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral"; + +/// KATA_HOST_DIR_TYPE use for host empty dir +pub const KATA_HOST_DIR_VOLUME_TYPE: &str = "kata:hostdir"; + +/// KATA_MOUNT_INFO_FILE_NAME is used for the file that holds direct-volume mount info +pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json"; + +/// Specify `fsgid` for a volume or mount, `fsgid=1`. +pub const KATA_MOUNT_OPTION_FS_GID: &str = "fsgid"; + +/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path +pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes"; + +/// SANDBOX_BIND_MOUNTS_DIR is for sandbox bindmounts +pub const SANDBOX_BIND_MOUNTS_DIR: &str = "sandbox-mounts"; + +/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readonly +pub const SANDBOX_BIND_MOUNTS_RO: &str = ":ro"; + +/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readwrite +pub const SANDBOX_BIND_MOUNTS_RW: &str = ":rw"; + +/// Directly assign a block volume to vm and mount it inside guest. +pub const KATA_VIRTUAL_VOLUME_DIRECT_BLOCK: &str = "direct_block"; +/// Present a container image as a generic block device. +pub const KATA_VIRTUAL_VOLUME_IMAGE_RAW_BLOCK: &str = "image_raw_block"; +/// Present each container image layer as a generic block device. +pub const KATA_VIRTUAL_VOLUME_LAYER_RAW_BLOCK: &str = "layer_raw_block"; +/// Present a container image as a nydus block device. +pub const KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_BLOCK: &str = "image_nydus_block"; +/// Present each container image layer as a nydus block device. +pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_BLOCK: &str = "layer_nydus_block"; +/// Present a container image as a nydus filesystem. +pub const KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS: &str = "image_nydus_fs"; +/// Present each container image layer as a nydus filesystem. +pub const KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS: &str = "layer_nydus_fs"; +/// Download and extra container image inside guest vm. +pub const KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL: &str = "image_guest_pull"; + +/// Information about a mount. +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +pub struct Mount { + /// A device name, but can also be a file or directory name for bind mounts or a dummy. + /// Path values for bind mounts are either absolute or relative to the bundle. A mount is a + /// bind mount if it has either bind or rbind in the options. + pub source: String, + /// Destination of mount point: path inside container. This value MUST be an absolute path. + pub destination: PathBuf, + /// The type of filesystem for the mountpoint. + pub fs_type: String, + /// Mount options for the mountpoint. + pub options: Vec, + /// Optional device id for the block device when: + /// - the source is a block device or a mountpoint for a block device + /// - block device direct assignment is enabled + pub device_id: Option, + /// Intermediate path to mount the source on host side and then passthrough to vm by shared fs. + pub host_shared_fs_path: Option, + /// Whether to mount the mountpoint in readonly mode + pub read_only: bool, +} + +impl Mount { + /// Get size of mount options. + pub fn option_size(&self) -> usize { + self.options.iter().map(|v| v.len() + 1).sum() + } +} + +/// DirectVolumeMountInfo contains the information needed by Kata +/// to consume a host block device and mount it as a filesystem inside the guest VM. +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct DirectVolumeMountInfo { + /// The type of the volume (ie. block) + pub volume_type: String, + /// The device backing the volume. + pub device: String, + /// The filesystem type to be mounted on the volume. + pub fs_type: String, + /// Additional metadata to pass to the agent regarding this volume. + pub metadata: HashMap, + /// Additional mount options. + pub options: Vec, +} + +/// Nydus extra options +#[derive(Debug, serde::Deserialize)] +pub struct NydusExtraOptions { + /// source path + pub source: String, + /// nydus config + pub config: String, + /// snapshotter directory + #[serde(rename(deserialize = "snapshotdir"))] + pub snapshot_dir: String, + /// fs version + pub fs_version: String, +} + +impl NydusExtraOptions { + /// Create Nydus extra options + pub fn new(mount: &Mount) -> Result { + let options: Vec<&str> = mount + .options + .iter() + .filter(|x| x.starts_with("extraoption=")) + .map(|x| x.as_ref()) + .collect(); + + if options.len() != 1 { + return Err(anyhow!( + "get_nydus_extra_options: Invalid nydus options: {:?}", + &mount.options + )); + } + let config_raw_data = options[0].trim_start_matches("extraoption="); + let extra_options_buf = + base64::decode(config_raw_data).context("decode the nydus's base64 extraoption")?; + + serde_json::from_slice(&extra_options_buf).context("deserialize nydus's extraoption") + } +} + +/// Configuration information for DmVerity device. +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct DmVerityInfo { + /// Hash algorithm for dm-verity. + pub hashtype: String, + /// Root hash for device verification or activation. + pub hash: String, + /// Size of data device used in verification. + pub blocknum: u64, + /// Used block size for the data device. + pub blocksize: u64, + /// Used block size for the hash device. + pub hashsize: u64, + /// Offset of hash area/superblock on hash_device. + pub offset: u64, +} + +/// Information about directly assigned volume. +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct DirectAssignedVolume { + /// Meta information for directly assigned volume. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +/// Information about pulling image inside guest. +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct ImagePullVolume { + /// Meta information for pulling image inside guest. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +/// Information about nydus image volume. +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct NydusImageVolume { + /// Nydus configuration information. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub config: String, + + /// Nydus snapshot directory + #[serde(default, skip_serializing_if = "String::is_empty")] + pub snapshot_dir: String, +} + +/// Kata virtual volume to encapsulate information for extra mount options and direct volumes. +/// +/// It's very expensive to build direct communication channels to pass information: +/// - between snapshotters and kata-runtime/kata-agent/image-rs +/// - between CSI drivers and kata-runtime/kata-agent +/// +/// So `KataVirtualVolume` is introduced to encapsulate extra mount options and direct volume +/// information, so we can build a common infrastructure to handle them. +/// `KataVirtualVolume` is a superset of `NydusExtraOptions` and `DirectVolumeMountInfo`. +/// +/// Value of `volume_type` determines how to interpret other fields in the structure. +/// +/// - `KATA_VIRTUAL_VOLUME_IGNORE` +/// -- all other fields should be ignored/unused. +/// +/// - `KATA_VIRTUAL_VOLUME_DIRECT_BLOCK` +/// -- `source`: the directly assigned block device +/// -- `fs_type`: filesystem type +/// -- `options`: mount options +/// -- `direct_volume`: additional metadata to pass to the agent regarding this volume. +/// +/// - `KATA_VIRTUAL_VOLUME_IMAGE_RAW_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_RAW_BLOCK` +/// -- `source`: path to the raw block image for the container image or layer. +/// -- `fs_type`: filesystem type +/// -- `options`: mount options +/// -- `dm_verity`: disk dm-verity information +/// +/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_BLOCK` +/// -- `source`: path to nydus meta blob +/// -- `fs_type`: filesystem type +/// -- `nydus_image`: configuration information for nydus image. +/// -- `dm_verity`: disk dm-verity information +/// +/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS` +/// -- `source`: path to nydus meta blob +/// -- `fs_type`: filesystem type +/// -- `nydus_image`: configuration information for nydus image. +/// +/// - `KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL` +/// -- `source`: image reference +/// -- `image_pull`: metadata for image pulling +#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)] +pub struct KataVirtualVolume { + /// Type of virtual volume. + pub volume_type: String, + /// Source/device path for the virtual volume. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub source: String, + /// Filesystem type. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub fs_type: String, + /// Mount options. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub options: Vec, + + /// Information about directly assigned volume. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub direct_volume: Option, + /// Information about pulling image inside guest. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub image_pull: Option, + /// Information about nydus image volume. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nydus_image: Option, + /// DmVerity: configuration information + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dm_verity: Option, +} + +impl KataVirtualVolume { + /// Create a new instance of `KataVirtualVolume` with specified type. + pub fn new(volume_type: String) -> Self { + Self { + volume_type, + ..Default::default() + } + } + + /// Validate virtual volume object. + pub fn validate(&self) -> Result<()> { + match self.volume_type.as_str() { + KATA_VIRTUAL_VOLUME_DIRECT_BLOCK => { + if self.source.is_empty() { + return Err(anyhow!( + "missing source device for directly assigned block volume" + )); + } else if self.fs_type.is_empty() { + return Err(anyhow!( + "missing filesystem for directly assigned block volume" + )); + } + } + KATA_VIRTUAL_VOLUME_IMAGE_RAW_BLOCK | KATA_VIRTUAL_VOLUME_LAYER_RAW_BLOCK => { + if self.source.is_empty() { + return Err(anyhow!("missing source device for raw block volume")); + } else if self.fs_type.is_empty() { + return Err(anyhow!("missing filesystem for raw block volume")); + } + } + KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_BLOCK | KATA_VIRTUAL_VOLUME_LAYER_NYDUS_BLOCK => { + if self.source.is_empty() { + return Err(anyhow!("missing meta blob for nydus block volume")); + } else if self.fs_type.as_str() != "rafsv6" { + return Err(anyhow!("invalid filesystem for nydus block volume")); + } + match self.nydus_image.as_ref() { + None => { + return Err(anyhow!( + "missing nydus configuration info for nydus block volume" + )) + } + Some(nydus) => { + if nydus.config.is_empty() { + return Err(anyhow!( + "missing configuration info for nydus block volume" + )); + } else if nydus.snapshot_dir.is_empty() { + return Err(anyhow!( + "missing snapshot directory for nydus block volume" + )); + } + } + } + } + KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS | KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS => { + if self.source.is_empty() { + return Err(anyhow!("missing meta blob for nydus fs volume")); + } else if self.fs_type.as_str() != "rafsv6" && self.fs_type.as_str() != "rafsv5" { + return Err(anyhow!("invalid filesystem for nydus fs volume")); + } + match self.nydus_image.as_ref() { + None => { + return Err(anyhow!( + "missing nydus configuration info for nydus block volume" + )) + } + Some(nydus) => { + if nydus.config.is_empty() { + return Err(anyhow!( + "missing configuration info for nydus block volume" + )); + } else if nydus.snapshot_dir.is_empty() { + return Err(anyhow!( + "missing snapshot directory for nydus block volume" + )); + } + } + } + } + KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL => { + if self.source.is_empty() { + return Err(anyhow!("missing image reference for guest pulling volume")); + } + } + _ => {} + } + + Ok(()) + } + + /// Serialize the virtual volume object to json. + pub fn to_json(&self) -> Result { + Ok(serde_json::to_string(self)?) + } + + /// Deserialize a virtual volume object from json string. + pub fn from_json(value: &str) -> Result { + let volume: KataVirtualVolume = serde_json::from_str(value)?; + volume.validate()?; + Ok(volume) + } + + /// Serialize the virtual volume object to json and encode the string with base64. + pub fn to_base64(&self) -> Result { + let json = self.to_json()?; + Ok(base64::encode(json)) + } + + /// Decode and deserialize a virtual volume object from base64 encoded json string. + pub fn from_base64(value: &str) -> Result { + let json = base64::decode(value)?; + let volume: KataVirtualVolume = serde_json::from_slice(&json)?; + volume.validate()?; + Ok(volume) + } +} + +impl TryFrom<&DirectVolumeMountInfo> for KataVirtualVolume { + type Error = Error; + + fn try_from(value: &DirectVolumeMountInfo) -> std::result::Result { + let volume_type = match value.volume_type.as_str() { + "block" => KATA_VIRTUAL_VOLUME_DIRECT_BLOCK.to_string(), + _ => { + return Err(anyhow!( + "unknown directly assigned volume type: {}", + value.volume_type + )) + } + }; + + Ok(KataVirtualVolume { + volume_type, + source: value.device.clone(), + fs_type: value.fs_type.clone(), + options: value.options.clone(), + direct_volume: Some(DirectAssignedVolume { + metadata: value.metadata.clone(), + }), + ..Default::default() + }) + } +} + +impl TryFrom<&NydusExtraOptions> for KataVirtualVolume { + type Error = Error; + + fn try_from(value: &NydusExtraOptions) -> std::result::Result { + let fs_type = match value.fs_version.as_str() { + "v6" => "rafsv6".to_string(), + "rafsv6" => "rafsv6".to_string(), + "v5" => "rafsv5".to_string(), + "rafsv5" => "rafsv5".to_string(), + _ => return Err(anyhow!("unknown RAFS version: {}", value.fs_version)), + }; + + Ok(KataVirtualVolume { + volume_type: KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS.to_string(), + source: value.source.clone(), + fs_type, + options: vec![], + nydus_image: Some(NydusImageVolume { + config: value.config.clone(), + snapshot_dir: value.snapshot_dir.clone(), + }), + ..Default::default() + }) + } +} + +/// Trait object for storage device. +pub trait StorageDevice: Send + Sync { + /// Path + fn path(&self) -> Option<&str>; + + /// Clean up resources related to the storage device. + fn cleanup(&self) -> Result<()>; +} + +/// Manager to manage registered storage device handlers. +pub struct StorageHandlerManager { + handlers: HashMap, +} + +impl Default for StorageHandlerManager { + fn default() -> Self { + Self::new() + } +} + +impl StorageHandlerManager { + /// Create a new instance of `StorageHandlerManager`. + pub fn new() -> Self { + Self { + handlers: HashMap::new(), + } + } + + /// Register a storage device handler. + pub fn add_handler(&mut self, id: &str, handler: H) -> Result<()> { + match self.handlers.entry(id.to_string()) { + Entry::Occupied(_) => Err(anyhow!("storage handler for {} already exists", id)), + Entry::Vacant(entry) => { + entry.insert(handler); + Ok(()) + } + } + } + + /// Get storage handler with specified `id`. + pub fn handler(&self, id: &str) -> Option<&H> { + self.handlers.get(id) + } + + /// Get names of registered handlers. + pub fn get_handlers(&self) -> Vec { + self.handlers.keys().map(|v| v.to_string()).collect() + } +} + +/// Join user provided volume path with kata direct-volume root path. +/// +/// The `volume_path` is base64-encoded and then safely joined to the `prefix` +pub fn join_path(prefix: &str, volume_path: &str) -> Result { + if volume_path.is_empty() { + return Err(anyhow!("volume path must not be empty")); + } + let b64_encoded_path = base64::encode(volume_path.as_bytes()); + + Ok(safe_path::scoped_join(prefix, b64_encoded_path)?) +} + +/// get DirectVolume mountInfo from mountinfo.json. +pub fn get_volume_mount_info(volume_path: &str) -> Result { + let volume_path = join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?; + let mount_info_file_path = volume_path.join(KATA_MOUNT_INFO_FILE_NAME); + let mount_info_file = fs::read_to_string(mount_info_file_path)?; + let mount_info: DirectVolumeMountInfo = serde_json::from_str(&mount_info_file)?; + + Ok(mount_info) +} + +/// Check whether a mount type is a marker for Kata specific volume. +pub fn is_kata_special_volume(ty: &str) -> bool { + ty.len() > KATA_VOLUME_TYPE_PREFIX.len() && ty.starts_with(KATA_VOLUME_TYPE_PREFIX) +} + +/// Check whether a mount type is a marker for Kata guest mount volume. +pub fn is_kata_guest_mount_volume(ty: &str) -> bool { + ty.len() > KATA_GUEST_MOUNT_PREFIX.len() && ty.starts_with(KATA_GUEST_MOUNT_PREFIX) +} + +/// Check whether a mount type is a marker for Kata ephemeral volume. +pub fn is_kata_ephemeral_volume(ty: &str) -> bool { + ty == KATA_EPHEMERAL_VOLUME_TYPE +} + +/// Check whether a mount type is a marker for Kata hostdir volume. +pub fn is_kata_host_dir_volume(ty: &str) -> bool { + ty == KATA_HOST_DIR_VOLUME_TYPE +} + +/// sandbox bindmount format: /path/to/dir, or /path/to/dir:ro[:rw] +/// the real path is without suffix ":ro" or ":rw". +pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) { + let (real_path, mode) = if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RO) { + ( + bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RO), + SANDBOX_BIND_MOUNTS_RO, + ) + } else if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RW) { + ( + bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RW), + SANDBOX_BIND_MOUNTS_RW, + ) + } else { + // default bindmount format + (bindmount, "") + }; + + (real_path, mode) +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_is_kata_special_volume() { + assert!(is_kata_special_volume("kata:guest-mount:nfs")); + assert!(!is_kata_special_volume("kata:")); + } + + #[test] + fn test_split_bind_mounts() { + let test01 = "xxx0:ro"; + let test02 = "xxx2:rw"; + let test03 = "xxx3:is"; + let test04 = "xxx4"; + assert_eq!(split_bind_mounts(test01), ("xxx0", ":ro")); + assert_eq!(split_bind_mounts(test02), ("xxx2", ":rw")); + assert_eq!(split_bind_mounts(test03), ("xxx3:is", "")); + assert_eq!(split_bind_mounts(test04), ("xxx4", "")); + } + + #[test] + fn test_is_kata_guest_mount_volume() { + assert!(is_kata_guest_mount_volume("kata:guest-mount:nfs")); + assert!(!is_kata_guest_mount_volume("kata:guest-mount")); + assert!(!is_kata_guest_mount_volume("kata:guest-moun")); + assert!(!is_kata_guest_mount_volume("Kata:guest-mount:nfs")); + } + + #[test] + fn test_get_nydus_extra_options_v5() { + let mut mount_info = Mount { + ..Default::default() + }; + mount_info.options = vec!["extraoption=eyJzb3VyY2UiOiIvdmFyL2xpYi9jb250YWluZXJkL2lvLmNvbnRhaW5lcmQuc25hcHNob3R0ZXIudjEubnlkdXMvc25hcHNob3RzLzkvZnMvaW1hZ2UvaW1hZ2UuYm9vdCIsImNvbmZpZyI6IntcImRldmljZVwiOntcImJhY2tlbmRcIjp7XCJ0eXBlXCI6XCJyZWdpc3RyeVwiLFwiY29uZmlnXCI6e1wicmVhZGFoZWFkXCI6ZmFsc2UsXCJob3N0XCI6XCJsb2NhbGhvc3Q6NTAwMFwiLFwicmVwb1wiOlwidWJ1bnR1LW55ZHVzXCIsXCJzY2hlbWVcIjpcImh0dHBcIixcInNraXBfdmVyaWZ5XCI6dHJ1ZSxcInByb3h5XCI6e1wiZmFsbGJhY2tcIjpmYWxzZX0sXCJ0aW1lb3V0XCI6NSxcImNvbm5lY3RfdGltZW91dFwiOjUsXCJyZXRyeV9saW1pdFwiOjJ9fSxcImNhY2hlXCI6e1widHlwZVwiOlwiYmxvYmNhY2hlXCIsXCJjb25maWdcIjp7XCJ3b3JrX2RpclwiOlwiL3Zhci9saWIvbnlkdXMvY2FjaGVcIixcImRpc2FibGVfaW5kZXhlZF9tYXBcIjpmYWxzZX19fSxcIm1vZGVcIjpcImRpcmVjdFwiLFwiZGlnZXN0X3ZhbGlkYXRlXCI6ZmFsc2UsXCJlbmFibGVfeGF0dHJcIjp0cnVlLFwiZnNfcHJlZmV0Y2hcIjp7XCJlbmFibGVcIjp0cnVlLFwicHJlZmV0Y2hfYWxsXCI6ZmFsc2UsXCJ0aHJlYWRzX2NvdW50XCI6NCxcIm1lcmdpbmdfc2l6ZVwiOjAsXCJiYW5kd2lkdGhfcmF0ZVwiOjB9LFwidHlwZVwiOlwiXCIsXCJpZFwiOlwiXCIsXCJkb21haW5faWRcIjpcIlwiLFwiY29uZmlnXCI6e1wiaWRcIjpcIlwiLFwiYmFja2VuZF90eXBlXCI6XCJcIixcImJhY2tlbmRfY29uZmlnXCI6e1wicmVhZGFoZWFkXCI6ZmFsc2UsXCJwcm94eVwiOntcImZhbGxiYWNrXCI6ZmFsc2V9fSxcImNhY2hlX3R5cGVcIjpcIlwiLFwiY2FjaGVfY29uZmlnXCI6e1wid29ya19kaXJcIjpcIlwifSxcIm1ldGFkYXRhX3BhdGhcIjpcIlwifX0iLCJzbmFwc2hvdGRpciI6Ii92YXIvbGliL2NvbnRhaW5lcmQvaW8uY29udGFpbmVyZC5zbmFwc2hvdHRlci52MS5ueWR1cy9zbmFwc2hvdHMvMjU3IiwiZnNfdmVyc2lvbiI6InY1In0=".to_string()]; + let extra_option_result = NydusExtraOptions::new(&mount_info); + assert!(extra_option_result.is_ok()); + let extra_option = extra_option_result.unwrap(); + assert_eq!(extra_option.source,"/var/lib/containerd/io.containerd.snapshotter.v1.nydus/snapshots/9/fs/image/image.boot"); + assert_eq!( + extra_option.snapshot_dir, + "/var/lib/containerd/io.containerd.snapshotter.v1.nydus/snapshots/257" + ); + assert_eq!(extra_option.fs_version, "v5"); + } + + #[test] + fn test_get_nydus_extra_options_v6() { + let mut mount_info = Mount { + ..Default::default() + }; + mount_info.options = vec!["extraoption=eyJzb3VyY2UiOiIvdmFyL2xpYi9jb250YWluZXJkL2lvLmNvbnRhaW5lcmQuc25hcHNob3R0ZXIudjEubnlkdXMvc25hcHNob3RzLzIwMS9mcy9pbWFnZS9pbWFnZS5ib290IiwiY29uZmlnIjoie1wiZGV2aWNlXCI6e1wiYmFja2VuZFwiOntcInR5cGVcIjpcInJlZ2lzdHJ5XCIsXCJjb25maWdcIjp7XCJyZWFkYWhlYWRcIjpmYWxzZSxcImhvc3RcIjpcImxvY2FsaG9zdDo1MDAwXCIsXCJyZXBvXCI6XCJ1YnVudHUtbnlkdXMtdjZcIixcInNjaGVtZVwiOlwiaHR0cFwiLFwic2tpcF92ZXJpZnlcIjp0cnVlLFwicHJveHlcIjp7XCJmYWxsYmFja1wiOmZhbHNlfSxcInRpbWVvdXRcIjo1LFwiY29ubmVjdF90aW1lb3V0XCI6NSxcInJldHJ5X2xpbWl0XCI6Mn19LFwiY2FjaGVcIjp7XCJ0eXBlXCI6XCJibG9iY2FjaGVcIixcImNvbmZpZ1wiOntcIndvcmtfZGlyXCI6XCIvdmFyL2xpYi9ueWR1cy9jYWNoZVwiLFwiZGlzYWJsZV9pbmRleGVkX21hcFwiOmZhbHNlfX19LFwibW9kZVwiOlwiZGlyZWN0XCIsXCJkaWdlc3RfdmFsaWRhdGVcIjpmYWxzZSxcImVuYWJsZV94YXR0clwiOnRydWUsXCJmc19wcmVmZXRjaFwiOntcImVuYWJsZVwiOnRydWUsXCJwcmVmZXRjaF9hbGxcIjpmYWxzZSxcInRocmVhZHNfY291bnRcIjo0LFwibWVyZ2luZ19zaXplXCI6MCxcImJhbmR3aWR0aF9yYXRlXCI6MH0sXCJ0eXBlXCI6XCJcIixcImlkXCI6XCJcIixcImRvbWFpbl9pZFwiOlwiXCIsXCJjb25maWdcIjp7XCJpZFwiOlwiXCIsXCJiYWNrZW5kX3R5cGVcIjpcIlwiLFwiYmFja2VuZF9jb25maWdcIjp7XCJyZWFkYWhlYWRcIjpmYWxzZSxcInByb3h5XCI6e1wiZmFsbGJhY2tcIjpmYWxzZX19LFwiY2FjaGVfdHlwZVwiOlwiXCIsXCJjYWNoZV9jb25maWdcIjp7XCJ3b3JrX2RpclwiOlwiXCJ9LFwibWV0YWRhdGFfcGF0aFwiOlwiXCJ9fSIsInNuYXBzaG90ZGlyIjoiL3Zhci9saWIvY29udGFpbmVyZC9pby5jb250YWluZXJkLnNuYXBzaG90dGVyLnYxLm55ZHVzL3NuYXBzaG90cy8yNjEiLCJmc192ZXJzaW9uIjoidjYifQ==".to_string()]; + let extra_option_result = NydusExtraOptions::new(&mount_info); + assert!(extra_option_result.is_ok()); + let extra_option = extra_option_result.unwrap(); + assert_eq!(extra_option.source,"/var/lib/containerd/io.containerd.snapshotter.v1.nydus/snapshots/201/fs/image/image.boot"); + assert_eq!( + extra_option.snapshot_dir, + "/var/lib/containerd/io.containerd.snapshotter.v1.nydus/snapshots/261" + ); + assert_eq!(extra_option.fs_version, "v6"); + } + + #[test] + fn test_kata_virtual_volume() { + let mut volume = KataVirtualVolume::new(KATA_VIRTUAL_VOLUME_DIRECT_BLOCK.to_string()); + assert_eq!( + volume.volume_type.as_str(), + KATA_VIRTUAL_VOLUME_DIRECT_BLOCK + ); + assert!(volume.fs_type.is_empty()); + + let value = serde_json::to_string(&volume).unwrap(); + assert_eq!(&value, "{\"volume_type\":\"direct_block\"}"); + + volume.source = "/tmp".to_string(); + volume.fs_type = "ext4".to_string(); + volume.options = vec!["rw".to_string()]; + volume.nydus_image = Some(NydusImageVolume { + config: "test".to_string(), + snapshot_dir: "/var/lib/nydus.dir".to_string(), + }); + let mut metadata = HashMap::new(); + metadata.insert("mode".to_string(), "rw".to_string()); + volume.direct_volume = Some(DirectAssignedVolume { metadata }); + + let value = serde_json::to_string(&volume).unwrap(); + let volume2: KataVirtualVolume = serde_json::from_str(&value).unwrap(); + assert_eq!(volume.volume_type, volume2.volume_type); + assert_eq!(volume.source, volume2.source); + assert_eq!(volume.fs_type, volume2.fs_type); + assert_eq!(volume.nydus_image, volume2.nydus_image); + assert_eq!(volume.direct_volume, volume2.direct_volume); + } + + #[test] + fn test_kata_virtual_volume_serde() { + let mut volume = KataVirtualVolume::new(KATA_VIRTUAL_VOLUME_DIRECT_BLOCK.to_string()); + volume.source = "/tmp".to_string(); + volume.fs_type = "ext4".to_string(); + volume.options = vec!["rw".to_string()]; + volume.nydus_image = Some(NydusImageVolume { + config: "test".to_string(), + snapshot_dir: "/var/lib/nydus.dir".to_string(), + }); + let mut metadata = HashMap::new(); + metadata.insert("mode".to_string(), "rw".to_string()); + volume.direct_volume = Some(DirectAssignedVolume { metadata }); + + let value = volume.to_base64().unwrap(); + let volume2: KataVirtualVolume = KataVirtualVolume::from_base64(value.as_str()).unwrap(); + assert_eq!(volume.volume_type, volume2.volume_type); + assert_eq!(volume.source, volume2.source); + assert_eq!(volume.fs_type, volume2.fs_type); + assert_eq!(volume.nydus_image, volume2.nydus_image); + assert_eq!(volume.direct_volume, volume2.direct_volume); + } + + #[test] + fn test_try_from_direct_volume() { + let mut metadata = HashMap::new(); + metadata.insert("mode".to_string(), "rw".to_string()); + let mut direct = DirectVolumeMountInfo { + volume_type: "unknown".to_string(), + device: "/dev/vda".to_string(), + fs_type: "ext4".to_string(), + metadata, + options: vec!["ro".to_string()], + }; + KataVirtualVolume::try_from(&direct).unwrap_err(); + + direct.volume_type = "block".to_string(); + let volume = KataVirtualVolume::try_from(&direct).unwrap(); + assert_eq!( + volume.volume_type.as_str(), + KATA_VIRTUAL_VOLUME_DIRECT_BLOCK + ); + assert_eq!(volume.source, direct.device); + assert_eq!(volume.fs_type, direct.fs_type); + assert_eq!( + volume.direct_volume.as_ref().unwrap().metadata, + direct.metadata + ); + assert_eq!(volume.options, direct.options); + } + + #[test] + fn test_try_from_nydus_extra_options() { + let mut nydus = NydusExtraOptions { + source: "/test/nydus".to_string(), + config: "test".to_string(), + snapshot_dir: "/var/lib/nydus".to_string(), + fs_version: "rafsvx".to_string(), + }; + KataVirtualVolume::try_from(&nydus).unwrap_err(); + + nydus.fs_version = "v6".to_string(); + let volume = KataVirtualVolume::try_from(&nydus).unwrap(); + assert_eq!( + volume.volume_type.as_str(), + KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS + ); + assert_eq!(volume.nydus_image.as_ref().unwrap().config, nydus.config); + assert_eq!( + volume.nydus_image.as_ref().unwrap().snapshot_dir, + nydus.snapshot_dir + ); + assert_eq!(volume.fs_type.as_str(), "rafsv6") + } +} diff --git a/src/libs/kata-types/src/utils/mod.rs b/src/libs/kata-types/src/utils/mod.rs new file mode 100644 index 000000000000..abcb4c227715 --- /dev/null +++ b/src/libs/kata-types/src/utils/mod.rs @@ -0,0 +1,6 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod u32_set; diff --git a/src/libs/kata-types/src/utils/u32_set.rs b/src/libs/kata-types/src/utils/u32_set.rs new file mode 100644 index 000000000000..44c55a16395e --- /dev/null +++ b/src/libs/kata-types/src/utils/u32_set.rs @@ -0,0 +1,163 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::ops::Deref; +use std::slice::Iter; +use std::str::FromStr; + +use crate::Error; + +/// A set of unique `u32` IDs. +/// +/// The `U32Set` may be used to save CPUs parsed from a CPU list file or NUMA nodes parsed from +/// a NUMA node list file. +#[derive(Clone, Default, Debug)] +pub struct U32Set(Vec); + +impl U32Set { + /// Create a new instance of `U32Set`. + pub fn new() -> Self { + U32Set(vec![]) + } + + /// Add the `cpu` to the CPU set. + pub fn add(&mut self, cpu: u32) { + self.0.push(cpu); + self.0.sort_unstable(); + self.0.dedup(); + } + + /// Add new CPUs into the set. + pub fn extend(&mut self, cpus: &[u32]) { + self.0.extend_from_slice(cpus); + self.0.sort_unstable(); + self.0.dedup(); + } + + /// Returns true if the CPU set contains elements. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Get number of elements in the CPU set. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Get an iterator over the CPU set. + pub fn iter(&self) -> Iter { + self.0.iter() + } +} + +impl From> for U32Set { + fn from(mut cpus: Vec) -> Self { + cpus.sort_unstable(); + cpus.dedup(); + U32Set(cpus) + } +} + +impl FromStr for U32Set { + type Err = Error; + + fn from_str(cpus_str: &str) -> Result { + if cpus_str.is_empty() { + return Ok(U32Set::new()); + } + + let mut cpus = Vec::new(); + for split_cpu in cpus_str.split(',') { + if !split_cpu.contains('-') { + if !split_cpu.is_empty() { + if let Ok(cpu_id) = split_cpu.parse::() { + cpus.push(cpu_id); + continue; + } + } + } else { + let fields: Vec<&str> = split_cpu.split('-').collect(); + if fields.len() == 2 { + if let Ok(start) = fields[0].parse::() { + if let Ok(end) = fields[1].parse::() { + if start < end { + for cpu in start..=end { + cpus.push(cpu); + } + continue; + } + } + } + } + } + + return Err(Error::InvalidList(cpus_str.to_string())); + } + + Ok(U32Set::from(cpus)) + } +} + +impl Deref for U32Set { + type Target = [u32]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// Test whether two CPU sets are equal. +impl PartialEq for U32Set { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_cpuset_equal() { + let cpuset1 = U32Set::from(vec![1, 2, 3]); + let cpuset2 = U32Set::from(vec![3, 2, 1]); + let cpuset3 = U32Set::from(vec![]); + let cpuset4 = U32Set::from(vec![3, 2, 4]); + let cpuset5 = U32Set::from(vec![1, 2, 3, 3, 2, 1]); + + assert_eq!(cpuset1.len(), 3); + assert!(cpuset3.is_empty()); + assert_eq!(cpuset5.len(), 3); + + assert_eq!(cpuset1, cpuset2); + assert_eq!(cpuset1, cpuset5); + assert_ne!(cpuset1, cpuset3); + assert_ne!(cpuset1, cpuset4); + } + + #[test] + fn test_cpuset_from_str() { + assert!(U32Set::from_str("").unwrap().is_empty()); + + let support_cpus1 = U32Set::from(vec![1, 2, 3]); + assert_eq!(support_cpus1, U32Set::from_str("1,2,3").unwrap()); + assert_eq!(support_cpus1, U32Set::from_str("1-2,3").unwrap()); + + let support_cpus2 = U32Set::from(vec![1, 3, 4, 6, 7, 8]); + assert_eq!(support_cpus2, U32Set::from_str("1,3,4,6,7,8").unwrap()); + assert_eq!(support_cpus2, U32Set::from_str("1,3-4,6-8").unwrap()); + + assert!(U32Set::from_str("1-2-3,3").is_err()); + assert!(U32Set::from_str("1-2,,3").is_err()); + assert!(U32Set::from_str("1-2.5,3").is_err()); + assert!(U32Set::from_str("1-1").is_err()); + assert!(U32Set::from_str("2-1").is_err()); + assert!(U32Set::from_str("0,,1").is_err()); + assert!(U32Set::from_str("-1").is_err()); + assert!(U32Set::from_str("1-").is_err()); + assert!(U32Set::from_str("-1--2").is_err()); + assert!(U32Set::from_str("999999999999999999999999999999999999999999999").is_err()); + } +} diff --git a/src/libs/kata-types/tests/test_config.rs b/src/libs/kata-types/tests/test_config.rs new file mode 100644 index 000000000000..d350df724bb9 --- /dev/null +++ b/src/libs/kata-types/tests/test_config.rs @@ -0,0 +1,493 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// +#[cfg(test)] +mod tests { + use kata_types::annotations::{ + Annotation, KATA_ANNO_CFG_AGENT_CONTAINER_PIPE_SIZE, KATA_ANNO_CFG_AGENT_TRACE, + KATA_ANNO_CFG_DISABLE_GUEST_SECCOMP, KATA_ANNO_CFG_ENABLE_PPROF, + KATA_ANNO_CFG_EXPERIMENTAL, KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_NOFLUSH, + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_DRIVER, KATA_ANNO_CFG_HYPERVISOR_CTLPATH, + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY, KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP, KATA_ANNO_CFG_HYPERVISOR_ENABLE_HUGEPAGES, + KATA_ANNO_CFG_HYPERVISOR_ENABLE_IO_THREADS, KATA_ANNO_CFG_HYPERVISOR_ENABLE_SWAP, + KATA_ANNO_CFG_HYPERVISOR_FILE_BACKED_MEM_ROOT_DIR, + KATA_ANNO_CFG_HYPERVISOR_GUEST_HOOK_PATH, KATA_ANNO_CFG_HYPERVISOR_JAILER_PATH, + KATA_ANNO_CFG_HYPERVISOR_KERNEL_PATH, KATA_ANNO_CFG_HYPERVISOR_MEMORY_PREALLOC, + KATA_ANNO_CFG_HYPERVISOR_MEMORY_SLOTS, KATA_ANNO_CFG_HYPERVISOR_PATH, + KATA_ANNO_CFG_HYPERVISOR_VHOSTUSER_STORE_PATH, KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_DAEMON, + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_EXTRA_ARGS, KATA_ANNO_CFG_HYPERVISOR_VIRTIO_MEM, + KATA_ANNO_CFG_KERNEL_MODULES, KATA_ANNO_CFG_RUNTIME_NAME, + }; + use kata_types::config::KataConfig; + use kata_types::config::{QemuConfig, TomlConfig}; + use std::collections::HashMap; + use std::fs; + use std::path::Path; + #[test] + fn test_change_config_annotation() { + let content = include_str!("texture/configuration-anno-0.toml"); + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + std::process::Command::new("mkdir") + .arg("./hypervisor_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./store_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./test_hypervisor_hook_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./jvm") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./test_file_backend_mem_root") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./test_jailer_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./test_kernel_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("mkdir") + .arg("./virtio_fs") + .output() + .expect("failed to execute process"); + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_KERNEL_MODULES.to_string(), + "j465 aaa=1;r33w".to_string(), + ); + anno_hash.insert(KATA_ANNO_CFG_AGENT_TRACE.to_string(), "false".to_string()); + anno_hash.insert( + KATA_ANNO_CFG_AGENT_CONTAINER_PIPE_SIZE.to_string(), + "3".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_PATH.to_string(), + "./hypervisor_path".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_DRIVER.to_string(), + "device".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_CACHE_NOFLUSH.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_VHOSTUSER_STORE_PATH.to_string(), + "./store_path".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_DISABLE_GUEST_SECCOMP.to_string(), + "true".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_GUEST_HOOK_PATH.to_string(), + "./test_hypervisor_hook_path".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_MEMORY_PREALLOC.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_CTLPATH.to_string(), + "./jvm".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS.to_string(), + "12".to_string(), + ); + anno_hash.insert(KATA_ANNO_CFG_ENABLE_PPROF.to_string(), "false".to_string()); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY.to_string(), + "100MiB".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_IO_THREADS.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_IO_THREADS.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_SWAP.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_FILE_BACKED_MEM_ROOT_DIR.to_string(), + "./test_file_backend_mem_root".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_HUGEPAGES.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_JAILER_PATH.to_string(), + "./test_jailer_path".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_KERNEL_PATH.to_string(), + "./test_kernel_path".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_MEMORY_SLOTS.to_string(), + "100".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_EXTRA_ARGS.to_string(), + "rr,dg,er".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_MEM.to_string(), + "false".to_string(), + ); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_DAEMON.to_string(), + "./virtio_fs".to_string(), + ); + anno_hash.insert(KATA_ANNO_CFG_EXPERIMENTAL.to_string(), "c,d,e".to_string()); + + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_ok()); + KataConfig::set_active_config(Some(config), "qemu", "agnet0"); + if let Some(ag) = KataConfig::get_default_config().get_agent() { + assert_eq!( + ag.kernel_modules[0], + "e1000e InterruptThrottleRate=3000,3000,3000 EEE=1" + ); + + assert_eq!(ag.kernel_modules[1], "i915_enabled_ppgtt=0"); + assert_eq!(ag.kernel_modules[2], "j465 aaa=1"); + assert_eq!(ag.kernel_modules[3], "r33w"); + assert!(!ag.enable_tracing); + assert_eq!(ag.container_pipe_size, 3); + } + if let Some(hv) = KataConfig::get_default_config().get_hypervisor() { + assert_eq!(hv.path, "./hypervisor_path".to_string()); + assert_eq!(hv.blockdev_info.block_device_driver, "device"); + assert!(!hv.blockdev_info.block_device_cache_noflush); + assert!(hv.blockdev_info.block_device_cache_set); + assert_eq!(hv.blockdev_info.vhost_user_store_path, "./store_path"); + assert_eq!( + hv.security_info.guest_hook_path, + "./test_hypervisor_hook_path" + ); + assert!(!hv.memory_info.enable_mem_prealloc); + assert_eq!(hv.ctlpath, "./jvm".to_string()); + assert_eq!(hv.cpu_info.default_vcpus, 12); + assert!(!hv.memory_info.enable_guest_swap); + assert_eq!(hv.memory_info.default_memory, 100); + assert!(!hv.enable_iothreads); + assert!(!hv.enable_iothreads); + assert!(!hv.memory_info.enable_swap); + assert_eq!( + hv.memory_info.file_mem_backend, + "./test_file_backend_mem_root" + ); + assert!(!hv.memory_info.enable_hugepages); + assert_eq!(hv.jailer_path, "./test_jailer_path".to_string()); + assert_eq!(hv.boot_info.kernel, "./test_kernel_path"); + assert_eq!(hv.memory_info.memory_slots, 100); + assert_eq!(hv.shared_fs.virtio_fs_extra_args[5], "rr"); + assert_eq!(hv.shared_fs.virtio_fs_extra_args[6], "dg"); + assert_eq!(hv.shared_fs.virtio_fs_extra_args[7], "er"); + assert!(!hv.memory_info.enable_virtio_mem); + assert_eq!(hv.shared_fs.virtio_fs_daemon, "./virtio_fs"); + } + + assert!( + KataConfig::get_active_config() + .get_config() + .runtime + .disable_guest_seccomp + ); + + assert!( + !KataConfig::get_active_config() + .get_config() + .runtime + .enable_pprof + ); + assert_eq!( + KataConfig::get_active_config() + .get_config() + .runtime + .experimental, + ["a", "b", "c", "d", "e"] + ); + std::process::Command::new("rmdir") + .arg("./hypervisor_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("rmdir") + .arg("./test_hypervisor_hook_path") + .output() + .expect("failed to execute process"); + + std::process::Command::new("rmdir") + .arg("./test_file_backend_mem_root") + .output() + .expect("failed to execute process"); + + std::process::Command::new("rmdir") + .arg("./test_jailer_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("rmdir") + .arg("./test_kernel_path") + .output() + .expect("failed to execute process"); + std::process::Command::new("rmdir") + .arg("./virtio_fs") + .output() + .expect("failed to execute process"); + std::process::Command::new("rmdir") + .arg("./jvm") + .output() + .expect("failed to execute process"); + std::process::Command::new("rmdir") + .arg("./store_path") + .output() + .expect("failed to execute process"); + } + + #[test] + fn test_fail_to_change_block_device_driver_because_not_enabled() { + let content = include_str!("texture/configuration-anno-1.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_BLOCK_DEV_DRIVER.to_string(), + "fvfvfvfvf".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_ok()); + if let Some(hv) = KataConfig::get_default_config().get_hypervisor() { + assert_eq!(hv.blockdev_info.block_device_driver, "virtio-blk"); + } + } + + #[test] + fn test_fail_to_change_enable_guest_swap_because_not_enabled() { + let content = include_str!("texture/configuration-anno-1.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP.to_string(), + "false".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_ok()); + if let Some(hv) = KataConfig::get_default_config().get_hypervisor() { + assert!(hv.memory_info.enable_guest_swap) + } + } + + #[test] + fn test_fail_to_change_hypervisor_path_because_of_invalid_path() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_PATH.to_string(), + "/usr/bin/nle".to_string(), + ); + let anno = Annotation::new(anno_hash); + + let path = env!("CARGO_MANIFEST_DIR"); + let path = Path::new(path).join("tests/texture/configuration-anno-0.toml"); + let content = fs::read_to_string(path).unwrap(); + let mut config = TomlConfig::load(&content).unwrap(); + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_kernel_path_because_of_invalid_path() { + let path = env!("CARGO_MANIFEST_DIR"); + let path = Path::new(path).join("tests/texture/configuration-anno-0.toml"); + let content = fs::read_to_string(path).unwrap(); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(&content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_KERNEL_PATH.to_string(), + "/usr/bin/cdcd".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(&content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_memory_slots_because_of_less_than_zero() { + let content = include_str!("texture/configuration-anno-0.toml"); + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_MEMORY_SLOTS.to_string(), + "-1".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_default_memory_because_less_than_min_memory_size() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY.to_string(), + "10".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_default_vcpus_becuase_more_than_max_cpu_size() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS.to_string(), + "400".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_enable_guest_swap_because_invalid_input() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_ENABLE_GUEST_SWAP.to_string(), + "false1".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_default_vcpus_becuase_invalid_input() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS.to_string(), + "ddc".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } + + #[test] + fn test_fail_to_change_runtime_name() { + let content = include_str!("texture/configuration-anno-0.toml"); + + let qemu = QemuConfig::new(); + qemu.register(); + + let config = TomlConfig::load(content).unwrap(); + KataConfig::set_active_config(Some(config), "qemu", "agent0"); + let mut anno_hash = HashMap::new(); + anno_hash.insert( + KATA_ANNO_CFG_RUNTIME_NAME.to_string(), + "other-container".to_string(), + ); + let anno = Annotation::new(anno_hash); + let mut config = TomlConfig::load(content).unwrap(); + assert!(anno.update_config_by_annotation(&mut config).is_err()); + } +} diff --git a/src/libs/kata-types/tests/texture/configuration-anno-0.toml b/src/libs/kata-types/tests/texture/configuration-anno-0.toml new file mode 100644 index 000000000000..807de57b69ae --- /dev/null +++ b/src/libs/kata-types/tests/texture/configuration-anno-0.toml @@ -0,0 +1,90 @@ +[hypervisor.qemu] +path = "/usr/bin/lsns" +valid_hypervisor_paths = ["/usr/bin/qemu*", "/opt/qemu?","/usr/bin/ls*","./hypervisor_path"] +valid_jailer_paths = ["/usr/lib/rust","./test_jailer_path"] +ctlpath = "/usr/bin/" +valid_ctlpaths = ["/usr/lib/jvm","usr/bin/qemu-io","./jvm"] +disable_nesting_checks = true +enable_iothreads = true +jailer_path = "/usr/local" +kernel = "/usr/bin/../bin/zcmp" +image = "/usr/bin/./tabs" +kernel_params = "ro" +firmware = "/etc/hostname" + +cpu_features="pmu=off,vmx=off" +default_vcpus = 2 +default_maxvcpus = 64 + +machine_type = "q35" +confidential_guest = true +rootless = true +enable_annotations = ["shared_fs","path", "ctlpath","jailer_path","enable_iothreads","default_memory","memory_slots","enable_mem_prealloc","enable_hugepages","file_mem_backend","enable_virtio_mem","enable_swap","enable_guest_swap","default_vcpus","virtio_fs_extra_args","block_device_driver","vhost_user_store_path","kernel","guest_hook_path","block_device_cache_noflush","virtio_fs_daemon"] +machine_accelerators="noapic" +default_bridges = 2 +default_memory = 128 +memory_slots = 128 +memory_offset = 0x100000 +enable_virtio_mem = true +disable_block_device_use = false +shared_fs = "virtio-fs" +virtio_fs_daemon = "/usr/bin/uptime" +valid_virtio_fs_daemon_paths = ["/usr/local/bin/virtiofsd*","./virtio_fs"] +virtio_fs_cache_size = 512 +virtio_fs_extra_args = ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +virtio_fs_cache = "always" +block_device_driver = "virtio-blk" +block_device_cache_set = true +block_device_cache_direct = true +block_device_cache_noflush = true +enable_mem_prealloc = true +enable_hugepages = true +enable_vhost_user_store = true +vhost_user_store_path = "/tmp" +valid_vhost_user_store_paths = ["/var/kata/vhost-user-store*", "/tmp/kata?","/var/tmp","./store_path"] +enable_iommu = true +enable_iommu_platform = true +file_mem_backend = "/dev/shm" +valid_file_mem_backends = ["/dev/shm","/dev/snd","./test_file_backend_mem_root"] +enable_swap = true +pflashes = ["/proc/mounts"] +enable_debug = true +msize_9p = 16384 +disable_image_nvdimm = true +hotplug_vfio_on_root_bus = true +pcie_root_port = 2 +disable_vhost_net = true +entropy_source= "/dev/urandom" +valid_entropy_sources = ["/dev/urandom", "/dev/random"] +guest_hook_path = "/usr/share" +rx_rate_limiter_max_rate = 10000 +tx_rate_limiter_max_rate = 10000 +guest_memory_dump_path="/var/crash/kata" +guest_memory_dump_paging = true +enable_guest_swap = true + +[agent.agent0] +enable_tracing = true +debug_console_enabled = true +debug = true +dial_timeout = 1 +kernel_modules = ["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1","i915_enabled_ppgtt=0"] +container_pipe_size = 2 +[runtime] +enable_debug = true +internetworking_model="macvtap" +disable_guest_seccomp=false +enable_tracing = true +jaeger_endpoint = "localhost:1234" +jaeger_user = "user" +jaeger_password = "pw" +disable_new_netns = true +sandbox_cgroup_only=true +sandbox_bind_mounts=["/proc/self"] +vfio_mode="vfio" +experimental=["a", "b"] +enable_pprof = true +hypervisor_name = "qemu" +agent_name = "agent0" + + diff --git a/src/libs/kata-types/tests/texture/configuration-anno-1.toml b/src/libs/kata-types/tests/texture/configuration-anno-1.toml new file mode 100644 index 000000000000..59c05c9b5914 --- /dev/null +++ b/src/libs/kata-types/tests/texture/configuration-anno-1.toml @@ -0,0 +1,88 @@ +[hypervisor.qemu] +path = "/usr/bin/lsns" +valid_hypervisor_paths = ["/usr/bin/qemu*", "/opt/qemu?","/usr/bin/lsns","./hypervisor_path"] +valid_jailer_paths = ["/usr/lib/rust"] +ctlpath = "/usr/bin" +disable_nesting_checks = true +enable_iothreads = true +jailer_path = "/usr/local" +kernel = "/usr/bin/../bin/uptime" +image = "/usr/bin/./lessecho" +kernel_params = "ro" +firmware = "/etc/hostname" + +cpu_features="pmu=off,vmx=off" +default_vcpus = 2 +default_maxvcpus = 64 + +machine_type = "q35" +confidential_guest = true +rootless = true +enable_annotations = ["path", "ctlpath","jailer_path"] +machine_accelerators="noapic" +default_bridges = 2 +default_memory = 128 +memory_slots = 128 +memory_offset = 0x100000 +enable_virtio_mem = true +disable_block_device_use = false +shared_fs = "virtio-fs" +virtio_fs_daemon = "/usr/bin/uptime" +valid_virtio_fs_daemon_paths = ["/usr/local/bin/virtiofsd*"] +virtio_fs_cache_size = 512 +virtio_fs_extra_args = ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +virtio_fs_cache = "always" +block_device_driver = "virtio-blk" +block_device_cache_set = true +block_device_cache_direct = true +block_device_cache_noflush = true +enable_mem_prealloc = true +enable_hugepages = true +enable_vhost_user_store = true +vhost_user_store_path = "/tmp" +valid_vhost_user_store_paths = ["/var/kata/vhost-user-store*", "/tmp/kata?"] +enable_iommu = true +enable_iommu_platform = true +file_mem_backend = "/dev/shm" +valid_file_mem_backends = ["/dev/shm"] +enable_swap = true +pflashes = ["/proc/mounts"] +enable_debug = true +msize_9p = 16384 +disable_image_nvdimm = true +hotplug_vfio_on_root_bus = true +pcie_root_port = 2 +disable_vhost_net = true +entropy_source= "/dev/urandom" +valid_entropy_sources = ["/dev/urandom", "/dev/random"] +guest_hook_path = "/usr/share/oci/hooks" +rx_rate_limiter_max_rate = 10000 +tx_rate_limiter_max_rate = 10000 +guest_memory_dump_path="/var/crash/kata" +guest_memory_dump_paging = true +enable_guest_swap = true + +[agent.agent0] +enable_tracing = true +debug_console_enabled = true +debug = true +dial_timeout = 1 +kernel_modules = ["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1","i915_enabled_ppgtt=0"] +container_pipe_size = 2 +[runtime] +enable_debug = true +internetworking_model="macvtap" +disable_guest_seccomp=true +enable_tracing = true +jaeger_endpoint = "localhost:1234" +jaeger_user = "user" +jaeger_password = "pw" +disable_new_netns = true +sandbox_cgroup_only=true +sandbox_bind_mounts=["/proc/self"] +vfio_mode="vfio" +experimental=["a", "b"] +enable_pprof = true +hypervisor_name = "qemu" +agent_name = "agent0" + diff --git a/src/libs/logging/Cargo.toml b/src/libs/logging/Cargo.toml index 36685c15a3ed..4d19d90c6754 100644 --- a/src/libs/logging/Cargo.toml +++ b/src/libs/logging/Cargo.toml @@ -3,6 +3,7 @@ name = "logging" version = "0.1.0" authors = ["The Kata Containers community "] edition = "2018" +license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -12,8 +13,9 @@ serde_json = "1.0.73" # - Dynamic keys required to allow HashMap keys to be slog::Serialized. # - The 'max_*' features allow changing the log level at runtime # (by stopping the compiler from removing log calls). -slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] } +slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] } slog-json = "2.4.0" +slog-term = "2.9.0" slog-async = "2.7.0" slog-scope = "4.4.0" diff --git a/src/libs/logging/Makefile b/src/libs/logging/Makefile deleted file mode 100644 index 74c917ab882f..000000000000 --- a/src/libs/logging/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2021 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -# It is not necessary to have a build target as this crate is built -# automatically by the consumers of it. -# -# However, it is essential that the crate be tested. -default: test - -# It is essential to run these tests using *both* build profiles. -# See the `test_logger_levels()` test for further information. -test: - @echo "INFO: testing log levels for development build" - @cargo test - @echo "INFO: testing log levels for release build" - @cargo test --release diff --git a/src/libs/logging/src/file_rotate.rs b/src/libs/logging/src/file_rotate.rs new file mode 100644 index 000000000000..3cc8f5715683 --- /dev/null +++ b/src/libs/logging/src/file_rotate.rs @@ -0,0 +1,315 @@ +// Copyright (c) 2020 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 or MIT +// +// Partial code are extracted from +// https://github.com/sile/sloggers/blob/153c00a59f7218c1d96f522fb7a95c80bb0d530c/src/file.rs +// with following license and copyright. +// The MIT License +// +// Copyright (c) 2017 Takeru Ohta +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +use std::fs::{self, File, OpenOptions}; +use std::io::{self, LineWriter, Result, Write}; +use std::path::{Path, PathBuf}; + +/// Default rotate size for logger files. +const DEFAULT_LOG_FILE_SIZE_TO_ROTATE: u64 = 10485760; + +/// Default number of log files to keep. +const DEFAULT_HISTORY_LOG_FILES: usize = 3; + +/// Writer with file rotation for log files. +/// +/// This is a modified version of `FileAppender` from +/// https://github.com/sile/sloggers/blob/153c00a59f7218c1d96f522fb7a95c80bb0d530c/src/file.rs#L190 +#[derive(Debug)] +pub struct FileRotator { + path: PathBuf, + file: Option>, + ignore_errors: bool, + rotate_size: u64, + rotate_keep: usize, + truncate: bool, + written_size: u64, + #[cfg(test)] + fail_rename: bool, +} + +impl FileRotator { + /// Create a new instance of [`FileRotator`] to write log file at `path`. + /// + /// It returns `std::io::Error` if the path is not a normal file or the parent directory does + /// not exist. + pub fn new>(path: P) -> Result { + let p = Path::new(path.as_ref()); + match p.metadata() { + Ok(md) => { + if !md.is_file() { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("path '{}' is not a file", p.to_string_lossy()), + )); + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => {} + Err(e) => return Err(e), + } + if let Some(parent) = p.parent() { + if p.has_root() || !parent.as_os_str().is_empty() { + let md = parent.metadata()?; + if !md.is_dir() { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("'{}' is not a directory", parent.to_string_lossy()), + )); + } + } + } + + Ok(FileRotator { + path: p.to_path_buf(), + file: None, + ignore_errors: false, + rotate_size: DEFAULT_LOG_FILE_SIZE_TO_ROTATE, + rotate_keep: DEFAULT_HISTORY_LOG_FILES, + truncate: false, + written_size: 0, + #[cfg(test)] + fail_rename: false, + }) + } + + /// Use "truncate" or "append" mode when opening the log file. + pub fn truncate_mode(&mut self, truncate: bool) -> &mut Self { + self.truncate = truncate; + self + } + + /// Set the threshold size to rotate log files. + pub fn rotate_threshold(&mut self, size: u64) -> &mut Self { + self.rotate_size = size; + self + } + + /// Set number of rotated log files to keep. + pub fn rotate_count(&mut self, count: usize) -> &mut Self { + self.rotate_keep = count; + self + } + + /// Ignore all errors and try best effort to log messages but without guarantee. + pub fn ignore_errors(&mut self, ignore_errors: bool) -> &mut Self { + self.ignore_errors = ignore_errors; + self + } + + /// Open the log file if + /// - it hasn't been opened yet. + /// - current log file has been rotated and needs to open a new log file. + fn reopen_if_needed(&mut self) -> Result<()> { + if self.file.is_none() || !self.path.exists() { + let file = OpenOptions::new() + .create(true) + .write(true) + .truncate(self.truncate) + .append(!self.truncate) + .open(&self.path)?; + match file.metadata() { + Ok(md) => self.written_size = md.len(), + Err(e) => { + if self.ignore_errors { + // Pretend as an empty file. + // It's better to permit over-sized log file instead of disabling rotation. + self.written_size = 0; + } else { + return Err(e); + } + } + } + self.file = Some(LineWriter::new(file)); + } + + Ok(()) + } + + /// Try to rotate log files. + /// + /// When failed to rotate the log files, we choose to ignore the error instead of possibly + /// panicking the whole program. This may cause over-sized log files, but that should be easy + /// to recover. + fn rotate(&mut self) -> Result<()> { + for i in (1..=self.rotate_keep).rev() { + let from = self.rotated_path(i); + let to = self.rotated_path(i + 1); + if from.exists() { + let _ = fs::rename(from, to); + } + } + + #[cfg(test)] + if !self.fail_rename && self.path.exists() { + let rotated_path = self.rotated_path(1); + let _ = fs::rename(&self.path, rotated_path); + } + #[cfg(not(test))] + if self.path.exists() { + let rotated_path = self.rotated_path(1); + let _ = fs::rename(&self.path, rotated_path); + } + + let delete_path = self.rotated_path(self.rotate_keep + 1); + if delete_path.exists() { + let _ = fs::remove_file(delete_path); + } + + // Reset the `written_size` so only try to rotate again when another `rotate_size` bytes + // of log messages have been written to the lo file. + self.written_size = 0; + self.reopen_if_needed()?; + + Ok(()) + } + + fn rotated_path(&self, i: usize) -> PathBuf { + let mut path = self.path.clone().into_os_string(); + path.push(format!(".{}", i)); + PathBuf::from(path) + } +} + +impl Write for FileRotator { + fn write(&mut self, buf: &[u8]) -> Result { + if self.ignore_errors { + let _ = self.reopen_if_needed(); + if let Some(file) = self.file.as_mut() { + let _ = file.write_all(buf); + } + } else { + self.reopen_if_needed()?; + match self.file.as_mut() { + Some(file) => file.write_all(buf)?, + None => { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("Cannot open file: {:?}", self.path), + )) + } + } + } + + self.written_size += buf.len() as u64; + Ok(buf.len()) + } + + fn flush(&mut self) -> Result<()> { + if let Some(f) = self.file.as_mut() { + if let Err(e) = f.flush() { + if !self.ignore_errors { + return Err(e); + } + } + } + if self.written_size >= self.rotate_size { + if let Err(e) = self.rotate() { + if !self.ignore_errors { + return Err(e); + } + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::os::unix::fs::MetadataExt; + + #[test] + fn test_rotator_valid_path() { + FileRotator::new("/proc/self").unwrap_err(); + FileRotator::new("/proc/self/__does_not_exist__/log.txt").unwrap_err(); + + let _ = FileRotator::new("log.txt").unwrap(); + } + + #[test] + fn test_rotator_rotate() { + let tmpdir = tempfile::tempdir().unwrap(); + let mut path = tmpdir.path().to_path_buf(); + path.push("log.txt"); + + let mut rotator = FileRotator::new(&path).unwrap(); + rotator.truncate_mode(false); + rotator.rotate_threshold(4); + rotator.rotate_count(1); + assert_eq!(rotator.rotate_size, 4); + assert_eq!(rotator.rotate_keep, 1); + assert!(!rotator.truncate); + + rotator.write_all("test".as_bytes()).unwrap(); + rotator.flush().unwrap(); + rotator.write_all("test1".as_bytes()).unwrap(); + rotator.flush().unwrap(); + rotator.write_all("t2".as_bytes()).unwrap(); + rotator.flush().unwrap(); + + let content = fs::read_to_string(path).unwrap(); + assert_eq!(content, "t2"); + + let mut path1 = tmpdir.path().to_path_buf(); + path1.push("log.txt.1"); + let content = fs::read_to_string(path1).unwrap(); + assert_eq!(content, "test1"); + + let mut path2 = tmpdir.path().to_path_buf(); + path2.push("log.txt.2"); + fs::read_to_string(path2).unwrap_err(); + } + + #[test] + fn test_rotator_rotate_fail() { + let tmpdir = tempfile::tempdir().unwrap(); + let mut path = tmpdir.path().to_path_buf(); + path.push("log.txt"); + + let mut rotator = FileRotator::new(&path).unwrap(); + rotator.truncate_mode(false); + rotator.rotate_threshold(1); + rotator.rotate_count(1); + rotator.fail_rename = true; + + rotator.write_all("test".as_bytes()).unwrap(); + rotator.flush().unwrap(); + let size1 = path.metadata().unwrap().size(); + + rotator.write_all("test1".as_bytes()).unwrap(); + rotator.flush().unwrap(); + let size2 = path.metadata().unwrap().size(); + assert!(size2 > size1); + + rotator.write_all("test2".as_bytes()).unwrap(); + rotator.flush().unwrap(); + let size3 = path.metadata().unwrap().size(); + assert!(size3 > size2); + } +} diff --git a/src/libs/logging/src/lib.rs b/src/libs/logging/src/lib.rs index 5d92acabfff9..b662174598c3 100644 --- a/src/libs/logging/src/lib.rs +++ b/src/libs/logging/src/lib.rs @@ -11,6 +11,23 @@ use std::process; use std::result; use std::sync::Mutex; +mod file_rotate; +mod log_writer; + +pub use file_rotate::FileRotator; +pub use log_writer::LogWriter; + +#[macro_export] +macro_rules! logger_with_subsystem { + ($name: ident, $subsystem: expr) => { + macro_rules! $name { + () => { + slog_scope::logger().new(slog::o!("subsystem" => $subsystem)) + }; + } + }; +} + const LOG_LEVELS: &[(&str, slog::Level)] = &[ ("trace", slog::Level::Trace), ("debug", slog::Level::Debug), @@ -22,6 +39,28 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[ const DEFAULT_SUBSYSTEM: &str = "root"; +// Creates a logger which prints output as human readable text to the terminal +pub fn create_term_logger(level: slog::Level) -> (slog::Logger, slog_async::AsyncGuard) { + let term_drain = slog_term::term_compact().fuse(); + + // Ensure only a unique set of key/value fields is logged + let unique_drain = UniqueDrain::new(term_drain).fuse(); + + // Allow runtime filtering of records by log level + let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse(); + + // Ensure the logger is thread-safe + let (async_drain, guard) = slog_async::Async::new(filter_drain) + .thread_name("slog-async-logger".into()) + .build_with_guard(); + + // Add some "standard" fields + let logger = slog::Logger::root(async_drain.fuse(), o!("subsystem" => DEFAULT_SUBSYSTEM)); + + (logger, guard) +} + +// Creates a logger which prints output as JSON // XXX: 'writer' param used to make testing possible. pub fn create_logger( name: &str, @@ -482,6 +521,7 @@ mod tests { let error_closure = |logger: &Logger, msg: String| error!(logger, "{}", msg); let critical_closure = |logger: &Logger, msg: String| crit!(logger, "{}", msg); + #[allow(clippy::type_complexity)] struct TestData<'a> { slog_level: slog::Level, slog_level_tag: &'a str, @@ -528,13 +568,13 @@ mod tests { let msg = format!("test[{}]", i); // Create a writer for the logger drain to use - let writer = - NamedTempFile::new().expect(&format!("{:}: failed to create tempfile", msg)); + let writer = NamedTempFile::new() + .unwrap_or_else(|_| panic!("{:}: failed to create tempfile", msg)); // Used to check file contents before the temp file is unlinked let mut writer_ref = writer .reopen() - .expect(&format!("{:?}: failed to clone tempfile", msg)); + .unwrap_or_else(|_| panic!("{:?}: failed to clone tempfile", msg)); let (logger, logger_guard) = create_logger(name, source, d.slog_level, writer); @@ -548,52 +588,52 @@ mod tests { let mut contents = String::new(); writer_ref .read_to_string(&mut contents) - .expect(&format!("{:?}: failed to read tempfile contents", msg)); + .unwrap_or_else(|_| panic!("{:?}: failed to read tempfile contents", msg)); // Convert file to JSON let fields: Value = serde_json::from_str(&contents) - .expect(&format!("{:?}: failed to convert logfile to json", msg)); + .unwrap_or_else(|_| panic!("{:?}: failed to convert logfile to json", msg)); // Check the expected JSON fields let field_ts = fields .get("ts") - .expect(&format!("{:?}: failed to find timestamp field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find timestamp field", msg)); assert_ne!(field_ts, "", "{}", msg); let field_version = fields .get("version") - .expect(&format!("{:?}: failed to find version field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find version field", msg)); assert_eq!(field_version, env!("CARGO_PKG_VERSION"), "{}", msg); let field_pid = fields .get("pid") - .expect(&format!("{:?}: failed to find pid field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find pid field", msg)); assert_ne!(field_pid, "", "{}", msg); let field_level = fields .get("level") - .expect(&format!("{:?}: failed to find level field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find level field", msg)); assert_eq!(field_level, d.slog_level_tag, "{}", msg); let field_msg = fields .get("msg") - .expect(&format!("{:?}: failed to find msg field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find msg field", msg)); assert_eq!(field_msg, &json!(d.msg), "{}", msg); let field_name = fields .get("name") - .expect(&format!("{:?}: failed to find name field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find name field", msg)); assert_eq!(field_name, name, "{}", msg); let field_source = fields .get("source") - .expect(&format!("{:?}: failed to find source field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find source field", msg)); assert_eq!(field_source, source, "{}", msg); let field_subsystem = fields .get("subsystem") - .expect(&format!("{:?}: failed to find subsystem field", msg)); + .unwrap_or_else(|| panic!("{:?}: failed to find subsystem field", msg)); // No explicit subsystem, so should be the default assert_eq!(field_subsystem, &json!(DEFAULT_SUBSYSTEM), "{}", msg); diff --git a/src/libs/logging/src/log_writer.rs b/src/libs/logging/src/log_writer.rs new file mode 100644 index 000000000000..53e6d541e06c --- /dev/null +++ b/src/libs/logging/src/log_writer.rs @@ -0,0 +1,66 @@ +// Copyright (c) 2020 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::{Result, Write}; + +use slog::{info, Logger}; + +/// Writer to convert each line written to it to a log record. +#[derive(Debug)] +pub struct LogWriter(Logger); + +impl LogWriter { + /// Create a new isntance of ['LogWriter']. + pub fn new(logger: Logger) -> Self { + LogWriter(logger) + } +} + +impl Write for LogWriter { + fn write(&mut self, buf: &[u8]) -> Result { + buf.split(|b| *b == b'\n').for_each(|it| { + if !it.is_empty() { + info!(self.0, "{}", String::from_utf8_lossy(it)) + } + }); + + Ok(buf.len()) + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{create_logger, FileRotator}; + use std::fs; + + #[test] + fn test_log_writer() { + let tmpdir = tempfile::tempdir().unwrap(); + let mut path = tmpdir.path().to_path_buf(); + path.push("log.txt"); + + let mut rotator = FileRotator::new(&path).unwrap(); + rotator.truncate_mode(false); + rotator.rotate_threshold(4); + rotator.rotate_count(1); + + let (logger, guard) = create_logger("test", "hi", slog::Level::Info, rotator); + let mut writer = LogWriter::new(logger); + + writer.write_all("test1\nblabla".as_bytes()).unwrap(); + writer.flush().unwrap(); + writer.write_all("test2".as_bytes()).unwrap(); + writer.flush().unwrap(); + drop(guard); + + let content = fs::read_to_string(path).unwrap(); + assert!(!content.is_empty()); + } +} diff --git a/src/libs/oci/Cargo.toml b/src/libs/oci/Cargo.toml index dde7b9915c02..8c08705a3dbe 100644 --- a/src/libs/oci/Cargo.toml +++ b/src/libs/oci/Cargo.toml @@ -3,6 +3,7 @@ name = "oci" version = "0.1.0" authors = ["The Kata Containers community "] edition = "2018" +license = "Apache-2.0" [dependencies] serde = "1.0.131" diff --git a/src/libs/oci/src/lib.rs b/src/libs/oci/src/lib.rs index 3998b166c1c0..d48ad404007a 100644 --- a/src/libs/oci/src/lib.rs +++ b/src/libs/oci/src/lib.rs @@ -14,6 +14,8 @@ use std::collections::HashMap; mod serialize; pub use serialize::{to_string, to_writer, Error, Result}; +pub const OCI_SPEC_CONFIG_FILE_NAME: &str = "config.json"; + #[allow(dead_code)] fn is_false(b: bool) -> bool { !b @@ -190,6 +192,24 @@ pub struct Hook { pub struct Hooks { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub prestart: Vec, + #[serde( + rename = "createRuntime", + default, + skip_serializing_if = "Vec::is_empty" + )] + pub create_runtime: Vec, + #[serde( + rename = "createContainer", + default, + skip_serializing_if = "Vec::is_empty" + )] + pub create_container: Vec, + #[serde( + rename = "startContainer", + default, + skip_serializing_if = "Vec::is_empty" + )] + pub start_container: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub poststart: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] @@ -829,6 +849,8 @@ pub struct State { #[cfg(test)] mod tests { + use std::vec; + use super::*; #[test] @@ -1019,6 +1041,11 @@ mod tests { "path": "/usr/bin/setup-network" } ], + "createRuntime": [ + { + "path": "/usr/local/bin/nerdctl" + } + ], "poststart": [ { "path": "/usr/bin/notify-start", @@ -1387,6 +1414,12 @@ mod tests { timeout: None, }, ], + create_runtime: vec![crate::Hook { + path: "/usr/local/bin/nerdctl".to_string(), + args: vec![], + env: vec![], + timeout: None, + }], poststart: vec![crate::Hook { path: "/usr/bin/notify-start".to_string(), args: vec![], @@ -1399,6 +1432,7 @@ mod tests { env: vec![], timeout: None, }], + ..Default::default() }), annotations: [ ("com.example.key1".to_string(), "value1".to_string()), diff --git a/src/libs/protocols/.gitignore b/src/libs/protocols/.gitignore index ce4964c4f0df..bc7e10bf3105 100644 --- a/src/libs/protocols/.gitignore +++ b/src/libs/protocols/.gitignore @@ -1,9 +1,6 @@ Cargo.lock -src/agent.rs -src/agent_ttrpc.rs -src/csi.rs -src/empty.rs -src/health.rs -src/health_ttrpc.rs -src/oci.rs -src/types.rs + +src/*.rs +!src/lib.rs +!src/trans.rs +!src/serde_config.rs diff --git a/src/libs/protocols/Cargo.toml b/src/libs/protocols/Cargo.toml index ae93e7fa191a..9c0033d17e97 100644 --- a/src/libs/protocols/Cargo.toml +++ b/src/libs/protocols/Cargo.toml @@ -3,17 +3,21 @@ name = "protocols" version = "0.1.0" authors = ["The Kata Containers community "] edition = "2018" +license = "Apache-2.0" [features] default = [] with-serde = [ "serde", "serde_json" ] +async = ["ttrpc/async", "async-trait"] [dependencies] -ttrpc = { version = "0.5.0", features = ["async"] } -async-trait = "0.1.42" -protobuf = { version = "=2.14.0", features = ["with-serde"] } +ttrpc = { version = "0.7.1" } +async-trait = { version = "0.1.42", optional = true } +protobuf = { version = "3.2.0" } serde = { version = "1.0.130", features = ["derive"], optional = true } serde_json = { version = "1.0.68", optional = true } +oci = { path = "../oci" } [build-dependencies] -ttrpc-codegen = "0.2.0" +ttrpc-codegen = "0.4.2" +protobuf = { version = "3.2.0" } diff --git a/src/libs/protocols/build.rs b/src/libs/protocols/build.rs index 4a43f36777d5..bc34c07a07a0 100644 --- a/src/libs/protocols/build.rs +++ b/src/libs/protocols/build.rs @@ -3,11 +3,50 @@ // SPDX-License-Identifier: Apache-2.0 // -use std::fs::File; +use std::fs::{self, File}; use std::io::{BufRead, BufReader, Read, Write}; use std::path::Path; use std::process::exit; -use ttrpc_codegen::{Codegen, Customize, ProtobufCustomize}; + +use protobuf::{ + descriptor::field_descriptor_proto::Type, + reflect::{EnumDescriptor, FieldDescriptor, MessageDescriptor, OneofDescriptor}, +}; +use ttrpc_codegen::{Codegen, Customize, ProtobufCustomize, ProtobufCustomizeCallback}; + +struct GenSerde; + +impl ProtobufCustomizeCallback for GenSerde { + fn message(&self, _message: &MessageDescriptor) -> ProtobufCustomize { + ProtobufCustomize::default().before("#[cfg_attr(feature = \"with-serde\", derive(::serde::Serialize, ::serde::Deserialize))]") + } + + fn enumeration(&self, _enum_type: &EnumDescriptor) -> ProtobufCustomize { + ProtobufCustomize::default().before("#[cfg_attr(feature = \"with-serde\", derive(::serde::Serialize, ::serde::Deserialize))]") + } + + fn oneof(&self, _oneof: &OneofDescriptor) -> ProtobufCustomize { + ProtobufCustomize::default().before("#[cfg_attr(feature = \"with-serde\", derive(::serde::Serialize, ::serde::Deserialize))]") + } + + fn field(&self, field: &FieldDescriptor) -> ProtobufCustomize { + if field.proto().type_() == Type::TYPE_ENUM { + ProtobufCustomize::default().before( + "#[cfg_attr(feature = \"with-serde\", serde(serialize_with = \"crate::serialize_enum_or_unknown\", deserialize_with = \"crate::deserialize_enum_or_unknown\"))]", + ) + } else if field.proto().type_() == Type::TYPE_MESSAGE && field.is_singular() { + ProtobufCustomize::default().before( + "#[cfg_attr(feature = \"with-serde\", serde(serialize_with = \"crate::serialize_message_field\", deserialize_with = \"crate::deserialize_message_field\"))]", + ) + } else { + ProtobufCustomize::default() + } + } + + fn special_field(&self, _message: &MessageDescriptor, _field: &str) -> ProtobufCustomize { + ProtobufCustomize::default().before("#[cfg_attr(feature = \"with-serde\", serde(skip))]") + } +} fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std::io::Error> { let mut src = File::open(file_name)?; @@ -17,7 +56,7 @@ fn replace_text_in_file(file_name: &str, from: &str, to: &str) -> Result<(), std let new_contents = contents.replace(from, to); - let mut dst = File::create(&file_name)?; + let mut dst = File::create(file_name)?; dst.write_all(new_contents.as_bytes())?; Ok(()) @@ -67,7 +106,7 @@ fn handle_file(autogen_comment: &str, rust_filename: &str) -> Result<(), std::io let pattern = "//! Generated file from"; - if line.starts_with(&pattern) { + if line.starts_with(pattern) { new_contents.push(autogen_comment.into()); } @@ -76,31 +115,22 @@ fn handle_file(autogen_comment: &str, rust_filename: &str) -> Result<(), std::io // Although we've requested serde support via `Customize`, to // allow the `kata-agent-ctl` tool to partially deserialise structures // specified in JSON, we need this bit of additional magic. - if line.starts_with(&struct_pattern) { + if line.starts_with(struct_pattern) { new_contents.insert(new_contents.len() - 1, serde_default_code.trim().into()); } } let data = new_contents.join("\n"); - let mut dst = File::create(&rust_filename)?; + let mut dst = File::create(rust_filename)?; dst.write_all(data.as_bytes())?; Ok(()) } -fn real_main() -> Result<(), std::io::Error> { - let autogen_comment = format!("\n//! Generated by {:?} ({:?})", file!(), module_path!()); - - let protos = vec![ - "protos/agent.proto", - "protos/csi.proto", - "protos/google/protobuf/empty.proto", - "protos/health.proto", - "protos/oci.proto", - "protos/types.proto", - ]; +fn codegen(path: &str, protos: &[&str], async_all: bool) -> Result<(), std::io::Error> { + fs::create_dir_all(path).unwrap(); // Tell Cargo that if the .proto files changed, to rerun this build script. protos @@ -108,26 +138,28 @@ fn real_main() -> Result<(), std::io::Error> { .for_each(|p| println!("cargo:rerun-if-changed={}", &p)); let ttrpc_options = Customize { - async_server: true, + async_all, ..Default::default() }; - let protobuf_options = ProtobufCustomize { - serde_derive: Some(true), - ..Default::default() - }; + let protobuf_options = ProtobufCustomize::default() + .gen_mod_rs(false) + .generate_getter(true) + .generate_accessors(true); let out_dir = Path::new("src"); Codegen::new() .out_dir(out_dir) - .inputs(&protos) + .inputs(protos) .include("protos") .customize(ttrpc_options) .rust_protobuf() .rust_protobuf_customize(protobuf_options) + .rust_protobuf_customize_callback(GenSerde) .run()?; + let autogen_comment = format!("\n//! Generated by {:?} ({:?})", file!(), module_path!()); for file in protos.iter() { let proto_filename = Path::new(file).file_name().unwrap(); @@ -147,6 +179,33 @@ fn real_main() -> Result<(), std::io::Error> { handle_file(&autogen_comment, out_file_str)?; } + use_serde(protos, out_dir)?; + Ok(()) +} +fn real_main() -> Result<(), std::io::Error> { + codegen( + "src", + &[ + "protos/google/protobuf/empty.proto", + "protos/gogo/protobuf/gogoproto/gogo.proto", + "protos/oci.proto", + "protos/types.proto", + "protos/csi.proto", + ], + false, + )?; + + // generate async + #[cfg(feature = "async")] + { + codegen("src", &["protos/agent.proto", "protos/health.proto"], true)?; + + fs::rename("src/agent_ttrpc.rs", "src/agent_ttrpc_async.rs")?; + fs::rename("src/health_ttrpc.rs", "src/health_ttrpc_async.rs")?; + } + + codegen("src", &["protos/agent.proto", "protos/health.proto"], false)?; + // There is a message named 'Box' in oci.proto // so there is a struct named 'Box', we should replace Box to ::std::boxed::Box // to avoid the conflict. @@ -156,8 +215,6 @@ fn real_main() -> Result<(), std::io::Error> { "self: ::std::boxed::Box", )?; - use_serde(&protos, out_dir)?; - Ok(()) } diff --git a/src/libs/protocols/hack/update-generated-proto.sh b/src/libs/protocols/hack/update-generated-proto.sh index 67f25058aab8..a1905248df95 100755 --- a/src/libs/protocols/hack/update-generated-proto.sh +++ b/src/libs/protocols/hack/update-generated-proto.sh @@ -29,12 +29,12 @@ show_succeed_msg() { show_usage() { echo "====================================================================" echo "" - echo " USAGE: make PROTO_FILE= generate-protocols" + echo " USAGE: generate-protocols " echo "" - echo " Where PROTO_FILE may be:" + echo " Where the first argument could be:" echo " all: will compile all protocol buffer files" echo "" - echo " Or compile individually by using the exact proto file:" + echo " Or compile individually by using the exact proto file:" # iterate over proto files for file in "$@" @@ -47,19 +47,26 @@ show_usage() { } generate_go_sources() { - local cmd="protoc -I$GOPATH/src:$GOPATH/src/github.com/kata-containers/kata-containers/src/libs/protocols/protos \ ---gogottrpc_out=plugins=ttrpc+fieldpath,\ -import_path=github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc,\ -\ -Mgithub.com/kata-containers/kata-containers/src/libs/protocols/protos/csi.proto=github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc,\ -\ -Mgithub.com/kata-containers/kata-containers/src/libs/protocols/protos/types.proto=github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols,\ -\ -Mgithub.com/kata-containers/kata-containers/src/libs/protocols/protos/oci.proto=github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc,\ -\ -Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto,Mgoogle/protobuf/any.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/descriptor.proto=github.com/gogo/protobuf/protoc-gen-gogo/descriptor,Mgoogle/protobuf/duration.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/empty.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/field_mask.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/timestamp.proto=github.com/gogo/protobuf/types,Mgoogle/protobuf/wrappers.proto=github.com/gogo/protobuf/types,Mgoogle/rpc/status.proto=github.com/gogo/googleapis/google/rpc\ -:$GOPATH/src \ -$GOPATH/src/github.com/kata-containers/kata-containers/src/libs/protocols/protos/$1" + local proto_file="$1" + local dir_path="${proto_file%/*}" + local file_name="${proto_file##*/}" + + [ "$dir_path" == "$proto_file" ] && dir_path="." + + local root_path=$(realpath ../)/libs/protocols/protos + local cmd="protoc -I$GOPATH/src:${root_path} \ +--gogottrpc_out=plugins=ttrpc+fieldpath,paths=source_relative,\ +Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto,\ +Mgoogle/protobuf/any.proto=github.com/gogo/protobuf/types,\ +Mgoogle/protobuf/descriptor.proto=github.com/gogo/protobuf/protoc-gen-gogo/descriptor,\ +Mgoogle/protobuf/duration.proto=github.com/gogo/protobuf/types,\ +Mgoogle/protobuf/empty.proto=github.com/gogo/protobuf/types,\ +Mgoogle/protobuf/field_mask.proto=github.com/gogo/protobuf/types,\ +Mgoogle/protobuf/timestamp.proto=github.com/gogo/protobuf/types,\ +Mgoogle/protobuf/wrappers.proto=github.com/gogo/protobuf/types,\ +Mgoogle/rpc/status.proto=github.com/gogo/googleapis/google/rpc\ +:$(realpath ../)/runtime/virtcontainers/pkg/agent/protocols/$dir_path \ +${root_path}/$file_name" echo $cmd $cmd @@ -71,7 +78,7 @@ if [ "$(basename $(pwd))" != "agent" ]; then fi # Protocol buffer files required to generate golang/rust bindings. -proto_files_list=(agent.proto csi.proto health.proto oci.proto types.proto) +proto_files_list=(grpc/agent.proto grpc/csi.proto grpc/health.proto grpc/oci.proto types.proto) if [ "$1" = "" ]; then show_usage "${proto_files_list[@]}" @@ -85,6 +92,8 @@ which protoc which protoc-gen-gogottrpc [ $? -eq 0 ] || die "Please install protoc-gen-gogottrpc from https://github.com/containerd/ttrpc" +[[ -n "$GOPATH" ]] || die "GOPATH is not set. Please set it." + # do generate work target=$1 diff --git a/src/libs/protocols/protos/agent.proto b/src/libs/protocols/protos/agent.proto index 504cae83addc..039630d30661 100644 --- a/src/libs/protocols/protos/agent.proto +++ b/src/libs/protocols/protos/agent.proto @@ -34,9 +34,11 @@ service AgentService { rpc SignalProcess(SignalProcessRequest) returns (google.protobuf.Empty); rpc WaitProcess(WaitProcessRequest) returns (WaitProcessResponse); // wait & reap like waitpid(2) rpc UpdateContainer(UpdateContainerRequest) returns (google.protobuf.Empty); + rpc UpdateEphemeralMounts(UpdateEphemeralMountsRequest) returns (google.protobuf.Empty); rpc StatsContainer(StatsContainerRequest) returns (StatsContainerResponse); rpc PauseContainer(PauseContainerRequest) returns (google.protobuf.Empty); rpc ResumeContainer(ResumeContainerRequest) returns (google.protobuf.Empty); + rpc RemoveStaleVirtiofsShareMounts(RemoveStaleVirtiofsShareMountsRequest) returns (google.protobuf.Empty); // stdio rpc WriteStdin(WriteStreamRequest) returns (WriteStreamResponse); @@ -70,6 +72,7 @@ service AgentService { rpc AddSwap(AddSwapRequest) returns (google.protobuf.Empty); rpc GetVolumeStats(VolumeStatsRequest) returns (VolumeStatsResponse); rpc ResizeVolume(ResizeVolumeRequest) returns (google.protobuf.Empty); + rpc SetPolicy(SetPolicyRequest) returns (google.protobuf.Empty); } message CreateContainerRequest { @@ -300,6 +303,8 @@ message CreateSandboxRequest { message DestroySandboxRequest { } +message RemoveStaleVirtiofsShareMountsRequest {} + message Interfaces { repeated types.Interface Interfaces = 1; } @@ -316,6 +321,10 @@ message UpdateRoutesRequest { Routes routes = 1; } +message UpdateEphemeralMountsRequest { + repeated Storage storages = 1; +} + message ListInterfacesRequest { } @@ -358,7 +367,8 @@ message OnlineCPUMemRequest { // resources are connected asynchronously and the agent returns immediately. bool wait = 1; - // NbCpus specifies the number of CPUs that were added and the agent has to online. + // NbCpus specifies the number of CPUs that should be onlined in the guest. + // Special value 0 means agent will skip this check. uint32 nb_cpus = 2; // CpuOnly specifies whether only online CPU or not. @@ -557,3 +567,7 @@ message ResizeVolumeRequest { string volume_guest_path = 1; uint64 size = 2; } + +message SetPolicyRequest { + string policy = 1; +} diff --git a/src/libs/protocols/protos/csi.proto b/src/libs/protocols/protos/csi.proto index e6da50c8b8d1..94a9603b12f9 100644 --- a/src/libs/protocols/protos/csi.proto +++ b/src/libs/protocols/protos/csi.proto @@ -7,12 +7,6 @@ syntax = "proto3"; option go_package = "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"; package grpc; -import "gogo/protobuf/gogoproto/gogo.proto"; - -option (gogoproto.equal_all) = true; -option (gogoproto.populate_all) = true; -option (gogoproto.testgen_all) = true; -option (gogoproto.benchgen_all) = true; // This should be kept in sync with CSI NodeGetVolumeStatsResponse (https://github.com/container-storage-interface/spec/blob/v1.5.0/csi.proto) message VolumeStatsResponse { diff --git a/src/libs/protocols/protos/health.proto b/src/libs/protocols/protos/health.proto index e8a6601598c7..da72ff32e902 100644 --- a/src/libs/protocols/protos/health.proto +++ b/src/libs/protocols/protos/health.proto @@ -11,13 +11,6 @@ option go_package = "github.com/kata-containers/kata-containers/src/runtime/virt package grpc; -import "gogo/protobuf/gogoproto/gogo.proto"; - -option (gogoproto.equal_all) = true; -option (gogoproto.populate_all) = true; -option (gogoproto.testgen_all) = true; -option (gogoproto.benchgen_all) = true; - message CheckRequest { string service = 1; } diff --git a/src/libs/protocols/protos/oci.proto b/src/libs/protocols/protos/oci.proto index aa0db012386d..85c4ba5c3c13 100644 --- a/src/libs/protocols/protos/oci.proto +++ b/src/libs/protocols/protos/oci.proto @@ -9,12 +9,6 @@ syntax = "proto3"; option go_package = "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"; package grpc; -import "gogo/protobuf/gogoproto/gogo.proto"; - -option (gogoproto.equal_all) = true; -option (gogoproto.populate_all) = true; -option (gogoproto.testgen_all) = true; -option (gogoproto.benchgen_all) = true; message Spec { // Version of the Open Container Initiative Runtime Specification with which the bundle complies. @@ -30,7 +24,7 @@ message Spec { string Hostname = 4; // Mounts configures additional mounts (on top of Root). - repeated Mount Mounts = 5 [(gogoproto.nullable) = false]; + repeated Mount Mounts = 5; // Hooks configures callbacks for container lifecycle events. Hooks Hooks = 6; @@ -55,7 +49,7 @@ message Process { Box ConsoleSize = 2; // User specifies user information for the process. - User User = 3 [(gogoproto.nullable) = false]; + User User = 3; // Args specifies the binary and arguments for the application to execute. repeated string Args = 4; @@ -71,7 +65,7 @@ message Process { LinuxCapabilities Capabilities = 7; // Rlimits specifies rlimit options to apply to the process. - repeated POSIXRlimit Rlimits = 8 [(gogoproto.nullable) = false]; + repeated POSIXRlimit Rlimits = 8; // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. bool NoNewPrivileges = 9; @@ -159,13 +153,22 @@ message Root { message Hooks { // Prestart is a list of hooks to be run before the container process is executed. - repeated Hook Prestart = 1 [(gogoproto.nullable) = false]; + repeated Hook Prestart = 1; // Poststart is a list of hooks to be run after the container process is started. - repeated Hook Poststart = 2 [(gogoproto.nullable) = false]; + repeated Hook Poststart = 2; // Poststop is a list of hooks to be run after the container process exits. - repeated Hook Poststop = 3 [(gogoproto.nullable) = false]; + repeated Hook Poststop = 3; + + // Createruntime is a list of hooks to be run during the creation of runtime(sandbox). + repeated Hook CreateRuntime = 4; + + // CreateContainer is a list of hooks to be run after VM is started, and before container is created. + repeated Hook CreateContainer = 5; + + // StartContainer is a list of hooks to be run after container is created, but before it is started. + repeated Hook StartContainer = 6; } message Hook { @@ -177,10 +180,10 @@ message Hook { message Linux { // UIDMapping specifies user mappings for supporting user namespaces. - repeated LinuxIDMapping UIDMappings = 1 [(gogoproto.nullable) = false]; + repeated LinuxIDMapping UIDMappings = 1; // GIDMapping specifies group mappings for supporting user namespaces. - repeated LinuxIDMapping GIDMappings = 2 [(gogoproto.nullable) = false]; + repeated LinuxIDMapping GIDMappings = 2; // Sysctl are a set of key value pairs that are set for the container on start map Sysctl = 3; @@ -195,10 +198,10 @@ message Linux { string CgroupsPath = 5; // Namespaces contains the namespaces that are created and/or joined by the container - repeated LinuxNamespace Namespaces = 6 [(gogoproto.nullable) = false]; + repeated LinuxNamespace Namespaces = 6; // Devices are a list of device nodes that are created for the container - repeated LinuxDevice Devices = 7 [(gogoproto.nullable) = false]; + repeated LinuxDevice Devices = 7; // Seccomp specifies the seccomp security settings for the container. LinuxSeccomp Seccomp = 8; @@ -275,7 +278,7 @@ message LinuxDevice { message LinuxResources { // Devices configures the device whitelist. - repeated LinuxDeviceCgroup Devices = 1 [(gogoproto.nullable) = false]; + repeated LinuxDeviceCgroup Devices = 1; // Memory restriction configuration LinuxMemory Memory = 2; @@ -290,7 +293,7 @@ message LinuxResources { LinuxBlockIO BlockIO = 5; // Hugetlb limit (in bytes) - repeated LinuxHugepageLimit HugepageLimits = 6 [(gogoproto.nullable) = false]; + repeated LinuxHugepageLimit HugepageLimits = 6; // Network restriction configuration LinuxNetwork Network = 7; @@ -375,19 +378,19 @@ message LinuxBlockIO { uint32 LeafWeight = 2; // Weight per cgroup per device, can override BlkioWeight - repeated LinuxWeightDevice WeightDevice = 3 [(gogoproto.nullable) = false]; + repeated LinuxWeightDevice WeightDevice = 3; // IO read rate limit per cgroup per device, bytes per second - repeated LinuxThrottleDevice ThrottleReadBpsDevice = 4 [(gogoproto.nullable) = false]; + repeated LinuxThrottleDevice ThrottleReadBpsDevice = 4; // IO write rate limit per cgroup per device, bytes per second - repeated LinuxThrottleDevice ThrottleWriteBpsDevice = 5 [(gogoproto.nullable) = false]; + repeated LinuxThrottleDevice ThrottleWriteBpsDevice = 5; // IO read rate limit per cgroup per device, IO per second - repeated LinuxThrottleDevice ThrottleReadIOPSDevice = 6 [(gogoproto.nullable) = false]; + repeated LinuxThrottleDevice ThrottleReadIOPSDevice = 6; // IO write rate limit per cgroup per device, IO per second - repeated LinuxThrottleDevice ThrottleWriteIOPSDevice = 7 [(gogoproto.nullable) = false]; + repeated LinuxThrottleDevice ThrottleWriteIOPSDevice = 7; } message LinuxPids { @@ -417,7 +420,7 @@ message LinuxNetwork { uint32 ClassID = 1; // Set priority of network traffic for container - repeated LinuxInterfacePriority Priorities = 2 [(gogoproto.nullable) = false]; + repeated LinuxInterfacePriority Priorities = 2; } message LinuxHugepageLimit { @@ -440,7 +443,7 @@ message LinuxSeccomp { string DefaultAction = 1; repeated string Architectures = 2; repeated string Flags = 3; - repeated LinuxSyscall Syscalls = 4 [(gogoproto.nullable) = false]; + repeated LinuxSyscall Syscalls = 4; } message LinuxSeccompArg { @@ -456,7 +459,7 @@ message LinuxSyscall { oneof ErrnoRet { uint32 errnoret = 3; } - repeated LinuxSeccompArg Args = 4 [(gogoproto.nullable) = false]; + repeated LinuxSeccompArg Args = 4; } message LinuxIntelRdt { diff --git a/src/libs/protocols/src/lib.rs b/src/libs/protocols/src/lib.rs index 14298e52d925..33f75ca0ea8a 100644 --- a/src/libs/protocols/src/lib.rs +++ b/src/libs/protocols/src/lib.rs @@ -7,9 +7,23 @@ pub mod agent; pub mod agent_ttrpc; +#[cfg(feature = "async")] +pub mod agent_ttrpc_async; pub mod csi; pub mod empty; +mod gogo; pub mod health; pub mod health_ttrpc; +#[cfg(feature = "async")] +pub mod health_ttrpc_async; pub mod oci; +#[cfg(feature = "with-serde")] +mod serde_config; +pub mod trans; pub mod types; + +#[cfg(feature = "with-serde")] +pub use serde_config::{ + deserialize_enum_or_unknown, deserialize_message_field, serialize_enum_or_unknown, + serialize_message_field, +}; diff --git a/src/libs/protocols/src/serde_config.rs b/src/libs/protocols/src/serde_config.rs new file mode 100644 index 000000000000..064379cd16e8 --- /dev/null +++ b/src/libs/protocols/src/serde_config.rs @@ -0,0 +1,68 @@ +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use protobuf::{EnumOrUnknown, MessageField}; +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "with-serde")] +pub fn serialize_enum_or_unknown( + e: &protobuf::EnumOrUnknown, + s: S, +) -> Result { + e.value().serialize(s) +} + +pub fn serialize_message_field( + e: &protobuf::MessageField, + s: S, +) -> Result { + if e.is_some() { + e.as_ref().unwrap().serialize(s) + } else { + s.serialize_unit() + } +} + +pub fn deserialize_enum_or_unknown<'de, E: Deserialize<'de>, D: serde::Deserializer<'de>>( + d: D, +) -> Result, D::Error> { + i32::deserialize(d).map(EnumOrUnknown::from_i32) +} + +pub fn deserialize_message_field<'de, E: Deserialize<'de>, D: serde::Deserializer<'de>>( + d: D, +) -> Result, D::Error> { + Option::deserialize(d).map(MessageField::from_option) +} + +#[cfg(test)] +mod tests { + use crate::agent::{ExecProcessRequest, StringUser}; + use crate::health::{health_check_response::ServingStatus, HealthCheckResponse}; + + #[test] + fn test_serde_for_enum_or_unknown() { + let mut hc = HealthCheckResponse::new(); + hc.set_status(ServingStatus::SERVING); + + let json = serde_json::to_string(&hc).unwrap(); + let from_json: HealthCheckResponse = serde_json::from_str(&json).unwrap(); + + assert_eq!(from_json, hc); + } + + #[test] + fn test_serde_for_message_field() { + let mut epr = ExecProcessRequest::new(); + let mut str_user = StringUser::new(); + str_user.uid = "Someone's id".to_string(); + epr.set_string_user(str_user); + + let json = serde_json::to_string(&epr).unwrap(); + let from_json: ExecProcessRequest = serde_json::from_str(&json).unwrap(); + + assert_eq!(from_json, epr); + } +} diff --git a/src/libs/protocols/src/trans.rs b/src/libs/protocols/src/trans.rs new file mode 100644 index 000000000000..db59b7c2aa44 --- /dev/null +++ b/src/libs/protocols/src/trans.rs @@ -0,0 +1,1073 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::convert::From; + +use oci::{ + Hook, Hooks, Linux, LinuxBlockIo, LinuxCapabilities, LinuxCpu, LinuxDevice, LinuxHugepageLimit, + LinuxIdMapping, LinuxIntelRdt, LinuxInterfacePriority, LinuxMemory, LinuxNamespace, + LinuxNetwork, LinuxPids, LinuxResources, LinuxSeccomp, LinuxSeccompArg, LinuxSyscall, + LinuxThrottleDevice, LinuxWeightDevice, Mount, PosixRlimit, Process, Root, Spec, User, +}; + +// translate from interface to ttprc tools +fn from_option>(from: Option) -> protobuf::MessageField { + match from { + Some(f) => protobuf::MessageField::from_option(Some(f.into())), + None => protobuf::MessageField::none(), + } +} + +fn from_vec>(from: Vec) -> Vec { + let mut to: Vec = vec![]; + for data in from { + to.push(data.into()); + } + to +} + +impl From for crate::oci::Box { + fn from(from: oci::Box) -> Self { + crate::oci::Box { + Height: from.height, + Width: from.width, + ..Default::default() + } + } +} + +impl From for crate::oci::User { + fn from(from: User) -> Self { + crate::oci::User { + UID: from.uid, + GID: from.gid, + AdditionalGids: from.additional_gids, + Username: from.username, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxCapabilities { + fn from(from: LinuxCapabilities) -> Self { + crate::oci::LinuxCapabilities { + Bounding: from.bounding, + Effective: from.effective, + Inheritable: from.inheritable, + Permitted: from.permitted, + Ambient: from.ambient, + ..Default::default() + } + } +} + +impl From for crate::oci::POSIXRlimit { + fn from(from: PosixRlimit) -> Self { + crate::oci::POSIXRlimit { + Type: from.r#type, + Hard: from.hard, + Soft: from.soft, + ..Default::default() + } + } +} + +impl From for crate::oci::Process { + fn from(from: Process) -> Self { + crate::oci::Process { + Terminal: from.terminal, + ConsoleSize: from_option(from.console_size), + User: from_option(Some(from.user)), + Args: from.args, + Env: from.env, + Cwd: from.cwd, + Capabilities: from_option(from.capabilities), + Rlimits: from_vec(from.rlimits), + NoNewPrivileges: from.no_new_privileges, + ApparmorProfile: from.apparmor_profile, + OOMScoreAdj: from.oom_score_adj.map_or(0, |t| t as i64), + SelinuxLabel: from.selinux_label, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxDeviceCgroup { + fn from(from: oci::LinuxDeviceCgroup) -> Self { + crate::oci::LinuxDeviceCgroup { + Allow: from.allow, + Type: from.r#type, + Major: from.major.map_or(0, |t| t), + Minor: from.minor.map_or(0, |t| t), + Access: from.access, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxMemory { + fn from(from: LinuxMemory) -> Self { + crate::oci::LinuxMemory { + Limit: from.limit.map_or(0, |t| t), + Reservation: from.reservation.map_or(0, |t| t), + Swap: from.swap.map_or(0, |t| t), + Kernel: from.kernel.map_or(0, |t| t), + KernelTCP: from.kernel_tcp.map_or(0, |t| t), + Swappiness: from.swappiness.map_or(0, |t| t), + DisableOOMKiller: from.disable_oom_killer.map_or(false, |t| t), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxCPU { + fn from(from: LinuxCpu) -> Self { + crate::oci::LinuxCPU { + Shares: from.shares.map_or(0, |t| t), + Quota: from.quota.map_or(0, |t| t), + Period: from.period.map_or(0, |t| t), + RealtimeRuntime: from.realtime_runtime.map_or(0, |t| t), + RealtimePeriod: from.realtime_period.map_or(0, |t| t), + Cpus: from.cpus, + Mems: from.mems, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxPids { + fn from(from: LinuxPids) -> Self { + crate::oci::LinuxPids { + Limit: from.limit, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxWeightDevice { + fn from(from: LinuxWeightDevice) -> Self { + crate::oci::LinuxWeightDevice { + // TODO : check + Major: 0, + Minor: 0, + Weight: from.weight.map_or(0, |t| t as u32), + LeafWeight: from.leaf_weight.map_or(0, |t| t as u32), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxThrottleDevice { + fn from(from: LinuxThrottleDevice) -> Self { + crate::oci::LinuxThrottleDevice { + // TODO : check + Major: 0, + Minor: 0, + Rate: from.rate, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxBlockIO { + fn from(from: LinuxBlockIo) -> Self { + crate::oci::LinuxBlockIO { + Weight: from.weight.map_or(0, |t| t as u32), + LeafWeight: from.leaf_weight.map_or(0, |t| t as u32), + WeightDevice: from_vec(from.weight_device), + ThrottleReadBpsDevice: from_vec(from.throttle_read_bps_device), + ThrottleWriteBpsDevice: from_vec(from.throttle_write_bps_device), + ThrottleReadIOPSDevice: from_vec(from.throttle_read_iops_device), + ThrottleWriteIOPSDevice: from_vec(from.throttle_write_iops_device), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxHugepageLimit { + fn from(from: LinuxHugepageLimit) -> Self { + crate::oci::LinuxHugepageLimit { + Pagesize: from.page_size, + Limit: from.limit, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxInterfacePriority { + fn from(from: LinuxInterfacePriority) -> Self { + crate::oci::LinuxInterfacePriority { + Name: from.name, + Priority: from.priority, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxNetwork { + fn from(from: LinuxNetwork) -> Self { + crate::oci::LinuxNetwork { + ClassID: from.class_id.map_or(0, |t| t), + Priorities: from_vec(from.priorities), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxResources { + fn from(from: LinuxResources) -> Self { + crate::oci::LinuxResources { + Devices: from_vec(from.devices), + Memory: from_option(from.memory), + CPU: from_option(from.cpu), + Pids: from_option(from.pids), + BlockIO: from_option(from.block_io), + HugepageLimits: from_vec(from.hugepage_limits), + Network: from_option(from.network), + ..Default::default() + } + } +} + +impl From for crate::oci::Root { + fn from(from: Root) -> Self { + crate::oci::Root { + Path: from.path, + Readonly: from.readonly, + ..Default::default() + } + } +} + +impl From for crate::oci::Mount { + fn from(from: Mount) -> Self { + crate::oci::Mount { + destination: from.destination, + source: from.source, + type_: from.r#type, + options: from.options, + ..Default::default() + } + } +} + +impl From for crate::oci::Hook { + fn from(from: Hook) -> Self { + let mut timeout: i64 = 0; + if let Some(v) = from.timeout { + timeout = v as i64; + } + crate::oci::Hook { + Path: from.path, + Args: from.args, + Env: from.env, + Timeout: timeout, + ..Default::default() + } + } +} + +impl From for crate::oci::Hooks { + fn from(from: Hooks) -> Self { + crate::oci::Hooks { + Prestart: from_vec(from.prestart), + CreateRuntime: from_vec(from.create_runtime), + CreateContainer: from_vec(from.create_container), + StartContainer: from_vec(from.start_container), + Poststart: from_vec(from.poststart), + Poststop: from_vec(from.poststop), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxIDMapping { + fn from(from: LinuxIdMapping) -> Self { + crate::oci::LinuxIDMapping { + HostID: from.host_id, + ContainerID: from.container_id, + Size: from.size, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxNamespace { + fn from(from: LinuxNamespace) -> Self { + crate::oci::LinuxNamespace { + Type: from.r#type, + Path: from.path, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxDevice { + fn from(from: LinuxDevice) -> Self { + crate::oci::LinuxDevice { + Path: from.path, + Type: from.r#type, + Major: from.major, + Minor: from.minor, + FileMode: from.file_mode.map_or(0, |v| v), + UID: from.uid.map_or(0, |v| v), + GID: from.gid.map_or(0, |v| v), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxSeccompArg { + fn from(from: LinuxSeccompArg) -> Self { + crate::oci::LinuxSeccompArg { + Index: from.index as u64, + Value: from.value, + ValueTwo: from.value_two, + Op: from.op, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxSyscall { + fn from(from: LinuxSyscall) -> Self { + crate::oci::LinuxSyscall { + Names: from.names, + Action: from.action, + Args: from_vec(from.args), + ErrnoRet: Some(crate::oci::linux_syscall::ErrnoRet::Errnoret( + from.errno_ret, + )), + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxSeccomp { + fn from(from: LinuxSeccomp) -> Self { + crate::oci::LinuxSeccomp { + DefaultAction: from.default_action, + Architectures: from.architectures, + Syscalls: from_vec(from.syscalls), + Flags: from.flags, + ..Default::default() + } + } +} + +impl From for crate::oci::LinuxIntelRdt { + fn from(from: LinuxIntelRdt) -> Self { + crate::oci::LinuxIntelRdt { + L3CacheSchema: from.l3_cache_schema, + ..Default::default() + } + } +} + +impl From for crate::oci::Linux { + fn from(from: Linux) -> Self { + crate::oci::Linux { + UIDMappings: from_vec(from.uid_mappings), + GIDMappings: from_vec(from.gid_mappings), + Sysctl: from.sysctl, + Resources: from_option(from.resources), + CgroupsPath: from.cgroups_path, + Namespaces: from_vec(from.namespaces), + Devices: from_vec(from.devices), + Seccomp: from_option(from.seccomp), + RootfsPropagation: from.rootfs_propagation, + MaskedPaths: from.masked_paths, + ReadonlyPaths: from.readonly_paths, + MountLabel: from.mount_label, + IntelRdt: from_option(from.intel_rdt), + ..Default::default() + } + } +} + +impl From for crate::oci::Spec { + fn from(from: Spec) -> Self { + crate::oci::Spec { + Version: from.version, + Process: from_option(from.process), + Root: from_option(from.root), + Hostname: from.hostname, + Mounts: from_vec(from.mounts), + Hooks: from_option(from.hooks), + Annotations: from.annotations, + Linux: from_option(from.linux), + Solaris: Default::default(), + Windows: Default::default(), + ..Default::default() + } + } +} + +impl From for oci::Root { + fn from(from: crate::oci::Root) -> Self { + Self { + path: from.Path, + readonly: from.Readonly, + } + } +} + +impl From for oci::Mount { + fn from(mut from: crate::oci::Mount) -> Self { + let options = from.take_options().to_vec(); + Self { + r#type: from.take_type_(), + destination: from.take_destination(), + source: from.take_source(), + options, + } + } +} + +impl From for oci::LinuxIdMapping { + fn from(from: crate::oci::LinuxIDMapping) -> Self { + LinuxIdMapping { + container_id: from.ContainerID(), + host_id: from.HostID(), + size: from.Size(), + } + } +} + +impl From for oci::LinuxDeviceCgroup { + fn from(mut from: crate::oci::LinuxDeviceCgroup) -> Self { + let mut major = None; + if from.Major() > 0 { + major = Some(from.Major()); + } + + let mut minor = None; + if from.Minor() > 0 { + minor = Some(from.Minor()) + } + + oci::LinuxDeviceCgroup { + allow: from.Allow(), + r#type: from.take_Type(), + major, + minor, + access: from.take_Access(), + } + } +} + +impl From for oci::LinuxMemory { + fn from(from: crate::oci::LinuxMemory) -> Self { + let mut limit = None; + if from.Limit() > 0 { + limit = Some(from.Limit()); + } + + let mut reservation = None; + if from.Reservation() > 0 { + reservation = Some(from.Reservation()); + } + + let mut swap = None; + if from.Swap() > 0 { + swap = Some(from.Swap()); + } + + let mut kernel = None; + if from.Kernel() > 0 { + kernel = Some(from.Kernel()); + } + + let mut kernel_tcp = None; + if from.KernelTCP() > 0 { + kernel_tcp = Some(from.KernelTCP()); + } + + let mut swappiness = None; + if from.Swappiness() > 0 { + swappiness = Some(from.Swappiness()); + } + + let disable_oom_killer = Some(from.DisableOOMKiller()); + + oci::LinuxMemory { + limit, + reservation, + swap, + kernel, + kernel_tcp, + swappiness, + disable_oom_killer, + } + } +} + +impl From for oci::LinuxCpu { + fn from(mut from: crate::oci::LinuxCPU) -> Self { + let mut shares = None; + if from.Shares() > 0 { + shares = Some(from.Shares()); + } + + let mut quota = None; + if from.Quota() > 0 { + quota = Some(from.Quota()); + } + + let mut period = None; + if from.Period() > 0 { + period = Some(from.Period()); + } + + let mut realtime_runtime = None; + if from.RealtimeRuntime() > 0 { + realtime_runtime = Some(from.RealtimeRuntime()); + } + + let mut realtime_period = None; + if from.RealtimePeriod() > 0 { + realtime_period = Some(from.RealtimePeriod()); + } + + let cpus = from.take_Cpus(); + let mems = from.take_Mems(); + + oci::LinuxCpu { + shares, + quota, + period, + realtime_runtime, + realtime_period, + cpus, + mems, + } + } +} + +impl From for oci::LinuxPids { + fn from(from: crate::oci::LinuxPids) -> Self { + oci::LinuxPids { + limit: from.Limit(), + } + } +} + +impl From for oci::LinuxBlockIo { + fn from(from: crate::oci::LinuxBlockIO) -> Self { + let mut weight = None; + if from.Weight() > 0 { + weight = Some(from.Weight() as u16); + } + let mut leaf_weight = None; + if from.LeafWeight() > 0 { + leaf_weight = Some(from.LeafWeight() as u16); + } + let mut weight_device = Vec::new(); + for wd in from.WeightDevice() { + weight_device.push(wd.clone().into()); + } + + let mut throttle_read_bps_device = Vec::new(); + for td in from.ThrottleReadBpsDevice() { + throttle_read_bps_device.push(td.clone().into()); + } + + let mut throttle_write_bps_device = Vec::new(); + for td in from.ThrottleWriteBpsDevice() { + throttle_write_bps_device.push(td.clone().into()); + } + + let mut throttle_read_iops_device = Vec::new(); + for td in from.ThrottleReadIOPSDevice() { + throttle_read_iops_device.push(td.clone().into()); + } + + let mut throttle_write_iops_device = Vec::new(); + for td in from.ThrottleWriteIOPSDevice() { + throttle_write_iops_device.push(td.clone().into()); + } + + oci::LinuxBlockIo { + weight, + leaf_weight, + weight_device, + throttle_read_bps_device, + throttle_write_bps_device, + throttle_read_iops_device, + throttle_write_iops_device, + } + } +} + +impl From for oci::LinuxThrottleDevice { + fn from(from: crate::oci::LinuxThrottleDevice) -> Self { + oci::LinuxThrottleDevice { + blk: oci::LinuxBlockIoDevice { + major: from.Major, + minor: from.Minor, + }, + rate: from.Rate, + } + } +} + +impl From for oci::LinuxWeightDevice { + fn from(from: crate::oci::LinuxWeightDevice) -> Self { + oci::LinuxWeightDevice { + blk: oci::LinuxBlockIoDevice { + major: from.Major, + minor: from.Minor, + }, + weight: Some(from.Weight as u16), + leaf_weight: Some(from.LeafWeight as u16), + } + } +} + +impl From for oci::LinuxInterfacePriority { + fn from(mut from: crate::oci::LinuxInterfacePriority) -> Self { + oci::LinuxInterfacePriority { + name: from.take_Name(), + priority: from.Priority(), + } + } +} + +impl From for oci::LinuxNetwork { + fn from(mut from: crate::oci::LinuxNetwork) -> Self { + let mut class_id = None; + if from.ClassID() > 0 { + class_id = Some(from.ClassID()); + } + let mut priorities = Vec::new(); + for p in from.take_Priorities() { + priorities.push(p.into()) + } + + oci::LinuxNetwork { + class_id, + priorities, + } + } +} + +impl From for oci::LinuxHugepageLimit { + fn from(mut from: crate::oci::LinuxHugepageLimit) -> Self { + oci::LinuxHugepageLimit { + page_size: from.take_Pagesize(), + limit: from.Limit(), + } + } +} + +impl From for oci::LinuxResources { + fn from(mut from: crate::oci::LinuxResources) -> Self { + let mut devices = Vec::new(); + for d in from.take_Devices() { + devices.push(d.into()); + } + + let mut memory = None; + if from.has_Memory() { + memory = Some(from.take_Memory().into()); + } + + let mut cpu = None; + if from.has_CPU() { + cpu = Some(from.take_CPU().into()); + } + + let mut pids = None; + if from.has_Pids() { + pids = Some(from.Pids().clone().into()) + } + + let mut block_io = None; + if from.has_BlockIO() { + block_io = Some(from.BlockIO().clone().into()); + } + + let mut hugepage_limits = Vec::new(); + for hl in from.HugepageLimits() { + hugepage_limits.push(hl.clone().into()); + } + + let mut network = None; + if from.has_Network() { + network = Some(from.take_Network().into()); + } + + let rdma = HashMap::new(); + + LinuxResources { + devices, + memory, + cpu, + pids, + block_io, + hugepage_limits, + network, + rdma, + } + } +} + +impl From for oci::LinuxDevice { + fn from(mut from: crate::oci::LinuxDevice) -> Self { + oci::LinuxDevice { + path: from.take_Path(), + r#type: from.take_Type(), + major: from.Major(), + minor: from.Minor(), + file_mode: Some(from.FileMode()), + uid: Some(from.UID()), + gid: Some(from.GID()), + } + } +} + +impl From for oci::LinuxSeccompArg { + fn from(mut from: crate::oci::LinuxSeccompArg) -> Self { + oci::LinuxSeccompArg { + index: from.Index() as u32, + value: from.Value(), + value_two: from.ValueTwo(), + op: from.take_Op(), + } + } +} + +impl From for oci::LinuxSyscall { + fn from(mut from: crate::oci::LinuxSyscall) -> Self { + let mut args = Vec::new(); + for ag in from.take_Args() { + args.push(ag.into()); + } + oci::LinuxSyscall { + names: from.take_Names().to_vec(), + action: from.take_Action(), + args, + errno_ret: from.errnoret(), + } + } +} + +impl From for oci::LinuxSeccomp { + fn from(mut from: crate::oci::LinuxSeccomp) -> Self { + let mut syscalls = Vec::new(); + for s in from.take_Syscalls() { + syscalls.push(s.into()); + } + + oci::LinuxSeccomp { + default_action: from.take_DefaultAction(), + architectures: from.take_Architectures().to_vec(), + syscalls, + flags: from.take_Flags().to_vec(), + } + } +} + +impl From for oci::LinuxNamespace { + fn from(mut from: crate::oci::LinuxNamespace) -> Self { + oci::LinuxNamespace { + r#type: from.take_Type(), + path: from.take_Path(), + } + } +} + +impl From for oci::Linux { + fn from(mut from: crate::oci::Linux) -> Self { + let mut uid_mappings = Vec::new(); + for id_map in from.take_UIDMappings() { + uid_mappings.push(id_map.into()) + } + + let mut gid_mappings = Vec::new(); + for id_map in from.take_GIDMappings() { + gid_mappings.push(id_map.into()) + } + + let sysctl = from.Sysctl().clone(); + let mut resources = None; + if from.has_Resources() { + resources = Some(from.take_Resources().into()); + } + + let cgroups_path = from.take_CgroupsPath(); + let mut namespaces = Vec::new(); + for ns in from.take_Namespaces() { + namespaces.push(ns.into()) + } + + let mut devices = Vec::new(); + for d in from.take_Devices() { + devices.push(d.into()); + } + + let mut seccomp = None; + if from.has_Seccomp() { + seccomp = Some(from.take_Seccomp().into()); + } + + let rootfs_propagation = from.take_RootfsPropagation(); + let masked_paths = from.take_MaskedPaths().to_vec(); + + let readonly_paths = from.take_ReadonlyPaths().to_vec(); + + let mount_label = from.take_MountLabel(); + let intel_rdt = None; + + oci::Linux { + uid_mappings, + gid_mappings, + sysctl, + resources, + cgroups_path, + namespaces, + devices, + seccomp, + rootfs_propagation, + masked_paths, + readonly_paths, + mount_label, + intel_rdt, + } + } +} + +impl From for oci::PosixRlimit { + fn from(mut from: crate::oci::POSIXRlimit) -> Self { + oci::PosixRlimit { + r#type: from.take_Type(), + hard: from.Hard(), + soft: from.Soft(), + } + } +} + +impl From for oci::LinuxCapabilities { + fn from(mut from: crate::oci::LinuxCapabilities) -> Self { + oci::LinuxCapabilities { + bounding: from.take_Bounding().to_vec(), + effective: from.take_Effective().to_vec(), + inheritable: from.take_Inheritable().to_vec(), + permitted: from.take_Permitted().to_vec(), + ambient: from.take_Ambient().to_vec(), + } + } +} + +impl From for oci::User { + fn from(mut from: crate::oci::User) -> Self { + oci::User { + uid: from.UID(), + gid: from.GID(), + additional_gids: from.take_AdditionalGids().to_vec(), + username: from.take_Username(), + } + } +} + +impl From for oci::Box { + fn from(from: crate::oci::Box) -> Self { + oci::Box { + height: from.Height(), + width: from.Width(), + } + } +} + +impl From for oci::Process { + fn from(mut from: crate::oci::Process) -> Self { + let mut console_size = None; + if from.has_ConsoleSize() { + console_size = Some(from.take_ConsoleSize().into()); + } + + let user = from.take_User().into(); + let args = from.take_Args(); + let env = from.take_Env(); + let cwd = from.take_Cwd(); + let mut capabilities = None; + if from.has_Capabilities() { + capabilities = Some(from.take_Capabilities().into()); + } + let mut rlimits = Vec::new(); + for rl in from.take_Rlimits() { + rlimits.push(rl.into()); + } + let no_new_privileges = from.NoNewPrivileges(); + let apparmor_profile = from.take_ApparmorProfile(); + let mut oom_score_adj = None; + if from.OOMScoreAdj() != 0 { + oom_score_adj = Some(from.OOMScoreAdj() as i32); + } + let selinux_label = from.take_SelinuxLabel(); + + oci::Process { + terminal: from.Terminal, + console_size, + user, + args, + env, + cwd, + capabilities, + rlimits, + no_new_privileges, + apparmor_profile, + oom_score_adj, + selinux_label, + } + } +} + +impl From for oci::Hook { + fn from(mut from: crate::oci::Hook) -> Self { + let mut timeout = None; + if from.Timeout() > 0 { + timeout = Some(from.Timeout() as i32); + } + oci::Hook { + path: from.take_Path(), + args: from.take_Args().to_vec(), + env: from.take_Env().to_vec(), + timeout, + } + } +} + +impl From for oci::Hooks { + fn from(mut from: crate::oci::Hooks) -> Self { + let prestart = from.take_Prestart().into_iter().map(|i| i.into()).collect(); + let create_runtime = from + .take_CreateRuntime() + .into_iter() + .map(|i| i.into()) + .collect(); + let create_container = from + .take_CreateContainer() + .into_iter() + .map(|i| i.into()) + .collect(); + let start_container = from + .take_StartContainer() + .into_iter() + .map(|i| i.into()) + .collect(); + let poststart = from + .take_Poststart() + .into_iter() + .map(|i| i.into()) + .collect(); + let poststop = from.take_Poststop().into_iter().map(|i| i.into()).collect(); + + oci::Hooks { + prestart, + create_runtime, + create_container, + start_container, + poststart, + poststop, + } + } +} + +impl From for oci::Spec { + fn from(mut from: crate::oci::Spec) -> Self { + let mut process = None; + if from.has_Process() { + process = Some(from.take_Process().into()); + } + + let mut root = None; + if from.has_Root() { + root = Some(from.take_Root().into()); + } + + let mut mounts = Vec::new(); + for m in from.take_Mounts() { + mounts.push(m.into()) + } + + let mut hooks: Option = None; + if from.has_Hooks() { + hooks = Some(from.take_Hooks().into()); + } + + let annotations = from.take_Annotations(); + + let mut linux = None; + if from.has_Linux() { + linux = Some(from.take_Linux().into()); + } + + oci::Spec { + version: from.take_Version(), + process, + root, + hostname: from.take_Hostname(), + mounts, + hooks, + annotations, + linux, + solaris: None, + windows: None, + vm: None, + } + } +} + +#[cfg(test)] +mod tests { + use crate::trans::from_vec; + + #[derive(Clone)] + struct TestA { + pub from: String, + } + + #[derive(Clone)] + struct TestB { + pub to: String, + } + + impl From for TestB { + fn from(from: TestA) -> Self { + TestB { to: from.from } + } + } + + #[test] + fn test_from() { + let from = TestA { + from: "a".to_string(), + }; + let to: TestB = TestB::from(from.clone()); + + assert_eq!(from.from, to.to); + } + + #[test] + fn test_from_vec_len_0() { + let from: Vec = vec![]; + let to: Vec = from_vec(from.clone()); + assert_eq!(from.len(), to.len()); + } + + #[test] + fn test_from_vec_len_1() { + let from: Vec = vec![TestA { + from: "a".to_string(), + }]; + let to: Vec = from_vec(from.clone()); + + assert_eq!(from.len(), to.len()); + assert_eq!(from[0].from, to[0].to); + } +} diff --git a/src/libs/safe-path/src/pinned_path_buf.rs b/src/libs/safe-path/src/pinned_path_buf.rs index d1816f450dcb..15c80f4ce9e5 100644 --- a/src/libs/safe-path/src/pinned_path_buf.rs +++ b/src/libs/safe-path/src/pinned_path_buf.rs @@ -295,7 +295,7 @@ mod tests { barrier2.wait(); }); - let path = scoped_join(&root_path, "s").unwrap(); + let path = scoped_join(root_path, "s").unwrap(); let data = fs::read_to_string(&path).unwrap(); assert_eq!(&data, "a"); assert!(path.is_file()); @@ -306,7 +306,7 @@ mod tests { assert_eq!(&data, "b"); PinnedPathBuf::from_path(&path).unwrap_err(); - let pinned_path = PinnedPathBuf::new(&root_path, "s").unwrap(); + let pinned_path = PinnedPathBuf::new(root_path, "s").unwrap(); let data = fs::read_to_string(&pinned_path).unwrap(); assert_eq!(&data, "b"); diff --git a/src/libs/safe-path/src/scoped_dir_builder.rs b/src/libs/safe-path/src/scoped_dir_builder.rs index 1a4ba189f2e7..2d231c62f951 100644 --- a/src/libs/safe-path/src/scoped_dir_builder.rs +++ b/src/libs/safe-path/src/scoped_dir_builder.rs @@ -173,7 +173,7 @@ mod tests { fs::write(rootfs_path.join("txt"), "test").unwrap(); ScopedDirBuilder::new(rootfs_path.join("txt")).unwrap_err(); - let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap(); + let mut builder = ScopedDirBuilder::new(rootfs_path).unwrap(); // file with the same name already exists. builder @@ -268,7 +268,7 @@ mod tests { symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap(); let rootfs_path = &rootfs_dir.path().join("a"); - let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap(); + let mut builder = ScopedDirBuilder::new(rootfs_path).unwrap(); builder.create_with_unscoped_path("/").unwrap_err(); builder .create_with_unscoped_path(rootfs_path.join("../__xxxx___xxx__")) @@ -278,13 +278,13 @@ mod tests { .unwrap_err(); // Return `AlreadyExist` when recursive is false - builder.create_with_unscoped_path(&rootfs_path).unwrap_err(); + builder.create_with_unscoped_path(rootfs_path).unwrap_err(); builder .create_with_unscoped_path(rootfs_path.join(".")) .unwrap_err(); builder.recursive(true); - builder.create_with_unscoped_path(&rootfs_path).unwrap(); + builder.create_with_unscoped_path(rootfs_path).unwrap(); builder .create_with_unscoped_path(rootfs_path.join(".")) .unwrap(); diff --git a/src/libs/safe-path/src/scoped_path_resolver.rs b/src/libs/safe-path/src/scoped_path_resolver.rs index 59b06bfe7059..d9815f52961b 100644 --- a/src/libs/safe-path/src/scoped_path_resolver.rs +++ b/src/libs/safe-path/src/scoped_path_resolver.rs @@ -245,7 +245,7 @@ mod tests { fn test_scoped_resolve_invalid() { scoped_resolve("./root_is_not_absolute_path", ".").unwrap_err(); scoped_resolve("C:", ".").unwrap_err(); - scoped_resolve(r#"\\server\test"#, ".").unwrap_err(); + scoped_resolve(r"\\server\test", ".").unwrap_err(); scoped_resolve(r#"http://localhost/test"#, ".").unwrap_err(); // Chinese Unicode characters scoped_resolve(r#"您好"#, ".").unwrap_err(); @@ -329,31 +329,31 @@ mod tests { let rootfs_path = &rootfs_dir.path(); assert_eq!( - scoped_join(&rootfs_path, "a").unwrap(), + scoped_join(rootfs_path, "a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "./a").unwrap(), + scoped_join(rootfs_path, "./a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "././a").unwrap(), + scoped_join(rootfs_path, "././a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "c/d/../../a").unwrap(), + scoped_join(rootfs_path, "c/d/../../a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "c/d/../../../.././a").unwrap(), + scoped_join(rootfs_path, "c/d/../../../.././a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "../../a").unwrap(), + scoped_join(rootfs_path, "../../a").unwrap(), rootfs_path.join("a") ); assert_eq!( - scoped_join(&rootfs_path, "./../a").unwrap(), + scoped_join(rootfs_path, "./../a").unwrap(), rootfs_path.join("a") ); } @@ -370,18 +370,18 @@ mod tests { fs::symlink("b/c", rootfs_dir.path().join("a")).unwrap(); let target = rootfs_path.join("b/c"); - assert_eq!(scoped_join(&rootfs_path, "a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "./a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "././a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "b/c/../../a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "./a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "././a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "b/c/../../a").unwrap(), target); assert_eq!( - scoped_join(&rootfs_path, "b/c/../../../.././a").unwrap(), + scoped_join(rootfs_path, "b/c/../../../.././a").unwrap(), target ); - assert_eq!(scoped_join(&rootfs_path, "../../a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "./../a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "a/../../../a").unwrap(), target); - assert_eq!(scoped_join(&rootfs_path, "a/../../../b/c").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "../../a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "./../a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "a/../../../a").unwrap(), target); + assert_eq!(scoped_join(rootfs_path, "a/../../../b/c").unwrap(), target); } #[test] diff --git a/src/libs/shim-interface/Cargo.toml b/src/libs/shim-interface/Cargo.toml new file mode 100644 index 000000000000..360b4a42e4b2 --- /dev/null +++ b/src/libs/shim-interface/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "shim-interface" +version = "0.1.0" +description = "A library to provide service interface of Kata Containers" +keywords = ["kata", "container", "http"] +categories = ["services"] +authors = ["The Kata Containers community "] +repository = "https://github.com/kata-containers/kata-containers.git" +homepage = "https://katacontainers.io/" +readme = "README.md" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +anyhow = "^1.0" +tokio = { version = "1.8.0", features = ["rt-multi-thread"] } +hyper = { version = "0.14.20", features = ["stream", "server", "http1"] } +hyperlocal = "0.8" +kata-types = { path = "../kata-types" } diff --git a/src/libs/shim-interface/src/lib.rs b/src/libs/shim-interface/src/lib.rs new file mode 100644 index 000000000000..add611d4ac03 --- /dev/null +++ b/src/libs/shim-interface/src/lib.rs @@ -0,0 +1,67 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! shim-interface is a common library for different components of Kata Containers +//! to make function call through services inside the runtime(runtime-rs runtime). +//! +//! Shim management: +//! Currently, inside the shim, there is a shim management server running as the shim +//! starts, working as a RESTful server. To make function call in runtime from another +//! binary, using the utilities provided in this library is one of the methods. +//! +//! You may construct clients by construct a MgmtClient and let is make specific +//! HTTP request to the server. The server inside shim will multiplex the request +//! to its corresponding handler and run certain methods. + +use std::path::Path; + +use anyhow::{anyhow, Result}; + +pub mod shim_mgmt; + +use kata_types::config::KATA_PATH; + +pub const SHIM_MGMT_SOCK_NAME: &str = "shim-monitor.sock"; + +// return sandbox's storage path +pub fn sb_storage_path() -> String { + String::from(KATA_PATH) +} + +// returns the address of the unix domain socket(UDS) for communication with shim +// management service using http +// normally returns "unix:///run/kata/{sid}/shim_monitor.sock" +pub fn mgmt_socket_addr(sid: &str) -> Result { + if sid.is_empty() { + return Err(anyhow!( + "Empty sandbox id for acquiring socket address for shim_mgmt" + )); + } + + let p = Path::new(&sb_storage_path()) + .join(sid) + .join(SHIM_MGMT_SOCK_NAME); + + if let Some(p) = p.to_str() { + Ok(format!("unix://{}", p)) + } else { + Err(anyhow!("Bad socket path")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mgmt_socket_addr() { + let sid = "414123"; + let addr = mgmt_socket_addr(sid).unwrap(); + assert_eq!(addr, "unix:///run/kata/414123/shim-monitor.sock"); + + let sid = ""; + assert!(mgmt_socket_addr(sid).is_err()); + } +} diff --git a/src/libs/shim-interface/src/shim_mgmt/client.rs b/src/libs/shim-interface/src/shim_mgmt/client.rs new file mode 100644 index 000000000000..f112903c5cca --- /dev/null +++ b/src/libs/shim-interface/src/shim_mgmt/client.rs @@ -0,0 +1,100 @@ +#![allow(dead_code)] +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// Defines the general client functions used by other components acting like +// clients. To be specific, a client first connect to the socket, then send +// request to destined URL, and finally handle the request(or not) + +use std::{path::Path, path::PathBuf, time::Duration}; + +use crate::mgmt_socket_addr; +use anyhow::{anyhow, Context, Result}; +use hyper::{Body, Client, Method, Request, Response}; +use hyperlocal::{UnixClientExt, UnixConnector, Uri}; + +/// Shim management client with timeout +pub struct MgmtClient { + /// The socket *file path* on host file system + sock_path: PathBuf, + + /// The http client connect to the long standing shim mgmt server + client: Client, + + /// Timeout value for each dial, usually 200ms will be enough + /// For heavier workload, you may want longer timeout + timeout: Option, +} + +impl MgmtClient { + /// Construct a new client connecting to shim mgmt server + pub fn new(sid: &str, timeout: Option) -> Result { + let unix_socket_path = mgmt_socket_addr(sid).context("Failed to get unix socket path")?; + let s_addr = unix_socket_path + .strip_prefix("unix:") + .context("failed to strip prefix")?; + let sock_path = Path::new("/").join(s_addr).as_path().to_owned(); + let client = Client::unix(); + Ok(Self { + sock_path, + client, + timeout, + }) + } + + /// The http GET method for client, return a raw response. Further handling should be done by caller. + /// Parameter uri should be like "/agent-url" etc. + pub async fn get(&self, uri: &str) -> Result> { + let url: hyper::Uri = Uri::new(&self.sock_path, uri).into(); + let req = Request::builder() + .method(Method::GET) + .uri(url) + .body(Body::empty())?; + self.send_request(req).await + } + + /// The HTTP Post method for client + pub async fn post( + &self, + uri: &str, + content_type: &str, + content: &str, + ) -> Result> { + let url: hyper::Uri = Uri::new(&self.sock_path, uri).into(); + + // build body from content + let body = Body::from(content.to_string()); + let req = Request::builder() + .method(Method::POST) + .uri(url) + .header("content-type", content_type) + .body(body)?; + self.send_request(req).await + } + + /// The http PUT method for client + pub async fn put(&self, uri: &str, data: Vec) -> Result> { + let url: hyper::Uri = Uri::new(&self.sock_path, uri).into(); + let req = Request::builder() + .method(Method::PUT) + .uri(url) + .body(Body::from(data))?; + self.send_request(req).await + } + + async fn send_request(&self, req: Request) -> Result> { + let msg = format!("Request ({:?}) to uri {:?}", req.method(), req.uri()); + let resp = self.client.request(req); + match self.timeout { + Some(timeout) => match tokio::time::timeout(timeout, resp).await { + Ok(result) => result.map_err(|e| anyhow!(e)), + Err(_) => Err(anyhow!("{:?} timeout after {:?}", msg, self.timeout)), + }, + // if client timeout is not set, request waits with no deadline + None => resp.await.context(format!("{:?} failed", msg)), + } + } +} diff --git a/src/libs/shim-interface/src/shim_mgmt/mod.rs b/src/libs/shim-interface/src/shim_mgmt/mod.rs new file mode 100644 index 000000000000..0f9abf562355 --- /dev/null +++ b/src/libs/shim-interface/src/shim_mgmt/mod.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +/// The shim management client module +pub mod client; + +/// The key for direct volume path +pub const DIRECT_VOLUME_PATH_KEY: &str = "path"; +/// URL for stats direct volume +pub const DIRECT_VOLUME_STATS_URL: &str = "/direct-volume/stats"; +/// URL for resizing direct volume +pub const DIRECT_VOLUME_RESIZE_URL: &str = "/direct-volume/resize"; +/// URL for querying agent's socket +pub const AGENT_URL: &str = "/agent-url"; +/// URL for operation on guest iptable (ipv4) +pub const IP_TABLE_URL: &str = "/iptables"; +/// URL for operation on guest iptable (ipv6) +pub const IP6_TABLE_URL: &str = "/ip6tables"; +/// URL for querying metrics inside shim +pub const METRICS_URL: &str = "/metrics"; + +pub const ERR_NO_SHIM_SERVER: &str = "Failed to create shim management server"; diff --git a/src/libs/test-utils/Cargo.toml b/src/libs/test-utils/Cargo.toml new file mode 100644 index 000000000000..2bfb652e4268 --- /dev/null +++ b/src/libs/test-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "test-utils" +version = "0.1.0" +description = "A library for test Rust code" +keywords = ["kata", "container", "test", "utils"] +categories = ["testing"] +authors = ["The Kata Containers community "] +repository = "https://github.com/kata-containers/kata-containers.git" +homepage = "https://katacontainers.io/" +readme = "README.md" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +nix = "0.24.2" diff --git a/src/libs/test-utils/README.md b/src/libs/test-utils/README.md new file mode 100644 index 000000000000..a7bf669a3b34 --- /dev/null +++ b/src/libs/test-utils/README.md @@ -0,0 +1,8 @@ +Test Utilities +==================== + +A library to share test code for Rust. + +## License + +This code is licensed under [Apache-2.0](../../../LICENSE). diff --git a/src/libs/test-utils/src/lib.rs b/src/libs/test-utils/src/lib.rs new file mode 100644 index 000000000000..948701304c17 --- /dev/null +++ b/src/libs/test-utils/src/lib.rs @@ -0,0 +1,119 @@ +// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Debug, PartialEq)] +pub enum TestUserType { + RootOnly, + NonRootOnly, + Any, +} + +#[macro_export] +macro_rules! skip_if_root { + () => { + if nix::unistd::Uid::effective().is_root() { + println!("INFO: skipping {} which needs non-root", module_path!()); + return; + } + }; +} + +#[macro_export] +macro_rules! skip_if_not_root { + () => { + if !nix::unistd::Uid::effective().is_root() { + println!("INFO: skipping {} which needs root", module_path!()); + return; + } + }; +} + +#[macro_export] +macro_rules! skip_loop_if_root { + ($msg:expr) => { + if nix::unistd::Uid::effective().is_root() { + println!( + "INFO: skipping loop {} in {} which needs non-root", + $msg, + module_path!() + ); + continue; + } + }; +} + +#[macro_export] +macro_rules! skip_loop_if_not_root { + ($msg:expr) => { + if !nix::unistd::Uid::effective().is_root() { + println!( + "INFO: skipping loop {} in {} which needs root", + $msg, + module_path!() + ); + continue; + } + }; +} + +// Parameters: +// +// 1: expected Result +// 2: actual Result +// 3: string used to identify the test on error +#[macro_export] +macro_rules! assert_result { + ($expected_result:expr, $actual_result:expr, $msg:expr) => { + if $expected_result.is_ok() { + let expected_value = $expected_result.as_ref().unwrap(); + let actual_value = $actual_result.unwrap(); + assert!(*expected_value == actual_value, "{}", $msg); + } else { + assert!($actual_result.is_err(), "{}", $msg); + + let expected_error = $expected_result.as_ref().unwrap_err(); + let expected_error_msg = format!("{:?}", expected_error); + + let actual_error_msg = format!("{:?}", $actual_result.unwrap_err()); + + assert!(expected_error_msg == actual_error_msg, "{}", $msg); + } + }; +} + +#[macro_export] +macro_rules! skip_loop_by_user { + ($msg:expr, $user:expr) => { + if $user == TestUserType::RootOnly { + skip_loop_if_not_root!($msg); + } else if $user == TestUserType::NonRootOnly { + skip_loop_if_root!($msg); + } + }; +} + +#[cfg(test)] +mod tests { + use super::{skip_if_not_root, skip_if_root}; + + #[test] + fn test_skip_if_not_root() { + skip_if_not_root!(); + assert!( + nix::unistd::Uid::effective().is_root(), + "normal user should be skipped" + ) + } + + #[test] + fn test_skip_if_root() { + skip_if_root!(); + assert!( + !nix::unistd::Uid::effective().is_root(), + "root user should be skipped" + ) + } +} diff --git a/src/runtime-rs/.gitignore b/src/runtime-rs/.gitignore new file mode 100644 index 000000000000..0e5a39c11f19 --- /dev/null +++ b/src/runtime-rs/.gitignore @@ -0,0 +1,3 @@ +target +crates/shim/src/config.rs +/config/*.toml diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock new file mode 100644 index 000000000000..64c63efa11ee --- /dev/null +++ b/src/runtime-rs/Cargo.lock @@ -0,0 +1,4268 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "actix-macros" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" +dependencies = [ + "quote", + "syn 2.0.27", +] + +[[package]] +name = "actix-rt" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15265b6b8e2347670eb363c47fc8c75208b4a4994b27192f345fcbe707804f3e" +dependencies = [ + "actix-macros", + "futures-core", + "tokio", +] + +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "agent" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "futures 0.1.31", + "kata-types", + "log", + "logging", + "nix 0.24.3", + "oci", + "protobuf 3.2.0", + "protocols", + "serde", + "serde_json", + "slog", + "slog-scope", + "tokio", + "tracing", + "ttrpc", + "url", +] + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.10", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" + +[[package]] +name = "api_client" +version = "0.1.0" +source = "git+https://github.com/cloud-hypervisor/cloud-hypervisor?tag=v27.0#2ba6a9bfcfd79629aecf77504fa554ab821d138e" +dependencies = [ + "vmm-sys-util 0.10.0", +] + +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener", + "futures-core", +] + +[[package]] +name = "async-executor" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fa3dc5f2a8564f07759c008b9109dc0d39de92a88d5588b8a5036d286383afb" +dependencies = [ + "async-lock", + "async-task", + "concurrent-queue", + "fastrand 1.9.0", + "futures-lite", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1b6f5d7df27bd294849f8eec66ecfc63d11814df7a4f5d74168a2394467b776" +dependencies = [ + "async-channel", + "async-executor", + "async-io", + "async-lock", + "blocking", + "futures-lite", + "once_cell", +] + +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock", + "autocfg", + "cfg-if 1.0.0", + "concurrent-queue", + "futures-lite", + "log", + "parking", + "polling", + "rustix 0.37.23", + "slab", + "socket2", + "waker-fn", +] + +[[package]] +name = "async-lock" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa24f727524730b077666307f2734b4a1a1c57acb79193127dcc8914d5242dd7" +dependencies = [ + "event-listener", +] + +[[package]] +name = "async-std" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" +dependencies = [ + "async-channel", + "async-global-executor", + "async-io", + "async-lock", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + +[[package]] +name = "async-task" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc7ab41815b3c653ccd2978ec3255c81349336702dfdf62ee6f7069b12a3aae" + +[[package]] +name = "async-trait" +version = "0.1.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "atomic-waker" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "awaitgroup" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc17ab023b4091c10ff099f9deebaeeb59b5189df07e554c4fef042b70745d68" + +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if 1.0.0", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + +[[package]] +name = "bitmask-enum" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78d456f91b4c1fdebf2698214e599fec3d7f8b46e3140fb254a9ea88c970ab0a" +dependencies = [ + "quote", + "syn 2.0.27", +] + +[[package]] +name = "blake3" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if 1.0.0", + "constant_time_eq", + "digest 0.10.7", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "blocking" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77231a1c8f801696fc0123ec6150ce92cffb8e164a02afb9c8ddee0e9b65ad65" +dependencies = [ + "async-channel", + "async-lock", + "async-task", + "atomic-waker", + "fastrand 1.9.0", + "futures-lite", + "log", +] + +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "byte-unit" +version = "3.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8" + +[[package]] +name = "byte-unit" +version = "4.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da78b32057b8fdfc352504708feeba7216dcd65a2c9ab02978cbd288d1279b6c" +dependencies = [ + "serde", + "utf8-width", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" +dependencies = [ + "byteorder", + "iovec", +] + +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + +[[package]] +name = "caps" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" +dependencies = [ + "libc", + "thiserror", +] + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cgroups-rs" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b098e7c3a70d03c288fa0a96ccf13e770eb3d78c4cc0e1549b3c13215d5f965" +dependencies = [ + "libc", + "log", + "nix 0.25.1", + "regex", + "thiserror", +] + +[[package]] +name = "ch-config" +version = "0.1.0" +dependencies = [ + "anyhow", + "api_client", + "kata-types", + "nix 0.26.2", + "serde", + "serde_json", + "thiserror", + "tokio", +] + +[[package]] +name = "chrono" +version = "0.4.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "time 0.1.45", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + +[[package]] +name = "common" +version = "0.1.0" +dependencies = [ + "agent", + "anyhow", + "async-trait", + "containerd-shim-protos", + "kata-sys-util", + "kata-types", + "lazy_static", + "nix 0.24.3", + "oci", + "persist", + "protobuf 3.2.0", + "serde_json", + "slog", + "slog-scope", + "strum", + "thiserror", + "tokio", + "ttrpc", +] + +[[package]] +name = "common-path" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2382f75942f4b3be3690fe4f86365e9c853c1587d6ee58212cebf6e2a9ccd101" + +[[package]] +name = "concurrent-queue" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "containerd-shim-protos" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef45f1c71aa587d8f657c546d8da38ea04f113dd05da0ef993c4515fa25fbdd1" +dependencies = [ + "async-trait", + "protobuf 3.2.0", + "ttrpc", + "ttrpc-codegen", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + +[[package]] +name = "cpuid-bool" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dashmap" +version = "5.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d" +dependencies = [ + "cfg-if 1.0.0", + "hashbrown 0.14.0", + "lock_api", + "once_cell", + "parking_lot_core 0.9.8", +] + +[[package]] +name = "dbs-address-space" +version = "0.3.0" +dependencies = [ + "arc-swap", + "lazy_static", + "libc", + "nix 0.23.2", + "thiserror", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "dbs-allocator" +version = "0.1.1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "dbs-arch" +version = "0.2.3" +dependencies = [ + "kvm-bindings", + "kvm-ioctls", + "libc", + "memoffset 0.6.5", + "thiserror", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "dbs-boot" +version = "0.4.0" +dependencies = [ + "dbs-arch", + "kvm-bindings", + "kvm-ioctls", + "lazy_static", + "libc", + "thiserror", + "vm-fdt", + "vm-memory", +] + +[[package]] +name = "dbs-device" +version = "0.2.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "dbs-interrupt" +version = "0.2.2" +dependencies = [ + "dbs-arch", + "dbs-device", + "kvm-bindings", + "kvm-ioctls", + "libc", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "dbs-legacy-devices" +version = "0.1.1" +dependencies = [ + "dbs-device", + "dbs-utils", + "libc", + "log", + "serde", + "vm-superio", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "dbs-upcall" +version = "0.3.0" +dependencies = [ + "anyhow", + "dbs-utils", + "dbs-virtio-devices", + "log", + "thiserror", + "timerfd", +] + +[[package]] +name = "dbs-utils" +version = "0.2.1" +dependencies = [ + "anyhow", + "event-manager", + "libc", + "log", + "serde", + "thiserror", + "timerfd", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "dbs-virtio-devices" +version = "0.3.1" +dependencies = [ + "byteorder", + "caps", + "dbs-device", + "dbs-interrupt", + "dbs-utils", + "epoll", + "fuse-backend-rs", + "io-uring", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-rafs", + "nydus-storage", + "rlimit", + "sendfd", + "serde", + "serde_json", + "thiserror", + "threadpool", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "derive-new" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dlv-list" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" + +[[package]] +name = "dragonball" +version = "0.1.0" +dependencies = [ + "anyhow", + "arc-swap", + "bytes 1.4.0", + "crossbeam-channel", + "dbs-address-space", + "dbs-allocator", + "dbs-arch", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "dbs-legacy-devices", + "dbs-upcall", + "dbs-utils", + "dbs-virtio-devices", + "fuse-backend-rs", + "kvm-bindings", + "kvm-ioctls", + "lazy_static", + "libc", + "linux-loader", + "log", + "nix 0.24.3", + "procfs 0.12.0", + "prometheus", + "seccompiler", + "serde", + "serde_derive", + "serde_json", + "slog", + "slog-scope", + "thiserror", + "virtio-queue", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "epoll" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20df693c700404f7e19d4d6fae6b15215d2913c27955d2b9d6f2c0f537511cd0" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-manager" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "377fa591135fbe23396a18e2655a6d5481bf7c5823cdfa3cc81b01a229cbe640" +dependencies = [ + "libc", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "fail" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" +dependencies = [ + "log", + "once_cell", + "rand 0.8.5", +] + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + +[[package]] +name = "fastrand" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" + +[[package]] +name = "filetime" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", +] + +[[package]] +name = "fixedbitset" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" + +[[package]] +name = "flate2" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +dependencies = [ + "crc32fast", + "libz-sys", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "fuse-backend-rs" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f85357722be4bf3d0b7548bedf7499686c77628c2c61cb99c6519463f7a9e5f0" +dependencies = [ + "arc-swap", + "bitflags 1.3.2", + "caps", + "core-foundation-sys", + "lazy_static", + "libc", + "log", + "mio", + "nix 0.24.3", + "virtio-queue", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "futures" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" + +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "go-flag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4a40c9ca507513f573aabaf6a8558173a1ac9aa1363d8de30c7f89b34f8d2b" +dependencies = [ + "cfg-if 0.1.10", +] + +[[package]] +name = "h2" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049" +dependencies = [ + "bytes 1.4.0", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "headers" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3e372db8e5c0d213e0cd0b9be18be2aca3d44cf2fe30a9d46a65581cd454584" +dependencies = [ + "base64", + "bitflags 1.3.2", + "bytes 1.4.0", + "headers-core", + "http", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" +dependencies = [ + "http", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes 1.4.0", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes 1.4.0", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "hyper" +version = "0.14.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +dependencies = [ + "bytes 1.4.0", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyperlocal" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fafdf7b2b2de7c9784f76e02c0935e65a8117ec3b768644379983ab333ac98c" +dependencies = [ + "futures-util", + "hex", + "hyper", + "pin-project", + "tokio", +] + +[[package]] +name = "hypervisor" +version = "0.1.0" +dependencies = [ + "actix-rt", + "anyhow", + "async-trait", + "ch-config", + "crossbeam-channel", + "dbs-utils", + "dragonball", + "futures 0.3.28", + "go-flag", + "kata-sys-util", + "kata-types", + "lazy_static", + "libc", + "logging", + "nix 0.24.3", + "path-clean", + "persist", + "rand 0.8.5", + "rust-ini", + "safe-path 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "seccompiler", + "serde", + "serde_json", + "shim-interface", + "slog", + "slog-scope", + "tests_utils", + "thiserror", + "tokio", + "tracing", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.2", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "io-uring" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd1e1a01cfb924fd8c5c43b6827965db394f5a3a16c599ce03452266e1cf984c" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jobserver" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kata-sys-util" +version = "0.1.0" +dependencies = [ + "anyhow", + "byteorder", + "cgroups-rs", + "chrono", + "common-path", + "fail", + "kata-types", + "lazy_static", + "libc", + "nix 0.24.3", + "oci", + "once_cell", + "rand 0.8.5", + "serde_json", + "slog", + "slog-scope", + "subprocess", + "thiserror", +] + +[[package]] +name = "kata-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "base64", + "bitmask-enum", + "byte-unit 3.1.4", + "glob", + "lazy_static", + "num_cpus", + "oci", + "regex", + "safe-path 0.1.0", + "serde", + "serde-enum-str", + "serde_json", + "slog", + "slog-scope", + "thiserror", + "toml 0.5.11", +] + +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + +[[package]] +name = "kvm-bindings" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe70e65a5b092161d17f5005b66e5eefe7a94a70c332e755036fc4af78c4e79" +dependencies = [ + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "kvm-ioctls" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a321cabd827642499c77e27314f388dd83a717a5ca716b86476fb947f73ae4" +dependencies = [ + "kvm-bindings", + "libc", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "libz-sys" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-loader" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9259ddbfbb52cc918f6bbc60390004ddd0228cf1d85f402009ff2b3d95de83f" +dependencies = [ + "vm-memory", +] + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "linux-raw-sys" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" + +[[package]] +name = "linux_container" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "common", + "kata-types", + "tokio", +] + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +dependencies = [ + "value-bag", +] + +[[package]] +name = "logging" +version = "0.1.0" +dependencies = [ + "serde_json", + "slog", + "slog-async", + "slog-json", + "slog-scope", + "slog-term", +] + +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.48.0", +] + +[[package]] +name = "multimap" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" + +[[package]] +name = "netlink-packet-core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345b8ab5bd4e71a2986663e88c56856699d060e78e152e6e9d7966fcd5491297" +dependencies = [ + "anyhow", + "byteorder", + "libc", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-route" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5dee5ed749373c298237fe694eb0a51887f4cc1a27370c8464bac4382348f1a" +dependencies = [ + "anyhow", + "bitflags 1.3.2", + "byteorder", + "libc", + "netlink-packet-core", + "netlink-packet-utils", +] + +[[package]] +name = "netlink-packet-utils" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34" +dependencies = [ + "anyhow", + "byteorder", + "paste", + "thiserror", +] + +[[package]] +name = "netlink-proto" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65b4b14489ab424703c092062176d52ba55485a89c076b4f9db05092b7223aa6" +dependencies = [ + "bytes 1.4.0", + "futures 0.3.28", + "log", + "netlink-packet-core", + "netlink-sys", + "thiserror", + "tokio", +] + +[[package]] +name = "netlink-sys" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6471bf08e7ac0135876a9581bf3217ef0333c191c128d34878079f42ee150411" +dependencies = [ + "bytes 1.4.0", + "futures 0.3.28", + "libc", + "log", + "tokio", +] + +[[package]] +name = "netns-rs" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23541694f1d7d18cd1a0da3a1352a6ea48b01cbb4a8e7a6e547963823fd5276e" +dependencies = [ + "nix 0.23.2", + "thiserror", +] + +[[package]] +name = "nix" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c" +dependencies = [ + "bitflags 1.3.2", + "cc", + "cfg-if 1.0.0", + "libc", + "memoffset 0.6.5", +] + +[[package]] +name = "nix" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" +dependencies = [ + "bitflags 1.3.2", + "cfg-if 1.0.0", + "libc", + "memoffset 0.6.5", +] + +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if 1.0.0", + "libc", + "memoffset 0.6.5", + "pin-utils", +] + +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags 1.3.2", + "cfg-if 1.0.0", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.2", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + +[[package]] +name = "nydus-api" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64c62d8a36c10b654b87246a39861b2c05f68e96ab3b2f002f5a54f406d5e0e" +dependencies = [ + "libc", + "log", + "serde", + "serde_json", + "toml 0.5.11", +] + +[[package]] +name = "nydus-rafs" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adde865ef71c91c5f139c4c05ca5aedb6fbd53f530d646b13409ac5220b85467" +dependencies = [ + "anyhow", + "arc-swap", + "bitflags 1.3.2", + "fuse-backend-rs", + "lazy_static", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-storage", + "nydus-utils", + "serde", + "serde_json", + "vm-memory", +] + +[[package]] +name = "nydus-storage" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4023f15303dbbda47797d07e9acd2045862ce82c7e28cd66f70b09bda5584cbb" +dependencies = [ + "arc-swap", + "bitflags 1.3.2", + "fuse-backend-rs", + "hex", + "lazy_static", + "libc", + "log", + "nix 0.24.3", + "nydus-api", + "nydus-utils", + "serde", + "serde_json", + "tar", + "tokio", + "vm-memory", +] + +[[package]] +name = "nydus-utils" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1f7bcde0f3906cf49101f2d40e485b0155eee97e3358eefd4783448c4f69c96" +dependencies = [ + "blake3", + "flate2", + "httpdate", + "lazy_static", + "libc", + "libz-sys", + "log", + "lz4", + "lz4-sys", + "nix 0.24.3", + "nydus-api", + "openssl", + "serde", + "serde_json", + "sha2 0.10.7", + "tokio", + "zstd", +] + +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + +[[package]] +name = "oci" +version = "0.1.0" +dependencies = [ + "libc", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "openssl" +version = "0.10.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" +dependencies = [ + "bitflags 1.3.2", + "cfg-if 1.0.0", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "openssl-src" +version = "111.26.0+1.1.1u" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efc62c9f12b22b8f5208c23a7200a442b2e5999f8bdf80233852122b5a4f6f37" +dependencies = [ + "cc", +] + +[[package]] +name = "openssl-sys" +version = "0.9.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" +dependencies = [ + "cc", + "libc", + "openssl-src", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "opentelemetry" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d6c3d7288a106c0a363e4b0e8d308058d56902adefb16f4936f417ffef086e" +dependencies = [ + "opentelemetry_api", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry-http" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc79add46364183ece1a4542592ca593e6421c60807232f5b8f7a31703825d" +dependencies = [ + "async-trait", + "bytes 1.4.0", + "http", + "hyper", + "opentelemetry_api", + "tokio", +] + +[[package]] +name = "opentelemetry-jaeger" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e785d273968748578931e4dc3b4f5ec86b26e09d9e0d66b55adda7fce742f7a" +dependencies = [ + "async-trait", + "futures 0.3.28", + "futures-executor", + "headers", + "http", + "hyper", + "once_cell", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-semantic-conventions", + "thiserror", + "thrift", + "tokio", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b02e0230abb0ab6636d18e2ba8fa02903ea63772281340ccac18e0af3ec9eeb" +dependencies = [ + "opentelemetry", +] + +[[package]] +name = "opentelemetry_api" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c24f96e21e7acc813c7a8394ee94978929db2bcc46cf6b5014fc612bf7760c22" +dependencies = [ + "fnv", + "futures-channel", + "futures-util", + "indexmap", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ca41c4933371b61c2a2f214bf16931499af4ec90543604ec828f7a625c09113" +dependencies = [ + "async-trait", + "crossbeam-channel", + "dashmap", + "fnv", + "futures-channel", + "futures-executor", + "futures-util", + "once_cell", + "opentelemetry_api", + "percent-encoding", + "rand 0.8.5", + "thiserror", + "tokio", + "tokio-stream", +] + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-multimap" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" +dependencies = [ + "dlv-list", + "hashbrown 0.12.3", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "parking" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14f2252c834a40ed9bb5422029649578e63aa341ac401f74e719dd1afda8394e" + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core 0.9.8", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if 1.0.0", + "instant", + "libc", + "redox_syscall 0.2.16", + "smallvec", + "winapi", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall 0.3.5", + "smallvec", + "windows-targets 0.48.1", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "persist" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "kata-sys-util", + "kata-types", + "libc", + "safe-path 0.1.0", + "serde", + "serde_json", + "shim-interface", +] + +[[package]] +name = "petgraph" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "pin-project" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "polling" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if 1.0.0", + "concurrent-queue", + "libc", + "log", + "pin-project-lite", + "windows-sys 0.48.0", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "procfs" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "chrono", + "flate2", + "hex", + "lazy_static", + "libc", +] + +[[package]] +name = "procfs" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "hex", + "lazy_static", + "rustix 0.36.15", +] + +[[package]] +name = "prometheus" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" +dependencies = [ + "cfg-if 1.0.0", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot 0.12.1", + "procfs 0.14.2", + "protobuf 2.28.0", + "thiserror", +] + +[[package]] +name = "prost" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020" +dependencies = [ + "bytes 1.4.0", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355f634b43cdd80724ee7848f95770e7e70eefa6dcf14fea676216573b8fd603" +dependencies = [ + "bytes 1.4.0", + "heck 0.3.3", + "itertools", + "log", + "multimap", + "petgraph", + "prost", + "prost-types", + "tempfile", + "which", +] + +[[package]] +name = "prost-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "prost-types" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b" +dependencies = [ + "bytes 1.4.0", + "prost", +] + +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + +[[package]] +name = "protobuf" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55bad9126f378a853655831eb7363b7b01b81d19f8cb1218861086ca4a1a61e" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror", +] + +[[package]] +name = "protobuf-codegen" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6" +dependencies = [ + "protobuf 2.28.0", +] + +[[package]] +name = "protobuf-codegen" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd418ac3c91caa4032d37cb80ff0d44e2ebe637b2fb243b6234bf89cdac4901" +dependencies = [ + "anyhow", + "once_cell", + "protobuf 3.2.0", + "protobuf-parse", + "regex", + "tempfile", + "thiserror", +] + +[[package]] +name = "protobuf-parse" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d39b14605eaa1f6a340aec7f320b34064feb26c93aec35d6a9a2272a8ddfa49" +dependencies = [ + "anyhow", + "indexmap", + "log", + "protobuf 3.2.0", + "protobuf-support", + "tempfile", + "thiserror", + "which", +] + +[[package]] +name = "protobuf-support" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d4d7b8601c814cfb36bcebb79f0e61e45e1e93640cf778837833bbed05c372" +dependencies = [ + "thiserror", +] + +[[package]] +name = "protocols" +version = "0.1.0" +dependencies = [ + "async-trait", + "oci", + "protobuf 3.2.0", + "ttrpc", + "ttrpc-codegen", +] + +[[package]] +name = "quote" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" +dependencies = [ + "libc", + "rand 0.4.6", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.10", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom 0.2.10", + "redox_syscall 0.2.16", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "resource" +version = "0.1.0" +dependencies = [ + "actix-rt", + "agent", + "anyhow", + "async-trait", + "bitflags 1.3.2", + "byte-unit 4.0.19", + "cgroups-rs", + "futures 0.3.28", + "hex", + "hypervisor", + "kata-sys-util", + "kata-types", + "lazy_static", + "libc", + "logging", + "netlink-packet-route", + "netlink-sys", + "netns-rs", + "nix 0.24.3", + "oci", + "persist", + "rand 0.7.3", + "rtnetlink", + "scopeguard", + "serde", + "serde_json", + "slog", + "slog-scope", + "tempfile", + "test-utils", + "tests_utils", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "rlimit" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347703a5ae47adf1e693144157be231dde38c72bd485925cae7407ad3e52480b" +dependencies = [ + "libc", +] + +[[package]] +name = "rtnetlink" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f1cfa18f8cebe685373a2697915d7e0db3b4554918bba118385e0f71f258a7" +dependencies = [ + "futures 0.3.28", + "log", + "netlink-packet-route", + "netlink-proto", + "nix 0.24.3", + "thiserror", + "tokio", +] + +[[package]] +name = "runtimes" +version = "0.1.0" +dependencies = [ + "agent", + "anyhow", + "common", + "hyper", + "hyperlocal", + "hypervisor", + "kata-sys-util", + "kata-types", + "lazy_static", + "linux_container", + "logging", + "netns-rs", + "nix 0.25.1", + "oci", + "opentelemetry", + "opentelemetry-jaeger", + "persist", + "procfs 0.12.0", + "prometheus", + "resource", + "serde_json", + "shim-interface", + "slog", + "slog-scope", + "tokio", + "tracing", + "tracing-opentelemetry", + "tracing-subscriber", + "url", + "virt_container", + "wasm_container", +] + +[[package]] +name = "rust-ini" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" +dependencies = [ + "cfg-if 1.0.0", + "ordered-multimap", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustix" +version = "0.36.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c37f1bd5ef1b5422177b7646cba67430579cfe2ace80f284fee876bca52ad941" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" +dependencies = [ + "bitflags 2.3.3", + "errno", + "libc", + "linux-raw-sys 0.4.3", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "safe-path" +version = "0.1.0" +dependencies = [ + "libc", +] + +[[package]] +name = "safe-path" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980abdd3220aa19b67ca3ea07b173ca36383f18ae48cde696d90c8af39447ffb" +dependencies = [ + "libc", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seccompiler" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01d1292a1131b22ccea49f30bd106f1238b5ddeec1a98d39268dcc31d540e68" +dependencies = [ + "libc", +] + +[[package]] +name = "sendfd" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604b71b8fc267e13bb3023a2c901126c8f349393666a6d98ac1ae5729b701798" +dependencies = [ + "libc", +] + +[[package]] +name = "serde" +version = "1.0.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ba2516aa6bf82e0b19ca8b50019d52df58455d3cf9bdaf6315225fdd0c560a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde-attributes" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eb8ec7724e4e524b2492b510e66957fe1a2c76c26a6975ec80823f2439da685" +dependencies = [ + "darling_core", + "serde-rename-rule", + "syn 1.0.109", +] + +[[package]] +name = "serde-enum-str" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26416dc95fcd46b0e4b12a3758043a229a6914050aaec2e8191949753ed4e9aa" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "serde-attributes", + "syn 1.0.109", +] + +[[package]] +name = "serde-rename-rule" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794e44574226fc701e3be5c651feb7939038fc67fb73f6f4dd5c4ba90fd3be70" + +[[package]] +name = "serde_derive" +version = "1.0.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "401797fe7833d72109fedec6bfcbe67c0eed9b99772f26eb8afd261f0abc6fd3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "serde_json" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot 0.11.2", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "service" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "common", + "containerd-shim-protos", + "kata-types", + "logging", + "persist", + "runtimes", + "slog", + "slog-scope", + "tokio", + "tracing", + "ttrpc", +] + +[[package]] +name = "sha1" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa827a14b29ab7f44778d14a88d3cb76e949c45083f7dbfa507d0cb699dc12de" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if 1.0.0", + "cpuid-bool", + "digest 0.9.0", + "opaque-debug", +] + +[[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.10.7", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shim" +version = "0.1.0" +dependencies = [ + "anyhow", + "backtrace", + "containerd-shim-protos", + "go-flag", + "kata-sys-util", + "kata-types", + "libc", + "log", + "logging", + "nix 0.24.3", + "oci", + "protobuf 3.2.0", + "rand 0.8.5", + "runtimes", + "serial_test", + "service", + "sha2 0.9.3", + "slog", + "slog-async", + "slog-scope", + "slog-stdlog", + "tempfile", + "tests_utils", + "thiserror", + "tokio", + "tracing", + "tracing-opentelemetry", + "unix_socket2", +] + +[[package]] +name = "shim-ctl" +version = "0.1.0" +dependencies = [ + "anyhow", + "common", + "logging", + "runtimes", + "tokio", +] + +[[package]] +name = "shim-interface" +version = "0.1.0" +dependencies = [ + "anyhow", + "hyper", + "hyperlocal", + "kata-types", + "tokio", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + +[[package]] +name = "slog" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06" + +[[package]] +name = "slog-async" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "766c59b252e62a34651412870ff55d8c4e6d04df19b43eecb2703e417b097ffe" +dependencies = [ + "crossbeam-channel", + "slog", + "take_mut", + "thread_local", +] + +[[package]] +name = "slog-json" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e1e53f61af1e3c8b852eef0a9dee29008f55d6dd63794f3f12cef786cf0f219" +dependencies = [ + "serde", + "serde_json", + "slog", + "time 0.3.23", +] + +[[package]] +name = "slog-scope" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786" +dependencies = [ + "arc-swap", + "lazy_static", + "slog", +] + +[[package]] +name = "slog-stdlog" +version = "4.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6706b2ace5bbae7291d3f8d2473e2bfab073ccd7d03670946197aec98471fa3e" +dependencies = [ + "log", + "slog", + "slog-scope", +] + +[[package]] +name = "slog-term" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +dependencies = [ + "atty", + "slog", + "term", + "thread_local", + "time 0.3.23", +] + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + +[[package]] +name = "subprocess" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "take_mut" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" + +[[package]] +name = "tar" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec96d2ffad078296368d46ff1cb309be1c23c513b4ab0e22a45de0185275ac96" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" +dependencies = [ + "cfg-if 1.0.0", + "fastrand 2.0.0", + "redox_syscall 0.3.5", + "rustix 0.38.4", + "windows-sys 0.48.0", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "test-utils" +version = "0.1.0" +dependencies = [ + "nix 0.24.3", +] + +[[package]] +name = "tests_utils" +version = "0.1.0" +dependencies = [ + "anyhow", + "kata-types", + "rand 0.8.5", +] + +[[package]] +name = "thiserror" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09678c4cdbb4eed72e18b7c2af1329c69825ed16fcbac62d083fc3e2b0590ff0" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float", + "threadpool", +] + +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi", +] + +[[package]] +name = "time" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446" +dependencies = [ + "itoa", + "libc", + "num_threads", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96ba15a897f3c86766b757e5ac7221554c6750054d74d5b28844fce5fb36a6c4" +dependencies = [ + "time-core", +] + +[[package]] +name = "timerfd" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3fd47d83ad0b5c7be2e8db0b9d712901ef6ce5afbcc6f676761004f5104ea2" +dependencies = [ + "rustix 0.37.23", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" +dependencies = [ + "autocfg", + "backtrace", + "bytes 1.4.0", + "libc", + "mio", + "num_cpus", + "parking_lot 0.12.1", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +dependencies = [ + "bytes 1.4.0", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tokio-vsock" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b33556828911d16e24d8b5d336446b0bf6b4b9bfda52cbdc2fa35b7a2862ebc" +dependencies = [ + "bytes 0.4.12", + "futures 0.3.28", + "libc", + "tokio", + "vsock", +] + +[[package]] +name = "toml" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758664fc71a3a69038656bee8b6be6477d2a6c315a6b81f7081f591bffa4111f" +dependencies = [ + "serde", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if 1.0.0", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-opentelemetry" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21ebb87a95ea13271332df069020513ab70bdb5637ca42d6e492dc3bbbad48de" +dependencies = [ + "once_cell", + "opentelemetry", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + +[[package]] +name = "ttrpc" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a35f22a2964bea14afee161665bb260b83cb48e665e0260ca06ec0e775c8b06c" +dependencies = [ + "async-trait", + "byteorder", + "futures 0.3.28", + "libc", + "log", + "nix 0.23.2", + "protobuf 3.2.0", + "protobuf-codegen 3.2.0", + "thiserror", + "tokio", + "tokio-vsock", +] + +[[package]] +name = "ttrpc-codegen" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94d7f7631d7a9ebed715a47cd4cb6072cbc7ae1d4ec01598971bbec0024340c2" +dependencies = [ + "protobuf 2.28.0", + "protobuf-codegen 3.2.0", + "protobuf-support", + "ttrpc-compiler", +] + +[[package]] +name = "ttrpc-compiler" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3cb5dbf1f0865a34fe3f722290fe776cacb16f50428610b779467b76ddf647" +dependencies = [ + "derive-new", + "prost", + "prost-build", + "prost-types", + "protobuf 2.28.0", + "protobuf-codegen 2.28.0", + "tempfile", +] + +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unix_socket2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b57c6eace16c00eccb98a28e85db3370eab0685bdd5e13831d59e2bcb49a1d8a" +dependencies = [ + "libc", +] + +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8-width" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" + +[[package]] +name = "uuid" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cfec50b0842181ba6e713151b72f4ec84a6a7e2c9c8a8a3ffc37bb1cd16b231" +dependencies = [ + "rand 0.3.23", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "value-bag" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92ccd67fb88503048c01b59152a04effd0782d035a83a6d256ce6085f08f4a3" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "virt_container" +version = "0.1.0" +dependencies = [ + "agent", + "anyhow", + "async-std", + "async-trait", + "awaitgroup", + "common", + "containerd-shim-protos", + "futures 0.3.28", + "hypervisor", + "kata-sys-util", + "kata-types", + "lazy_static", + "libc", + "logging", + "nix 0.24.3", + "oci", + "persist", + "protobuf 3.2.0", + "resource", + "serde", + "serde_derive", + "serde_json", + "slog", + "slog-scope", + "tokio", + "toml 0.4.10", + "tracing", + "url", +] + +[[package]] +name = "virtio-bindings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff512178285488516ed85f15b5d0113a7cdb89e9e8a760b269ae4f02b84bd6b" + +[[package]] +name = "virtio-queue" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ba81e2bcc21c0d2fc5e6683e79367e26ad219197423a498df801d79d5ba77bd" +dependencies = [ + "log", + "virtio-bindings", + "vm-memory", + "vmm-sys-util 0.11.1", +] + +[[package]] +name = "vm-fdt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43fb5a6bd1a7d423ad72802801036719b7546cf847a103f8fe4575f5b0d45a6" + +[[package]] +name = "vm-memory" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688a70366615b45575a424d9c665561c1b5ab2224d494f706b6a6812911a827c" +dependencies = [ + "arc-swap", + "libc", + "winapi", +] + +[[package]] +name = "vm-superio" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4b5231d334edbc03b22704caa1a022e4c07491d6df736593f26094df8b04a51" + +[[package]] +name = "vmm-sys-util" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08604d7be03eb26e33b3cee3ed4aef2bf550b305d1cca60e84da5d28d3790b62" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "vmm-sys-util" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd64fe09d8e880e600c324e7d664760a17f56e9672b7495a86381b49e4f72f46" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "vsock" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e32675ee2b3ce5df274c0ab52d19b28789632406277ca26bffee79a8e27dc133" +dependencies = [ + "libc", + "nix 0.23.2", +] + +[[package]] +name = "waker-fn" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.27", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "wasm_container" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "common", + "kata-types", + "tokio", +] + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "which" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +dependencies = [ + "either", + "libc", + "once_cell", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.1", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.1", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.8+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] diff --git a/src/runtime-rs/Cargo.toml b/src/runtime-rs/Cargo.toml new file mode 100644 index 000000000000..bbc401f64531 --- /dev/null +++ b/src/runtime-rs/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + "crates/shim", + "crates/shim-ctl", +] + diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile new file mode 100644 index 000000000000..abc96a553251 --- /dev/null +++ b/src/runtime-rs/Makefile @@ -0,0 +1,528 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +# To show variables or targets help on `make help` +# Use the following format: +# '##VAR VARIABLE_NAME: help about variable' +# '##TARGET TARGET_NAME: help about target' +PROJECT_TYPE = kata +PROJECT_NAME = Kata Containers +PROJECT_TAG = kata-containers +PROJECT_URL = https://github.com/kata-containers +PROJECT_COMPONENT = containerd-shim-kata-v2 +CONTAINERD_RUNTIME_NAME = io.containerd.kata.v2 + +include ../../utils.mk + +ifeq ($(ARCH), ppc64le) + override ARCH = powerpc64le +endif + +ARCH_DIR = arch +ARCH_FILE_SUFFIX = -options.mk +ARCH_FILE = $(ARCH_DIR)/$(ARCH)$(ARCH_FILE_SUFFIX) + +ifeq ($(ARCH), s390x) +default: + @echo "s390x is not currently supported" + exit 0 +test: + @echo "s390x is not currently supported" + exit 0 +install: + @echo "s390x is not currently supported" + exit 0 +else ifeq ($(ARCH), powerpc64le) +default: + @echo "PowerPC 64 LE is not currently supported" + exit 0 +test: + @echo "PowerPC 64 LE is not currently supported" + exit 0 +install: + @echo "PowerPC 64 LE is not currently supported" + exit 0 +else +##TARGET default: build code +default: runtime show-header +##TARGET test: run cargo tests +test: static-checks-build + @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture +install: install-runtime install-configs +endif + +ifeq (,$(realpath $(ARCH_FILE))) + $(error "ERROR: invalid architecture: '$(ARCH)'") +else + # Load architecture-dependent settings + include $(ARCH_FILE) +endif + +ifeq ($(PREFIX),) +PREFIX := /usr +EXEC_PREFIX := $(PREFIX)/local +##VAR BINDIR= is a directory for installing executable programs +BINDIR := $(EXEC_PREFIX)/bin +else +EXEC_PREFIX := $(PREFIX) +# when creating the kata-deploy image, the default installation path for go runtime is $(EXEC_PREFIX)/bin, so we put it here for multiple runtime +BINDIR := $(EXEC_PREFIX)/runtime-rs/bin/ +endif + +PREFIXDEPS := $(PREFIX) +LIBEXECDIR := $(PREFIXDEPS)/libexec +SHAREDIR := $(PREFIX)/share +DEFAULTSDIR := $(SHAREDIR)/defaults +PROJECT_DIR = $(PROJECT_TAG) +IMAGENAME = $(PROJECT_TAG).img +TARGET = $(PROJECT_COMPONENT) +SYSCONFDIR := /etc +CONFIG_FILE = configuration.toml +HYPERVISOR_DB = dragonball +HYPERVISOR_ACRN = acrn +HYPERVISOR_FC = firecracker +HYPERVISOR_QEMU = qemu +HYPERVISOR_CLH = cloud-hypervisor + + +DEFAULT_HYPERVISOR ?= $(HYPERVISOR_DB) + +##VAR HYPERVISOR= List of hypervisors this build system can generate configuration for. +HYPERVISORS := $(HYPERVISOR_DB) $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) + +DBVALIDHYPERVISORPATHS := [] +PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR) +KERNELDIR := $(PKGDATADIR) +IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + +PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR) +FIRMWAREPATH := +FIRMWAREVOLUMEPATH := + +##VAR DEFVCPUS= Default number of vCPUs +DEFVCPUS := 1 +##VAR DEFMAXVCPUS= Default maximum number of vCPUs +DEFMAXVCPUS := 0 +##VAR DEFMEMSZ= Default memory size in MiB +DEFMEMSZ := 2048 +##VAR DEFMEMSLOTS= Default memory slots +# Cases to consider : +# - nvdimm rootfs image +# - preallocated memory +# - vm template memory +# - hugepage memory +DEFMEMSLOTS := 10 +##VAR DEFBRIDGES= Default number of bridges +DEFBRIDGES := 0 +DEFENABLEANNOTATIONS := [\"kernel_params\"] +DEFDISABLEGUESTSECCOMP := true +DEFDISABLEGUESTEMPTYDIR := false +##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled +DEFAULTEXPFEATURES := [] +DEFDISABLESELINUX := false +##VAR DEFENTROPYSOURCE=[entropy_source] Default entropy source +DEFENTROPYSOURCE := /dev/urandom +DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] +DEFDISABLEBLOCK := false +DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs +DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs +DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd +ifeq ($(ARCH),x86_64) +DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd +endif +DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"] +##VAR DEFVIRTIOFSCACHESIZE= Default DAX mapping cache size in MiB +# if value is 0, DAX is not enabled +DEFVIRTIOFSCACHESIZE ?= 0 +DEFVIRTIOFSCACHE ?= auto +DEFVIRTIOFSQUEUESIZE ?= 1024 +# Format example: +# [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"] +# +# see `virtiofsd -h` for possible options. +# Make sure you quote args. +DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"-o\", \"announce_submounts\"] +DEFENABLEIOTHREADS := false +DEFENABLEVHOSTUSERSTORE := false +DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user +DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"] +DEFFILEMEMBACKEND := "" +DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"] +DEFMSIZE9P := 8192 +DEFVFIOMODE := guest-kernel +##VAR DEFSANDBOXCGROUPONLY= Default cgroup model +DEFSANDBOXCGROUPONLY ?= false +DEFSTATICRESOURCEMGMT_DB ?= false +DEFBINDMOUNTS := [] +DEFDANCONF := /run/kata-containers/dans +SED = sed +CLI_DIR = cmd +SHIMV2 = containerd-shim-kata-v2 +SHIMV2_OUTPUT = $(CURDIR)/$(SHIMV2) +SHIMV2_DIR = $(CLI_DIR)/$(SHIMV2) +MONITOR = kata-monitor +MONITOR_OUTPUT = $(CURDIR)/$(MONITOR) +MONITOR_DIR = $(CLI_DIR)/kata-monitor +SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$') +VERSION := ${shell cat ./VERSION} + +# List of configuration files to build and install +CONFIGS = +CONFIG_PATHS = +SYSCONFIG_PATHS = +# List of hypervisors known for the current architecture +KNOWN_HYPERVISORS = +# List of hypervisors known for the current architecture +KNOWN_HYPERVISORS = + +CONFDIR := $(DEFAULTSDIR)/$(PROJECT_DIR) +SYSCONFDIR := $(SYSCONFDIR)/$(PROJECT_DIR) +##VAR CONFIG_PATH= Main configuration file location for stateless systems +CONFIG_PATH := $(abspath $(CONFDIR)/$(CONFIG_FILE)) +##VAR SYSCONFIG= Secondary configuration file location. Note that this takes precedence +# over CONFIG_PATH. +SYSCONFIG := $(abspath $(SYSCONFDIR)/$(CONFIG_FILE)) +SHAREDIR := $(SHAREDIR) + +ifneq (,$(DBCMD)) + KNOWN_HYPERVISORS += $(HYPERVISOR_DB) + CONFIG_FILE_DB = configuration-dragonball.toml + CONFIG_DB = config/$(CONFIG_FILE_DB) + CONFIG_DB_IN = $(CONFIG_DB).in + CONFIG_PATH_DB = $(abspath $(CONFDIR)/$(CONFIG_FILE_DB)) + CONFIG_PATHS += $(CONFIG_PATH_DB) + SYSCONFIG_DB = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_DB)) + SYSCONFIG_PATHS += $(SYSCONFIG_DB) + CONFIGS += $(CONFIG_DB) + # dragonball-specific options (all should be suffixed by "_DB") + VMROOTFSDRIVER_DB := virtio-blk-pci + DEFMAXVCPUS_DB := 1 + DEFBLOCKSTORAGEDRIVER_DB := virtio-blk-mmio + DEFNETWORKMODEL_DB := tcfilter + KERNELPARAMS = console=ttyS1 agent.log_vport=1025 + KERNELTYPE_DB = uncompressed + KERNEL_NAME_DB = $(call MAKE_KERNEL_NAME_DB,$(KERNELTYPE_DB)) + KERNELPATH_DB = $(KERNELDIR)/$(KERNEL_NAME_DB) + DEFSANDBOXCGROUPONLY = true + RUNTIMENAME := virt_container + PIPESIZE := 1 + DBSHAREDFS := inline-virtio-fs +endif + +ifeq ($(DEFAULT_HYPERVISOR),$(HYPERVISOR_DB)) + DEFAULT_HYPERVISOR_CONFIG = $(CONFIG_FILE_DB) +endif +# list of variables the user may wish to override +USER_VARS += ARCH +USER_VARS += BINDIR +USER_VARS += CONFIG_DB_IN +USER_VARS += CONFIG_PATH +USER_VARS += DESTDIR +USER_VARS += DEFAULT_HYPERVISOR +USER_VARS += DBCMD +USER_VARS += DBCTLCMD +USER_VARS += DBPATH +USER_VARS += DBVALIDHYPERVISORPATHS +USER_VARS += DBCTLPATH +USER_VARS += DBVALIDCTLPATHS +USER_VARS += SYSCONFIG +USER_VARS += IMAGENAME +USER_VARS += IMAGEPATH +USER_VARS += DEFROOTFSTYPE +USER_VARS += VMROOTFSDRIVER_DB +USER_VARS += MACHINETYPE +USER_VARS += KERNELDIR +USER_VARS += KERNELTYPE +USER_VARS += KERNELPATH_DB +USER_VARS += KERNELPATH +USER_VARS += KERNELVIRTIOFSPATH +USER_VARS += FIRMWAREPATH +USER_VARS += FIRMWAREVOLUMEPATH +USER_VARS += MACHINEACCELERATORS +USER_VARS += CPUFEATURES +USER_VARS += DEFMACHINETYPE_CLH +USER_VARS += KERNELPARAMS +USER_VARS += LIBEXECDIR +USER_VARS += LOCALSTATEDIR +USER_VARS += PKGDATADIR +USER_VARS += PKGLIBEXECDIR +USER_VARS += PKGRUNDIR +USER_VARS += PREFIX +USER_VARS += PROJECT_BUG_URL +USER_VARS += PROJECT_NAME +USER_VARS += PROJECT_ORG +USER_VARS += PROJECT_PREFIX +USER_VARS += PROJECT_TAG +USER_VARS += PROJECT_TYPE +USER_VARS += RUNTIME_NAME +USER_VARS += SHAREDIR +USER_VARS += SYSCONFDIR +USER_VARS += DEFVCPUS +USER_VARS += DEFMAXVCPUS +USER_VARS += DEFMAXVCPUS_ACRN +USER_VARS += DEFMAXVCPUS_DB +USER_VARS += DEFMEMSZ +USER_VARS += DEFMEMSLOTS +USER_VARS += DEFBRIDGES +USER_VARS += DEFNETWORKMODEL_DB +USER_VARS += DEFDISABLEGUESTEMPTYDIR +USER_VARS += DEFDISABLEGUESTSECCOMP +USER_VARS += DEFDISABLESELINUX +USER_VARS += DEFAULTEXPFEATURES +USER_VARS += DEFDISABLEBLOCK +USER_VARS += DEFBLOCKSTORAGEDRIVER_DB +USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS +USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS +USER_VARS += DEFVIRTIOFSDAEMON +USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS +USER_VARS += DEFVIRTIOFSCACHESIZE +USER_VARS += DEFVIRTIOFSCACHE +USER_VARS += DEFVIRTIOFSQUEUESIZE +USER_VARS += DEFVIRTIOFSEXTRAARGS +USER_VARS += DEFENABLEANNOTATIONS +USER_VARS += DEFENABLEIOTHREADS +USER_VARS += DEFENABLEVHOSTUSERSTORE +USER_VARS += DEFVHOSTUSERSTOREPATH +USER_VARS += DEFVALIDVHOSTUSERSTOREPATHS +USER_VARS += DEFFILEMEMBACKEND +USER_VARS += DEFVALIDFILEMEMBACKENDS +USER_VARS += DEFMSIZE9P +USER_VARS += DEFENTROPYSOURCE +USER_VARS += DEFVALIDENTROPYSOURCES +USER_VARS += DEFSANDBOXCGROUPONLY +USER_VARS += DEFSTATICRESOURCEMGMT_DB +USER_VARS += DEFBINDMOUNTS +USER_VARS += DEFVFIOMODE +USER_VARS += BUILDFLAGS +USER_VARS += RUNTIMENAME +USER_VARS += HYPERVISOR_DB +USER_VARS += PIPESIZE +USER_VARS += DBSHAREDFS +USER_VARS += KATA_INSTALL_GROUP +USER_VARS += KATA_INSTALL_OWNER +USER_VARS += KATA_INSTALL_CFG_PERMS +USER_VARS += DEFDANCONF + +SOURCES := \ + $(shell find . 2>&1 | grep -E '.*\.rs$$') \ + Cargo.toml + +VERSION_FILE := ./VERSION +VERSION := $(shell grep -v ^\# $(VERSION_FILE)) +COMMIT_NO := $(shell git rev-parse HEAD 2>/dev/null || true) +COMMIT := $(if $(shell git status --porcelain --untracked-files=no 2>/dev/null || true),${COMMIT_NO}-dirty,${COMMIT_NO}) +COMMIT_MSG = $(if $(COMMIT),$(COMMIT),unknown) + +EXTRA_RUSTFEATURES := + +ifneq ($(EXTRA_RUSTFEATURES),) + override EXTRA_RUSTFEATURES := --features $(EXTRA_RUSTFEATURES) +endif + + +TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET) + +##VAR DESTDIR= is a directory prepended to each installed target file +DESTDIR ?= / + +GENERATED_CODE = crates/shim/src/config.rs + +RUNTIME_NAME=$(TARGET) +RUNTIME_VERSION=$(VERSION) + +GENERATED_VARS = \ + VERSION \ + CONFIG_DB_IN \ + $(USER_VARS) + + +GENERATED_REPLACEMENTS= \ + PROJECT_NAME \ + RUNTIME_NAME \ + CONTAINERD_RUNTIME_NAME \ + RUNTIME_VERSION \ + BINDIR \ + COMMIT + +GENERATED_FILES := + +GENERATED_FILES += $(GENERATED_CODE) + +# Display name of command and it's version (or a message if not available). +# +# Arguments: +# +# 1: Name of command +define get_command_version +$(shell printf "%s: %s\\n" $(1) "$(or $(shell $(1) --version 2>/dev/null), (not available))") +endef + +define get_toolchain_version +$(shell printf "%s: %s\\n" "toolchain" "$(or $(shell rustup show active-toolchain 2>/dev/null), (unknown))") +endef + +# Install a configuration file +# params: +# $1 : file to install +# $2 : directory path where file will be installed +define INSTALL_FILE + install --mode 0644 -D $1 $(DESTDIR)$2/$(notdir $1); +endef + +# Returns the name of the kernel file to use based on the provided KERNELTYPE. +# $1 : KERNELTYPE (compressed or uncompressed) +define MAKE_KERNEL_NAME_DB +$(if $(findstring uncompressed,$1),vmlinux-dragonball-experimental.container,vmlinuz-dragonball-experimental.container) +endef + +.DEFAULT_GOAL := default + +GENERATED_FILES += $(CONFIGS) + +runtime: $(TARGET) + +static-checks-build: $(GENERATED_FILES) + +$(TARGET): $(GENERATED_FILES) $(TARGET_PATH) + +$(TARGET_PATH): $(SOURCES) | show-summary + @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) + +$(GENERATED_FILES): %: %.in + @sed \ + $(foreach r,$(GENERATED_REPLACEMENTS),-e 's|@$r@|$($r)|g') \ + $(foreach v,$(GENERATED_VARS),-e "s|@$v@|$($v)|g") \ + $< > $@ + +##TARGET optimize: optimized build +optimize: $(SOURCES) | show-summary show-header + @RUSTFLAGS="-C link-arg=-s $(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES) + +##TARGET clean: clean build +clean: + @cargo clean + @rm -f $(GENERATED_FILES) + @rm -f tarpaulin-report.html + @rm -f $(CONFIGS) + +vendor: + @cargo vendor + +##TARGET check: run test +check: $(GENERATED_FILES) standard_rust_check + +##TARGET run: build and run agent +run: + @cargo run --target $(TRIPLE) + +show-header: + @printf "%s - version %s (commit %s)\n\n" "$(TARGET)" "$(VERSION)" "$(COMMIT_MSG)" + +show-summary: show-header + @printf "• Project:\n" + @printf " name: $(PROJECT_NAME)\n" + @printf " url: $(PROJECT_URL)\n" + @printf " component: $(PROJECT_COMPONENT)\n" + @printf "\n" + @printf "• Target: $(TARGET)\n" + @printf "\n" + @printf "• Architecture: $(ARCH)\n" + @printf "\n" + @printf "• Rust:\n" + @printf " %s\n" "$(call get_command_version,cargo)" + @printf " %s\n" "$(call get_command_version,rustc)" + @printf " %s\n" "$(call get_command_version,rustup)" + @printf " %s\n" "$(call get_toolchain_version)" + @printf "\n" + @printf "• Hypervisors:\n" + @printf "\tDefault: $(DEFAULT_HYPERVISOR)\n" + @printf "\tKnown: $(sort $(HYPERVISORS))\n" + @printf "\tAvailable for this architecture: $(sort $(KNOWN_HYPERVISORS))\n" + @printf "\n" + @printf "• Summary:\n" + @printf "\n" + @printf "\tdestination install path (DESTDIR) : %s\n" $(abspath $(DESTDIR)) + @printf "\tbinary installation path (BINDIR) : %s\n" $(abspath $(BINDIR)) + @printf "\tbinaries to install :\n" + @printf \ + "$(foreach b,$(sort $(SHIMV2)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" + @printf "\tconfigs to install (CONFIGS) :\n" + @printf \ + "$(foreach c,$(sort $(CONFIGS)),$(shell printf "\\t - $(c)\\\n"))" + @printf "\tinstall paths (CONFIG_PATHS) :\n" + @printf \ + "$(foreach c,$(sort $(CONFIG_PATHS)),$(shell printf "\\t - $(c)\\\n"))" + @printf "\talternate config paths (SYSCONFIG_PATHS) : %s\n" + @printf \ + "$(foreach c,$(sort $(SYSCONFIG_PATHS)),$(shell printf "\\t - $(c)\\\n"))" + @printf "\tdefault install path for $(DEFAULT_HYPERVISOR) (CONFIG_PATH) : %s\n" $(abspath $(CONFIG_PATH)) + @printf "\tdefault alternate config path (SYSCONFIG) : %s\n" $(abspath $(SYSCONFIG)) +ifneq (,$(findstring $(HYPERVISOR_QEMU),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_QEMU) hypervisor path (QEMUPATH) : %s\n" $(abspath $(QEMUPATH)) +endif +ifneq (,$(findstring $(HYPERVISOR_QEMU_VIRTIOFS),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_QEMU_VIRTIOFS) hypervisor path (QEMUVIRTIOFSPATH) : %s\n" $(abspath $(QEMUVIRTIOFSPATH)) +endif +ifneq (,$(findstring $(HYPERVISOR_CLH),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_CLH) hypervisor path (CLHPATH) : %s\n" $(abspath $(CLHPATH)) +endif +ifneq (,$(findstring $(HYPERVISOR_FC),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_FC) hypervisor path (FCPATH) : %s\n" $(abspath $(FCPATH)) +endif +ifneq (,$(findstring $(HYPERVISOR_ACRN),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_ACRN) hypervisor path (ACRNPATH) : %s\n" $(abspath $(ACRNPATH)) +endif + @printf "\tassets path (PKGDATADIR) : %s\n" $(abspath $(PKGDATADIR)) + @printf "\tshim path (PKGLIBEXECDIR) : %s\n" $(abspath $(PKGLIBEXECDIR)) + @printf "\n" +##TARGET help: Show help comments that start with `##VAR` and `##TARGET` in runtime-rs makefile +help: Makefile show-summary + @echo "========================== Help =============================" + @echo "Variables:" + @sed -n 's/^##VAR//p' $< | sort + @echo "" + @echo "Targets:" + @sed -n 's/^##TARGET//p' $< | sort + +TARPAULIN_ARGS:=-v --workspace +install-tarpaulin: + cargo install cargo-tarpaulin + +# Check if cargo tarpaulin is installed +HAS_TARPAULIN:= $(shell cargo --list | grep tarpaulin 2>/dev/null) +check_tarpaulin: +ifndef HAS_TARPAULIN + $(error "tarpaulin is not available please: run make install-tarpaulin ") +else + $(info OK: tarpaulin installed) +endif + +##TARGET codecov: Generate code coverage report +codecov: check_tarpaulin + cargo tarpaulin $(TARPAULIN_ARGS) + +##TARGET codecov-html: Generate code coverage html report +codecov-html: check_tarpaulin + cargo tarpaulin $(TARPAULIN_ARGS) -o Html + +install-runtime: runtime + install -D $(TARGET_PATH) $(DESTDIR)$(BINDIR)/$(notdir $(TARGET_PATH)) + +install-configs: $(CONFIGS) + $(foreach f,$(CONFIGS),$(call INSTALL_FILE,$f,$(dir $(CONFIG_PATH)))) \ + ln -sf $(DEFAULT_HYPERVISOR_CONFIG) $(DESTDIR)/$(CONFIG_PATH) + +.PHONY: \ + help \ + optimize \ + show-header \ + show-summary \ + vendor diff --git a/src/runtime-rs/README.md b/src/runtime-rs/README.md new file mode 100644 index 000000000000..973439975d1d --- /dev/null +++ b/src/runtime-rs/README.md @@ -0,0 +1,131 @@ +# runtime-rs + +## Wath's runtime-rs + +`runtime-rs` is a new component introduced in Kata Containers 3.0, it is a Rust version of runtime(shim). It like [runtime](../runtime), but they have many difference: + +- `runtime-rs` is written in Rust, and `runtime` is written in Go. +- `runtime` is the default shim in Kata Containers 3.0, `runtime-rs` is still under heavy development. +- `runtime-rs` has a completed different architecture than `runtime`, you can check at the [architecture overview](../../docs/design/architecture_3.0). + +**Note**: + +`runtime-rs` is still under heavy development, you should avoid using it in critical system. + +## Architecture overview + +Also, `runtime-rs` provides the following features: + +- Turn key solution with builtin `Dragonball` Sandbox, all components in one process +- Async I/O to reduce resource consumption +- Extensible framework for multiple services, runtimes and hypervisors +- Lifecycle management for sandbox and container associated resources + +See the [architecture overview](../../docs/design/architecture_3.0) +for details on the `runtime-rs` design. + +`runtime-rs` is a runtime written in Rust, it is composed of several crates. + +This picture shows the overview about the crates under this directory and the relation between crates. + +![crates overview](docs/images/crate-overview.svg) + +Not all the features have been implemented yet, for details please check the [roadmap](../../docs/design/architecture_3.0/README.md#roadmap). + +## Crates + +The `runtime-rs` directory contains some crates in the crates directory that compose the `containerd-shim-kata-v2`. + +| Crate | Description | +|-|-| +| [`shim`](crates/shim)| containerd shimv2 implementation | +| [`service`](crates/service)| services for containers, includes task service | +| [`runtimes`](crates/runtimes)| container runtimes | +| [`resource`](crates/resource)| sandbox and container resources | +| [`hypervisor`](crates/hypervisor)| hypervisor that act as a sandbox | +| [`agent`](crates/agent)| library used to communicate with agent in the guest OS | +| [`persist`](crates/persist)| persist container state to disk | + +### shim + +`shim` is the entry point of the containerd shim process, it implements containerd shim's [binary protocol](https://github.com/containerd/containerd/tree/v1.6.8/runtime/v2#commands): + +- start: start a new shim process +- delete: delete exist a shim process +- run: run ttRPC service in shim + +containerd will launch a shim process and the shim process will serve as a ttRPC server to provide shim service through `TaskService` from `service` crate. + +### service + +The `runtime-rs` has an extensible framework, includes extension of services, runtimes, and hypervisors. + +Currently, only containerd compatible `TaskService` is implemented. + +`TaskService` has implemented the [containerd shim protocol](https://docs.rs/containerd-shim-protos/0.2.0/containerd_shim_protos/), +and interacts with runtimes through messages. + +### runtimes + +Runtime is a container runtime, the runtime handler handles messages from task services to manage containers. +Runtime handler and Runtime instance is used to deal with the operation for sandbox and container. + +Currently, only `VirtContainer` has been implemented. + +### resource + +In `runtime-rs`, all networks/volumes/rootfs are abstracted as resources. + +Resources are classified into two types: + +- sandbox resources: network, share-fs +- container resources: rootfs, volume, cgroup + +[Here](../../docs/design/architecture_3.0/README.md#resource-manager) is a detailed description of the resources. + +### hypervisor + +For `VirtContainer`, there will be more hypervisors to choose. + +Currently, only built-in `Dragonball` has been implemented. + +### agent + +`agent` is used to communicate with agent in the guest OS from the shim side. The only supported agent is `KataAgent`. + +### persist + +Persist defines traits and functions to help different components save state to disk and load state from disk. + +### helper libraries + +Some helper libraries are maintained in [the library directory](../libs) so that they can be shared with other rust components. + +## Build and install + +See the +[build from the source section of the rust runtime installation guide](../../docs/install/kata-containers-3.0-rust-runtime-installation-guide.md#build-from-source-installation). + +## Configuration + +`runtime-rs` has the same [configuration as `runtime`](../runtime/README.md#configuration) with some [limitations](#limitations). + +## Logging + +See the +[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers). + +## Debugging + +See the +[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers). + +An [experimental alternative binary](crates/shim-ctl/README.md) is available that removes containerd dependencies and makes it easier to run the shim proper outside of the runtime's usual deployment environment (i.e. on a developer machine). + +## Limitations + +For Kata Containers limitations, see the +[limitations file](../../docs/Limitations.md) +for further details. + +`runtime-rs` is under heavy developments, and doesn't support all features as the Golang version [`runtime`](../runtime), check the [roadmap](../../docs/design/architecture_3.0/README.md#roadmap) for details. diff --git a/src/runtime-rs/VERSION b/src/runtime-rs/VERSION new file mode 120000 index 000000000000..558194c5a5a5 --- /dev/null +++ b/src/runtime-rs/VERSION @@ -0,0 +1 @@ +../../VERSION \ No newline at end of file diff --git a/src/runtime-rs/arch/aarch64-options.mk b/src/runtime-rs/arch/aarch64-options.mk new file mode 100644 index 000000000000..2e9e5759b73a --- /dev/null +++ b/src/runtime-rs/arch/aarch64-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := +KERNELPARAMS := +MACHINEACCELERATORS := +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-aarch64 + +# dragonball binary name +DBCMD := dragonball diff --git a/src/runtime-rs/arch/powerpc64le-options.mk b/src/runtime-rs/arch/powerpc64le-options.mk new file mode 100644 index 000000000000..0a974680e0f1 --- /dev/null +++ b/src/runtime-rs/arch/powerpc64le-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := pseries +KERNELPARAMS := +MACHINEACCELERATORS := "cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,cap-large-decr=off,cap-ccf-assist=off" +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-ppc64 + +# dragonball binary name +DBCMD := dragonball diff --git a/src/runtime-rs/arch/s390x-options.mk b/src/runtime-rs/arch/s390x-options.mk new file mode 100644 index 000000000000..f6381eee22aa --- /dev/null +++ b/src/runtime-rs/arch/s390x-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := +KERNELPARAMS := +MACHINEACCELERATORS := +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-s390x + +# dragonball binary name +DBCMD := dragonball diff --git a/src/runtime-rs/arch/x86_64-options.mk b/src/runtime-rs/arch/x86_64-options.mk new file mode 100644 index 000000000000..0e837f065781 --- /dev/null +++ b/src/runtime-rs/arch/x86_64-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := q35 +KERNELPARAMS := +MACHINEACCELERATORS := +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-x86_64 + +# dragonball binary name +DBCMD := dragonball diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in new file mode 100644 index 000000000000..f4b6bcfdbd06 --- /dev/null +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -0,0 +1,334 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_DB_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.dragonball] +path = "@DBPATH@" +ctlpath = "@DBCTLPATH@" +kernel = "@KERNELPATH_DB@" +image = "@IMAGEPATH@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + + +# Block storage driver to be used for the VM rootfs is backed +# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm +vm_rootfs_driver = "@VMROOTFSDRIVER_DB@" + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DBVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @DBVALIDHYPERVISORPATHS@ + +# List of valid annotations values for ctlpath +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: +# valid_ctlpaths = + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that DB uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to 1 +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = @DEFVCPUS@ + + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +default_maxvcpus = @DEFMAXVCPUS_DB@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. DB only supports virtio-blk. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@" + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# If host doesn't support vhost_net, set to true. Thus we won't create vhost fds for nics. +# Default false +#disable_vhost_net = true + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" + +# Shared file system type: +# - inline-virtio-fs (default) +# - virtio-fs +# - virtio-9p +# - virtio-fs-nydus +# "inline-virtio-fs" is the same as "virtio-fs", but it is running in the same process +# of shim, does not need an external virtiofsd process. +shared_fs = "@DBSHAREDFS@" + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +[agent.@PROJECT_TYPE@] +container_pipe_size=@PIPESIZE@ +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 45) +dial_timeout = 45 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true + +# If enabled, enabled, it means that 1) if the runtime exits abnormally, +# the cleanup process will be skipped, and 2) the runtime will not exit +# even if the health check fails. +# This option is typically used to retain abnormal information for debugging. +# (default: false) +#keep_abnormal = true + +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - bridged (Deprecated) +# Uses a linux bridge to interconnect the container interface to +# the VM. Works for most cases except macvlan and ipvlan. +# ***NOTE: This feature has been deprecated with plans to remove this +# feature in the future. Please use other network models listed below. +# +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_DB@" + +name="@RUNTIMENAME@" +hypervisor_name="@HYPERVISOR_DB@" +agent_name="@PROJECT_TYPE@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=bridged` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +# Now it supports three kinds of bind mount format: +# - "/path/to", default readonly mode. +# - "/path/to:ro", readonly mode. +# - "/path/to:rw", readwrite mode. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# Base directory of directly attachable network config. +# Network devices for VM-based containers are allowed to be placed in the +# host netns to eliminate as many hops as possible, which is what we +# called a "Directly Attachable Network". The config, set by special CNI +# plugins, is used to tell the Kata containers what devices are attached +# to the hypervisor. +# (default: /run/kata-containers/dans) +dan_conf = "@DEFDANCONF@" diff --git a/src/runtime-rs/crates/agent/Cargo.toml b/src/runtime-rs/crates/agent/Cargo.toml new file mode 100644 index 000000000000..53f6c7290f82 --- /dev/null +++ b/src/runtime-rs/crates/agent/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "agent" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dev-dependencies] +futures = "0.1.27" + +[dependencies] +anyhow = "1.0.26" +async-trait = "0.1.48" +log = "0.4.14" +protobuf = "3.2.0" +serde = { version = "^1.0", features = ["derive"] } +serde_json = ">=1.0.9" +slog = "2.5.2" +slog-scope = "4.4.0" +ttrpc = { version = "0.7.1" } +tokio = { version = "1.28.1", features = ["fs", "rt"] } +tracing = "0.1.36" +url = "2.2.2" +nix = "0.24.2" + +kata-types = { path = "../../../libs/kata-types"} +logging = { path = "../../../libs/logging"} +oci = { path = "../../../libs/oci" } +protocols = { path = "../../../libs/protocols", features=["async"] } + +[features] +default = [] diff --git a/src/runtime-rs/crates/agent/src/kata/agent.rs b/src/runtime-rs/crates/agent/src/kata/agent.rs new file mode 100644 index 000000000000..ee3e97bf8513 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/kata/agent.rs @@ -0,0 +1,126 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tracing::instrument; +use ttrpc::context as ttrpc_ctx; + +use kata_types::config::Agent as AgentConfig; + +use crate::{kata::KataAgent, Agent, AgentManager, HealthService}; + +/// millisecond to nanosecond +const MILLISECOND_TO_NANOSECOND: i64 = 1_000_000; + +/// new ttrpc context with timeout +fn new_ttrpc_ctx(timeout: i64) -> ttrpc_ctx::Context { + ttrpc_ctx::with_timeout(timeout) +} + +#[async_trait] +impl AgentManager for KataAgent { + #[instrument] + async fn start(&self, address: &str) -> Result<()> { + info!(sl!(), "begin to connect agent {:?}", address); + self.set_socket_address(address) + .await + .context("set socket")?; + self.connect_agent_server() + .await + .context("connect agent server")?; + self.start_log_forwarder() + .await + .context("connect log forwarder")?; + Ok(()) + } + + async fn stop(&self) { + self.stop_log_forwarder().await; + } + + async fn agent_sock(&self) -> Result { + self.agent_sock().await + } + + async fn agent_config(&self) -> AgentConfig { + self.agent_config().await + } +} + +// implement for health service +macro_rules! impl_health_service { + ($($name: tt | $req: ty | $resp: ty),*) => { + #[async_trait] + impl HealthService for KataAgent { + $(async fn $name(&self, req: $req) -> Result<$resp> { + let r = req.into(); + let (client, timeout, _) = self.get_health_client().await.context("get health client")?; + let resp = client.$name(new_ttrpc_ctx(timeout * MILLISECOND_TO_NANOSECOND), &r).await?; + Ok(resp.into()) + })* + } + }; +} + +impl_health_service!( + check | crate::CheckRequest | crate::HealthCheckResponse, + version | crate::CheckRequest | crate::VersionCheckResponse +); + +macro_rules! impl_agent { + ($($name: tt | $req: ty | $resp: ty | $new_timeout: expr),*) => { + #[async_trait] + impl Agent for KataAgent { + #[instrument(skip(req))] + $(async fn $name(&self, req: $req) -> Result<$resp> { + let r = req.into(); + let (client, mut timeout, _) = self.get_agent_client().await.context("get client")?; + + // update new timeout + if let Some(v) = $new_timeout { + timeout = v; + } + + let resp = client.$name(new_ttrpc_ctx(timeout * MILLISECOND_TO_NANOSECOND), &r).await?; + Ok(resp.into()) + })* + } + }; +} + +impl_agent!( + create_container | crate::CreateContainerRequest | crate::Empty | None, + start_container | crate::ContainerID | crate::Empty | None, + remove_container | crate::RemoveContainerRequest | crate::Empty | None, + exec_process | crate::ExecProcessRequest | crate::Empty | None, + signal_process | crate::SignalProcessRequest | crate::Empty | None, + wait_process | crate::WaitProcessRequest | crate::WaitProcessResponse | Some(0), + update_container | crate::UpdateContainerRequest | crate::Empty | None, + stats_container | crate::ContainerID | crate::StatsContainerResponse | None, + pause_container | crate::ContainerID | crate::Empty | None, + resume_container | crate::ContainerID | crate::Empty | None, + write_stdin | crate::WriteStreamRequest | crate::WriteStreamResponse | Some(0), + read_stdout | crate::ReadStreamRequest | crate::ReadStreamResponse | Some(0), + read_stderr | crate::ReadStreamRequest | crate::ReadStreamResponse | Some(0), + close_stdin | crate::CloseStdinRequest | crate::Empty | None, + tty_win_resize | crate::TtyWinResizeRequest | crate::Empty | None, + update_interface | crate::UpdateInterfaceRequest | crate::Interface | None, + update_routes | crate::UpdateRoutesRequest | crate::Routes | None, + add_arp_neighbors | crate::AddArpNeighborRequest | crate::Empty | None, + list_interfaces | crate::Empty | crate::Interfaces | None, + list_routes | crate::Empty | crate::Routes | None, + create_sandbox | crate::CreateSandboxRequest | crate::Empty | None, + destroy_sandbox | crate::Empty | crate::Empty | None, + copy_file | crate::CopyFileRequest | crate::Empty | None, + get_oom_event | crate::Empty | crate::OomEventResponse | Some(0), + get_ip_tables | crate::GetIPTablesRequest | crate::GetIPTablesResponse | None, + set_ip_tables | crate::SetIPTablesRequest | crate::SetIPTablesResponse | None, + get_volume_stats | crate::VolumeStatsRequest | crate::VolumeStatsResponse | None, + resize_volume | crate::ResizeVolumeRequest | crate::Empty | None, + online_cpu_mem | crate::OnlineCPUMemRequest | crate::Empty | None, + get_metrics | crate::Empty | crate::MetricsResponse | None +); diff --git a/src/runtime-rs/crates/agent/src/kata/mod.rs b/src/runtime-rs/crates/agent/src/kata/mod.rs new file mode 100644 index 000000000000..ce3f2305ee7d --- /dev/null +++ b/src/runtime-rs/crates/agent/src/kata/mod.rs @@ -0,0 +1,157 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod agent; +mod trans; + +use std::{ + os::unix::io::{IntoRawFd, RawFd}, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use kata_types::config::Agent as AgentConfig; +use protocols::{agent_ttrpc_async as agent_ttrpc, health_ttrpc_async as health_ttrpc}; +use tokio::sync::RwLock; +use ttrpc::asynchronous::Client; + +use crate::{log_forwarder::LogForwarder, sock}; + +// https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md +#[derive(Debug, Default)] +pub struct Vsock { + pub context_id: u64, + pub port: u32, +} + +pub(crate) struct KataAgentInner { + /// TTRPC client + pub client: Option, + + /// Client fd + pub client_fd: RawFd, + + /// Unix domain socket address + pub socket_address: String, + + /// Agent config + config: AgentConfig, + + /// Log forwarder + log_forwarder: LogForwarder, +} + +impl std::fmt::Debug for KataAgentInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KataAgentInner") + .field("client_fd", &self.client_fd) + .field("socket_address", &self.socket_address) + .field("config", &self.config) + .finish() + } +} + +unsafe impl Send for KataAgent {} +unsafe impl Sync for KataAgent {} +#[derive(Debug)] +pub struct KataAgent { + pub(crate) inner: Arc>, +} + +impl KataAgent { + pub fn new(config: AgentConfig) -> Self { + KataAgent { + inner: Arc::new(RwLock::new(KataAgentInner { + client: None, + client_fd: -1, + socket_address: "".to_string(), + config, + log_forwarder: LogForwarder::new(), + })), + } + } + + pub async fn get_health_client(&self) -> Option<(health_ttrpc::HealthClient, i64, RawFd)> { + let inner = self.inner.read().await; + inner.client.as_ref().map(|c| { + ( + health_ttrpc::HealthClient::new(c.clone()), + inner.config.health_check_request_timeout_ms as i64, + inner.client_fd, + ) + }) + } + + pub async fn get_agent_client(&self) -> Option<(agent_ttrpc::AgentServiceClient, i64, RawFd)> { + let inner = self.inner.read().await; + inner.client.as_ref().map(|c| { + ( + agent_ttrpc::AgentServiceClient::new(c.clone()), + inner.config.request_timeout_ms as i64, + inner.client_fd, + ) + }) + } + + pub(crate) async fn set_socket_address(&self, address: &str) -> Result<()> { + let mut inner = self.inner.write().await; + inner.socket_address = address.to_string(); + Ok(()) + } + + pub(crate) async fn connect_agent_server(&self) -> Result<()> { + let mut inner = self.inner.write().await; + + let config = sock::ConnectConfig::new( + inner.config.dial_timeout_ms as u64, + inner.config.reconnect_timeout_ms as u64, + ); + let sock = + sock::new(&inner.socket_address, inner.config.server_port).context("new sock")?; + let stream = sock.connect(&config).await.context("connect")?; + let fd = stream.into_raw_fd(); + info!(sl!(), "get stream raw fd {:?}", fd); + let c = Client::new(fd); + inner.client = Some(c); + inner.client_fd = fd; + Ok(()) + } + + pub(crate) async fn start_log_forwarder(&self) -> Result<()> { + let mut inner = self.inner.write().await; + let config = sock::ConnectConfig::new( + inner.config.dial_timeout_ms as u64, + inner.config.reconnect_timeout_ms as u64, + ); + let address = inner.socket_address.clone(); + let port = inner.config.log_port; + inner + .log_forwarder + .start(&address, port, config) + .await + .context("start log forwarder")?; + Ok(()) + } + + pub(crate) async fn stop_log_forwarder(&self) { + let mut inner = self.inner.write().await; + inner.log_forwarder.stop(); + } + + pub(crate) async fn agent_sock(&self) -> Result { + let inner = self.inner.read().await; + Ok(format!( + "{}:{}", + inner.socket_address.clone(), + inner.config.server_port + )) + } + + pub(crate) async fn agent_config(&self) -> AgentConfig { + let inner = self.inner.read().await; + inner.config.clone() + } +} diff --git a/src/runtime-rs/crates/agent/src/kata/trans.rs b/src/runtime-rs/crates/agent/src/kata/trans.rs new file mode 100644 index 000000000000..55635c6c59e0 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/kata/trans.rs @@ -0,0 +1,844 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::Into; + +use protocols::{ + agent::{self, Metrics, OOMEvent}, + csi, empty, health, types, +}; + +use crate::{ + types::{ + ARPNeighbor, ARPNeighbors, AddArpNeighborRequest, AgentDetails, BlkioStats, + BlkioStatsEntry, CgroupStats, CheckRequest, CloseStdinRequest, ContainerID, + CopyFileRequest, CpuStats, CpuUsage, CreateContainerRequest, CreateSandboxRequest, Device, + Empty, ExecProcessRequest, FSGroup, FSGroupChangePolicy, GetIPTablesRequest, + GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, HugetlbStats, IPAddress, + IPFamily, Interface, Interfaces, KernelModule, MemHotplugByProbeRequest, MemoryData, + MemoryStats, MetricsResponse, NetworkStats, OnlineCPUMemRequest, PidsStats, + ReadStreamRequest, ReadStreamResponse, RemoveContainerRequest, ReseedRandomDevRequest, + ResizeVolumeRequest, Route, Routes, SetGuestDateTimeRequest, SetIPTablesRequest, + SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage, StringUser, + ThrottlingData, TtyWinResizeRequest, UpdateContainerRequest, UpdateInterfaceRequest, + UpdateRoutesRequest, VersionCheckResponse, VolumeStatsRequest, VolumeStatsResponse, + WaitProcessRequest, WriteStreamRequest, + }, + OomEventResponse, WaitProcessResponse, WriteStreamResponse, +}; + +fn trans_vec>(from: Vec) -> Vec { + from.into_iter().map(|f| f.into()).collect() +} + +fn from_option>(from: Option) -> protobuf::MessageField { + match from { + Some(f) => protobuf::MessageField::from_option(Some(T::from(f))), + None => protobuf::MessageField::none(), + } +} + +fn into_option, T: Sized>(from: protobuf::MessageField) -> Option { + from.into_option().map(|f| f.into()) +} + +fn into_hash_map, T>( + from: std::collections::HashMap, +) -> std::collections::HashMap { + let mut to: std::collections::HashMap = Default::default(); + + for (key, value) in from { + to.insert(key, value.into()); + } + + to +} + +impl From for Empty { + fn from(_: empty::Empty) -> Self { + Self {} + } +} + +impl From for agent::FSGroup { + fn from(from: FSGroup) -> Self { + let policy = match from.group_change_policy { + FSGroupChangePolicy::Always => types::FSGroupChangePolicy::Always, + FSGroupChangePolicy::OnRootMismatch => types::FSGroupChangePolicy::OnRootMismatch, + }; + + Self { + group_id: from.group_id, + group_change_policy: policy.into(), + ..Default::default() + } + } +} + +impl From for agent::StringUser { + fn from(from: StringUser) -> Self { + Self { + uid: from.uid, + gid: from.gid, + additionalGids: from.additional_gids, + ..Default::default() + } + } +} + +impl From for agent::Device { + fn from(from: Device) -> Self { + Self { + id: from.id, + type_: from.field_type, + vm_path: from.vm_path, + container_path: from.container_path, + options: trans_vec(from.options), + ..Default::default() + } + } +} + +impl From for agent::Storage { + fn from(from: Storage) -> Self { + Self { + driver: from.driver, + driver_options: trans_vec(from.driver_options), + source: from.source, + fstype: from.fs_type, + fs_group: from_option(from.fs_group), + options: trans_vec(from.options), + mount_point: from.mount_point, + ..Default::default() + } + } +} + +impl From for agent::KernelModule { + fn from(from: KernelModule) -> Self { + Self { + name: from.name, + parameters: trans_vec(from.parameters), + ..Default::default() + } + } +} + +impl From for types::IPFamily { + fn from(from: IPFamily) -> Self { + if from == IPFamily::V4 { + types::IPFamily::v4 + } else { + types::IPFamily::v6 + } + } +} + +impl From for IPFamily { + fn from(src: types::IPFamily) -> Self { + match src { + types::IPFamily::v4 => IPFamily::V4, + types::IPFamily::v6 => IPFamily::V6, + } + } +} + +impl From for types::IPAddress { + fn from(from: IPAddress) -> Self { + Self { + family: protobuf::EnumOrUnknown::new(from.family.into()), + address: from.address, + mask: from.mask, + ..Default::default() + } + } +} + +impl From for IPAddress { + fn from(src: types::IPAddress) -> Self { + Self { + family: src.family.unwrap().into(), + address: "".to_string(), + mask: "".to_string(), + } + } +} + +impl From for types::Interface { + fn from(from: Interface) -> Self { + Self { + device: from.device, + name: from.name, + IPAddresses: trans_vec(from.ip_addresses), + mtu: from.mtu, + hwAddr: from.hw_addr, + pciPath: from.pci_addr, + type_: from.field_type, + raw_flags: from.raw_flags, + ..Default::default() + } + } +} + +impl From for Interface { + fn from(src: types::Interface) -> Self { + Self { + device: src.device, + name: src.name, + ip_addresses: trans_vec(src.IPAddresses), + mtu: src.mtu, + hw_addr: src.hwAddr, + pci_addr: src.pciPath, + field_type: src.type_, + raw_flags: src.raw_flags, + } + } +} + +impl From for Interfaces { + fn from(src: agent::Interfaces) -> Self { + Self { + interfaces: trans_vec(src.Interfaces), + } + } +} + +impl From for types::Route { + fn from(from: Route) -> Self { + Self { + dest: from.dest, + gateway: from.gateway, + device: from.device, + source: from.source, + scope: from.scope, + family: protobuf::EnumOrUnknown::new(from.family.into()), + ..Default::default() + } + } +} + +impl From for Route { + fn from(src: types::Route) -> Self { + Self { + dest: src.dest, + gateway: src.gateway, + device: src.device, + source: src.source, + scope: src.scope, + family: src.family.unwrap().into(), + } + } +} + +impl From for agent::Routes { + fn from(from: Routes) -> Self { + Self { + Routes: trans_vec(from.routes), + ..Default::default() + } + } +} + +impl From for Routes { + fn from(src: agent::Routes) -> Self { + Self { + routes: trans_vec(src.Routes), + } + } +} + +impl From for agent::CreateContainerRequest { + fn from(from: CreateContainerRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + string_user: from_option(from.string_user), + devices: trans_vec(from.devices), + storages: trans_vec(from.storages), + OCI: from_option(from.oci), + sandbox_pidns: from.sandbox_pidns, + ..Default::default() + } + } +} + +impl From for agent::RemoveContainerRequest { + fn from(from: RemoveContainerRequest) -> Self { + Self { + container_id: from.container_id, + timeout: from.timeout, + ..Default::default() + } + } +} + +impl From for agent::StartContainerRequest { + fn from(from: ContainerID) -> Self { + Self { + container_id: from.container_id, + ..Default::default() + } + } +} + +impl From for agent::StatsContainerRequest { + fn from(from: ContainerID) -> Self { + Self { + container_id: from.container_id, + ..Default::default() + } + } +} + +impl From for agent::PauseContainerRequest { + fn from(from: ContainerID) -> Self { + Self { + container_id: from.container_id, + ..Default::default() + } + } +} + +impl From for agent::ResumeContainerRequest { + fn from(from: ContainerID) -> Self { + Self { + container_id: from.container_id, + ..Default::default() + } + } +} + +impl From for agent::SignalProcessRequest { + fn from(from: SignalProcessRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + signal: from.signal, + ..Default::default() + } + } +} + +impl From for agent::WaitProcessRequest { + fn from(from: WaitProcessRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + ..Default::default() + } + } +} + +impl From for agent::UpdateContainerRequest { + fn from(from: UpdateContainerRequest) -> Self { + Self { + container_id: from.container_id, + resources: from_option(from.resources), + ..Default::default() + } + } +} + +impl From for agent::WriteStreamRequest { + fn from(from: WriteStreamRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + data: from.data, + ..Default::default() + } + } +} + +impl From for WriteStreamResponse { + fn from(from: agent::WriteStreamResponse) -> Self { + Self { length: from.len } + } +} + +impl From for agent::GetIPTablesRequest { + fn from(from: GetIPTablesRequest) -> Self { + Self { + is_ipv6: from.is_ipv6, + ..Default::default() + } + } +} + +impl From for GetIPTablesResponse { + fn from(from: agent::GetIPTablesResponse) -> Self { + Self { + data: from.data().to_vec(), + } + } +} + +impl From for agent::SetIPTablesRequest { + fn from(from: SetIPTablesRequest) -> Self { + Self { + is_ipv6: from.is_ipv6, + data: from.data, + ..Default::default() + } + } +} + +impl From for SetIPTablesResponse { + fn from(from: agent::SetIPTablesResponse) -> Self { + Self { + data: from.data().to_vec(), + } + } +} + +impl From for agent::ExecProcessRequest { + fn from(from: ExecProcessRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + string_user: from_option(from.string_user), + process: from_option(from.process), + ..Default::default() + } + } +} + +impl From for CpuUsage { + fn from(src: agent::CpuUsage) -> Self { + Self { + total_usage: src.total_usage, + percpu_usage: src.percpu_usage, + usage_in_kernelmode: src.usage_in_kernelmode, + usage_in_usermode: src.usage_in_usermode, + } + } +} + +impl From for ThrottlingData { + fn from(src: agent::ThrottlingData) -> Self { + Self { + periods: src.periods, + throttled_periods: src.throttled_periods, + throttled_time: src.throttled_time, + } + } +} + +impl From for CpuStats { + fn from(src: agent::CpuStats) -> Self { + Self { + cpu_usage: into_option(src.cpu_usage), + throttling_data: into_option(src.throttling_data), + } + } +} + +impl From for MemoryData { + fn from(src: agent::MemoryData) -> Self { + Self { + usage: src.usage, + max_usage: src.max_usage, + failcnt: src.failcnt, + limit: src.limit, + } + } +} + +impl From for MemoryStats { + fn from(src: agent::MemoryStats) -> Self { + Self { + cache: src.cache, + usage: into_option(src.usage), + swap_usage: into_option(src.swap_usage), + kernel_usage: into_option(src.kernel_usage), + use_hierarchy: src.use_hierarchy, + stats: into_hash_map(src.stats), + } + } +} + +impl From for PidsStats { + fn from(src: agent::PidsStats) -> Self { + Self { + current: src.current, + limit: src.limit, + } + } +} + +impl From for BlkioStatsEntry { + fn from(src: agent::BlkioStatsEntry) -> Self { + Self { + major: src.major, + minor: src.minor, + op: src.op, + value: src.value, + } + } +} + +impl From for BlkioStats { + fn from(src: agent::BlkioStats) -> Self { + Self { + io_service_bytes_recursive: trans_vec(src.io_service_bytes_recursive), + io_serviced_recursive: trans_vec(src.io_serviced_recursive), + io_queued_recursive: trans_vec(src.io_queued_recursive), + io_service_time_recursive: trans_vec(src.io_service_time_recursive), + io_wait_time_recursive: trans_vec(src.io_wait_time_recursive), + io_merged_recursive: trans_vec(src.io_merged_recursive), + io_time_recursive: trans_vec(src.io_time_recursive), + sectors_recursive: trans_vec(src.sectors_recursive), + } + } +} + +impl From for HugetlbStats { + fn from(src: agent::HugetlbStats) -> Self { + Self { + usage: src.usage, + max_usage: src.max_usage, + failcnt: src.failcnt, + } + } +} + +impl From for CgroupStats { + fn from(src: agent::CgroupStats) -> Self { + Self { + cpu_stats: into_option(src.cpu_stats), + memory_stats: into_option(src.memory_stats), + pids_stats: into_option(src.pids_stats), + blkio_stats: into_option(src.blkio_stats), + hugetlb_stats: into_hash_map(src.hugetlb_stats), + } + } +} + +impl From for NetworkStats { + fn from(src: agent::NetworkStats) -> Self { + Self { + name: src.name, + rx_bytes: src.rx_bytes, + rx_packets: src.rx_packets, + rx_errors: src.rx_errors, + rx_dropped: src.rx_dropped, + tx_bytes: src.tx_bytes, + tx_packets: src.tx_packets, + tx_errors: src.tx_errors, + tx_dropped: src.tx_dropped, + } + } +} + +// translate ttrpc::agent response to interface::agent response +impl From for StatsContainerResponse { + fn from(src: agent::StatsContainerResponse) -> Self { + Self { + cgroup_stats: into_option(src.cgroup_stats), + network_stats: trans_vec(src.network_stats), + } + } +} + +impl From for agent::ReadStreamRequest { + fn from(from: ReadStreamRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + len: from.len, + ..Default::default() + } + } +} + +impl From for ReadStreamResponse { + fn from(from: agent::ReadStreamResponse) -> Self { + Self { data: from.data } + } +} + +impl From for agent::CloseStdinRequest { + fn from(from: CloseStdinRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + ..Default::default() + } + } +} + +impl From for agent::TtyWinResizeRequest { + fn from(from: TtyWinResizeRequest) -> Self { + Self { + container_id: from.process_id.container_id(), + exec_id: from.process_id.exec_id(), + row: from.row, + column: from.column, + ..Default::default() + } + } +} + +impl From for agent::UpdateInterfaceRequest { + fn from(from: UpdateInterfaceRequest) -> Self { + Self { + interface: from_option(from.interface), + ..Default::default() + } + } +} + +impl From for agent::ListInterfacesRequest { + fn from(_: Empty) -> Self { + Self { + ..Default::default() + } + } +} + +impl From for agent::UpdateRoutesRequest { + fn from(from: UpdateRoutesRequest) -> Self { + Self { + routes: from_option(from.route), + ..Default::default() + } + } +} + +impl From for agent::ListRoutesRequest { + fn from(_: Empty) -> Self { + Self { + ..Default::default() + } + } +} + +impl From for types::ARPNeighbor { + fn from(from: ARPNeighbor) -> Self { + Self { + toIPAddress: from_option(from.to_ip_address), + device: from.device, + lladdr: from.ll_addr, + state: from.state, + flags: from.flags, + ..Default::default() + } + } +} + +impl From for agent::ARPNeighbors { + fn from(from: ARPNeighbors) -> Self { + Self { + ARPNeighbors: trans_vec(from.neighbors), + ..Default::default() + } + } +} + +impl From for agent::AddARPNeighborsRequest { + fn from(from: AddArpNeighborRequest) -> Self { + Self { + neighbors: from_option(from.neighbors), + ..Default::default() + } + } +} + +impl From for agent::CreateSandboxRequest { + fn from(from: CreateSandboxRequest) -> Self { + Self { + hostname: from.hostname, + dns: trans_vec(from.dns), + storages: trans_vec(from.storages), + sandbox_pidns: from.sandbox_pidns, + sandbox_id: from.sandbox_id, + guest_hook_path: from.guest_hook_path, + kernel_modules: trans_vec(from.kernel_modules), + ..Default::default() + } + } +} + +impl From for agent::DestroySandboxRequest { + fn from(_: Empty) -> Self { + Self { + ..Default::default() + } + } +} + +impl From for agent::OnlineCPUMemRequest { + fn from(from: OnlineCPUMemRequest) -> Self { + Self { + wait: from.wait, + nb_cpus: from.nb_cpus, + cpu_only: from.cpu_only, + ..Default::default() + } + } +} + +impl From for agent::ReseedRandomDevRequest { + fn from(from: ReseedRandomDevRequest) -> Self { + Self { + data: from.data, + ..Default::default() + } + } +} + +impl From for agent::MemHotplugByProbeRequest { + fn from(from: MemHotplugByProbeRequest) -> Self { + Self { + memHotplugProbeAddr: from.mem_hotplug_probe_addr, + ..Default::default() + } + } +} + +impl From for agent::SetGuestDateTimeRequest { + fn from(from: SetGuestDateTimeRequest) -> Self { + Self { + Sec: from.sec, + Usec: from.usec, + ..Default::default() + } + } +} + +impl From for AgentDetails { + fn from(src: agent::AgentDetails) -> Self { + Self { + version: src.version, + init_daemon: src.init_daemon, + device_handlers: trans_vec(src.device_handlers), + storage_handlers: trans_vec(src.storage_handlers), + supports_seccomp: src.supports_seccomp, + } + } +} + +impl From for GuestDetailsResponse { + fn from(src: agent::GuestDetailsResponse) -> Self { + Self { + mem_block_size_bytes: src.mem_block_size_bytes, + agent_details: into_option(src.agent_details), + support_mem_hotplug_probe: src.support_mem_hotplug_probe, + } + } +} + +impl From for agent::CopyFileRequest { + fn from(from: CopyFileRequest) -> Self { + Self { + path: from.path, + file_size: from.file_size, + file_mode: from.file_mode, + dir_mode: from.dir_mode, + uid: from.uid, + gid: from.gid, + offset: from.offset, + data: from.data, + ..Default::default() + } + } +} + +impl From for WaitProcessResponse { + fn from(from: agent::WaitProcessResponse) -> Self { + Self { + status: from.status, + } + } +} + +impl From for agent::GetMetricsRequest { + fn from(_: Empty) -> Self { + Self { + ..Default::default() + } + } +} + +impl From for agent::GetOOMEventRequest { + fn from(_: Empty) -> Self { + Self { + ..Default::default() + } + } +} + +impl From for health::CheckRequest { + fn from(from: CheckRequest) -> Self { + Self { + service: from.service, + ..Default::default() + } + } +} + +impl From for HealthCheckResponse { + fn from(from: health::HealthCheckResponse) -> Self { + Self { + status: from.status.value() as u32, + } + } +} + +impl From for VersionCheckResponse { + fn from(from: health::VersionCheckResponse) -> Self { + Self { + grpc_version: from.grpc_version, + agent_version: from.agent_version, + } + } +} + +impl From for MetricsResponse { + fn from(from: Metrics) -> Self { + Self { + metrics: from.metrics, + } + } +} + +impl From for OomEventResponse { + fn from(from: OOMEvent) -> Self { + Self { + container_id: from.container_id, + } + } +} + +impl From for agent::VolumeStatsRequest { + fn from(from: VolumeStatsRequest) -> Self { + Self { + volume_guest_path: from.volume_guest_path, + ..Default::default() + } + } +} + +impl From for VolumeStatsResponse { + fn from(from: csi::VolumeStatsResponse) -> Self { + let result: String = format!( + "Usage: {:?} Volume Condition: {:?}", + from.usage(), + from.volume_condition() + ); + Self { data: result } + } +} + +impl From for agent::ResizeVolumeRequest { + fn from(from: ResizeVolumeRequest) -> Self { + Self { + volume_guest_path: from.volume_guest_path, + size: from.size, + ..Default::default() + } + } +} diff --git a/src/runtime-rs/crates/agent/src/lib.rs b/src/runtime-rs/crates/agent/src/lib.rs new file mode 100644 index 000000000000..8dd71e8d7ba2 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/lib.rs @@ -0,0 +1,96 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "agent"); + +pub mod kata; +mod log_forwarder; +mod sock; +pub mod types; +pub use types::{ + ARPNeighbor, ARPNeighbors, AddArpNeighborRequest, BlkioStatsEntry, CheckRequest, + CloseStdinRequest, ContainerID, ContainerProcessID, CopyFileRequest, CreateContainerRequest, + CreateSandboxRequest, Empty, ExecProcessRequest, GetGuestDetailsRequest, GetIPTablesRequest, + GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, IPAddress, IPFamily, Interface, + Interfaces, ListProcessesRequest, MemHotplugByProbeRequest, MetricsResponse, + OnlineCPUMemRequest, OomEventResponse, ReadStreamRequest, ReadStreamResponse, + RemoveContainerRequest, ReseedRandomDevRequest, ResizeVolumeRequest, Route, Routes, + SetGuestDateTimeRequest, SetIPTablesRequest, SetIPTablesResponse, SignalProcessRequest, + StatsContainerResponse, Storage, TtyWinResizeRequest, UpdateContainerRequest, + UpdateInterfaceRequest, UpdateRoutesRequest, VersionCheckResponse, VolumeStatsRequest, + VolumeStatsResponse, WaitProcessRequest, WaitProcessResponse, WriteStreamRequest, + WriteStreamResponse, +}; + +use anyhow::Result; +use async_trait::async_trait; + +use kata_types::config::Agent as AgentConfig; + +pub const AGENT_KATA: &str = "kata"; + +#[async_trait] +pub trait AgentManager: Send + Sync { + async fn start(&self, address: &str) -> Result<()>; + async fn stop(&self); + + async fn agent_sock(&self) -> Result; + async fn agent_config(&self) -> AgentConfig; +} + +#[async_trait] +pub trait HealthService: Send + Sync { + async fn check(&self, req: CheckRequest) -> Result; + async fn version(&self, req: CheckRequest) -> Result; +} + +#[async_trait] +pub trait Agent: AgentManager + HealthService + Send + Sync { + // sandbox + async fn create_sandbox(&self, req: CreateSandboxRequest) -> Result; + async fn destroy_sandbox(&self, req: Empty) -> Result; + async fn online_cpu_mem(&self, req: OnlineCPUMemRequest) -> Result; + + // network + async fn add_arp_neighbors(&self, req: AddArpNeighborRequest) -> Result; + async fn list_interfaces(&self, req: Empty) -> Result; + async fn list_routes(&self, req: Empty) -> Result; + async fn update_interface(&self, req: UpdateInterfaceRequest) -> Result; + async fn update_routes(&self, req: UpdateRoutesRequest) -> Result; + + // container + async fn create_container(&self, req: CreateContainerRequest) -> Result; + async fn pause_container(&self, req: ContainerID) -> Result; + async fn remove_container(&self, req: RemoveContainerRequest) -> Result; + async fn resume_container(&self, req: ContainerID) -> Result; + async fn start_container(&self, req: ContainerID) -> Result; + async fn stats_container(&self, req: ContainerID) -> Result; + async fn update_container(&self, req: UpdateContainerRequest) -> Result; + + // process + async fn exec_process(&self, req: ExecProcessRequest) -> Result; + async fn signal_process(&self, req: SignalProcessRequest) -> Result; + async fn wait_process(&self, req: WaitProcessRequest) -> Result; + + // io and tty + async fn close_stdin(&self, req: CloseStdinRequest) -> Result; + async fn read_stderr(&self, req: ReadStreamRequest) -> Result; + async fn read_stdout(&self, req: ReadStreamRequest) -> Result; + async fn tty_win_resize(&self, req: TtyWinResizeRequest) -> Result; + async fn write_stdin(&self, req: WriteStreamRequest) -> Result; + + // utils + async fn copy_file(&self, req: CopyFileRequest) -> Result; + async fn get_metrics(&self, req: Empty) -> Result; + async fn get_oom_event(&self, req: Empty) -> Result; + async fn get_ip_tables(&self, req: GetIPTablesRequest) -> Result; + async fn set_ip_tables(&self, req: SetIPTablesRequest) -> Result; + async fn get_volume_stats(&self, req: VolumeStatsRequest) -> Result; + async fn resize_volume(&self, req: ResizeVolumeRequest) -> Result; +} diff --git a/src/runtime-rs/crates/agent/src/log_forwarder.rs b/src/runtime-rs/crates/agent/src/log_forwarder.rs new file mode 100644 index 000000000000..221ddd6af178 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/log_forwarder.rs @@ -0,0 +1,157 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use tokio::io::{AsyncBufReadExt, BufReader}; + +use crate::sock; + +// https://github.com/slog-rs/slog/blob/master/src/lib.rs#L2082 +const LOG_LEVEL_TRACE: &str = "TRCE"; +const LOG_LEVEL_DEBUG: &str = "DEBG"; +const LOG_LEVEL_INFO: &str = "INFO"; +const LOG_LEVEL_WARNING: &str = "WARN"; +const LOG_LEVEL_ERROR: &str = "ERRO"; +const LOG_LEVEL_CRITICAL: &str = "CRIT"; + +pub(crate) struct LogForwarder { + task_handler: Option>, +} + +impl LogForwarder { + pub(crate) fn new() -> Self { + Self { task_handler: None } + } + + pub(crate) fn stop(&mut self) { + let task_handler = self.task_handler.take(); + if let Some(handler) = task_handler { + handler.abort(); + info!(sl!(), "abort log forwarder thread"); + } + } + + // start connect kata-agent log vsock and copy data to hypervisor's log stream + pub(crate) async fn start( + &mut self, + address: &str, + port: u32, + config: sock::ConnectConfig, + ) -> Result<()> { + let logger = sl!().clone(); + let address = address.to_string(); + let task_handler = tokio::spawn(async move { + loop { + info!(logger, "try to connect to get agent log"); + let sock = match sock::new(&address, port) { + Ok(sock) => sock, + Err(err) => { + error!( + sl!(), + "failed to new sock for address {:?} port {} error {:?}", + address, + port, + err + ); + return; + } + }; + + match sock.connect(&config).await { + Ok(stream) => { + let stream = BufReader::new(stream); + let mut lines = stream.lines(); + while let Ok(Some(l)) = lines.next_line().await { + match parse_agent_log_level(&l) { + LOG_LEVEL_TRACE => trace!(sl!(), "{}", l), + LOG_LEVEL_DEBUG => debug!(sl!(), "{}", l), + LOG_LEVEL_WARNING => warn!(sl!(), "{}", l), + LOG_LEVEL_ERROR => error!(sl!(), "{}", l), + LOG_LEVEL_CRITICAL => crit!(sl!(), "{}", l), + _ => info!(sl!(), "{}", l), + } + } + } + Err(err) => { + warn!(logger, "connect agent vsock failed: {:?}", err); + } + } + } + }); + self.task_handler = Some(task_handler); + Ok(()) + } +} + +pub fn parse_agent_log_level(s: &str) -> &str { + let v: serde_json::Result = serde_json::from_str(s); + match v { + Err(_err) => LOG_LEVEL_INFO, + Ok(val) => { + match &val["level"] { + serde_json::Value::String(s) => match s.as_str() { + LOG_LEVEL_TRACE => LOG_LEVEL_TRACE, + LOG_LEVEL_DEBUG => LOG_LEVEL_DEBUG, + LOG_LEVEL_WARNING => LOG_LEVEL_WARNING, + LOG_LEVEL_ERROR => LOG_LEVEL_ERROR, + LOG_LEVEL_CRITICAL => LOG_LEVEL_CRITICAL, + _ => LOG_LEVEL_INFO, // info or other values will return info, + }, + _ => LOG_LEVEL_INFO, // info or other values will return info, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::parse_agent_log_level; + + #[test] + fn test_parse_agent_log_level() { + let cases = vec![ + // normal cases + ( + r#"{"msg":"child exited unexpectedly","level":"TRCE"}"#, + super::LOG_LEVEL_TRACE, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"DEBG"}"#, + super::LOG_LEVEL_DEBUG, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"INFO"}"#, + super::LOG_LEVEL_INFO, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"WARN"}"#, + super::LOG_LEVEL_WARNING, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"ERRO"}"#, + super::LOG_LEVEL_ERROR, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"CRIT"}"#, + super::LOG_LEVEL_CRITICAL, + ), + ( + r#"{"msg":"child exited unexpectedly","level":"abc"}"#, + super::LOG_LEVEL_INFO, + ), + // exception cases + (r#"{"not a valid json struct"}"#, super::LOG_LEVEL_INFO), + ("not a valid json struct", super::LOG_LEVEL_INFO), + ]; + + for case in cases.iter() { + let s = case.0; + let result = parse_agent_log_level(s); + let excepted = case.1; + assert_eq!(result, excepted); + } + } +} diff --git a/src/runtime-rs/crates/agent/src/sock/hybrid_vsock.rs b/src/runtime-rs/crates/agent/src/sock/hybrid_vsock.rs new file mode 100644 index 000000000000..1b19a65b0ea9 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/sock/hybrid_vsock.rs @@ -0,0 +1,78 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::os::unix::prelude::AsRawFd; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::{ + io::{AsyncBufReadExt, AsyncWriteExt, BufReader}, + net::UnixStream, +}; + +use super::{ConnectConfig, Sock, Stream}; + +#[derive(Debug, PartialEq)] +pub struct HybridVsock { + uds: String, + port: u32, +} + +impl HybridVsock { + pub fn new(uds: &str, port: u32) -> Self { + Self { + uds: uds.to_string(), + port, + } + } +} + +#[async_trait] +impl Sock for HybridVsock { + async fn connect(&self, config: &ConnectConfig) -> Result { + let retry_times = config.reconnect_timeout_ms / config.dial_timeout_ms; + for i in 0..retry_times { + match connect_helper(&self.uds, self.port).await { + Ok(stream) => { + info!( + sl!(), + "connect success on {} current client fd {}", + i, + stream.as_raw_fd() + ); + return Ok(Stream::Unix(stream)); + } + Err(err) => { + debug!(sl!(), "connect on {} err : {:?}", i, err); + tokio::time::sleep(std::time::Duration::from_millis(config.dial_timeout_ms)) + .await; + continue; + } + } + } + Err(anyhow!("cannot connect to agent ttrpc server {:?}", config)) + } +} + +async fn connect_helper(uds: &str, port: u32) -> Result { + info!(sl!(), "connect uds {:?} port {}", &uds, port); + let mut stream = UnixStream::connect(&uds).await.context("connect")?; + stream + .write_all(format!("connect {}\n", port).as_bytes()) + .await + .context("write all")?; + let mut reads = BufReader::new(&mut stream); + let mut response = String::new(); + reads.read_line(&mut response).await.context("read line")?; + //info!(sl!(), "get socket resp: {}", response); + if !response.contains("OK") { + return Err(anyhow!( + "handshake error: malformed response code: {:?}", + response + )); + } + Ok(stream) +} diff --git a/src/runtime-rs/crates/agent/src/sock/mod.rs b/src/runtime-rs/crates/agent/src/sock/mod.rs new file mode 100644 index 000000000000..4e4d851a147d --- /dev/null +++ b/src/runtime-rs/crates/agent/src/sock/mod.rs @@ -0,0 +1,160 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod hybrid_vsock; +pub use hybrid_vsock::HybridVsock; +mod vsock; +pub use vsock::Vsock; + +use std::{ + pin::Pin, + task::{Context as TaskContext, Poll}, + { + os::unix::{io::IntoRawFd, prelude::RawFd}, + sync::Arc, + }, +}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::{ + io::{AsyncRead, ReadBuf}, + net::UnixStream, +}; +use url::Url; + +const VSOCK_SCHEME: &str = "vsock"; +const HYBRID_VSOCK_SCHEME: &str = "hvsock"; + +/// Socket stream +pub enum Stream { + // hvsock://:. Firecracker/Dragonball implements the virtio-vsock device + // model, and mediates communication between AF_UNIX sockets (on the host end) + // and AF_VSOCK sockets (on the guest end). + Unix(UnixStream), + // vsock://: + Vsock(UnixStream), +} + +impl Stream { + fn poll_read_priv( + &mut self, + cx: &mut TaskContext<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + // Safety: `UnixStream::read` correctly handles reads into uninitialized memory + match self { + Stream::Unix(stream) | Stream::Vsock(stream) => Pin::new(stream).poll_read(cx, buf), + } + } +} + +impl IntoRawFd for Stream { + fn into_raw_fd(self) -> RawFd { + match self { + Stream::Unix(stream) | Stream::Vsock(stream) => match stream.into_std() { + Ok(stream) => stream.into_raw_fd(), + Err(err) => { + error!(sl!(), "failed to into std unix stream {:?}", err); + -1 + } + }, + } + } +} + +impl AsyncRead for Stream { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut TaskContext<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + // we know this is safe because doesn't moved + let me = unsafe { self.get_unchecked_mut() }; + me.poll_read_priv(cx, buf) + } +} + +/// Connect config +#[derive(Debug)] +pub struct ConnectConfig { + dial_timeout_ms: u64, + reconnect_timeout_ms: u64, +} + +impl ConnectConfig { + pub fn new(dial_timeout_ms: u64, reconnect_timeout_ms: u64) -> Self { + Self { + dial_timeout_ms, + reconnect_timeout_ms, + } + } +} + +#[derive(Debug, PartialEq)] +enum SockType { + Vsock(Vsock), + HybridVsock(HybridVsock), +} + +#[async_trait] +pub trait Sock: Send + Sync { + async fn connect(&self, config: &ConnectConfig) -> Result; +} + +// Supported sock address formats are: +// - vsock://: +// - hvsock://:. Firecracker implements the virtio-vsock device +// model, and mediates communication between AF_UNIX sockets (on the host end) +// and AF_VSOCK sockets (on the guest end). +pub fn new(address: &str, port: u32) -> Result> { + match parse(address, port).context("parse url")? { + SockType::Vsock(sock) => Ok(Arc::new(sock)), + SockType::HybridVsock(sock) => Ok(Arc::new(sock)), + } +} + +fn parse(address: &str, port: u32) -> Result { + let url = Url::parse(address).context("parse url")?; + match url.scheme() { + VSOCK_SCHEME => { + let vsock_cid = url + .host_str() + .unwrap_or_default() + .parse::() + .context("parse vsock cid")?; + Ok(SockType::Vsock(Vsock::new(vsock_cid, port))) + } + HYBRID_VSOCK_SCHEME => { + let path: Vec<&str> = url.path().split(':').collect(); + if path.len() != 1 { + return Err(anyhow!("invalid path {:?}", path)); + } + let uds = path[0]; + Ok(SockType::HybridVsock(HybridVsock::new(uds, port))) + } + _ => Err(anyhow!("Unsupported scheme")), + } +} + +#[cfg(test)] +mod test { + use super::{hybrid_vsock::HybridVsock, parse, vsock::Vsock, SockType}; + + #[test] + fn test_parse_url() { + // check vsock + let vsock = parse("vsock://123", 456).unwrap(); + assert_eq!(vsock, SockType::Vsock(Vsock::new(123, 456))); + + // check hybrid vsock + let hvsock = parse("hvsock:///tmp/test.hvsock", 456).unwrap(); + assert_eq!( + hvsock, + SockType::HybridVsock(HybridVsock::new("/tmp/test.hvsock", 456)) + ); + } +} diff --git a/src/runtime-rs/crates/agent/src/sock/vsock.rs b/src/runtime-rs/crates/agent/src/sock/vsock.rs new file mode 100644 index 000000000000..1fbac463d0fc --- /dev/null +++ b/src/runtime-rs/crates/agent/src/sock/vsock.rs @@ -0,0 +1,79 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + os::unix::prelude::{AsRawFd, FromRawFd}, + time::Duration, +}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use nix::sys::socket::{connect, socket, AddressFamily, SockFlag, SockType, VsockAddr}; +use tokio::net::UnixStream; + +use super::{ConnectConfig, Sock, Stream}; + +#[derive(Debug, PartialEq)] +pub struct Vsock { + vsock_cid: u32, + port: u32, +} + +impl Vsock { + pub fn new(vsock_cid: u32, port: u32) -> Self { + Self { vsock_cid, port } + } +} + +#[async_trait] +impl Sock for Vsock { + async fn connect(&self, config: &ConnectConfig) -> Result { + let retry_times = config.reconnect_timeout_ms / config.dial_timeout_ms; + let sock_addr = VsockAddr::new(self.vsock_cid, self.port); + let connect_once = || { + // Create socket fd + let socket = socket( + AddressFamily::Vsock, + SockType::Stream, + SockFlag::empty(), + None, + ) + .context("failed to create vsock socket")?; + + // Wrap the socket fd in a UnixStream, so that it is closed when + // anything fails. + // We MUST NOT reuse a vsock socket which has failed a connection + // attempt before, since a ECONNRESET error marks the whole socket as + // broken and non-reusable. + let socket = unsafe { std::os::unix::net::UnixStream::from_raw_fd(socket) }; + + // Connect the socket to vsock server. + connect(socket.as_raw_fd(), &sock_addr) + .with_context(|| format!("failed to connect to {}", sock_addr))?; + + // Finally, convert the std UnixSocket to tokio's UnixSocket. + UnixStream::from_std(socket).context("from_std") + }; + + for i in 0..retry_times { + match connect_once() { + Ok(stream) => { + info!( + sl!(), + "connect vsock success on {} current client fd {}", + i, + stream.as_raw_fd() + ); + return Ok(Stream::Vsock(stream)); + } + Err(_) => { + tokio::time::sleep(Duration::from_millis(config.dial_timeout_ms)).await; + } + } + } + Err(anyhow!("cannot connect to agent ttrpc server {:?}", config)) + } +} diff --git a/src/runtime-rs/crates/agent/src/types.rs b/src/runtime-rs/crates/agent/src/types.rs new file mode 100644 index 000000000000..325d84588dd3 --- /dev/null +++ b/src/runtime-rs/crates/agent/src/types.rs @@ -0,0 +1,626 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; +use std::convert::TryFrom; + +use serde::{Deserialize, Serialize}; + +pub const DEFAULT_REMOVE_CONTAINER_REQUEST_TIMEOUT: u32 = 10; + +#[derive(PartialEq, Clone, Default)] +pub struct Empty {} + +impl Empty { + pub fn new() -> Self { + Self::default() + } +} + +#[derive(Default, Debug, Clone, PartialEq)] +pub enum FSGroupChangePolicy { + #[default] + Always = 0, + OnRootMismatch = 1, +} + +#[derive(Debug, PartialEq, Clone, Default)] +pub struct FSGroup { + pub group_id: u32, + pub group_change_policy: FSGroupChangePolicy, +} + +#[derive(PartialEq, Clone, Default)] +pub struct StringUser { + pub uid: String, + pub gid: String, + pub additional_gids: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct Device { + pub id: String, + pub field_type: String, + pub vm_path: String, + pub container_path: String, + pub options: Vec, +} + +#[derive(Debug, PartialEq, Clone, Default)] +pub struct Storage { + pub driver: String, + pub driver_options: Vec, + pub source: String, + pub fs_type: String, + pub fs_group: Option, + pub options: Vec, + pub mount_point: String, +} + +#[derive(Deserialize, Default, Clone, PartialEq, Eq, Debug, Hash)] +pub enum IPFamily { + #[default] + V4 = 0, + V6 = 1, +} + +#[derive(Deserialize, Debug, PartialEq, Clone, Default)] +pub struct IPAddress { + pub family: IPFamily, + pub address: String, + pub mask: String, +} + +#[derive(Deserialize, Debug, PartialEq, Clone, Default)] +pub struct Interface { + pub device: String, + pub name: String, + pub ip_addresses: Vec, + pub mtu: u64, + pub hw_addr: String, + #[serde(default)] + pub pci_addr: String, + #[serde(default)] + pub field_type: String, + #[serde(default)] + pub raw_flags: u32, +} + +#[derive(PartialEq, Clone, Default)] +pub struct Interfaces { + pub interfaces: Vec, +} + +#[derive(Deserialize, Debug, PartialEq, Clone, Default)] +pub struct Route { + pub dest: String, + pub gateway: String, + pub device: String, + pub source: String, + pub scope: u32, + pub family: IPFamily, +} + +#[derive(Deserialize, Debug, PartialEq, Clone, Default)] +pub struct Routes { + pub routes: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct CreateContainerRequest { + pub process_id: ContainerProcessID, + pub string_user: Option, + pub devices: Vec, + pub storages: Vec, + pub oci: Option, + pub sandbox_pidns: bool, + pub rootfs_mounts: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ContainerID { + pub container_id: String, +} + +impl ContainerID { + pub fn new(id: &str) -> Self { + Self { + container_id: id.to_string(), + } + } +} + +#[derive(PartialEq, Clone, Default)] +pub struct ContainerProcessID { + pub container_id: ContainerID, + pub exec_id: String, +} + +impl ContainerProcessID { + pub fn new(container_id: &str, exec_id: &str) -> Self { + Self { + container_id: ContainerID::new(container_id), + exec_id: exec_id.to_string(), + } + } + + pub fn container_id(&self) -> String { + self.container_id.container_id.clone() + } + + pub fn exec_id(&self) -> String { + self.exec_id.clone() + } +} + +#[derive(PartialEq, Clone, Debug)] +pub struct RemoveContainerRequest { + pub container_id: String, + pub timeout: u32, +} + +impl RemoveContainerRequest { + pub fn new(id: &str, timeout: u32) -> Self { + Self { + container_id: id.to_string(), + timeout, + } + } +} + +impl std::default::Default for RemoveContainerRequest { + fn default() -> Self { + Self { + container_id: "".to_string(), + timeout: DEFAULT_REMOVE_CONTAINER_REQUEST_TIMEOUT, + } + } +} + +#[derive(PartialEq, Clone, Default)] +pub struct SignalProcessRequest { + pub process_id: ContainerProcessID, + pub signal: u32, +} + +#[derive(PartialEq, Clone, Default)] +pub struct WaitProcessRequest { + pub process_id: ContainerProcessID, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ListProcessesRequest { + pub container_id: String, + pub format: String, + pub args: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct UpdateContainerRequest { + pub container_id: String, + pub resources: Option, + pub mounts: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct GetIPTablesRequest { + pub is_ipv6: bool, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct GetIPTablesResponse { + pub data: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct SetIPTablesRequest { + pub is_ipv6: bool, + pub data: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct SetIPTablesResponse { + pub data: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct WriteStreamRequest { + pub process_id: ContainerProcessID, + pub data: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct WriteStreamResponse { + pub length: u32, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ExecProcessRequest { + pub process_id: ContainerProcessID, + pub string_user: Option, + pub process: Option, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct CpuUsage { + pub total_usage: u64, + pub percpu_usage: ::std::vec::Vec, + pub usage_in_kernelmode: u64, + pub usage_in_usermode: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct ThrottlingData { + pub periods: u64, + pub throttled_periods: u64, + pub throttled_time: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct LoadData { + pub one: String, + pub five: String, + pub fifteen: String, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct CpuStats { + pub cpu_usage: Option, + pub throttling_data: Option, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct MemoryData { + pub usage: u64, + pub max_usage: u64, + pub failcnt: u64, + pub limit: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct MemoryStats { + pub cache: u64, + pub usage: Option, + pub swap_usage: Option, + pub kernel_usage: Option, + pub use_hierarchy: bool, + pub stats: ::std::collections::HashMap, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct PidsStats { + pub current: u64, + pub limit: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct BlkioStatsEntry { + pub major: u64, + pub minor: u64, + pub op: String, + pub value: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct BlkioStats { + pub io_service_bytes_recursive: Vec, + pub io_serviced_recursive: Vec, + pub io_queued_recursive: Vec, + pub io_service_time_recursive: Vec, + pub io_wait_time_recursive: Vec, + pub io_merged_recursive: Vec, + pub io_time_recursive: Vec, + pub sectors_recursive: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct HugetlbStats { + pub usage: u64, + pub max_usage: u64, + pub failcnt: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct CgroupStats { + pub cpu_stats: Option, + pub memory_stats: Option, + pub pids_stats: Option, + pub blkio_stats: Option, + pub hugetlb_stats: ::std::collections::HashMap, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct NetworkStats { + pub name: String, + pub rx_bytes: u64, + pub rx_packets: u64, + pub rx_errors: u64, + pub rx_dropped: u64, + pub tx_bytes: u64, + pub tx_packets: u64, + pub tx_errors: u64, + pub tx_dropped: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct StatsContainerResponse { + pub cgroup_stats: Option, + pub network_stats: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct WaitProcessResponse { + pub status: i32, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ReadStreamRequest { + pub process_id: ContainerProcessID, + pub len: u32, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ReadStreamResponse { + pub data: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct CloseStdinRequest { + pub process_id: ContainerProcessID, +} + +#[derive(PartialEq, Clone, Default)] +pub struct TtyWinResizeRequest { + pub process_id: ContainerProcessID, + pub row: u32, + pub column: u32, +} + +#[derive(Debug, PartialEq, Clone, Default)] +pub struct UpdateInterfaceRequest { + pub interface: Option, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct UpdateRoutesRequest { + pub route: Option, +} + +#[derive(Deserialize, PartialEq, Clone, Default, Debug)] +pub struct ARPNeighbor { + pub to_ip_address: Option, + pub device: String, + pub ll_addr: String, + pub state: i32, + pub flags: i32, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct ARPNeighbors { + pub neighbors: Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct AddArpNeighborRequest { + pub neighbors: Option, +} + +#[derive(PartialEq, Clone, Default)] +pub struct KernelModule { + pub name: String, + pub parameters: Vec, +} + +impl KernelModule { + pub fn set_kernel_modules(modules: Vec) -> Result> { + let mut kernel_modules = Vec::new(); + for module_string in modules { + if module_string.is_empty() { + continue; + } + let kernel_module = Self::try_from(module_string)?; + kernel_modules.push(kernel_module); + } + Ok(kernel_modules) + } +} + +impl TryFrom for KernelModule { + type Error = anyhow::Error; + // input string: " ModuleName Param1 Param2 ... " + // NOTICE: " ModuleName Param1="spaces in here" " => KernelModule { name: ModuleName, parameters: Param1="spaces in here" } + fn try_from(str: String) -> Result { + let split: Vec<&str> = str.split(' ').collect(); + let mut name = String::new(); + let mut parameters = Vec::new(); + + let mut flag = false; + for (index, info) in split.iter().enumerate() { + if index == 0 { + name = info.to_string(); + } else if flag { + // a former param's string contains \" + if let Some(former_param) = parameters.pop() { + let cur_param = format!("{} {}", former_param, info); + parameters.push(cur_param); + } + } else { + parameters.push(info.to_string()); + } + + if info.contains('\"') { + flag = !flag; + } + } + + if flag { + return Err(anyhow!("\" not match")); + } + + Ok(KernelModule { name, parameters }) + } +} + +#[derive(PartialEq, Clone, Default)] +pub struct CreateSandboxRequest { + pub hostname: String, + pub dns: Vec, + pub storages: Vec, + pub sandbox_pidns: bool, + pub sandbox_id: String, + pub guest_hook_path: String, + pub kernel_modules: Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct OnlineCPUMemRequest { + pub wait: bool, + pub nb_cpus: u32, + pub cpu_only: bool, +} + +#[derive(PartialEq, Clone, Default)] +pub struct ReseedRandomDevRequest { + pub data: ::std::vec::Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct GetGuestDetailsRequest { + pub mem_block_size: bool, + pub mem_hotplug_probe: bool, +} + +#[derive(PartialEq, Clone, Default)] +pub struct MemHotplugByProbeRequest { + pub mem_hotplug_probe_addr: ::std::vec::Vec, +} + +#[derive(PartialEq, Clone, Default)] +pub struct SetGuestDateTimeRequest { + pub sec: i64, + pub usec: i64, +} + +#[derive(PartialEq, Clone, Default)] +pub struct AgentDetails { + pub version: String, + pub init_daemon: bool, + pub device_handlers: Vec, + pub storage_handlers: Vec, + pub supports_seccomp: bool, +} + +#[derive(PartialEq, Clone, Default)] +pub struct GuestDetailsResponse { + pub mem_block_size_bytes: u64, + pub agent_details: Option, + pub support_mem_hotplug_probe: bool, +} + +#[derive(PartialEq, Clone, Default)] +pub struct CopyFileRequest { + pub path: String, + pub file_size: i64, + pub file_mode: u32, + pub dir_mode: u32, + pub uid: i32, + pub gid: i32, + pub offset: i64, + pub data: ::std::vec::Vec, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct CheckRequest { + pub service: String, +} + +impl CheckRequest { + pub fn new(service: &str) -> Self { + Self { + service: service.to_string(), + } + } +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct HealthCheckResponse { + pub status: u32, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct VersionCheckResponse { + pub grpc_version: String, + pub agent_version: String, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct MetricsResponse { + pub metrics: String, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct OomEventResponse { + pub container_id: String, +} + +// ResizeVolumeRequest is also the common struct for serialization and deserialization with json +// between shim-client HTTP calls to the shim-mgmt-server +#[derive(Serialize, Deserialize, PartialEq, Clone, Default, Debug)] +pub struct ResizeVolumeRequest { + pub volume_guest_path: String, + pub size: u64, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct VolumeStatsRequest { + pub volume_guest_path: String, +} + +#[derive(PartialEq, Clone, Default, Debug)] +pub struct VolumeStatsResponse { + pub data: String, +} + +#[cfg(test)] +mod test { + use std::convert::TryFrom; + + use super::KernelModule; + + #[test] + fn test_new_kernel_module() { + let kernel_module_str1 = "ModuleName Param1 Param2"; + let kernel_module1 = KernelModule::try_from(kernel_module_str1.to_string()).unwrap(); + assert!(kernel_module1.name == "ModuleName"); + assert!(kernel_module1.parameters[0] == "Param1"); + assert!(kernel_module1.parameters[1] == "Param2"); + + let kernel_module_str2 = "ModuleName Param1=\"spaces in here\""; + let kernel_module2 = KernelModule::try_from(kernel_module_str2.to_string()).unwrap(); + assert!(kernel_module2.name == "ModuleName"); + assert!(kernel_module2.parameters[0] == "Param1=\"spaces in here\""); + + // exception case + let kernel_module_str3 = "ModuleName \"Param1"; + let kernel_module3 = KernelModule::try_from(kernel_module_str3.to_string()); + assert!(kernel_module3.is_err()); + } + + #[test] + fn test_kernel_modules() { + let kernel_module_str1 = "ModuleName1 Param1 Param2".to_string(); + let kernel_module_str2 = "".to_string(); + let kernel_module_str3 = "ModuleName2".to_string(); + let kernel_modules_str = vec![kernel_module_str1, kernel_module_str2, kernel_module_str3]; + + let kernel_modules = KernelModule::set_kernel_modules(kernel_modules_str).unwrap(); + assert!(kernel_modules.len() == 2); + assert!(kernel_modules[0].name == "ModuleName1"); + assert!(kernel_modules[0].parameters.len() == 2); + assert!(kernel_modules[1].name == "ModuleName2"); + assert!(kernel_modules[1].parameters.is_empty()); + } +} diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml new file mode 100644 index 000000000000..f8e5497f5dbb --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -0,0 +1,52 @@ +[package] +name = "hypervisor" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +actix-rt = "2.7.0" +anyhow = "^1.0" +async-trait = "0.1.48" +dbs-utils = { path = "../../../dragonball/src/dbs_utils" } +go-flag = "0.1.0" +libc = ">=0.2.39" +nix = "0.24.2" +persist = { path = "../persist" } +rust-ini = "0.18.0" +seccompiler = "0.2.0" +serde = { version = "1.0.138", features = ["derive"] } +serde_json = ">=1.0.9" +slog = "2.5.2" +slog-scope = "4.4.0" +thiserror = "1.0" +tokio = { version = "1.28.1", features = ["sync", "fs"] } +vmm-sys-util = "0.11.0" +rand = "0.8.4" +path-clean = "1.0.1" +lazy_static = "1.4" +tracing = "0.1.36" + +kata-sys-util = { path = "../../../libs/kata-sys-util" } +kata-types = { path = "../../../libs/kata-types" } +logging = { path = "../../../libs/logging" } +shim-interface = { path = "../../../libs/shim-interface" } + +dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "dbs-upcall"] } + +ch-config = { path = "ch-config", optional = true } +tests_utils = { path = "../../tests/utils" } + +futures = "0.3.25" +safe-path = "0.1.0" +crossbeam-channel = "0.5.6" + +[features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = ["ch-config"] diff --git a/src/runtime-rs/crates/hypervisor/README.md b/src/runtime-rs/crates/hypervisor/README.md new file mode 100644 index 000000000000..dcaa4e57e65e --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/README.md @@ -0,0 +1,116 @@ +# Multi-vmm support for runtime-rs + +## 0. Status + +External hypervisor support is currently being developed. + +See [the main tracking issue](https://github.com/kata-containers/kata-containers/issues/4634) +for further details. + +### Cloud Hypervisor + +A basic implementation currently exists for Cloud Hypervisor. However, +since it is not yet fully functional, the feature is disabled by +default. When the implementation matures, the feature will be enabled +by default. + +> **Note:** +> +> To enable the feature, follow the instructions on https://github.com/kata-containers/kata-containers/pull/6201. + +See the [Cloud Hypervisor tracking issue](https://github.com/kata-containers/kata-containers/issues/6263) +for further details. + +Some key points for supporting multi-vmm in rust runtime. +## 1. Hypervisor Config + +The diagram below gives an overview for the hypervisor config + +![hypervisor config](../../docs/images/hypervisor-config.svg) + +VMM's config info will be loaded when initialize the runtime instance, there are some important functions need to be focused on. +### `VirtContainer::init()` + +This function initialize the runtime handler. It will register the plugins into the HYPERVISOR_PLUGINS. Different plugins are needed for different hypervisors. +```rust +#[async_trait] +impl RuntimeHandler for VirtContainer { + fn init() -> Result<()> { + // register + let dragonball_config = Arc::new(DragonballConfig::new()); + register_hypervisor_plugin("dragonball", dragonball_config); + Ok(()) + } +} +``` + +[This is the plugin method for QEMU. Other VMM plugin methods haven't support currently.](../../../libs/kata-types/src/config/hypervisor/qemu.rs) +QEMU plugin defines the methods to adjust and validate the hypervisor config file, those methods could be modified if it is needed. + +After that, when loading the TOML config, the plugins will be called to adjust and validate the config file. +```rust +async fn try_init(&mut self, spec: &oci::Spec) -> Result<()> {、 + ... + let config = load_config(spec).context("load config")?; + ... +} +``` + +### new_instance + +This function will create a runtime_instance which include the operations for container and sandbox. At the same time, a hypervisor instance will be created. QEMU instance will be created here as well, and set the hypervisor config file +```rust +async fn new_hypervisor(toml_config: &TomlConfig) -> Result> { + let hypervisor_name = &toml_config.runtime.hypervisor_name; + let hypervisor_config = toml_config + .hypervisor + .get(hypervisor_name) + .ok_or_else(|| anyhow!("failed to get hypervisor for {}", &hypervisor_name)) + .context("get hypervisor")?; + + // TODO: support other hypervisor + match hypervisor_name.as_str() { + HYPERVISOR_DRAGONBALL => { + let mut hypervisor = Dragonball::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + Ok(Arc::new(hypervisor)) + } + _ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)), + } +} +``` + +## 2. Hypervisor Trait + +[To support multi-vmm, the hypervisor trait need to be implemented.](./src/lib.rs) +```rust +pub trait Hypervisor: Send + Sync { + // vm manager + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()>; + async fn start_vm(&self, timeout: i32) -> Result<()>; + async fn stop_vm(&self) -> Result<()>; + async fn pause_vm(&self) -> Result<()>; + async fn save_vm(&self) -> Result<()>; + async fn resume_vm(&self) -> Result<()>; + + // device manager + async fn add_device(&self, device: device::Device) -> Result<()>; + async fn remove_device(&self, device: device::Device) -> Result<()>; + + // utils + async fn get_agent_socket(&self) -> Result; + async fn disconnect(&self); + async fn hypervisor_config(&self) -> HypervisorConfig; + async fn get_thread_ids(&self) -> Result; + async fn get_pids(&self) -> Result>; + async fn cleanup(&self) -> Result<()>; + async fn check(&self) -> Result<()>; + async fn get_jailer_root(&self) -> Result; + async fn save_state(&self) -> Result; + } +``` +In current design, VM will be started in the following steps. + +![vmm start](../../docs/images/vm-start.svg) diff --git a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml new file mode 100644 index 000000000000..0e0b45e59437 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml @@ -0,0 +1,26 @@ +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "ch-config" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.68" +serde = { version = "1.0.145", features = ["rc", "derive"] } +serde_json = "1.0.91" +tokio = { version = "1.28.1", features = ["sync", "rt"] } + +# Cloud Hypervisor public HTTP API functions +# Note that the version specified is not necessarily the version of CH +# being used. This version is used to pin the CH config structure +# which is relatively static. +api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" } + +kata-types = { path = "../../../../libs/kata-types"} +nix = "0.26.2" +thiserror = "1.0.38" diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs new file mode 100644 index 000000000000..c1c8685c798f --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::{DeviceConfig, DiskConfig, FsConfig, VmConfig}; +use anyhow::{anyhow, Result}; +use api_client::simple_api_full_command_and_response; + +use std::os::unix::net::UnixStream; +use tokio::task; + +pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "GET", "vmm.ping", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vmm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_create( + mut socket: UnixStream, + cfg: VmConfig, +) -> Result> { + let serialised = serde_json::to_string_pretty(&cfg)?; + + task::spawn_blocking(move || -> Result> { + let data = Some(serialised.as_str()); + + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.create", data) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_start(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.boot", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_stop(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_blockdev_add( + mut socket: UnixStream, + blk_config: DiskConfig, +) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-disk", + Some(&serde_json::to_string(&blk_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_device_add(mut socket: UnixStream) -> Result> { + let device_config = DeviceConfig::default(); + + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-device", + Some(&serde_json::to_string(&device_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_fs_add( + mut socket: UnixStream, + fs_config: FsConfig, +) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-fs", + Some(&serde_json::to_string(&fs_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs new file mode 100644 index 000000000000..638a333560c0 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs @@ -0,0 +1,1903 @@ +// Copyright (c) 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::net_util::MAC_ADDR_LEN; +use crate::NamedHypervisorConfig; +use crate::VmConfig; +use crate::{ + ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpuTopology, CpusConfig, DiskConfig, MacAddr, + MemoryConfig, PayloadConfig, PlatformConfig, PmemConfig, RngConfig, VsockConfig, +}; +use anyhow::{anyhow, Context, Result}; +use kata_types::config::default::DEFAULT_CH_ENTROPY_SOURCE; +use kata_types::config::hypervisor::{CpuInfo, MachineInfo, MemoryInfo}; +use kata_types::config::BootInfo; +use std::convert::TryFrom; +use std::fmt::Display; +use std::path::PathBuf; + +use crate::errors::*; + +// 1 MiB +const MIB: u64 = 1024 * 1024; + +const PMEM_ALIGN_BYTES: u64 = 2 * MIB; + +const DEFAULT_CH_MAX_PHYS_BITS: u8 = 46; + +const DEFAULT_VSOCK_CID: u64 = 3; + +impl TryFrom for VmConfig { + type Error = VmConfigError; + + fn try_from(n: NamedHypervisorConfig) -> Result { + let kernel_params = if n.kernel_params.is_empty() { + None + } else { + Some(n.kernel_params) + }; + + let cfg = n.cfg; + + let debug = cfg.debug_info.enable_debug; + let confidential_guest = cfg.security_info.confidential_guest; + + let tdx_enabled = n.tdx_enabled; + + let vsock_socket_path = if n.vsock_socket_path.is_empty() { + return Err(VmConfigError::EmptyVsockSocketPath); + } else { + n.vsock_socket_path + }; + + let sandbox_path = if n.sandbox_path.is_empty() { + return Err(VmConfigError::EmptySandboxPath); + } else { + n.sandbox_path + }; + + let fs = n.shared_fs_devices; + let net = n.network_devices; + + let cpus = CpusConfig::try_from(cfg.cpu_info).map_err(VmConfigError::CPUError)?; + + let rng = RngConfig::from(cfg.machine_info); + + // Note how CH handles the different image types: + // + // - A standard image is specified in PmemConfig. + // - An initrd/initramfs is specified in PayloadConfig. + // - A confidential guest image is specified by a DiskConfig. + // - If TDX is enabled, the firmware (`td-shim` [1]) must be + // specified in PayloadConfig. + // - A confidential guest initrd is specified by a PayloadConfig with + // firmware. + // + // [1] - https://github.com/confidential-containers/td-shim + let boot_info = cfg.boot_info; + + let use_initrd = !boot_info.initrd.is_empty(); + let use_image = !boot_info.image.is_empty(); + + if use_initrd && use_image { + return Err(VmConfigError::MultipleBootFiles); + } + + if !use_initrd && !use_image { + return Err(VmConfigError::NoBootFile); + } + + let pmem = if use_initrd || confidential_guest { + None + } else { + let pmem = PmemConfig::try_from(&boot_info).map_err(VmConfigError::PmemError)?; + + Some(vec![pmem]) + }; + + let payload = Some( + PayloadConfig::try_from((boot_info.clone(), kernel_params, tdx_enabled)) + .map_err(VmConfigError::PayloadError)?, + ); + + let disks = if confidential_guest && use_image { + let disk = DiskConfig::try_from(boot_info).map_err(VmConfigError::DiskError)?; + + Some(vec![disk]) + } else { + None + }; + + let serial = get_serial_cfg(debug, confidential_guest); + let console = get_console_cfg(debug, confidential_guest); + + let memory = MemoryConfig::try_from((cfg.memory_info, confidential_guest)) + .map_err(VmConfigError::MemoryError)?; + + std::fs::create_dir_all(sandbox_path.clone()) + .map_err(|e| VmConfigError::SandboxError(sandbox_path, e.to_string()))?; + + let vsock = VsockConfig::try_from((vsock_socket_path, DEFAULT_VSOCK_CID)) + .map_err(VmConfigError::VsockError)?; + + let platform = get_platform_cfg(tdx_enabled); + + let cfg = VmConfig { + cpus, + memory, + serial, + console, + payload, + fs, + net, + pmem, + disks, + vsock: Some(vsock), + rng, + platform, + + ..Default::default() + }; + + Ok(cfg) + } +} + +impl TryFrom<(String, u64)> for VsockConfig { + type Error = VsockConfigError; + + fn try_from(args: (String, u64)) -> Result { + let vsock_socket_path = args.0; + let cid = args.1; + + let path = if vsock_socket_path.is_empty() { + return Err(VsockConfigError::NoVsockSocketPath); + } else { + vsock_socket_path + }; + + let cfg = VsockConfig { + cid, + socket: PathBuf::from(path), + + ..Default::default() + }; + + Ok(cfg) + } +} + +impl TryFrom<(MemoryInfo, bool)> for MemoryConfig { + type Error = MemoryConfigError; + + fn try_from(args: (MemoryInfo, bool)) -> Result { + let mem = args.0; + let confidential_guest = args.1; + + if mem.default_memory == 0 { + return Err(MemoryConfigError::NoDefaultMemory); + } + + let sysinfo = nix::sys::sysinfo::sysinfo().map_err(MemoryConfigError::SysInfoFail)?; + + let max_mem_bytes = sysinfo.ram_total(); + + let mem_bytes: u64 = MIB + .checked_mul(mem.default_memory as u64) + .ok_or(()) + .map_err(|_| MemoryConfigError::BadDefaultMemSize(mem.default_memory))?; + + if mem_bytes > max_mem_bytes { + return Err(MemoryConfigError::DefaultMemSizeTooBig); + } + + let hotplug_size = if confidential_guest { + None + } else { + // The amount of memory that can be hot-plugged is the total less the + // amount allocated at VM start. + let hotplug_size_bytes = max_mem_bytes + .checked_sub(mem_bytes) + .ok_or(()) + .map_err(|_| MemoryConfigError::BadMemSizeForHotplug(max_mem_bytes))?; + + let aligned_hotplug_size_bytes = + checked_next_multiple_of(hotplug_size_bytes, PMEM_ALIGN_BYTES) + .ok_or(()) + .map_err(|_| MemoryConfigError::BadPmemAlign(hotplug_size_bytes))?; + + Some(aligned_hotplug_size_bytes) + }; + + let cfg = MemoryConfig { + size: mem_bytes, + + // Required + shared: true, + + hotplug_size, + + ..Default::default() + }; + + Ok(cfg) + } +} + +// Return the next multiple of 'multiple' starting from the specified value +// (aka align value to multiple). +// +// This is a temporary solution until checked_next_multiple_of() integer +// method is available in the rust language. +// +// See: https://github.com/rust-lang/rust/issues/88581 +fn checked_next_multiple_of(value: u64, multiple: u64) -> Option { + match value.checked_rem(multiple) { + None => Some(value), + Some(r) => value.checked_add(multiple - r), + } +} + +impl TryFrom for CpusConfig { + type Error = CpusConfigError; + + fn try_from(cpu: CpuInfo) -> Result { + let boot_vcpus = + u8::try_from(cpu.default_vcpus).map_err(CpusConfigError::BootVCPUsTooBig)?; + + let max_vcpus = + u8::try_from(cpu.default_maxvcpus).map_err(CpusConfigError::MaxVCPUsTooBig)?; + + let topology = CpuTopology { + cores_per_die: max_vcpus, + threads_per_core: 1, + dies_per_package: 1, + packages: 1, + }; + + let max_phys_bits = DEFAULT_CH_MAX_PHYS_BITS; + + let features = CpuFeatures::from(cpu.cpu_features); + + let cfg = CpusConfig { + boot_vcpus, + max_vcpus, + max_phys_bits, + topology: Some(topology), + features, + + ..Default::default() + }; + + Ok(cfg) + } +} + +impl From for CpuFeatures { + #[cfg(target_arch = "x86_64")] + fn from(s: String) -> Self { + let amx = s.split(',').any(|x| x == "amx"); + + CpuFeatures { amx } + } + + #[cfg(not(target_arch = "x86_64"))] + fn from(_s: String) -> Self { + CpuFeatures::default() + } +} + +// - The 2nd tuple element is the space separated final kernel parameters list. +// It is made up of both the CH specific kernel parameters and the user +// specified parameters from BootInfo. +// +// The kernel params cannot be created only from BootInfo since that contains +// the user-specified kernel parameters only. +// +// - The 3rd tuple element determines if TDX is enabled. +// +impl TryFrom<(BootInfo, Option, bool)> for PayloadConfig { + type Error = PayloadConfigError; + + fn try_from(args: (BootInfo, Option, bool)) -> Result { + let boot_info = args.0; + let cmdline = args.1; + let tdx_enabled = args.2; + + // The kernel is always specified here, + // not in the top level VmConfig.kernel. + let kernel = if boot_info.kernel.is_empty() { + return Err(PayloadConfigError::NoKernel); + } else { + PathBuf::from(boot_info.kernel) + }; + + let initramfs = if boot_info.initrd.is_empty() { + None + } else { + Some(PathBuf::from(boot_info.initrd)) + }; + + let firmware = if tdx_enabled { + if boot_info.firmware.is_empty() { + return Err(PayloadConfigError::TDXFirmwareMissing); + } else { + Some(PathBuf::from(boot_info.firmware)) + } + } else if boot_info.firmware.is_empty() { + None + } else { + Some(PathBuf::from(boot_info.firmware)) + }; + + let payload = PayloadConfig { + kernel: Some(kernel), + initramfs, + cmdline, + firmware, + }; + + Ok(payload) + } +} + +impl TryFrom for DiskConfig { + type Error = DiskConfigError; + + fn try_from(boot_info: BootInfo) -> Result { + let path = if boot_info.image.is_empty() { + return Err(DiskConfigError::MissingPath); + } else { + PathBuf::from(boot_info.image) + }; + + let disk = DiskConfig { + path: Some(path), + readonly: true, + + ..Default::default() + }; + + Ok(disk) + } +} + +impl From for RngConfig { + fn from(m: MachineInfo) -> Self { + let entropy_source = if !m.entropy_source.is_empty() { + m.entropy_source + } else { + DEFAULT_CH_ENTROPY_SOURCE.to_string() + }; + + RngConfig { + src: PathBuf::from(entropy_source), + + ..Default::default() + } + } +} + +impl TryFrom<&BootInfo> for PmemConfig { + type Error = PmemConfigError; + + fn try_from(b: &BootInfo) -> Result { + let file = if b.image.is_empty() { + return Err(PmemConfigError::MissingImage); + } else { + b.image.clone() + }; + + let cfg = PmemConfig { + file: PathBuf::from(file), + discard_writes: true, + + ..Default::default() + }; + + Ok(cfg) + } +} + +fn get_serial_cfg(debug: bool, confidential_guest: bool) -> ConsoleConfig { + let mode = if confidential_guest { + ConsoleOutputMode::Off + } else if debug { + ConsoleOutputMode::Tty + } else { + ConsoleOutputMode::Off + }; + + ConsoleConfig { + file: None, + mode, + iommu: false, + } +} + +fn get_console_cfg(debug: bool, confidential_guest: bool) -> ConsoleConfig { + let mode = if confidential_guest { + if debug { + ConsoleOutputMode::Tty + } else { + ConsoleOutputMode::Off + } + } else { + ConsoleOutputMode::Off + }; + + ConsoleConfig { + file: None, + mode, + iommu: false, + } +} + +fn get_platform_cfg(tdx_enabled: bool) -> Option { + if tdx_enabled { + let platform = PlatformConfig { + tdx: true, + + ..Default::default() + }; + + Some(platform) + } else { + None + } +} + +#[allow(dead_code)] +fn parse_mac(s: &S) -> Result +where + S: AsRef + ?Sized + Display, +{ + let v: Vec<&str> = s.as_ref().split(':').collect(); + let mut bytes = [0u8; MAC_ADDR_LEN]; + + if v.len() != MAC_ADDR_LEN { + return Err(anyhow!( + "invalid MAC {} (length {}, expected {})", + s, + v.len(), + MAC_ADDR_LEN + )); + } + + for i in 0..MAC_ADDR_LEN { + if v[i].len() != 2 { + return Err(anyhow!( + "invalid MAC {} (segment {} length {}, expected {})", + s, + i, + v.len(), + 2 + )); + } + + bytes[i] = + u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?; + } + + Ok(MacAddr { bytes }) +} + +#[cfg(test)] +mod tests { + use super::*; + use kata_types::config::hypervisor::{Hypervisor as HypervisorConfig, SecurityInfo}; + + // Generate a valid generic memory info object and a valid CH specific + // memory config object. + fn make_memory_objects( + default_memory_mib: u32, + usable_max_mem_bytes: u64, + confidential_guest: bool, + ) -> (MemoryInfo, MemoryConfig) { + let mem_info = MemoryInfo { + default_memory: default_memory_mib, + + ..Default::default() + }; + + let hotplug_size = if confidential_guest { + None + } else { + checked_next_multiple_of( + usable_max_mem_bytes - (default_memory_mib as u64 * MIB), + PMEM_ALIGN_BYTES, + ) + }; + + let mem_cfg = MemoryConfig { + size: default_memory_mib as u64 * MIB, + shared: true, + hotplug_size, + + ..Default::default() + }; + + (mem_info, mem_cfg) + } + + // The "default" sent to CH but without "cores_per_die" + // to allow the tests to set that value explicitly. + fn make_bare_topology() -> CpuTopology { + CpuTopology { + threads_per_core: 1, + dies_per_package: 1, + packages: 1, + + ..Default::default() + } + } + + fn make_cpu_objects(cpu_default: u8, cpu_max: u8) -> (CpuInfo, CpusConfig) { + let cpu_info = CpuInfo { + default_vcpus: cpu_default as i32, + default_maxvcpus: cpu_max as u32, + + ..Default::default() + }; + + let cpus_config = CpusConfig { + boot_vcpus: cpu_default, + max_vcpus: cpu_max, + topology: Some(CpuTopology { + cores_per_die: cpu_max, + + ..make_bare_topology() + }), + max_phys_bits: DEFAULT_CH_MAX_PHYS_BITS, + + ..Default::default() + }; + + (cpu_info, cpus_config) + } + + fn make_bootinfo_pmemconfig_objects(image: &str) -> (BootInfo, PmemConfig) { + let boot_info = BootInfo { + image: image.to_string(), + + ..Default::default() + }; + + let pmem_config = PmemConfig { + file: PathBuf::from(image), + discard_writes: true, + + ..Default::default() + }; + + (boot_info, pmem_config) + } + + fn make_bootinfo_diskconfig_objects(path: &str) -> (BootInfo, DiskConfig) { + let boot_info = BootInfo { + image: path.to_string(), + + ..Default::default() + }; + + let disk_config = DiskConfig { + path: Some(PathBuf::from(path)), + readonly: true, + + ..Default::default() + }; + + (boot_info, disk_config) + } + + // Create BootInfo and PayloadConfig objects for non-TDX scenarios. + fn make_bootinfo_payloadconfig_objects( + kernel: &str, + initramfs: &str, + firmware: Option<&str>, + cmdline: Option, + ) -> (BootInfo, PayloadConfig) { + let boot_info = if let Some(firmware) = firmware { + BootInfo { + kernel: kernel.into(), + initrd: initramfs.into(), + firmware: firmware.into(), + + ..Default::default() + } + } else { + BootInfo { + kernel: kernel.into(), + initrd: initramfs.into(), + + ..Default::default() + } + }; + + let payload_firmware = firmware.map(PathBuf::from); + + let payload_config = PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + initramfs: Some(PathBuf::from(initramfs)), + firmware: payload_firmware, + cmdline, + }; + + (boot_info, payload_config) + } + + fn make_machineinfo_rngconfig_objects(entropy_source: &str) -> (MachineInfo, RngConfig) { + let machine_info = MachineInfo { + entropy_source: entropy_source.to_string(), + + ..Default::default() + }; + + let rng_config = RngConfig { + src: PathBuf::from(entropy_source.to_string()), + + ..Default::default() + }; + + (machine_info, rng_config) + } + + #[test] + fn test_get_serial_cfg() { + #[derive(Debug)] + struct TestData { + debug: bool, + confidential_guest: bool, + result: ConsoleConfig, + } + + let tests = &[ + TestData { + debug: false, + confidential_guest: false, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + TestData { + debug: true, + confidential_guest: false, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Tty, + iommu: false, + }, + }, + TestData { + debug: false, + confidential_guest: true, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + TestData { + debug: true, + confidential_guest: true, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = get_serial_cfg(d.debug, d.confidential_guest); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result.file, d.result.file, "{}", msg); + assert_eq!(result.iommu, d.result.iommu, "{}", msg); + assert_eq!(result.mode, d.result.mode, "{}", msg); + } + } + + #[test] + fn test_get_console_cfg() { + #[derive(Debug)] + struct TestData { + debug: bool, + confidential_guest: bool, + result: ConsoleConfig, + } + + let tests = &[ + TestData { + debug: false, + confidential_guest: false, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + TestData { + debug: true, + confidential_guest: false, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + TestData { + debug: false, + confidential_guest: true, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }, + }, + TestData { + debug: true, + confidential_guest: true, + result: ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Tty, + iommu: false, + }, + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = get_console_cfg(d.debug, d.confidential_guest); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result, d.result, "{}", msg); + } + } + + #[test] + fn test_get_platform_cfg() { + #[derive(Debug)] + struct TestData { + tdx_enabled: bool, + result: Option, + } + + let tests = &[ + TestData { + tdx_enabled: false, + result: None, + }, + TestData { + tdx_enabled: true, + result: Some(PlatformConfig { + tdx: true, + + ..Default::default() + }), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = get_platform_cfg(d.tdx_enabled); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result, d.result, "{}", msg); + } + } + + #[test] + fn test_bootinfo_to_pmemconfig() { + #[derive(Debug)] + struct TestData { + boot_info: BootInfo, + result: Result, + } + + let image = "/an/image"; + + let (boot_info_with_image, pmem_config) = make_bootinfo_pmemconfig_objects(image); + + let tests = &[ + TestData { + boot_info: BootInfo::default(), + result: Err(PmemConfigError::MissingImage), + }, + TestData { + boot_info: boot_info_with_image, + result: Ok(pmem_config), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = PmemConfig::try_from(&d.boot_info); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } + + #[test] + fn test_machineinfo_to_rngconfig() { + #[derive(Debug)] + struct TestData { + machine_info: MachineInfo, + result: RngConfig, + } + + let entropy_source = "/dev/foo"; + + let (machine_info, rng_config) = make_machineinfo_rngconfig_objects(entropy_source); + + let tests = &[ + TestData { + machine_info: MachineInfo::default(), + result: RngConfig { + src: PathBuf::from(DEFAULT_CH_ENTROPY_SOURCE.to_string()), + + ..Default::default() + }, + }, + TestData { + machine_info: MachineInfo { + entropy_source: DEFAULT_CH_ENTROPY_SOURCE.to_string(), + + ..Default::default() + }, + result: RngConfig { + src: PathBuf::from(DEFAULT_CH_ENTROPY_SOURCE.to_string()), + + ..Default::default() + }, + }, + TestData { + machine_info: MachineInfo { + entropy_source: entropy_source.to_string(), + + ..Default::default() + }, + result: RngConfig { + src: PathBuf::from(entropy_source.to_string()), + + ..Default::default() + }, + }, + TestData { + machine_info, + result: rng_config, + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = RngConfig::from(d.machine_info.clone()); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result, d.result, "{}", msg); + } + } + + #[test] + fn test_string_to_cpufeatures() { + #[derive(Debug)] + struct TestData<'a> { + s: &'a str, + result: CpuFeatures, + } + + let tests = &[ + TestData { + s: "", + result: CpuFeatures::default(), + }, + #[cfg(target_arch = "x86_64")] + TestData { + s: "amx", + result: CpuFeatures { amx: true }, + }, + #[cfg(target_arch = "x86_64")] + TestData { + s: "amxyz", + result: CpuFeatures { amx: false }, + }, + #[cfg(target_arch = "x86_64")] + TestData { + s: "aamx", + result: CpuFeatures { amx: false }, + }, + #[cfg(not(target_arch = "x86_64"))] + TestData { + s: "amx", + result: CpuFeatures::default(), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = CpuFeatures::from(d.s.to_string()); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result, d.result, "{}", msg); + } + } + + #[test] + fn test_bootinfo_to_diskconfig() { + #[derive(Debug)] + struct TestData { + boot_info: BootInfo, + result: Result, + } + + let path = "/some/where"; + + let (boot_info, disk_config) = make_bootinfo_diskconfig_objects(path); + + let tests = &[ + TestData { + boot_info: BootInfo::default(), + result: Err(DiskConfigError::MissingPath), + }, + TestData { + boot_info, + result: Ok(disk_config), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = DiskConfig::try_from(d.boot_info.clone()); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + assert_eq!(result, d.result, "{}", msg); + } + } + + #[test] + fn test_cpuinfo_to_cpusconfig() { + #[derive(Debug)] + struct TestData { + cpu_info: CpuInfo, + result: Result, + } + + let topology = make_bare_topology(); + + let u8_max = std::u8::MAX; + + let (cpu_info, cpus_config) = make_cpu_objects(7, u8_max); + + let tests = &[ + TestData { + cpu_info: CpuInfo::default(), + result: Ok(CpusConfig { + boot_vcpus: 0, + max_vcpus: 0, + topology: Some(CpuTopology { + cores_per_die: 0, + + ..topology + }), + max_phys_bits: DEFAULT_CH_MAX_PHYS_BITS, + + ..Default::default() + }), + }, + TestData { + cpu_info: CpuInfo { + default_vcpus: u8_max as i32, + + ..Default::default() + }, + result: Ok(CpusConfig { + boot_vcpus: u8_max, + max_vcpus: 0, + topology: Some(topology.clone()), + max_phys_bits: DEFAULT_CH_MAX_PHYS_BITS, + + ..Default::default() + }), + }, + TestData { + cpu_info: CpuInfo { + default_vcpus: u8_max as i32 + 1, + + ..Default::default() + }, + result: Err(CpusConfigError::BootVCPUsTooBig( + u8::try_from(u8_max as i32 + 1).unwrap_err(), + )), + }, + TestData { + cpu_info: CpuInfo { + default_maxvcpus: u8_max as u32 + 1, + + ..Default::default() + }, + result: Err(CpusConfigError::MaxVCPUsTooBig( + u8::try_from(u8_max as u32 + 1).unwrap_err(), + )), + }, + TestData { + cpu_info: CpuInfo { + default_vcpus: u8_max as i32, + default_maxvcpus: u8_max as u32, + + ..Default::default() + }, + result: Ok(CpusConfig { + boot_vcpus: u8_max, + max_vcpus: u8_max, + topology: Some(CpuTopology { + cores_per_die: u8_max, + + ..topology + }), + max_phys_bits: DEFAULT_CH_MAX_PHYS_BITS, + + ..Default::default() + }), + }, + TestData { + cpu_info: CpuInfo { + default_vcpus: (u8_max - 1) as i32, + default_maxvcpus: u8_max as u32, + + ..Default::default() + }, + result: Ok(CpusConfig { + boot_vcpus: (u8_max - 1), + max_vcpus: u8_max, + topology: Some(CpuTopology { + cores_per_die: u8_max, + + ..topology + }), + max_phys_bits: DEFAULT_CH_MAX_PHYS_BITS, + + ..Default::default() + }), + }, + TestData { + cpu_info, + result: Ok(cpus_config), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = CpusConfig::try_from(d.cpu_info.clone()); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } + + #[test] + fn test_bootinfo_to_payloadconfig() { + #[derive(Debug)] + struct TestData { + boot_info: BootInfo, + cmdline: Option, + tdx: bool, + result: Result, + } + + let cmdline = "debug foo a=b c=d"; + let kernel = "kernel"; + let firmware = "firmware"; + let initramfs = "initramfs"; + + let (boot_info_with_initrd, payload_config_with_initrd) = + make_bootinfo_payloadconfig_objects( + kernel, + initramfs, + Some(firmware), + Some(cmdline.to_string()), + ); + + let boot_info_without_initrd = BootInfo { + kernel: kernel.into(), + firmware: firmware.into(), + + ..Default::default() + }; + + let payload_config_without_initrd = PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + firmware: Some(PathBuf::from(firmware)), + cmdline: Some(cmdline.into()), + + ..Default::default() + }; + + let tests = &[ + TestData { + boot_info: BootInfo::default(), + cmdline: None, + tdx: false, + result: Err(PayloadConfigError::NoKernel), + }, + TestData { + boot_info: BootInfo { + kernel: kernel.into(), + kernel_params: String::new(), + initrd: initramfs.into(), + + ..Default::default() + }, + cmdline: None, + tdx: false, + result: Ok(PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + cmdline: None, + initramfs: Some(PathBuf::from(initramfs)), + + ..Default::default() + }), + }, + TestData { + boot_info: BootInfo { + kernel: kernel.into(), + kernel_params: cmdline.to_string(), + initrd: initramfs.into(), + + ..Default::default() + }, + cmdline: Some(cmdline.to_string()), + tdx: false, + result: Ok(PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + initramfs: Some(PathBuf::from(initramfs)), + cmdline: Some(cmdline.to_string()), + + ..Default::default() + }), + }, + TestData { + boot_info: BootInfo { + kernel: kernel.into(), + initrd: initramfs.into(), + + ..Default::default() + }, + cmdline: None, + tdx: true, + result: Err(PayloadConfigError::TDXFirmwareMissing), + }, + TestData { + boot_info: boot_info_with_initrd, + cmdline: Some(cmdline.to_string()), + tdx: true, + result: Ok(payload_config_with_initrd), + }, + TestData { + boot_info: boot_info_without_initrd, + cmdline: Some(cmdline.to_string()), + tdx: true, + result: Ok(payload_config_without_initrd), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = PayloadConfig::try_from((d.boot_info.clone(), d.cmdline.clone(), d.tdx)); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } + + #[test] + fn test_memoryinfo_to_memoryconfig() { + #[derive(Debug)] + struct TestData { + mem_info: MemoryInfo, + confidential_guest: bool, + result: Result, + } + + let sysinfo = nix::sys::sysinfo::sysinfo().unwrap(); + + let actual_max_mem_bytes = sysinfo.ram_total(); + + // Calculate the available MiB value + let max_mem_mib = actual_max_mem_bytes.checked_div(MIB).unwrap(); + + // Undo the operation to get back to the usable amount of max memory + // bytes. + let usable_max_mem_bytes = MIB.checked_mul(max_mem_mib).unwrap(); + + let (mem_info_std, mem_cfg_std) = make_memory_objects(79, usable_max_mem_bytes, false); + let (mem_info_confidential_guest, mem_cfg_confidential_guest) = + make_memory_objects(79, usable_max_mem_bytes, true); + + let tests = &[ + TestData { + mem_info: MemoryInfo::default(), + confidential_guest: false, + result: Err(MemoryConfigError::NoDefaultMemory), + }, + TestData { + mem_info: MemoryInfo { + default_memory: 17, + + ..Default::default() + }, + confidential_guest: true, + result: Ok(MemoryConfig { + size: (17 * MIB), + shared: true, + hotplug_size: None, + + ..Default::default() + }), + }, + TestData { + mem_info: MemoryInfo { + default_memory: max_mem_mib as u32, + + ..Default::default() + }, + confidential_guest: true, + result: Ok(MemoryConfig { + size: usable_max_mem_bytes, + shared: true, + hotplug_size: None, + + ..Default::default() + }), + }, + TestData { + mem_info: MemoryInfo { + default_memory: (max_mem_mib + 1) as u32, + + ..Default::default() + }, + confidential_guest: true, + result: Err(MemoryConfigError::DefaultMemSizeTooBig), + }, + TestData { + mem_info: MemoryInfo { + default_memory: 1024, + + ..Default::default() + }, + confidential_guest: false, + result: Ok(MemoryConfig { + size: 1024_u64 * MIB, + shared: true, + hotplug_size: checked_next_multiple_of( + usable_max_mem_bytes - (1024 * MIB), + PMEM_ALIGN_BYTES, + ), + + ..Default::default() + }), + }, + TestData { + mem_info: mem_info_std, + confidential_guest: false, + result: Ok(mem_cfg_std), + }, + TestData { + mem_info: mem_info_confidential_guest, + confidential_guest: true, + result: Ok(mem_cfg_confidential_guest), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = MemoryConfig::try_from((d.mem_info.clone(), d.confidential_guest)); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } + + #[test] + fn test_vsock_config() { + #[derive(Debug)] + struct TestData<'a> { + vsock_socket_path: &'a str, + cid: u64, + result: Result, + } + + let tests = &[ + TestData { + vsock_socket_path: "", + cid: 0, + result: Err(VsockConfigError::NoVsockSocketPath), + }, + TestData { + vsock_socket_path: "vsock_socket_path", + cid: DEFAULT_VSOCK_CID, + result: Ok(VsockConfig { + socket: PathBuf::from("vsock_socket_path"), + cid: DEFAULT_VSOCK_CID, + + ..Default::default() + }), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = VsockConfig::try_from((d.vsock_socket_path.to_string(), d.cid)); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } + + #[test] + fn test_named_hypervisor_config_to_vmconfig() { + #[derive(Debug)] + struct TestData { + cfg: NamedHypervisorConfig, + result: Result, + } + + let u8_max = std::u8::MAX; + let sysinfo = nix::sys::sysinfo::sysinfo().unwrap(); + + let actual_max_mem_bytes = sysinfo.ram_total(); + + // Calculate the available MiB value + let max_mem_mib = actual_max_mem_bytes.checked_div(MIB).unwrap(); + + // Undo the operation to get back to the usable amount of max memory + // bytes. + let usable_max_mem_bytes = MIB.checked_mul(max_mem_mib).unwrap(); + + let image = "image"; + let initramfs = "initramfs"; + let kernel = "kernel"; + let firmware = "firmware"; + + let entropy_source = "entropy_source"; + let sandbox_path = "sandbox_path"; + let vsock_socket_path = "vsock_socket_path"; + + let valid_vsock = + VsockConfig::try_from((vsock_socket_path.to_string(), DEFAULT_VSOCK_CID)).unwrap(); + + let (cpu_info, cpus_config) = make_cpu_objects(7, u8_max); + + let (memory_info_std, mem_config_std) = + make_memory_objects(79, usable_max_mem_bytes, false); + + let (memory_info_confidential_guest, mem_config_confidential_guest) = + make_memory_objects(79, usable_max_mem_bytes, true); + + let (_, pmem_config_with_image) = make_bootinfo_pmemconfig_objects(image); + let (machine_info, rng_config) = make_machineinfo_rngconfig_objects(entropy_source); + + let payload_firmware = None; + + let (boot_info_with_initrd, payload_config_with_initrd) = + make_bootinfo_payloadconfig_objects(kernel, initramfs, payload_firmware, None); + + let (boot_info_confidential_guest_image, disk_config_confidential_guest_image) = + make_bootinfo_diskconfig_objects(image); + + let boot_info_confidential_guest_initrd = BootInfo { + kernel: kernel.to_string(), + initrd: initramfs.to_string(), + + ..Default::default() + }; + + let boot_info_tdx_image = BootInfo { + kernel: kernel.to_string(), + image: image.to_string(), + firmware: firmware.to_string(), + + ..Default::default() + }; + + let boot_info_tdx_initrd = BootInfo { + kernel: kernel.to_string(), + initrd: initramfs.to_string(), + firmware: firmware.to_string(), + + ..Default::default() + }; + + let payload_config_confidential_guest_initrd = PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + initramfs: Some(PathBuf::from(initramfs)), + + ..Default::default() + }; + + // XXX: Note that the image is defined in a DiskConfig! + let payload_config_tdx_for_image = PayloadConfig { + firmware: Some(PathBuf::from(firmware)), + kernel: Some(PathBuf::from(kernel)), + + ..Default::default() + }; + + let payload_config_tdx_initrd = PayloadConfig { + firmware: Some(PathBuf::from(firmware)), + initramfs: Some(PathBuf::from(initramfs)), + kernel: Some(PathBuf::from(kernel)), + + ..Default::default() + }; + + //------------------------------ + + let hypervisor_cfg_with_image_and_kernel = HypervisorConfig { + cpu_info: cpu_info.clone(), + memory_info: memory_info_std.clone(), + boot_info: BootInfo { + image: image.to_string(), + kernel: kernel.to_string(), + + ..Default::default() + }, + machine_info: machine_info.clone(), + + ..Default::default() + }; + + let hypervisor_cfg_with_initrd = HypervisorConfig { + cpu_info: cpu_info.clone(), + memory_info: memory_info_std, + boot_info: boot_info_with_initrd, + machine_info: machine_info.clone(), + + ..Default::default() + }; + + let security_info_confidential_guest = SecurityInfo { + confidential_guest: true, + + ..Default::default() + }; + + let hypervisor_cfg_confidential_guest_image = HypervisorConfig { + cpu_info: cpu_info.clone(), + memory_info: memory_info_confidential_guest.clone(), + boot_info: BootInfo { + kernel: kernel.to_string(), + + ..boot_info_confidential_guest_image + }, + machine_info: machine_info.clone(), + security_info: security_info_confidential_guest.clone(), + + ..Default::default() + }; + + let hypervisor_cfg_confidential_guest_initrd = HypervisorConfig { + cpu_info: cpu_info.clone(), + memory_info: memory_info_confidential_guest.clone(), + boot_info: boot_info_confidential_guest_initrd, + machine_info: machine_info.clone(), + security_info: security_info_confidential_guest.clone(), + + ..Default::default() + }; + + let hypervisor_cfg_tdx_image = HypervisorConfig { + cpu_info: cpu_info.clone(), + memory_info: memory_info_confidential_guest.clone(), + boot_info: boot_info_tdx_image, + machine_info: machine_info.clone(), + security_info: security_info_confidential_guest.clone(), + + ..Default::default() + }; + + let hypervisor_cfg_tdx_initrd = HypervisorConfig { + cpu_info, + memory_info: memory_info_confidential_guest, + boot_info: boot_info_tdx_initrd, + machine_info, + security_info: security_info_confidential_guest, + + ..Default::default() + }; + + //------------------------------ + + let vmconfig_with_image_and_kernel = VmConfig { + cpus: cpus_config.clone(), + memory: mem_config_std.clone(), + rng: rng_config.clone(), + vsock: Some(valid_vsock.clone()), + + // rootfs image specific + pmem: Some(vec![pmem_config_with_image]), + + payload: Some(PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + + ..Default::default() + }), + + ..Default::default() + }; + + let vmconfig_with_initrd = VmConfig { + cpus: cpus_config.clone(), + memory: mem_config_std, + rng: rng_config.clone(), + vsock: Some(valid_vsock.clone()), + + // initrd/initramfs specific + payload: Some(payload_config_with_initrd), + + ..Default::default() + }; + + let vmconfig_confidential_guest_image = VmConfig { + cpus: cpus_config.clone(), + memory: mem_config_confidential_guest.clone(), + rng: rng_config.clone(), + vsock: Some(valid_vsock.clone()), + + // Confidential guest image specific + disks: Some(vec![disk_config_confidential_guest_image.clone()]), + + payload: Some(PayloadConfig { + kernel: Some(PathBuf::from(kernel)), + + ..Default::default() + }), + + ..Default::default() + }; + + let vmconfig_confidential_guest_initrd = VmConfig { + cpus: cpus_config.clone(), + memory: mem_config_confidential_guest.clone(), + rng: rng_config.clone(), + vsock: Some(valid_vsock.clone()), + + // Confidential guest initrd specific + payload: Some(payload_config_confidential_guest_initrd), + + ..Default::default() + }; + + let platform_config_tdx = get_platform_cfg(true); + + let vmconfig_tdx_image = VmConfig { + cpus: cpus_config.clone(), + memory: mem_config_confidential_guest.clone(), + rng: rng_config.clone(), + vsock: Some(valid_vsock.clone()), + platform: platform_config_tdx.clone(), + + // TDX specific + payload: Some(payload_config_tdx_for_image), + + // Confidential guest + TDX specific + disks: Some(vec![disk_config_confidential_guest_image]), + + ..Default::default() + }; + + let vmconfig_tdx_initrd = VmConfig { + cpus: cpus_config, + memory: mem_config_confidential_guest, + rng: rng_config, + vsock: Some(valid_vsock), + platform: platform_config_tdx, + + // Confidential guest + TDX specific + payload: Some(payload_config_tdx_initrd), + + ..Default::default() + }; + + //------------------------------ + + let named_hypervisor_cfg_with_image_and_kernel = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_with_image_and_kernel, + + ..Default::default() + }; + + let named_hypervisor_cfg_with_initrd = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_with_initrd, + + ..Default::default() + }; + + let named_hypervisor_cfg_confidential_guest_image = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_confidential_guest_image, + + ..Default::default() + }; + + let named_hypervisor_cfg_confidential_guest_initrd = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_confidential_guest_initrd, + + ..Default::default() + }; + + let named_hypervisor_cfg_tdx_image = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_tdx_image, + + tdx_enabled: true, + + ..Default::default() + }; + + let named_hypervisor_cfg_tdx_initrd = NamedHypervisorConfig { + sandbox_path: sandbox_path.into(), + vsock_socket_path: vsock_socket_path.into(), + + cfg: hypervisor_cfg_tdx_initrd, + + tdx_enabled: true, + + ..Default::default() + }; + + //------------------------------ + + let tests = &[ + TestData { + cfg: NamedHypervisorConfig::default(), + result: Err(VmConfigError::EmptyVsockSocketPath), + }, + TestData { + cfg: NamedHypervisorConfig { + vsock_socket_path: "vsock_socket_path".into(), + + ..Default::default() + }, + result: Err(VmConfigError::EmptySandboxPath), + }, + TestData { + cfg: NamedHypervisorConfig { + sandbox_path: "sandbox_path".into(), + + ..Default::default() + }, + result: Err(VmConfigError::EmptyVsockSocketPath), + }, + TestData { + cfg: NamedHypervisorConfig { + sandbox_path: "sandbox_path".into(), + vsock_socket_path: "vsock_socket_path".into(), + cfg: HypervisorConfig::default(), + + ..Default::default() + }, + result: Err(VmConfigError::NoBootFile), + }, + TestData { + cfg: NamedHypervisorConfig { + sandbox_path: "sandbox_path".into(), + vsock_socket_path: "vsock_socket_path".into(), + cfg: HypervisorConfig { + boot_info: BootInfo { + initrd: "initrd".into(), + image: "image".into(), + + ..Default::default() + }, + + ..Default::default() + }, + + ..Default::default() + }, + result: Err(VmConfigError::MultipleBootFiles), + }, + TestData { + cfg: named_hypervisor_cfg_with_image_and_kernel, + result: Ok(vmconfig_with_image_and_kernel), + }, + TestData { + cfg: named_hypervisor_cfg_with_initrd, + result: Ok(vmconfig_with_initrd), + }, + TestData { + cfg: named_hypervisor_cfg_confidential_guest_image, + result: Ok(vmconfig_confidential_guest_image), + }, + TestData { + cfg: named_hypervisor_cfg_confidential_guest_initrd, + result: Ok(vmconfig_confidential_guest_initrd), + }, + TestData { + cfg: named_hypervisor_cfg_tdx_image, + result: Ok(vmconfig_tdx_image), + }, + TestData { + cfg: named_hypervisor_cfg_tdx_initrd, + result: Ok(vmconfig_tdx_initrd), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = VmConfig::try_from(d.cfg.clone()); + + let msg = format!("{}: actual result: {:?}", msg, result); + + if std::env::var("DEBUG").is_ok() { + eprintln!("DEBUG: {}", msg); + } + + if d.result.is_err() { + assert!(result.is_err(), "{}", msg); + + assert_eq!( + &result.unwrap_err(), + d.result.as_ref().unwrap_err(), + "{}", + msg + ); + continue; + } + + assert!(result.is_ok(), "{}", msg); + assert_eq!(&result.unwrap(), d.result.as_ref().unwrap(), "{}", msg); + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/errors.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/errors.rs new file mode 100644 index 000000000000..7e062f5e6e2d --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/errors.rs @@ -0,0 +1,107 @@ +// Copyright (c) 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryFrom; +use thiserror::Error; + +#[derive(Error, Debug, PartialEq)] +pub enum VmConfigError { + #[error("empty sandbox path")] + EmptySandboxPath, + + #[error("empty VSOCK socket path")] + EmptyVsockSocketPath, + + #[error("cannot specify image and initrd")] + MultipleBootFiles, + + #[error("missing boot image (no rootfs image or initrd)")] + NoBootFile, + + #[error("CPU config error: {0}")] + CPUError(CpusConfigError), + + #[error("Pmem config error: {0}")] + PmemError(PmemConfigError), + + #[error("Payload config error: {0}")] + PayloadError(PayloadConfigError), + + #[error("Disk config error: {0}")] + DiskError(DiskConfigError), + + #[error("Memory config error: {0}")] + MemoryError(MemoryConfigError), + + // The 2nd arg is actually a std::io::Error but that doesn't implement + // PartialEq, so we convert it to a String. + #[error("Failed to create sandbox path ({0}: {1}")] + SandboxError(String, String), + + #[error("VSOCK config error: {0}")] + VsockError(VsockConfigError), +} + +#[derive(Error, Debug, PartialEq)] +pub enum PmemConfigError { + #[error("Need rootfs image for PmemConfig")] + MissingImage, +} + +#[derive(Error, Debug, PartialEq)] +pub enum DiskConfigError { + #[error("Need path for DiskConfig")] + MissingPath, + + #[error("Found unexpected path for DiskConfig with TDX: {0}")] + UnexpectedPathForTDX(String), +} + +#[derive(Error, Debug, PartialEq)] +pub enum CpusConfigError { + #[error("Too many boot vCPUs specified: {0}")] + BootVCPUsTooBig(>::Error), + + #[error("Too many max vCPUs specified: {0}")] + MaxVCPUsTooBig(>::Error), +} + +#[derive(Error, Debug, PartialEq)] +pub enum PayloadConfigError { + #[error("No kernel specified")] + NoKernel, + + #[error("No initrd/initramfs specified")] + NoInitrd, + + #[error("Need firmware for TDX")] + TDXFirmwareMissing, +} + +#[derive(Error, Debug, PartialEq)] +pub enum MemoryConfigError { + #[error("No default memory specified")] + NoDefaultMemory, + + #[error("Default memory size > available RAM")] + DefaultMemSizeTooBig, + + #[error("Cannot convert default memory to bytes: {0}")] + BadDefaultMemSize(u32), + + #[error("Cannot calculate hotplug memory size from default memory: {0}")] + BadMemSizeForHotplug(u64), + + #[error("Cannot align hotplug memory size from pmem: {0}")] + BadPmemAlign(u64), + + #[error("Failed to query system memory information: {0}")] + SysInfoFail(#[source] nix::errno::Errno), +} + +#[derive(Error, Debug, PartialEq)] +pub enum VsockConfigError { + #[error("Missing VSOCK socket path")] + NoVsockSocketPath, +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs new file mode 100644 index 000000000000..fd5b228f502e --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs @@ -0,0 +1,496 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::net::Ipv4Addr; +use std::path::PathBuf; + +pub mod ch_api; +pub mod convert; +pub mod net_util; +mod virtio_devices; + +use crate::virtio_devices::RateLimiterConfig; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +pub use net_util::MacAddr; + +pub const MAX_NUM_PCI_SEGMENTS: u16 = 16; + +mod errors; + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct BalloonConfig { + pub size: u64, + /// Option to deflate the balloon in case the guest is out of memory. + #[serde(default)] + pub deflate_on_oom: bool, + /// Option to enable free page reporting from the guest. + #[serde(default)] + pub free_page_reporting: bool, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CmdlineConfig { + pub args: String, +} + +impl CmdlineConfig { + fn is_empty(&self) -> bool { + self.args.is_empty() + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct ConsoleConfig { + //#[serde(default = "default_consoleconfig_file")] + pub file: Option, + pub mode: ConsoleOutputMode, + #[serde(default)] + pub iommu: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum ConsoleOutputMode { + #[default] + Off, + Pty, + Tty, + File, + Null, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuAffinity { + pub vcpu: u8, + pub host_cpus: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpusConfig { + pub boot_vcpus: u8, + pub max_vcpus: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub topology: Option, + #[serde(default)] + pub kvm_hyperv: bool, + #[serde(skip_serializing_if = "u8_is_zero")] + pub max_phys_bits: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub affinity: Option>, + #[serde(default)] + pub features: CpuFeatures, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CpuFeatures { + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub amx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuTopology { + pub threads_per_core: u8, + pub cores_per_die: u8, + pub dies_per_package: u8, + pub packages: u8, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DeviceConfig { + pub path: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DiskConfig { + pub path: Option, + #[serde(default)] + pub readonly: bool, + #[serde(default)] + pub direct: bool, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_diskconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_diskconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + pub vhost_socket: Option, + #[serde(default)] + pub rate_limiter_config: Option, + #[serde(default)] + pub id: Option, + // For testing use only. Not exposed in API. + #[serde(default)] + pub disable_io_uring: bool, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct FsConfig { + pub tag: String, + pub socket: PathBuf, + //#[serde(default = "default_fsconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_fsconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum HotplugMethod { + #[default] + Acpi, + VirtioMem, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct InitramfsConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct KernelConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryConfig { + pub size: u64, + #[serde(default)] + pub mergeable: bool, + #[serde(default)] + pub hotplug_method: HotplugMethod, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplug_size: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplugged_size: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hugepage_size: Option, + #[serde(default)] + pub prefault: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub zones: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryZoneConfig { + pub id: String, + pub size: u64, + #[serde(default)] + pub file: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + pub hugepage_size: Option, + #[serde(default)] + pub host_numa_node: Option, + #[serde(default)] + pub hotplug_size: Option, + #[serde(default)] + pub hotplugged_size: Option, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NetConfig { + //#[serde(default = "default_netconfig_tap")] + #[serde(skip_serializing_if = "Option::is_none")] + pub tap: Option, + //#[serde(default = "default_netconfig_ip")] + pub ip: Ipv4Addr, + //#[serde(default = "default_netconfig_mask")] + pub mask: Ipv4Addr, + //#[serde(default = "default_netconfig_mac")] + pub mac: MacAddr, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub host_mac: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub mtu: Option, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_netconfig_num_queues")] + #[serde(skip_serializing_if = "usize_is_zero")] + pub num_queues: usize, + //#[serde(default = "default_netconfig_queue_size")] + #[serde(skip_serializing_if = "u16_is_zero")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub vhost_socket: Option, + #[serde(default)] + pub vhost_mode: VhostMode, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub fds: Option>, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub rate_limiter_config: Option, + #[serde(default)] + #[serde(skip_serializing_if = "u16_is_zero")] + pub pci_segment: u16, +} + +impl Default for NetConfig { + fn default() -> Self { + NetConfig { + tap: None, + ip: Ipv4Addr::new(0, 0, 0, 0), + mask: Ipv4Addr::new(0, 0, 0, 0), + mac: MacAddr::default(), + host_mac: None, + mtu: None, + iommu: false, + num_queues: 0, + queue_size: 0, + vhost_user: false, + vhost_socket: None, + vhost_mode: VhostMode::default(), + id: None, + fds: None, + rate_limiter_config: None, + pci_segment: 0, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaConfig { + #[serde(default)] + pub guest_numa_id: u32, + #[serde(default)] + pub cpus: Option>, + #[serde(default)] + pub distances: Option>, + #[serde(default)] + pub memory_zones: Option>, + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub sgx_epc_sections: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaDistance { + #[serde(default)] + pub destination: u32, + #[serde(default)] + pub distance: u8, +} + +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct PayloadConfig { + #[serde(default)] + pub firmware: Option, + #[serde(default)] + pub kernel: Option, + #[serde(default)] + pub cmdline: Option, + #[serde(default)] + pub initramfs: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PlatformConfig { + //#[serde(default = "default_platformconfig_num_pci_segments")] + pub num_pci_segments: u16, + #[serde(default)] + pub iommu_segments: Option>, + #[serde(default)] + pub serial_number: Option, + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub oem_strings: Option>, + #[serde(default)] + pub tdx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PmemConfig { + pub file: PathBuf, + #[serde(default)] + pub size: Option, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub discard_writes: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct RngConfig { + pub src: PathBuf, + #[serde(default)] + pub iommu: bool, +} + +#[cfg(target_arch = "x86_64")] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct SgxEpcConfig { + pub id: String, + #[serde(default)] + pub size: u64, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct UserDeviceConfig { + pub socket: PathBuf, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VdpaConfig { + pub path: PathBuf, + //#[serde(default = "default_vdpaconfig_num_queues")] + pub num_queues: usize, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum VhostMode { + #[default] + Client, + Server, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VmConfig { + #[serde(default)] + pub cpus: CpusConfig, + #[serde(default)] + pub memory: MemoryConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub kernel: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub initramfs: Option, + #[serde(default)] + #[serde(skip_serializing_if = "CmdlineConfig::is_empty")] + pub cmdline: CmdlineConfig, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub payload: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub disks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub net: Option>, + #[serde(default)] + pub rng: RngConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub balloon: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub fs: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub pmem: Option>, + pub serial: ConsoleConfig, + pub console: ConsoleConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub user_devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vdpa: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vsock: Option, + #[serde(default)] + pub iommu: bool, + #[cfg(target_arch = "x86_64")] + #[serde(skip_serializing_if = "Option::is_none")] + pub sgx_epc: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub numa: Option>, + #[serde(default)] + pub watchdog: bool, + #[cfg(feature = "guest_debug")] + pub gdb: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub platform: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VsockConfig { + pub cid: u64, + pub socket: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +//-------------------------------------------------------------------- +// For serde serialization + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u8_is_zero(v: &u8) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn usize_is_zero(v: &usize) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u16_is_zero(v: &u16) -> bool { + *v == 0 +} + +// Type used to simplify conversion from a generic Hypervisor config +// to a CH specific VmConfig. +#[derive(Debug, Clone, Default)] +pub struct NamedHypervisorConfig { + pub kernel_params: String, + pub sandbox_path: String, + pub vsock_socket_path: String, + pub cfg: HypervisorConfig, + pub tdx_enabled: bool, + + pub shared_fs_devices: Option>, + pub network_devices: Option>, +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs new file mode 100644 index 000000000000..00a0794628ad --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs @@ -0,0 +1,32 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize, Serializer}; +use std::fmt; + +pub const MAC_ADDR_LEN: usize = 6; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Default)] +pub struct MacAddr { + pub bytes: [u8; MAC_ADDR_LEN], +} + +// Note: Implements ToString automatically. +impl fmt::Display for MacAddr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = &self.bytes; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +// Requried to remove the `bytes` member from the serialized JSON! +impl Serialize for MacAddr { + fn serialize(&self, serializer: S) -> Result { + self.to_string().serialize(serializer) + } +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs new file mode 100644 index 000000000000..02bf04bf9606 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenBucketConfig { + pub size: u64, + pub one_time_burst: Option, + pub refill_time: u64, +} + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct RateLimiterConfig { + pub bandwidth: Option, + pub ops: Option, +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs new file mode 100644 index 000000000000..6be9df282611 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::device::DeviceType; +use crate::VmmState; +use anyhow::Result; +use async_trait::async_trait; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; +use persist::sandbox_persist::Persist; +use std::os::unix::net::UnixStream; +use tokio::process::Child; +use tokio::sync::watch::{channel, Receiver, Sender}; +use tokio::task::JoinHandle; + +#[derive(Debug)] +pub struct CloudHypervisorInner { + pub(crate) state: VmmState, + pub(crate) id: String, + + pub(crate) api_socket: Option, + pub(crate) extra_args: Option>, + + pub(crate) config: Option, + + pub(crate) process: Option, + pub(crate) pid: Option, + + pub(crate) timeout_secs: i32, + + pub(crate) netns: Option, + + // Sandbox-specific directory + pub(crate) vm_path: String, + + // Hypervisor runtime directory + pub(crate) run_dir: String, + + // Subdirectory of vm_path. + pub(crate) jailer_root: String, + + /// List of devices that will be added to the VM once it boots + pub(crate) pending_devices: Vec, + + pub(crate) _capabilities: Capabilities, + + pub(crate) shutdown_tx: Option>, + pub(crate) shutdown_rx: Option>, + pub(crate) tasks: Option>>>, +} + +const CH_DEFAULT_TIMEOUT_SECS: u32 = 10; + +impl CloudHypervisorInner { + pub fn new() -> Self { + let mut capabilities = Capabilities::new(); + capabilities.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::FsSharingSupport, + ); + + let (tx, rx) = channel(true); + + Self { + api_socket: None, + extra_args: None, + + process: None, + pid: None, + + config: None, + state: VmmState::NotReady, + timeout_secs: CH_DEFAULT_TIMEOUT_SECS as i32, + id: String::default(), + jailer_root: String::default(), + vm_path: String::default(), + run_dir: String::default(), + netns: None, + pending_devices: vec![], + _capabilities: capabilities, + shutdown_tx: Some(tx), + shutdown_rx: Some(rx), + tasks: None, + } + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = Some(config); + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + self.config.clone().unwrap_or_default() + } +} + +impl Default for CloudHypervisorInner { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Persist for CloudHypervisorInner { + type State = HypervisorState; + type ConstructorArgs = (); + + // Return a state object that will be saved by the caller. + async fn save(&self) -> Result { + Ok(HypervisorState { + hypervisor_type: HYPERVISOR_NAME_CH.to_string(), + id: self.id.clone(), + vm_path: self.vm_path.clone(), + jailed: false, + jailer_root: String::default(), + netns: None, + config: self.hypervisor_config(), + run_dir: self.run_dir.clone(), + cached_block_devices: Default::default(), + ..Default::default() + }) + } + + // Set the hypervisor state to the specified state + async fn restore( + _hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let ch = Self { + config: Some(hypervisor_state.config), + state: VmmState::NotReady, + id: hypervisor_state.id, + vm_path: hypervisor_state.vm_path, + run_dir: hypervisor_state.run_dir, + + ..Default::default() + }; + + Ok(ch) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs new file mode 100644 index 000000000000..a6f4a9d02132 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs @@ -0,0 +1,290 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::device::DeviceType; +use crate::BlockConfig; +use crate::HybridVsockConfig; +use crate::NetworkConfig; +use crate::ShareFsDeviceConfig; +use crate::VmmState; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::{cloud_hypervisor_vm_blockdev_add, cloud_hypervisor_vm_fs_add}; +use ch_config::DiskConfig; +use ch_config::{net_util::MacAddr, FsConfig, NetConfig}; +use safe_path::scoped_join; +use std::convert::TryFrom; +use std::path::PathBuf; + +const VIRTIO_FS: &str = "virtio-fs"; +const DEFAULT_DISK_QUEUES: usize = 1; +const DEFAULT_DISK_QUEUE_SIZE: u16 = 1024; + +impl CloudHypervisorInner { + pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result<()> { + if self.state != VmmState::VmRunning { + self.pending_devices.insert(0, device); + + return Ok(()); + } + + self.handle_add_device(device).await?; + + Ok(()) + } + + async fn handle_add_device(&mut self, device: DeviceType) -> Result<()> { + match device { + DeviceType::ShareFs(sharefs) => self.handle_share_fs_device(sharefs.config).await, + DeviceType::HybridVsock(hvsock) => self.handle_hvsock_device(&hvsock.config).await, + DeviceType::Block(block) => self.handle_block_device(block.config).await, + _ => Err(anyhow!("unhandled device: {:?}", device)), + } + } + + /// Add the device that were requested to be added before the VMM was + /// started. + #[allow(dead_code)] + pub(crate) async fn handle_pending_devices_after_boot(&mut self) -> Result<()> { + if self.state != VmmState::VmRunning { + return Err(anyhow!( + "cannot handle pending devices with VMM state {:?}", + self.state + )); + } + + while let Some(dev) = self.pending_devices.pop() { + self.add_device(dev).await.context("add_device")?; + } + + Ok(()) + } + + pub(crate) async fn remove_device(&mut self, _device: DeviceType) -> Result<()> { + Ok(()) + } + + async fn handle_share_fs_device(&mut self, cfg: ShareFsDeviceConfig) -> Result<()> { + if cfg.fs_type != VIRTIO_FS { + return Err(anyhow!("cannot handle share fs type: {:?}", cfg.fs_type)); + } + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + 1 + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + 1024 + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + scoped_join(&self.vm_path, cfg.sock_path)? + }; + + let fs_config = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + let response = cloud_hypervisor_vm_fs_add( + socket.try_clone().context("failed to clone socket")?, + fs_config, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "fs add response: {:?}", detail); + } + + Ok(()) + } + + async fn handle_hvsock_device(&mut self, _cfg: &HybridVsockConfig) -> Result<()> { + Ok(()) + } + + async fn handle_block_device(&mut self, cfg: BlockConfig) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let num_queues: usize = DEFAULT_DISK_QUEUES; + let queue_size: u16 = DEFAULT_DISK_QUEUE_SIZE; + + let block_config = DiskConfig { + path: Some(cfg.path_on_host.as_str().into()), + readonly: cfg.is_readonly, + num_queues, + queue_size, + ..Default::default() + }; + + let response = cloud_hypervisor_vm_blockdev_add( + socket.try_clone().context("failed to clone socket")?, + block_config, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "blockdev add response: {:?}", detail); + } + + Ok(()) + } + + pub(crate) async fn get_shared_devices( + &mut self, + ) -> Result<(Option>, Option>)> { + let mut shared_fs_devices = Vec::::new(); + let mut network_devices = Vec::::new(); + + while let Some(dev) = self.pending_devices.pop() { + match dev { + DeviceType::ShareFs(dev) => { + let settings = ShareFsSettings::new(dev.config, self.vm_path.clone()); + + let fs_cfg = FsConfig::try_from(settings)?; + + shared_fs_devices.push(fs_cfg); + } + DeviceType::Network(net_device) => { + let net_config = NetConfig::try_from(net_device.config)?; + network_devices.push(net_config); + } + _ => continue, + } + } + + Ok((Some(shared_fs_devices), Some(network_devices))) + } +} + +impl TryFrom for NetConfig { + type Error = anyhow::Error; + + fn try_from(cfg: NetworkConfig) -> Result { + if let Some(mac) = cfg.guest_mac { + let net_config = NetConfig { + tap: Some(cfg.host_dev_name.clone()), + id: Some(cfg.virt_iface_name.clone()), + num_queues: cfg.queue_num, + queue_size: cfg.queue_size as u16, + mac: MacAddr { bytes: mac.0 }, + ..Default::default() + }; + + return Ok(net_config); + } + + Err(anyhow!("Missing mac address for network device")) + } +} +#[derive(Debug)] +pub struct ShareFsSettings { + cfg: ShareFsDeviceConfig, + vm_path: String, +} + +impl ShareFsSettings { + pub fn new(cfg: ShareFsDeviceConfig, vm_path: String) -> Self { + ShareFsSettings { cfg, vm_path } + } +} + +impl TryFrom for FsConfig { + type Error = anyhow::Error; + + fn try_from(settings: ShareFsSettings) -> Result { + let cfg = settings.cfg; + let vm_path = settings.vm_path; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + DEFAULT_DISK_QUEUES + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + DEFAULT_DISK_QUEUE_SIZE + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + PathBuf::from(vm_path).join(cfg.sock_path) + }; + + let fs_cfg = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + Ok(fs_cfg) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Address; + + #[test] + fn test_networkconfig_to_netconfig() { + let mut cfg = NetworkConfig { + host_dev_name: String::from("tap0"), + virt_iface_name: String::from("eth0"), + queue_size: 256, + queue_num: 2, + guest_mac: None, + index: 1, + }; + + let net = NetConfig::try_from(cfg.clone()); + assert_eq!( + net.unwrap_err().to_string(), + "Missing mac address for network device" + ); + + let v: [u8; 6] = [10, 11, 128, 3, 4, 5]; + let mac_address = Address(v); + cfg.guest_mac = Some(mac_address.clone()); + + let expected = NetConfig { + tap: Some(cfg.host_dev_name.clone()), + id: Some(cfg.virt_iface_name.clone()), + num_queues: cfg.queue_num, + queue_size: cfg.queue_size as u16, + mac: MacAddr { bytes: v }, + ..Default::default() + }; + + let net = NetConfig::try_from(cfg); + assert!(net.is_ok()); + assert_eq!(net.unwrap(), expected); + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs new file mode 100644 index 000000000000..bd34fd0be457 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -0,0 +1,585 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::ch::utils::get_api_socket_path; +use crate::ch::utils::get_vsock_path; +use crate::kernel_param::KernelParams; +use crate::utils::{get_jailer_root, get_sandbox_path}; +use crate::VM_ROOTFS_DRIVER_PMEM; +use crate::{VcpuThreadIds, VmmState}; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::{ + cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping, + cloud_hypervisor_vmm_shutdown, +}; +use ch_config::{NamedHypervisorConfig, VmConfig}; +use core::future::poll_fn; +use futures::executor::block_on; +use futures::future::join_all; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE; +use std::convert::TryFrom; +use std::fs::create_dir_all; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::process::Stdio; +use tokio::io::AsyncBufReadExt; +use tokio::io::BufReader; +use tokio::process::{Child, Command}; +use tokio::sync::watch::Receiver; +use tokio::task; +use tokio::task::JoinHandle; +use tokio::time::Duration; + +const CH_NAME: &str = "cloud-hypervisor"; + +/// Number of milliseconds to wait before retrying a CH operation. +const CH_POLL_TIME_MS: u64 = 50; + +impl CloudHypervisorInner { + async fn start_hypervisor(&mut self, timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_launch(timeout_secs) + .await + .context("launch failed")?; + + self.cloud_hypervisor_setup_comms() + .await + .context("comms setup failed")?; + + self.cloud_hypervisor_check_running() + .await + .context("hypervisor running check failed")?; + + self.state = VmmState::VmmServerReady; + + Ok(()) + } + + async fn get_kernel_params(&self) -> Result { + let cfg = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))?; + + let enable_debug = cfg.debug_info.enable_debug; + + let confidential_guest = cfg.security_info.confidential_guest; + + // Note that the configuration option hypervisor.block_device_driver is not used. + let rootfs_driver = VM_ROOTFS_DRIVER_PMEM; + + let rootfs_type = match cfg.boot_info.rootfs_type.is_empty() { + true => DEFAULT_CH_ROOTFS_TYPE, + false => &cfg.boot_info.rootfs_type, + }; + + // Start by adding the default set of kernel parameters. + let mut params = KernelParams::new(enable_debug); + + let mut rootfs_param = KernelParams::new_rootfs_kernel_params(rootfs_driver, rootfs_type)?; + + let mut extra_params = if enable_debug { + if confidential_guest { + KernelParams::from_string("console=hvc0") + } else { + KernelParams::from_string("console=ttyS0,115200n8") + } + } else { + KernelParams::from_string("quiet") + }; + + params.append(&mut extra_params); + + // Add the rootfs device + params.append(&mut rootfs_param); + + // Finally, add the user-specified options at the end + // (so they will take priority). + params.append(&mut KernelParams::from_string(&cfg.boot_info.kernel_params)); + + let kernel_params = params.to_string()?; + + Ok(kernel_params) + } + + async fn boot_vm(&mut self) -> Result<()> { + let (shared_fs_devices, network_devices) = self.get_shared_devices().await?; + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let sandbox_path = get_sandbox_path(&self.id); + + std::fs::create_dir_all(sandbox_path.clone()).context("failed to create sandbox path")?; + + let vsock_socket_path = get_vsock_path(&self.id)?; + + let hypervisor_config = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))?; + + debug!( + sl!(), + "generic Hypervisor configuration: {:?}", hypervisor_config + ); + + let kernel_params = self.get_kernel_params().await?; + + // FIXME: See: + // + // - https://github.com/kata-containers/kata-containers/issues/6383 + // - https://github.com/kata-containers/kata-containers/pull/6257 + let tdx_enabled = false; + + let named_cfg = NamedHypervisorConfig { + kernel_params, + sandbox_path, + vsock_socket_path, + cfg: hypervisor_config.clone(), + tdx_enabled, + shared_fs_devices, + network_devices, + }; + + let cfg = VmConfig::try_from(named_cfg)?; + + debug!(sl!(), "CH specific VmConfig configuration: {:?}", cfg); + + let response = + cloud_hypervisor_vm_create(socket.try_clone().context("failed to clone socket")?, cfg) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm boot response: {:?}", detail); + } + + let response = + cloud_hypervisor_vm_start(socket.try_clone().context("failed to clone socket")?) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm start response: {:?}", detail); + } + + self.state = VmmState::VmRunning; + + Ok(()) + } + + async fn cloud_hypervisor_setup_comms(&mut self) -> Result<()> { + let api_socket_path = get_api_socket_path(&self.id)?; + + // The hypervisor has just been spawned, but may not yet have created + // the API socket, so repeatedly try to connect for up to + // timeout_secs. + let join_handle: JoinHandle> = + task::spawn_blocking(move || -> Result { + let api_socket: UnixStream; + + loop { + let result = UnixStream::connect(api_socket_path.clone()); + + if let Ok(result) = result { + api_socket = result; + break; + } + + std::thread::sleep(Duration::from_millis(CH_POLL_TIME_MS)); + } + + Ok(api_socket) + }); + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + self.timeout_secs + ); + + let result = + tokio::time::timeout(Duration::from_secs(self.timeout_secs as u64), join_handle) + .await + .context(timeout_msg)?; + + let result = result?; + + let api_socket = result?; + + self.api_socket = Some(api_socket); + + Ok(()) + } + + async fn cloud_hypervisor_check_running(&mut self) -> Result<()> { + let timeout_secs = self.timeout_secs; + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + timeout_secs + ); + + let join_handle = self.cloud_hypervisor_ping_until_ready(CH_POLL_TIME_MS); + + tokio::time::timeout(Duration::new(timeout_secs as u64, 0), join_handle) + .await + .context(timeout_msg)? + } + + async fn cloud_hypervisor_ensure_not_launched(&self) -> Result<()> { + if let Some(child) = &self.process { + return Err(anyhow!( + "{} already running with PID {}", + CH_NAME, + child.id().unwrap_or(0) + )); + } + + Ok(()) + } + + async fn cloud_hypervisor_launch(&mut self, _timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_ensure_not_launched().await?; + + let cfg = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))?; + + let debug = cfg.debug_info.enable_debug; + + let disable_seccomp = true; + + let api_socket_path = get_api_socket_path(&self.id)?; + + let _ = std::fs::remove_file(api_socket_path.clone()); + + let binary_path = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))? + .path + .to_string(); + + let path = Path::new(&binary_path).canonicalize()?; + + let mut cmd = Command::new(path); + + cmd.current_dir("/"); + + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + cmd.env("RUST_BACKTRACE", "full"); + + cmd.args(["--api-socket", &api_socket_path]); + + if let Some(extra_args) = &self.extra_args { + cmd.args(extra_args); + } + + if debug { + cmd.arg("-v"); + } + + if disable_seccomp { + cmd.args(["--seccomp", "false"]); + } + + let child = cmd.spawn().context(format!("{} spawn failed", CH_NAME))?; + + // Save process PID + self.pid = child.id(); + + let shutdown = self + .shutdown_rx + .as_ref() + .ok_or("no receiver channel") + .map_err(|e| anyhow!(e))? + .clone(); + + let ch_outputlogger_task = tokio::spawn(cloud_hypervisor_log_output(child, shutdown)); + + let tasks = vec![ch_outputlogger_task]; + + self.tasks = Some(tasks); + + Ok(()) + } + + async fn cloud_hypervisor_shutdown(&mut self) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let response = + cloud_hypervisor_vmm_shutdown(socket.try_clone().context("shutdown failed")?).await?; + + if let Some(detail) = response { + debug!(sl!(), "shutdown response: {:?}", detail); + } + + // Trigger a controlled shutdown + self.shutdown_tx + .as_mut() + .ok_or("no shutdown channel") + .map_err(|e| anyhow!(e))? + .send(true) + .map_err(|e| anyhow!(e).context("failed to request shutdown"))?; + + let tasks = self + .tasks + .take() + .ok_or("no tasks") + .map_err(|e| anyhow!(e))?; + + let results = join_all(tasks).await; + + let mut wait_errors: Vec = vec![]; + + for result in results { + if let Err(e) = result { + eprintln!("wait task error: {:#?}", e); + + wait_errors.push(e); + } + } + + if wait_errors.is_empty() { + Ok(()) + } else { + Err(anyhow!("wait all tasks failed: {:#?}", wait_errors)) + } + } + + #[allow(dead_code)] + async fn cloud_hypervisor_wait(&mut self) -> Result<()> { + let mut child = self + .process + .take() + .ok_or(format!("{} not running", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + let _pid = child + .id() + .ok_or(format!("{} missing PID", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) + } + + async fn cloud_hypervisor_ping_until_ready(&mut self, _poll_time_ms: u64) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + loop { + let response = + cloud_hypervisor_vmm_ping(socket.try_clone().context("failed to clone socket")?) + .await + .context("ping failed"); + + if let Ok(response) = response { + if let Some(detail) = response { + debug!(sl!(), "ping response: {:?}", detail); + } + break; + } + + tokio::time::sleep(Duration::from_millis(CH_POLL_TIME_MS)).await; + } + + Ok(()) + } + + pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option) -> Result<()> { + self.id = id.to_string(); + self.state = VmmState::NotReady; + + self.setup_environment().await?; + + self.netns = netns; + + Ok(()) + } + + async fn setup_environment(&mut self) -> Result<()> { + // run_dir and vm_path are the same (shared) + self.run_dir = get_sandbox_path(&self.id); + self.vm_path = self.run_dir.to_string(); + + create_dir_all(&self.run_dir) + .with_context(|| anyhow!("failed to create sandbox directory {}", self.run_dir))?; + + if !self.jailer_root.is_empty() { + create_dir_all(self.jailer_root.as_str()) + .map_err(|e| anyhow!("Failed to create dir {} err : {:?}", self.jailer_root, e))?; + } + + Ok(()) + } + + pub(crate) async fn start_vm(&mut self, timeout_secs: i32) -> Result<()> { + self.timeout_secs = timeout_secs; + self.start_hypervisor(self.timeout_secs).await?; + + self.boot_vm().await?; + + Ok(()) + } + + pub(crate) fn stop_vm(&mut self) -> Result<()> { + block_on(self.cloud_hypervisor_shutdown())?; + + Ok(()) + } + + pub(crate) fn pause_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) fn resume_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_agent_socket(&self) -> Result { + const HYBRID_VSOCK_SCHEME: &str = "hvsock"; + + let vsock_path = get_vsock_path(&self.id)?; + + let uri = format!("{}://{}", HYBRID_VSOCK_SCHEME, vsock_path); + + Ok(uri) + } + + pub(crate) async fn disconnect(&mut self) { + self.state = VmmState::NotReady; + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + Ok(VcpuThreadIds::default()) + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> { + Ok((old_vcpu, new_vcpu)) + } + + pub(crate) async fn get_pids(&self) -> Result> { + Ok(Vec::::new()) + } + + pub(crate) async fn get_vmm_master_tid(&self) -> Result { + if let Some(pid) = self.pid { + Ok(pid) + } else { + Err(anyhow!("could not get vmm master tid")) + } + } + + pub(crate) async fn get_ns_path(&self) -> Result { + if let Some(pid) = self.pid { + let ns_path = format!("/proc/{}/ns", pid); + Ok(ns_path) + } else { + Err(anyhow!("could not get ns path")) + } + } + + pub(crate) async fn check(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + let root_path = get_jailer_root(&self.id); + + std::fs::create_dir_all(&root_path)?; + + Ok(root_path) + } + + pub(crate) async fn capabilities(&self) -> Result { + let mut caps = Capabilities::default(); + caps.set(CapabilityBits::FsSharingSupport); + Ok(caps) + } + + pub(crate) async fn get_hypervisor_metrics(&self) -> Result { + todo!() + } +} + +// Log all output from the CH process until a shutdown signal is received. +// When that happens, stop logging and wait for the child process to finish +// before returning. +async fn cloud_hypervisor_log_output(mut child: Child, mut shutdown: Receiver) -> Result<()> { + let stdout = child + .stdout + .as_mut() + .ok_or("failed to get child stdout") + .map_err(|e| anyhow!(e))?; + + let stdout_reader = BufReader::new(stdout); + let mut stdout_lines = stdout_reader.lines(); + + let stderr = child + .stderr + .as_mut() + .ok_or("failed to get child stderr") + .map_err(|e| anyhow!(e))?; + + let stderr_reader = BufReader::new(stderr); + let mut stderr_lines = stderr_reader.lines(); + + loop { + tokio::select! { + _ = shutdown.changed() => { + info!(sl!(), "got shutdown request"); + break; + }, + stderr_line = poll_fn(|cx| Pin::new(&mut stderr_lines).poll_next_line(cx)) => { + if let Ok(line) = stderr_line { + let line = line.ok_or("missing stderr line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stderr"); + } + }, + stdout_line = poll_fn(|cx| Pin::new(&mut stdout_lines).poll_next_line(cx)) => { + if let Ok(line) = stdout_line { + let line = line.ok_or("missing stdout line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stdout"); + } + }, + }; + } + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs new file mode 100644 index 000000000000..72373978ce90 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs @@ -0,0 +1,181 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::device::DeviceType; +use crate::{Hypervisor, VcpuThreadIds}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use kata_types::capabilities::Capabilities; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use persist::sandbox_persist::Persist; +use std::sync::Arc; +use tokio::sync::RwLock; + +// Convenience macro to obtain the scope logger +#[macro_export] +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "cloud-hypervisor")) + }; + } + +mod inner; +mod inner_device; +mod inner_hypervisor; +mod utils; + +use inner::CloudHypervisorInner; + +#[derive(Debug, Default, Clone)] +pub struct CloudHypervisor { + inner: Arc>, +} + +impl CloudHypervisor { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(CloudHypervisorInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for CloudHypervisor { + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm() + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.pause_vm() + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.resume_vm() + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.save_vm().await + } + + async fn add_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.write().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.write().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> { + let inner = self.inner.read().await; + inner.resize_vcpu(old_vcpu, new_vcpu).await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn get_vmm_master_tid(&self) -> Result { + let inner = self.inner.read().await; + inner.get_vmm_master_tid().await + } + + async fn get_ns_path(&self) -> Result { + let inner = self.inner.read().await; + inner.get_ns_path().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + self.save().await + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } + + async fn get_hypervisor_metrics(&self) -> Result { + let inner = self.inner.read().await; + inner.get_hypervisor_metrics().await + } +} + +#[async_trait] +impl Persist for CloudHypervisor { + type State = HypervisorState; + type ConstructorArgs = (); + + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await.context("save CH hypervisor state") + } + + async fn restore( + hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let inner = CloudHypervisorInner::restore(hypervisor_args, hypervisor_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/utils.rs b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs new file mode 100644 index 000000000000..05291bb2efd0 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::Result; + +use crate::utils::get_sandbox_path; + +// The socket used to connect to CH. This is used for CH API communications. +const CH_API_SOCKET_NAME: &str = "ch-api.sock"; + +// The socket that allows runtime-rs to connect direct through to the Kata +// Containers agent running inside the CH hosted VM. +const CH_VM_SOCKET_NAME: &str = "ch-vm.sock"; + +// Return the path for a _hypothetical_ API socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_api_socket_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id); + + let path = [&sandbox_path, CH_API_SOCKET_NAME].join("/"); + + Ok(path) +} + +// Return the path for a _hypothetical_ sandbox specific VSOCK socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_vsock_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id); + + let path = [&sandbox_path, CH_VM_SOCKET_NAME].join("/"); + + Ok(path) +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs new file mode 100644 index 000000000000..1c1ed0b7185b --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/device_manager.rs @@ -0,0 +1,539 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{collections::HashMap, sync::Arc}; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::rand::RandomBytes; +use tokio::sync::{Mutex, RwLock}; + +use crate::{ + vhost_user_blk::VhostUserBlkDevice, BlockConfig, BlockDevice, HybridVsockDevice, Hypervisor, + NetworkDevice, VfioDevice, VhostUserConfig, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, + KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, +}; + +use super::{ + util::{get_host_path, get_virt_drive_name, DEVICE_TYPE_BLOCK}, + Device, DeviceConfig, DeviceType, +}; + +pub type ArcMutexDevice = Arc>; + +macro_rules! declare_index { + ($self:ident, $index:ident, $released_index:ident) => {{ + let current_index = if let Some(index) = $self.$released_index.pop() { + index + } else { + $self.$index + }; + $self.$index += 1; + Ok(current_index) + }}; +} + +macro_rules! release_index { + ($self:ident, $index:ident, $released_index:ident) => {{ + $self.$released_index.push($index); + $self.$released_index.sort_by(|a, b| b.cmp(a)); + }}; +} + +/// block_index and released_block_index are used to search an available block index +/// in Sandbox. +/// pmem_index and released_pmem_index are used to search an available pmem index +/// in Sandbox. +/// +/// @pmem_index generally default is 0 for ; +/// @block_index generally default is 0 for ; +/// @released_pmem_index for pmem devices removed and indexes will released at the same time. +/// @released_block_index for blk devices removed and indexes will released at the same time. +#[derive(Clone, Debug, Default)] +struct SharedInfo { + pmem_index: u64, + block_index: u64, + released_pmem_index: Vec, + released_block_index: Vec, +} + +impl SharedInfo { + async fn new() -> Self { + SharedInfo { + pmem_index: 0, + block_index: 0, + released_pmem_index: vec![], + released_block_index: vec![], + } + } + + fn declare_device_index(&mut self, is_pmem: bool) -> Result { + if is_pmem { + declare_index!(self, pmem_index, released_pmem_index) + } else { + declare_index!(self, block_index, released_block_index) + } + } + + fn release_device_index(&mut self, index: u64, is_pmem: bool) { + if is_pmem { + release_index!(self, index, released_pmem_index); + } else { + release_index!(self, index, released_block_index); + } + } +} + +// Device manager will manage the lifecycle of sandbox device +#[derive(Debug)] +pub struct DeviceManager { + devices: HashMap, + hypervisor: Arc, + shared_info: SharedInfo, +} + +impl DeviceManager { + pub async fn new(hypervisor: Arc) -> Result { + let devices = HashMap::::new(); + Ok(DeviceManager { + devices, + hypervisor, + shared_info: SharedInfo::new().await, + }) + } + + async fn get_block_driver(&self) -> String { + self.hypervisor + .hypervisor_config() + .await + .blockdev_info + .block_device_driver + } + + async fn try_add_device(&mut self, device_id: &str) -> Result<()> { + // find the device + let device = self + .devices + .get(device_id) + .context("failed to find device")?; + let mut device_guard = device.lock().await; + // attach device + let result = device_guard.attach(self.hypervisor.as_ref()).await; + // handle attach error + if let Err(e) = result { + match device_guard.get_device_info().await { + DeviceType::Block(device) => { + self.shared_info.release_device_index( + device.config.index, + device.config.driver_option == *KATA_NVDIMM_DEV_TYPE, + ); + } + DeviceType::Vfio(device) => { + // safe here: + // Only when vfio dev_type is `b`, virt_path MUST be Some(X), + // and needs do release_device_index. otherwise, let it go. + if device.config.dev_type == DEVICE_TYPE_BLOCK { + self.shared_info + .release_device_index(device.config.virt_path.unwrap().0, false); + } + } + DeviceType::VhostUserBlk(device) => { + self.shared_info + .release_device_index(device.config.index, false); + } + _ => { + debug!(sl!(), "no need to do release device index."); + } + } + + drop(device_guard); + self.devices.remove(device_id); + + return Err(e); + } + + Ok(()) + } + + pub async fn try_remove_device(&mut self, device_id: &str) -> Result<()> { + if let Some(dev) = self.devices.get(device_id) { + let mut device_guard = dev.lock().await; + let result = match device_guard.detach(self.hypervisor.as_ref()).await { + Ok(index) => { + if let Some(i) = index { + // release the declared device index + let is_pmem = + if let DeviceType::Block(blk) = device_guard.get_device_info().await { + blk.config.driver_option == *KATA_NVDIMM_DEV_TYPE + } else { + false + }; + self.shared_info.release_device_index(i, is_pmem); + } + Ok(()) + } + Err(e) => Err(e), + }; + + // if detach success, remove it from device manager + if result.is_ok() { + drop(device_guard); + self.devices.remove(device_id); + } + + return result; + } + + Err(anyhow!( + "device with specified ID hasn't been created. {}", + device_id + )) + } + + async fn get_device_info(&self, device_id: &str) -> Result { + if let Some(dev) = self.devices.get(device_id) { + return Ok(dev.lock().await.get_device_info().await); + } + + Err(anyhow!( + "device with specified ID hasn't been created. {}", + device_id + )) + } + + async fn find_device(&self, host_path: String) -> Option { + for (device_id, dev) in &self.devices { + match dev.lock().await.get_device_info().await { + DeviceType::Block(device) => { + if device.config.path_on_host == host_path { + return Some(device_id.to_string()); + } + } + DeviceType::Vfio(device) => { + if device.config.host_path == host_path { + return Some(device_id.to_string()); + } + } + DeviceType::VhostUserBlk(device) => { + if device.config.socket_path == host_path { + return Some(device_id.to_string()); + } + } + DeviceType::Network(device) => { + if device.config.host_dev_name == host_path { + return Some(device_id.to_string()); + } + } + _ => { + // TODO: support find other device type + continue; + } + } + } + + None + } + + fn get_dev_virt_path( + &mut self, + dev_type: &str, + is_pmem: bool, + ) -> Result> { + let virt_path = if dev_type == DEVICE_TYPE_BLOCK { + let current_index = self.shared_info.declare_device_index(is_pmem)?; + let drive_name = if is_pmem { + format!("pmem{}", current_index) + } else { + get_virt_drive_name(current_index as i32)? + }; + let virt_path_name = format!("/dev/{}", drive_name); + Some((current_index, virt_path_name)) + } else { + // only dev_type is block, otherwise, it's None. + None + }; + + Ok(virt_path) + } + + async fn new_device(&mut self, device_config: &DeviceConfig) -> Result { + // device ID must be generated by manager instead of device itself + // in case of ID collision + let device_id = self.new_device_id()?; + let dev: ArcMutexDevice = match device_config { + DeviceConfig::BlockCfg(config) => { + // try to find the device, if found and just return id. + if let Some(device_matched_id) = self.find_device(config.path_on_host.clone()).await + { + return Ok(device_matched_id); + } + + self.create_block_device(config, device_id.clone()) + .await + .context("failed to create device")? + } + DeviceConfig::VfioCfg(config) => { + let mut vfio_dev_config = config.clone(); + let dev_host_path = vfio_dev_config.host_path.clone(); + if let Some(device_matched_id) = self.find_device(dev_host_path).await { + return Ok(device_matched_id); + } + let virt_path = self.get_dev_virt_path(vfio_dev_config.dev_type.as_str(), false)?; + vfio_dev_config.virt_path = virt_path; + + Arc::new(Mutex::new(VfioDevice::new( + device_id.clone(), + &vfio_dev_config, + ))) + } + DeviceConfig::VhostUserBlkCfg(config) => { + // try to find the device, found and just return id. + if let Some(dev_id_matched) = self.find_device(config.socket_path.clone()).await { + info!( + sl!(), + "vhost blk device with path:{:?} found. just return device id: {:?}", + config.socket_path.clone(), + dev_id_matched + ); + + return Ok(dev_id_matched); + } + + self.create_vhost_blk_device(config, device_id.clone()) + .await + .context("failed to create vhost blk device")? + } + DeviceConfig::NetworkCfg(config) => { + // try to find the device, found and just return id. + if let Some(dev_id_matched) = self.find_device(config.host_dev_name.clone()).await { + info!( + sl!(), + "network device with path:{:?} found. return network device id: {:?}", + config.host_dev_name.clone(), + dev_id_matched + ); + + return Ok(dev_id_matched); + } + + Arc::new(Mutex::new(NetworkDevice::new(device_id.clone(), config))) + } + DeviceConfig::HybridVsockCfg(hvconfig) => { + // No need to do find device for hybrid vsock device. + Arc::new(Mutex::new(HybridVsockDevice::new(&device_id, hvconfig))) + } + _ => { + return Err(anyhow!("invliad device type")); + } + }; + + // register device to devices + self.devices.insert(device_id.clone(), dev.clone()); + + Ok(device_id) + } + + async fn create_vhost_blk_device( + &mut self, + config: &VhostUserConfig, + device_id: String, + ) -> Result { + // TODO virtio-scsi + let mut vhu_blk_config = config.clone(); + + match vhu_blk_config.driver_option.as_str() { + // convert the block driver to kata type + VIRTIO_BLOCK_MMIO => { + vhu_blk_config.driver_option = KATA_MMIO_BLK_DEV_TYPE.to_string(); + } + VIRTIO_BLOCK_PCI => { + vhu_blk_config.driver_option = KATA_BLK_DEV_TYPE.to_string(); + } + _ => { + return Err(anyhow!( + "unsupported driver type {}", + vhu_blk_config.driver_option + )); + } + }; + + // generate block device index and virt path + // safe here, Block device always has virt_path. + if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK, false)? { + vhu_blk_config.index = virt_path.0; + vhu_blk_config.virt_path = virt_path.1; + } + + Ok(Arc::new(Mutex::new(VhostUserBlkDevice::new( + device_id, + vhu_blk_config, + )))) + } + + async fn create_block_device( + &mut self, + config: &BlockConfig, + device_id: String, + ) -> Result { + let mut block_config = config.clone(); + let mut is_pmem = false; + + match block_config.driver_option.as_str() { + // convert the block driver to kata type + VIRTIO_BLOCK_MMIO => { + block_config.driver_option = KATA_MMIO_BLK_DEV_TYPE.to_string(); + } + VIRTIO_BLOCK_PCI => { + block_config.driver_option = KATA_BLK_DEV_TYPE.to_string(); + } + VIRTIO_PMEM => { + block_config.driver_option = KATA_NVDIMM_DEV_TYPE.to_string(); + is_pmem = true; + } + _ => { + return Err(anyhow!( + "unsupported driver type {}", + block_config.driver_option + )); + } + }; + + // generate virt path + if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK, is_pmem)? { + block_config.index = virt_path.0; + block_config.virt_path = virt_path.1; + } + + // if the path on host is empty, we need to get device host path from the device major and minor number + // Otherwise, it might be rawfile based block device, the host path is already passed from the runtime, + // so we don't need to do anything here. + if block_config.path_on_host.is_empty() { + block_config.path_on_host = + get_host_path(DEVICE_TYPE_BLOCK, config.major, config.minor) + .context("failed to get host path")?; + } + + Ok(Arc::new(Mutex::new(BlockDevice::new( + device_id, + block_config, + )))) + } + + // device ID must be generated by device manager instead of device itself + // in case of ID collision + fn new_device_id(&self) -> Result { + for _ in 0..5 { + let rand_bytes = RandomBytes::new(8); + let id = format!("{:x}", rand_bytes); + + // check collision in devices + if self.devices.get(&id).is_none() { + return Ok(id); + } + } + + Err(anyhow!("ID are exhausted")) + } +} + +// Many scenarios have similar steps when adding devices. so to reduce duplicated code, +// we should create a common method abstracted and use it in various scenarios. +// do_handle_device: +// (1) new_device with DeviceConfig and return device_id; +// (2) try_add_device with device_id and do really add device; +// (3) return device info of device's info; +pub async fn do_handle_device( + d: &RwLock, + dev_info: &DeviceConfig, +) -> Result { + let device_id = d + .write() + .await + .new_device(dev_info) + .await + .context("failed to create deviec")?; + + d.write() + .await + .try_add_device(&device_id) + .await + .context("failed to add deivce")?; + + let device_info = d + .read() + .await + .get_device_info(&device_id) + .await + .context("failed to get device info")?; + + Ok(device_info) +} + +pub async fn get_block_driver(d: &RwLock) -> String { + d.read().await.get_block_driver().await +} + +#[cfg(test)] +mod tests { + use super::DeviceManager; + use crate::{ + device::{device_manager::get_block_driver, DeviceConfig, DeviceType}, + qemu::Qemu, + BlockConfig, KATA_BLK_DEV_TYPE, + }; + use anyhow::{anyhow, Context, Result}; + use std::sync::Arc; + use tests_utils::load_test_config; + use tokio::sync::RwLock; + + async fn new_device_manager() -> Result>> { + let hypervisor_name: &str = "qemu"; + let toml_config = load_test_config(hypervisor_name.to_owned())?; + let hypervisor_config = toml_config + .hypervisor + .get(hypervisor_name) + .ok_or_else(|| anyhow!("failed to get hypervisor for {}", &hypervisor_name))?; + + let mut hypervisor = Qemu::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + + let dm = Arc::new(RwLock::new( + DeviceManager::new(Arc::new(hypervisor)) + .await + .context("device manager")?, + )); + + Ok(dm) + } + + #[actix_rt::test] + async fn test_new_block_device() { + let dm = new_device_manager().await; + assert!(dm.is_ok()); + + let d = dm.unwrap(); + let block_driver = get_block_driver(&d).await; + let dev_info = DeviceConfig::BlockCfg(BlockConfig { + path_on_host: "/dev/dddzzz".to_string(), + driver_option: block_driver, + ..Default::default() + }); + let new_device_result = d.write().await.new_device(&dev_info).await; + assert!(new_device_result.is_ok()); + + let device_id = new_device_result.unwrap(); + let devices_info_result = d.read().await.get_device_info(&device_id).await; + assert!(devices_info_result.is_ok()); + + let device_info = devices_info_result.unwrap(); + if let DeviceType::Block(device) = device_info { + assert_eq!(device.config.driver_option, KATA_BLK_DEV_TYPE); + } else { + assert_eq!(1, 0) + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs new file mode 100644 index 000000000000..acc6c4b6ed2c --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/mod.rs @@ -0,0 +1,218 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod vfio; +mod vhost_user; +mod virtio_blk; +mod virtio_fs; +mod virtio_net; +mod virtio_vsock; + +pub use vfio::{ + bind_device_to_host, bind_device_to_vfio, get_host_guest_map, get_vfio_device, HostDevice, + VfioBusMode, VfioConfig, VfioDevice, +}; +pub use virtio_blk::{ + BlockConfig, BlockDevice, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE, KATA_NVDIMM_DEV_TYPE, + VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM, +}; +pub use virtio_fs::{ + ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType, + ShareFsOperation, +}; +pub use virtio_net::{Address, NetworkConfig, NetworkDevice}; +pub use virtio_vsock::{ + HybridVsockConfig, HybridVsockDevice, VsockConfig, VsockDevice, DEFAULT_GUEST_VSOCK_CID, +}; + +pub mod vhost_user_blk; +pub use vhost_user::{VhostUserConfig, VhostUserDevice, VhostUserType}; + +use anyhow::{anyhow, Context, Result}; + +// Tips: +// The Re-write `PciSlot` and `PciPath` with rust that it origins from `pcipath.go`: +// + +// The PCI spec reserves 5 bits for slot number (a.k.a. device +// number), giving slots 0..31 +const PCI_SLOT_BITS: u32 = 5; +const MAX_PCI_SLOTS: u32 = (1 << PCI_SLOT_BITS) - 1; + +// A PciSlot describes where a PCI device sits on a single bus +// +// This encapsulates the PCI slot number a.k.a device number, which is +// limited to a 5 bit value [0x00..0x1f] by the PCI specification +// +// To support multifunction device's, It's needed to extend +// this to include the PCI 3-bit function number as well. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct PciSlot(pub u8); + +impl PciSlot { + pub fn convert_from_string(s: &str) -> Result { + if s.is_empty() || s.len() > 2 { + return Err(anyhow!("string given is invalid.")); + } + + let base = 16; + let n = u64::from_str_radix(s, base).context("convert string to number failed")?; + if n >> PCI_SLOT_BITS > 0 { + return Err(anyhow!( + "number {:?} exceeds MAX:{:?}, failed.", + n, + MAX_PCI_SLOTS + )); + } + + Ok(PciSlot(n as u8)) + } + + pub fn convert_from_u32(v: u32) -> Result { + if v > MAX_PCI_SLOTS { + return Err(anyhow!("value {:?} exceeds MAX: {:?}", v, MAX_PCI_SLOTS)); + } + + Ok(PciSlot(v as u8)) + } + + pub fn convert_to_string(&self) -> String { + format!("{:02x}", self.0) + } +} + +// A PciPath describes where a PCI sits in a PCI hierarchy. +// +// Consists of a list of PCI slots, giving the slot of each bridge +// that must be traversed from the PCI root to reach the device, +// followed by the slot of the device itself. +// +// When formatted into a string is written as "xx/.../yy/zz". Here, +// zz is the slot of the device on its PCI bridge, yy is the slot of +// the bridge on its parent bridge and so forth until xx is the slot +// of the "most upstream" bridge on the root bus. +// +// If a device is directly connected to the root bus, which used in +// lightweight hypervisors, such as dragonball/firecracker/clh, and +// its PciPath.slots will contains only one PciSlot. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct PciPath { + // list of PCI slots + slots: Vec, +} + +impl PciPath { + // method to format the PciPath into a string + pub fn convert_to_string(&self) -> String { + self.slots + .iter() + .map(|pci_slot| format!("{:02x}", pci_slot.0)) + .collect::>() + .join("/") + } + + // method to parse a PciPath from a string + pub fn convert_from_string(path: &str) -> Result { + if path.is_empty() { + return Err(anyhow!("path given is empty.")); + } + + let mut pci_slots: Vec = Vec::new(); + let slots: Vec<&str> = path.split('/').collect(); + for slot in slots { + match PciSlot::convert_from_string(slot) { + Ok(s) => pci_slots.push(s), + Err(e) => return Err(anyhow!("slot is invalid with: {:?}", e)), + } + } + + Ok(PciPath { slots: pci_slots }) + } + + pub fn from_pci_slots(slots: Vec) -> Option { + if slots.is_empty() { + return None; + } + + Some(PciPath { slots }) + } + + // device_slot to get the slot of the device on its PCI bridge + pub fn get_device_slot(&self) -> Option { + self.slots.last().cloned() + } + + // root_slot to get the slot of the "most upstream" bridge on the root bus + pub fn get_root_slot(&self) -> Option { + self.slots.first().cloned() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pci_slot() { + // min + let pci_slot_01 = PciSlot::convert_from_string("00"); + assert!(pci_slot_01.is_ok()); + // max + let pci_slot_02 = PciSlot::convert_from_string("1f"); + assert!(pci_slot_02.is_ok()); + + // exceed + let pci_slot_03 = PciSlot::convert_from_string("20"); + assert!(pci_slot_03.is_err()); + + // valid number + let pci_slot_04 = PciSlot::convert_from_u32(1_u32); + assert!(pci_slot_04.is_ok()); + assert_eq!(pci_slot_04.as_ref().unwrap().0, 1_u8); + let pci_slot_str = pci_slot_04.as_ref().unwrap().convert_to_string(); + assert_eq!(pci_slot_str, format!("{:02x}", pci_slot_04.unwrap().0)); + + // max number + let pci_slot_05 = PciSlot::convert_from_u32(31_u32); + assert!(pci_slot_05.is_ok()); + assert_eq!(pci_slot_05.unwrap().0, 31_u8); + + // exceed and error + let pci_slot_06 = PciSlot::convert_from_u32(32_u32); + assert!(pci_slot_06.is_err()); + } + + #[test] + fn test_pci_patch() { + let pci_path_0 = PciPath::convert_from_string("01/0a/05"); + assert!(pci_path_0.is_ok()); + let pci_path_unwrap = pci_path_0.unwrap(); + assert_eq!(pci_path_unwrap.slots[0].0, 1); + assert_eq!(pci_path_unwrap.slots[1].0, 10); + assert_eq!(pci_path_unwrap.slots[2].0, 5); + + let pci_path_01 = PciPath::from_pci_slots(vec![PciSlot(1), PciSlot(10), PciSlot(5)]); + assert!(pci_path_01.is_some()); + let pci_path = pci_path_01.unwrap(); + let pci_path_02 = pci_path.convert_to_string(); + assert_eq!(pci_path_02, "01/0a/05".to_string()); + + let dev_slot = pci_path.get_device_slot(); + assert!(dev_slot.is_some()); + assert_eq!(dev_slot.unwrap().0, 5); + + let root_slot = pci_path.get_root_slot(); + assert!(root_slot.is_some()); + assert_eq!(root_slot.unwrap().0, 1); + } + + #[test] + fn test_get_host_guest_map() { + // test unwrap is fine, no panic occurs. + let hg_map = get_host_guest_map("".to_owned()); + assert!(hg_map.is_none()); + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs new file mode 100644 index 000000000000..132535dbdc94 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -0,0 +1,778 @@ +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, + process::Command, + sync::{ + atomic::{AtomicU8, Ordering}, + Arc, RwLock, + }, +}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use lazy_static::lazy_static; +use path_clean::PathClean; + +use crate::{ + device::{hypervisor, Device, DeviceType}, + PciPath, PciSlot, +}; +use kata_sys_util::fs::get_base_name; + +pub const SYS_BUS_PCI_DRIVER_PROBE: &str = "/sys/bus/pci/drivers_probe"; +pub const SYS_BUS_PCI_DEVICES: &str = "/sys/bus/pci/devices"; +pub const SYS_KERN_IOMMU_GROUPS: &str = "/sys/kernel/iommu_groups"; +pub const VFIO_PCI_DRIVER: &str = "vfio-pci"; +pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk"; +pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; +pub const MAX_DEV_ID_SIZE: usize = 31; + +const VFIO_PCI_DRIVER_NEW_ID: &str = "/sys/bus/pci/drivers/vfio-pci/new_id"; +const VFIO_PCI_DRIVER_UNBIND: &str = "/sys/bus/pci/drivers/vfio-pci/unbind"; +const SYS_CLASS_IOMMU: &str = "/sys/class/iommu"; +const INTEL_IOMMU_PREFIX: &str = "dmar"; +const AMD_IOMMU_PREFIX: &str = "ivhd"; +const ARM_IOMMU_PREFIX: &str = "smmu"; + +lazy_static! { + static ref GUEST_DEVICE_ID: Arc = Arc::new(AtomicU8::new(0_u8)); + static ref HOST_GUEST_MAP: Arc>> = + Arc::new(RwLock::new(HashMap::new())); +} + +// map host/guest bdf and the mapping saved into `HOST_GUEST_MAP`, +// and return PciPath. +pub fn generate_guest_pci_path(bdf: String) -> Result { + let hg_map = HOST_GUEST_MAP.clone(); + let current_id = GUEST_DEVICE_ID.clone(); + + current_id.fetch_add(1, Ordering::SeqCst); + let slot = current_id.load(Ordering::SeqCst); + + // In some Hypervisors, dragonball, cloud-hypervisor or firecracker, + // the device is directly connected to the bus without intermediary bus. + // FIXME: Qemu's pci path needs to be implemented; + let host_bdf = normalize_device_bdf(bdf.as_str()); + let guest_bdf = format!("0000:00:{:02x}.0", slot); + + // safe, just do unwrap as `HOST_GUEST_MAP` is always valid. + hg_map.write().unwrap().insert(host_bdf, guest_bdf); + + Ok(PciPath { + slots: vec![PciSlot::convert_from_u32(slot.into()).context("pci slot convert failed.")?], + }) +} + +// get host/guest mapping for info +pub fn get_host_guest_map(host_bdf: String) -> Option { + // safe, just do unwrap as `HOST_GUEST_MAP` is always valid. + HOST_GUEST_MAP.read().unwrap().get(&host_bdf).cloned() +} + +pub fn do_check_iommu_on() -> Result { + let element = std::fs::read_dir(SYS_CLASS_IOMMU)? + .filter_map(|e| e.ok()) + .last(); + + if element.is_none() { + return Err(anyhow!("iommu is not enabled")); + } + + // safe here, the result of map is always be Some(true) or Some(false). + Ok(element + .map(|e| { + let x = e.file_name().to_string_lossy().into_owned(); + x.starts_with(INTEL_IOMMU_PREFIX) + || x.starts_with(AMD_IOMMU_PREFIX) + || x.starts_with(ARM_IOMMU_PREFIX) + }) + .unwrap()) +} + +fn override_driver(bdf: &str, driver: &str) -> Result<()> { + let driver_override = format!("/sys/bus/pci/devices/{}/driver_override", bdf); + fs::write(&driver_override, driver) + .with_context(|| format!("echo {} > {}", driver, &driver_override))?; + info!(sl!(), "echo {} > {}", driver, driver_override); + Ok(()) +} + +#[derive(Clone, Debug, Default, PartialEq)] +pub enum VfioBusMode { + #[default] + MMIO, + PCI, +} + +impl VfioBusMode { + pub fn new(mode: &str) -> Self { + match mode { + "mmio" => VfioBusMode::MMIO, + _ => VfioBusMode::PCI, + } + } + + pub fn to_string(mode: VfioBusMode) -> String { + match mode { + VfioBusMode::MMIO => "mmio".to_owned(), + _ => "pci".to_owned(), + } + } + + // driver_type used for kata-agent + // (1) vfio-pci for add device handler, + // (2) mmioblk for add storage handler, + pub fn driver_type(mode: &str) -> &str { + match mode { + "b" => DRIVER_MMIO_BLK_TYPE, + _ => DRIVER_VFIO_PCI_TYPE, + } + } +} + +#[derive(Clone, Debug, Default)] +pub enum VfioDeviceType { + /// error type of VFIO device + Error, + + /// normal VFIO device type + #[default] + Normal, + + /// mediated VFIO device type + Mediated, +} + +// DeviceVendor represents a PCI device's device id and vendor id +// DeviceVendor: (device, vendor) +#[derive(Clone, Debug)] +pub struct DeviceVendor(String, String); + +impl DeviceVendor { + pub fn get_device_vendor(&self) -> Result<(u32, u32)> { + // default value is 0 when vendor_id or device_id is empty + if self.0.is_empty() || self.1.is_empty() { + return Ok((0, 0)); + } + + let do_convert = |id: &String| { + u32::from_str_radix( + id.trim_start_matches("0x") + .trim_matches(char::is_whitespace), + 16, + ) + .with_context(|| anyhow!("invalid id {:?}", id)) + }; + + let device = do_convert(&self.0).context("convert device failed")?; + let vendor = do_convert(&self.1).context("convert vendor failed")?; + + Ok((device, vendor)) + } + + pub fn get_device_vendor_id(&self) -> Result { + let (device, vendor) = self + .get_device_vendor() + .context("get device and vendor failed")?; + + Ok(((device & 0xffff) << 16) | (vendor & 0xffff)) + } +} + +// HostDevice represents a VFIO drive used to hotplug +#[derive(Clone, Debug, Default)] +pub struct HostDevice { + /// unique identifier of the device + pub hostdev_id: String, + + /// Sysfs path for mdev bus type device + pub sysfs_path: String, + + /// PCI device information (BDF): "bus:slot:function" + pub bus_slot_func: String, + + /// device_vendor: device id and vendor id + pub device_vendor: Option, + + /// type of vfio device + pub vfio_type: VfioDeviceType, + + /// guest PCI path of device + pub guest_pci_path: Option, + + /// vfio_vendor for vendor's some special cases. + #[cfg(feature = "enable-vendor")] + pub vfio_vendor: VfioVendor, +} + +// VfioConfig represents a VFIO drive used for hotplugging +#[derive(Clone, Debug, Default)] +pub struct VfioConfig { + /// usually host path will be /dev/vfio/N + pub host_path: String, + + /// device as block or char + pub dev_type: String, + + /// hostdev_prefix for devices, such as: + /// (1) phisycial endpoint: "physical_nic_" + /// (2) vfio mdev: "vfio_mdev_" + /// (3) vfio pci: "vfio_device_" + /// (4) vfio volume: "vfio_vol_" + /// (5) vfio nvme: "vfio_nvme_" + pub hostdev_prefix: String, + + /// device in guest which it appears inside the VM, + /// outside of the container mount namespace + /// virt_path: Option<(index, virt_path_name)> + pub virt_path: Option<(u64, String)>, +} + +#[derive(Clone, Debug, Default)] +pub struct VfioDevice { + pub device_id: String, + pub attach_count: u64, + + /// Bus Mode, PCI or MMIO + pub bus_mode: VfioBusMode, + /// driver type + pub driver_type: String, + + /// vfio config from business + pub config: VfioConfig, + + // host device with multi-funtions + pub devices: Vec, + // options for vfio pci handler in kata-agent + pub device_options: Vec, +} + +impl VfioDevice { + // new with VfioConfig + pub fn new(device_id: String, dev_info: &VfioConfig) -> Self { + // devices and device_options are in a 1-1 mapping, used in + // vfio-pci handler for kata-agent. + let devices: Vec = Vec::with_capacity(MAX_DEV_ID_SIZE); + let device_options: Vec = Vec::with_capacity(MAX_DEV_ID_SIZE); + + // get bus mode and driver type based on the device type + let dev_type = dev_info.dev_type.as_str(); + let driver_type = VfioBusMode::driver_type(dev_type).to_owned(); + + Self { + device_id, + attach_count: 0, + bus_mode: VfioBusMode::PCI, + driver_type, + config: dev_info.clone(), + devices, + device_options, + } + } + + fn get_host_path(&self) -> String { + self.config.host_path.clone() + } + + fn get_vfio_prefix(&self) -> String { + self.config.hostdev_prefix.clone() + } + + // nornaml VFIO BDF: 0000:04:00.0 + // mediated VFIO BDF: 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + fn get_vfio_device_type(&self, device_sys_path: String) -> Result { + let mut tokens: Vec<&str> = device_sys_path.as_str().split(':').collect(); + let vfio_type = match tokens.len() { + 3 => VfioDeviceType::Normal, + _ => { + tokens = device_sys_path.split('-').collect(); + if tokens.len() == 5 { + VfioDeviceType::Mediated + } else { + VfioDeviceType::Error + } + } + }; + + Ok(vfio_type) + } + + // get_sysfs_device returns the sysfsdev of mediated device + // expected input string format is absolute path to the sysfs dev node + // eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 + fn get_sysfs_device(&self, sysfs_dev_path: PathBuf) -> Result { + let mut buf = + fs::canonicalize(sysfs_dev_path.clone()).context("sysfs device path not exist")?; + let mut resolved = false; + + // resolve symbolic links until there's no more to resolve + while buf.symlink_metadata()?.file_type().is_symlink() { + let link = fs::read_link(&buf)?; + buf.pop(); + buf.push(link); + resolved = true; + } + + // If a symbolic link was resolved, the resulting path may be relative to the original path + if resolved { + // If the original path is relative and the resolved path is not, the resolved path + // should be returned as absolute. + if sysfs_dev_path.is_relative() && buf.is_absolute() { + buf = fs::canonicalize(&buf)?; + } + } + + Ok(buf.clean().display().to_string()) + } + + // vfio device details: (device BDF, device SysfsDev, vfio Device Type) + fn get_vfio_device_details( + &self, + dev_file_name: String, + iommu_dev_path: PathBuf, + ) -> Result<(Option, String, VfioDeviceType)> { + let vfio_type = self.get_vfio_device_type(dev_file_name.clone())?; + match vfio_type { + VfioDeviceType::Normal => { + let dev_bdf = get_device_bdf(dev_file_name.clone()); + let dev_sys = [SYS_BUS_PCI_DEVICES, dev_file_name.as_str()].join("/"); + Ok((dev_bdf, dev_sys, vfio_type)) + } + VfioDeviceType::Mediated => { + // sysfsdev eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 + let sysfs_dev = Path::new(&iommu_dev_path).join(dev_file_name); + let dev_sys = self + .get_sysfs_device(sysfs_dev) + .context("get sysfs device failed")?; + + let dev_bdf = if let Some(dev_s) = get_mediated_device_bdf(dev_sys.clone()) { + get_device_bdf(dev_s) + } else { + None + }; + + Ok((dev_bdf, dev_sys, vfio_type)) + } + _ => Err(anyhow!("unsupported vfio type : {:?}", vfio_type)), + } + } + + // read vendor and deviceor from /sys/bus/pci/devices/BDF/X + fn get_vfio_device_vendor(&self, bdf: &str) -> Result { + let device = + get_device_property(bdf, "device").context("get device from syspath failed")?; + let vendor = + get_device_property(bdf, "vendor").context("get vendor from syspath failed")?; + + Ok(DeviceVendor(device, vendor)) + } + + async fn set_vfio_config( + &mut self, + iommu_devs_path: PathBuf, + device_name: &str, + ) -> Result { + let vfio_dev_details = self + .get_vfio_device_details(device_name.to_owned(), iommu_devs_path) + .context("get vfio device details failed")?; + + // It's safe as BDF really exists. + let dev_bdf = vfio_dev_details.0.unwrap(); + let dev_vendor = self + .get_vfio_device_vendor(&dev_bdf) + .context("get property device and vendor failed")?; + + let mut vfio_dev = HostDevice { + bus_slot_func: dev_bdf.clone(), + device_vendor: Some(dev_vendor), + sysfs_path: vfio_dev_details.1, + vfio_type: vfio_dev_details.2, + ..Default::default() + }; + + // when vfio pci, kata-agent handles with device_options, and its + // format: "DDDD:BB:DD.F=" + // DDDD:BB:DD.F is the device's PCI address on host + // is the device's PCI path in the guest + if self.bus_mode == VfioBusMode::PCI { + let pci_path = + generate_guest_pci_path(dev_bdf.clone()).context("generate pci path failed")?; + vfio_dev.guest_pci_path = Some(pci_path.clone()); + self.device_options + .push(format!("0000:{}={}", dev_bdf, pci_path.convert_to_string())); + } + + Ok(vfio_dev) + } + + // filter Host or PCI Bridges that are in the same IOMMU group as the + // passed-through devices. One CANNOT pass-through a PCI bridge or Host + // bridge. Class 0x0604 is PCI bridge, 0x0600 is Host bridge + fn filter_bridge_device(&self, bdf: &str, bitmask: u64) -> Option { + let device_class = match get_device_property(bdf, "class") { + Ok(dev_class) => dev_class, + Err(_) => "".to_string(), + }; + + if device_class.is_empty() { + return None; + } + + match device_class.parse::() { + Ok(cid_u32) => { + // class code is 16 bits, remove the two trailing zeros + let class_code = u64::from(cid_u32) >> 8; + if class_code & bitmask == bitmask { + Some(class_code) + } else { + None + } + } + _ => None, + } + } +} + +#[async_trait] +impl Device for VfioDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + // host path: /dev/vfio/X + let host_path = self.get_host_path(); + // vfio group: X + let vfio_group = get_base_name(host_path.clone())? + .into_string() + .map_err(|e| anyhow!("failed to get base name {:?}", e))?; + + // /sys/kernel/iommu_groups/X/devices + let iommu_devs_path = Path::new(SYS_KERN_IOMMU_GROUPS) + .join(vfio_group.as_str()) + .join("devices"); + + // /sys/kernel/iommu_groups/X/devices + // DDDD:BB:DD.F0 DDDD:BB:DD.F1 + let iommu_devices = fs::read_dir(iommu_devs_path.clone())? + .filter_map(|e| { + let x = e.ok()?.file_name().to_string_lossy().into_owned(); + Some(x) + }) + .collect::>(); + if iommu_devices.len() > 1 { + warn!(sl!(), "vfio device {} with multi-function", host_path); + } + + // pass all devices in iommu group, and use index to identify device. + for (index, device) in iommu_devices.iter().enumerate() { + // filter host or PCI bridge + if self.filter_bridge_device(device, 0x0600).is_some() { + continue; + } + + let mut hostdev: HostDevice = self + .set_vfio_config(iommu_devs_path.clone(), device) + .await + .context("set vfio config failed")?; + let dev_prefix = self.get_vfio_prefix(); + hostdev.hostdev_id = make_device_nameid(&dev_prefix, index, MAX_DEV_ID_SIZE); + + self.devices.push(hostdev); + } + + if self + .increase_attach_count() + .await + .context("failed to increase attach count")? + { + return Err(anyhow!("attach count increased failed as some reason.")); + } + + // do add device for vfio deivce + if let Err(e) = h.add_device(DeviceType::Vfio(self.clone())).await { + self.decrease_attach_count().await?; + + return Err(e); + } + + Ok(()) + } + + async fn detach(&mut self, h: &dyn hypervisor) -> Result> { + if self + .decrease_attach_count() + .await + .context("failed to decrease attach count")? + { + return Ok(None); + } + + if let Err(e) = h.remove_device(DeviceType::Vfio(self.clone())).await { + self.increase_attach_count().await?; + return Err(e); + } + + // only virt_path is Some, there's a device index + let device_index = if let Some(virt_path) = self.config.virt_path.clone() { + Some(virt_path.0) + } else { + None + }; + + Ok(device_index) + } + + async fn increase_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => { + // do real attach + self.attach_count += 1; + Ok(false) + } + std::u64::MAX => Err(anyhow!("device was attached too many times")), + _ => { + self.attach_count += 1; + Ok(true) + } + } + } + + async fn decrease_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => Err(anyhow!("detaching a device that wasn't attached")), + 1 => { + // do real wrok + self.attach_count -= 1; + Ok(false) + } + _ => { + self.attach_count -= 1; + Ok(true) + } + } + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::Vfio(self.clone()) + } +} + +// binds the device to vfio driver after unbinding from host. +// Will be called by a network interface or a generic pcie device. +pub fn bind_device_to_vfio(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { + // modprobe vfio-pci + if !Path::new(VFIO_PCI_DRIVER_NEW_ID).exists() { + Command::new("modprobe") + .arg(VFIO_PCI_DRIVER) + .output() + .expect("Failed to run modprobe vfio-pci"); + } + + // Arm does not need cmdline to open iommu, just set it through bios. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + // check intel_iommu=on + let cmdline = fs::read_to_string("/proc/cmdline").unwrap(); + if cmdline.contains("iommu=off") || !cmdline.contains("iommu=") { + return Err(anyhow!("iommu isn't set on kernel cmdline")); + } + } + + if !do_check_iommu_on().context("check iommu on failed")? { + return Err(anyhow!("IOMMU not enabled yet.")); + } + + // if it's already bound to vfio + if is_equal_driver(bdf, VFIO_PCI_DRIVER) { + info!(sl!(), "bdf : {} was already bound to vfio-pci", bdf); + return Ok(()); + } + + info!(sl!(), "host driver : {}", host_driver); + override_driver(bdf, VFIO_PCI_DRIVER).context("override driver")?; + + let unbind_path = format!("/sys/bus/pci/devices/{}/driver/unbind", bdf); + // echo bdf > /sys/bus/pci/drivers/virtio-pci/unbind" + fs::write(&unbind_path, bdf) + .with_context(|| format!("Failed to echo {} > {}", bdf, &unbind_path))?; + + info!(sl!(), "{} is unbound from {}", bdf, host_driver); + + // echo bdf > /sys/bus/pci/drivers_probe + fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf) + .with_context(|| format!("Failed to echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?; + + info!(sl!(), "echo {} > /sys/bus/pci/drivers_probe", bdf); + + Ok(()) +} + +pub fn is_equal_driver(bdf: &str, host_driver: &str) -> bool { + let sys_pci_devices_path = Path::new(SYS_BUS_PCI_DEVICES); + let driver_file = sys_pci_devices_path.join(bdf).join("driver"); + + if driver_file.exists() { + let driver_path = fs::read_link(driver_file).unwrap_or_default(); + let driver_name = driver_path + .file_name() + .map_or(String::new(), |v| v.to_str().unwrap().to_owned()); + return driver_name.eq(host_driver); + } + + false +} + +// bind_device_to_host binds the device to the host driver after unbinding from vfio-pci. +pub fn bind_device_to_host(bdf: &str, host_driver: &str, _vendor_device_id: &str) -> Result<()> { + // Unbind from vfio-pci driver to the original host driver + info!(sl!(), "bind {} to {}", bdf, host_driver); + + // if it's already bound to host_driver + if is_equal_driver(bdf, host_driver) { + info!( + sl!(), + "bdf {} was already unbound to host driver {}", bdf, host_driver + ); + return Ok(()); + } + + override_driver(bdf, host_driver).context("override driver")?; + + // echo bdf > /sys/bus/pci/drivers/vfio-pci/unbind" + std::fs::write(VFIO_PCI_DRIVER_UNBIND, bdf) + .with_context(|| format!("echo {}> {}", bdf, VFIO_PCI_DRIVER_UNBIND))?; + info!(sl!(), "echo {} > {}", bdf, VFIO_PCI_DRIVER_UNBIND); + + // echo bdf > /sys/bus/pci/drivers_probe + std::fs::write(SYS_BUS_PCI_DRIVER_PROBE, bdf) + .with_context(|| format!("echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE))?; + info!(sl!(), "echo {} > {}", bdf, SYS_BUS_PCI_DRIVER_PROBE); + + Ok(()) +} + +// get_vfio_device_bdf returns the BDF of pci device +// expected format :. eg. 02:10.0 +fn get_device_bdf(dev_sys_str: String) -> Option { + let dev_sys = dev_sys_str; + if !dev_sys.starts_with("0000:") { + return Some(dev_sys); + } + + let parts: Vec<&str> = dev_sys.as_str().splitn(2, ':').collect(); + if parts.len() < 2 { + return None; + } + + parts.get(1).copied().map(|bdf| bdf.to_owned()) +} + +// expected format ::. eg. 0000:02:10.0 +fn normalize_device_bdf(bdf: &str) -> String { + if !bdf.starts_with("0000") { + format!("0000:{}", bdf) + } else { + bdf.to_string() + } +} + +// make_device_nameid: generate a ID for the hypervisor commandline +fn make_device_nameid(name_type: &str, id: usize, max_len: usize) -> String { + let name_id = format!("{}_{}", name_type, id); + + if name_id.len() > max_len { + name_id[0..max_len].to_string() + } else { + name_id + } +} + +// get_mediated_device_bdf returns the MDEV BDF +// expected input string /sys/devices/pci0000:d7/BDF0/BDF1/.../MDEVBDF/UUID +fn get_mediated_device_bdf(dev_sys_str: String) -> Option { + let dev_sys = dev_sys_str; + let parts: Vec<&str> = dev_sys.as_str().split('/').collect(); + if parts.len() < 4 { + return None; + } + + parts + .get(parts.len() - 2) + .copied() + .map(|bdf| bdf.to_owned()) +} + +// dev_sys_path: /sys/bus/pci/devices/DDDD:BB:DD.F +// cfg_path: : /sys/bus/pci/devices/DDDD:BB:DD.F/xxx +fn get_device_property(bdf: &str, property: &str) -> Result { + let device_name = normalize_device_bdf(bdf); + + let dev_sys_path = Path::new(SYS_BUS_PCI_DEVICES).join(device_name); + let cfg_path = fs::read_to_string(dev_sys_path.join(property)).with_context(|| { + format!( + "failed to read {}", + dev_sys_path.join(property).to_str().unwrap() + ) + })?; + + Ok(cfg_path.as_str().trim_end_matches('\n').to_string()) +} + +pub fn get_vfio_iommu_group(bdf: String) -> Result { + // /sys/bus/pci/devices/DDDD:BB:DD.F/iommu_group + let dbdf = normalize_device_bdf(bdf.as_str()); + let iommugrp_path = Path::new(SYS_BUS_PCI_DEVICES) + .join(dbdf.as_str()) + .join("iommu_group"); + if !iommugrp_path.exists() { + warn!( + sl!(), + "IOMMU group path: {:?} not found, do bind device to vfio first.", iommugrp_path + ); + return Err(anyhow!("please do bind device to vfio")); + } + + // iommu group symlink: ../../../../../../kernel/iommu_groups/X + let iommugrp_symlink = fs::read_link(&iommugrp_path) + .map_err(|e| anyhow!("read iommu group symlink failed {:?}", e))?; + + // get base name from iommu group symlink: X + let iommu_group = get_base_name(iommugrp_symlink)? + .into_string() + .map_err(|e| anyhow!("failed to get iommu group {:?}", e))?; + + // we'd better verify the path to ensure it dose exist. + if !Path::new(SYS_KERN_IOMMU_GROUPS) + .join(&iommu_group) + .join("devices") + .join(dbdf.as_str()) + .exists() + { + return Err(anyhow!( + "device dbdf {:?} dosn't exist in {}/{}/devices.", + dbdf.as_str(), + SYS_KERN_IOMMU_GROUPS, + iommu_group + )); + } + + Ok(format!("/dev/vfio/{}", iommu_group)) +} + +pub fn get_vfio_device(device: String) -> Result { + // support both /dev/vfio/X and BDF or BDF + let mut vfio_device = device; + + let bdf_vec: Vec<&str> = vfio_device.as_str().split(&[':', '.'][..]).collect(); + if bdf_vec.len() >= 3 && bdf_vec.len() < 5 { + // DDDD:BB:DD.F -> /dev/vfio/X + vfio_device = + get_vfio_iommu_group(vfio_device.clone()).context("get vfio iommu group failed")?; + } + + Ok(vfio_device) +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs new file mode 100644 index 000000000000..53258821c5f2 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user.rs @@ -0,0 +1,73 @@ +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Debug, Clone)] +pub enum VhostUserType { + /// Blk - represents a block vhostuser device type + /// "vhost-user-blk-pci" + Blk(String), + + /// SCSI - represents SCSI based vhost-user type + /// "vhost-user-scsi-pci" + SCSI(String), + + /// Net - represents Net based vhost-user type + /// "virtio-net-pci" + Net(String), + + /// FS - represents a virtio-fs vhostuser device type + /// "vhost-user-fs-pci" + FS(String), +} + +impl Default for VhostUserType { + fn default() -> Self { + VhostUserType::Blk("vhost-user-blk-pci".to_owned()) + } +} + +#[derive(Debug, Clone, Default)] +/// VhostUserConfig represents data shared by most vhost-user devices +pub struct VhostUserConfig { + /// device id + pub dev_id: String, + /// socket path + pub socket_path: String, + /// mac_address is only meaningful for vhost user net device + pub mac_address: String, + + /// vhost-user-fs is only meaningful for vhost-user-fs device + pub tag: String, + /// vhost-user-fs cache mode + pub cache_mode: String, + /// vhost-user-fs cache size in MB + pub cache_size: u32, + + /// vhost user device type + pub device_type: VhostUserType, + /// guest block driver + pub driver_option: String, + /// pci_addr is the PCI address used to identify the slot at which the drive is attached. + pub pci_addr: Option, + + /// Block index of the device if assigned + /// type u64 is not OK + pub index: u64, + + /// Virtio queue size. Size: byte + pub queue_size: u32, + /// Block device multi-queue + pub num_queues: usize, + + /// device path in guest + pub virt_path: String, +} + +#[derive(Debug, Clone, Default)] +pub struct VhostUserDevice { + pub device_id: String, + pub config: VhostUserConfig, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs new file mode 100644 index 000000000000..0912f89f1ab9 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vhost_user_blk.rs @@ -0,0 +1,122 @@ +// Copyright (c) 2023 Alibaba Cloud +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; + +use super::VhostUserConfig; +use crate::{ + device::{Device, DeviceType}, + Hypervisor as hypervisor, +}; + +#[derive(Debug, Clone, Default)] +pub struct VhostUserBlkDevice { + pub device_id: String, + + /// If set to true, the drive is opened in read-only mode. Otherwise, the + /// drive is opened as read-write. + pub is_readonly: bool, + + /// Don't close `path_on_host` file when dropping the device. + pub no_drop: bool, + + /// driver type for block device + pub driver_option: String, + + pub attach_count: u64, + pub config: VhostUserConfig, +} + +impl VhostUserBlkDevice { + // new creates a new VhostUserBlkDevice + pub fn new(device_id: String, config: VhostUserConfig) -> Self { + VhostUserBlkDevice { + device_id, + attach_count: 0, + config, + ..Default::default() + } + } +} + +#[async_trait] +impl Device for VhostUserBlkDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + // increase attach count, skip attach the device if the device is already attached + if self + .increase_attach_count() + .await + .context("failed to increase attach count")? + { + return Ok(()); + } + + if let Err(e) = h.add_device(DeviceType::VhostUserBlk(self.clone())).await { + self.decrease_attach_count().await?; + + return Err(e); + } + + return Ok(()); + } + + async fn detach(&mut self, h: &dyn hypervisor) -> Result> { + // get the count of device detached, and detach once it reaches 0 + if self + .decrease_attach_count() + .await + .context("failed to decrease attach count")? + { + return Ok(None); + } + + if let Err(e) = h + .remove_device(DeviceType::VhostUserBlk(self.clone())) + .await + { + self.increase_attach_count().await?; + + return Err(e); + } + + Ok(Some(self.config.index)) + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::VhostUserBlk(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => { + // do real attach + self.attach_count += 1; + Ok(false) + } + std::u64::MAX => Err(anyhow!("device was attached too many times")), + _ => { + self.attach_count += 1; + Ok(true) + } + } + } + + async fn decrease_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => Err(anyhow!("detaching a device that wasn't attached")), + 1 => { + // do real wrok + self.attach_count -= 1; + Ok(false) + } + _ => { + self.attach_count -= 1; + Ok(true) + } + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs new file mode 100644 index 000000000000..4f6b44ef4b40 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -0,0 +1,137 @@ +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::device::Device; +use crate::device::DeviceType; +use crate::Hypervisor as hypervisor; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; + +/// VIRTIO_BLOCK_PCI indicates block driver is virtio-pci based +pub const VIRTIO_BLOCK_PCI: &str = "virtio-blk-pci"; +pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio"; +pub const VIRTIO_PMEM: &str = "virtio-pmem"; +pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk"; +pub const KATA_BLK_DEV_TYPE: &str = "blk"; +pub const KATA_NVDIMM_DEV_TYPE: &str = "nvdimm"; + +#[derive(Debug, Clone, Default)] +pub struct BlockConfig { + /// Path of the drive. + pub path_on_host: String, + + /// If set to true, the drive is opened in read-only mode. Otherwise, the + /// drive is opened as read-write. + pub is_readonly: bool, + + /// Don't close `path_on_host` file when dropping the device. + pub no_drop: bool, + + /// device index + pub index: u64, + + /// driver type for block device + pub driver_option: String, + + /// device path in guest + pub virt_path: String, + + /// device attach count + pub attach_count: u64, + + /// device major number + pub major: i64, + + /// device minor number + pub minor: i64, +} + +#[derive(Debug, Clone, Default)] +pub struct BlockDevice { + pub device_id: String, + pub attach_count: u64, + pub config: BlockConfig, +} + +impl BlockDevice { + // new creates a new VirtioBlkDevice + pub fn new(device_id: String, config: BlockConfig) -> Self { + BlockDevice { + device_id, + attach_count: 0, + config, + } + } +} + +#[async_trait] +impl Device for BlockDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + // increase attach count, skip attach the device if the device is already attached + if self + .increase_attach_count() + .await + .context("failed to increase attach count")? + { + return Ok(()); + } + if let Err(e) = h.add_device(DeviceType::Block(self.clone())).await { + self.decrease_attach_count().await?; + return Err(e); + } + return Ok(()); + } + + async fn detach(&mut self, h: &dyn hypervisor) -> Result> { + // get the count of device detached, skip detach once it reaches the 0 + if self + .decrease_attach_count() + .await + .context("failed to decrease attach count")? + { + return Ok(None); + } + if let Err(e) = h.remove_device(DeviceType::Block(self.clone())).await { + self.increase_attach_count().await?; + return Err(e); + } + Ok(Some(self.config.index)) + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::Block(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => { + // do real attach + self.attach_count += 1; + Ok(false) + } + std::u64::MAX => Err(anyhow!("device was attached too many times")), + _ => { + self.attach_count += 1; + Ok(true) + } + } + } + + async fn decrease_attach_count(&mut self) -> Result { + match self.attach_count { + 0 => Err(anyhow!("detaching a device that wasn't attached")), + 1 => { + // do real wrok + self.attach_count -= 1; + Ok(false) + } + _ => { + self.attach_count -= 1; + Ok(true) + } + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs new file mode 100644 index 000000000000..0a97845e71d0 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_fs.rs @@ -0,0 +1,78 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[derive(Copy, Clone, Debug)] +pub enum ShareFsOperation { + Mount, + Umount, + Update, +} + +#[derive(Debug, Clone)] +pub enum ShareFsMountType { + PASSTHROUGH, + RAFS, +} + +/// ShareFsMountConfig: share fs mount config +#[derive(Debug, Clone)] +pub struct ShareFsMountConfig { + /// source: the passthrough fs exported dir or rafs meta file of rafs + pub source: String, + + /// fstype: specifies the type of this sub-fs, could be passthrough-fs or rafs + pub fstype: ShareFsMountType, + + /// mount_point: the mount point inside guest + pub mount_point: String, + + /// config: the rafs backend config file + pub config: Option, + + /// tag: is the tag used inside the kata guest. + pub tag: String, + + /// op: the operation to take, e.g. mount, umount or update + pub op: ShareFsOperation, + + /// prefetch_list_path: path to file that contains file lists that should be prefetched by rafs + pub prefetch_list_path: Option, +} + +#[derive(Debug, Clone)] +pub struct ShareFsMountDevice { + pub config: ShareFsMountConfig, +} + +/// ShareFsDeviceConfig: share fs device config +#[derive(Debug, Clone)] +pub struct ShareFsDeviceConfig { + /// fs_type: virtiofs or inline-virtiofs + pub fs_type: String, + + /// socket_path: socket path for virtiofs + pub sock_path: String, + + /// mount_tag: a label used as a hint to the guest. + pub mount_tag: String, + + /// host_path: the host filesystem path for this volume. + pub host_path: String, + + /// queue_size: queue size + pub queue_size: u64, + + /// queue_num: queue number + pub queue_num: u64, + + /// options: virtiofs device's config options. + pub options: Vec, +} + +#[derive(Debug, Clone)] +pub struct ShareFsDevice { + pub config: ShareFsDeviceConfig, +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs new file mode 100644 index 000000000000..3d19625b9f44 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs @@ -0,0 +1,104 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt; + +use anyhow::{Context, Result}; +use async_trait::async_trait; + +use crate::{ + device::{Device, DeviceType}, + Hypervisor as hypervisor, +}; + +#[derive(Clone)] +pub struct Address(pub [u8; 6]); + +impl fmt::Debug for Address { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = self.0; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +#[derive(Clone, Debug, Default)] +pub struct NetworkConfig { + /// for detach, now it's default value 0. + pub index: u64, + + /// Host level path for the guest network interface. + pub host_dev_name: String, + + /// Guest iface name for the guest network interface. + pub virt_iface_name: String, + + /// Guest MAC address. + pub guest_mac: Option
, + + /// Virtio queue size + pub queue_size: usize, + + /// Virtio queue num + pub queue_num: usize, +} + +#[derive(Clone, Debug, Default)] +pub struct NetworkDevice { + /// Unique identifier of the device + pub device_id: String, + + /// Network Device config info + pub config: NetworkConfig, +} + +impl NetworkDevice { + // new creates a NetworkDevice + pub fn new(device_id: String, config: &NetworkConfig) -> Self { + Self { + device_id, + config: config.clone(), + } + } +} + +#[async_trait] +impl Device for NetworkDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + h.add_device(DeviceType::Network(self.clone())) + .await + .context("add network device.")?; + + return Ok(()); + } + + async fn detach(&mut self, h: &dyn hypervisor) -> Result> { + h.remove_device(DeviceType::Network(self.clone())) + .await + .context("remove network device.")?; + + Ok(Some(self.config.index)) + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::Network(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + // network devices will not be attached multiple times, Just return Ok(false) + + Ok(false) + } + + async fn decrease_attach_count(&mut self) -> Result { + // network devices will not be detached multiple times, Just return Ok(false) + + Ok(false) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs new file mode 100644 index 000000000000..98f48e29a98b --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_vsock.rs @@ -0,0 +1,161 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use rand::Rng; +use std::os::unix::prelude::AsRawFd; +use tokio::fs::{File, OpenOptions}; + +use async_trait::async_trait; + +use crate::{ + device::{Device, DeviceType}, + Hypervisor as hypervisor, +}; + +// This is the first usable vsock context ID. All the vsocks +// can use the same ID, since it's only used in the guest. +pub const DEFAULT_GUEST_VSOCK_CID: u32 = 0x3; + +#[derive(Clone, Debug, Default)] +pub struct HybridVsockConfig { + /// A 32-bit Context Identifier (CID) used to identify the guest. + pub guest_cid: u32, + + /// unix domain socket path + pub uds_path: String, +} + +#[derive(Clone, Debug, Default)] +pub struct HybridVsockDevice { + /// Unique identifier of the device + pub id: String, + + /// config information for HybridVsockDevice + pub config: HybridVsockConfig, +} + +impl HybridVsockDevice { + pub fn new(device_id: &String, config: &HybridVsockConfig) -> Self { + Self { + id: format!("vsock-{}", device_id), + config: config.clone(), + } + } +} + +#[async_trait] +impl Device for HybridVsockDevice { + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()> { + h.add_device(DeviceType::HybridVsock(self.clone())) + .await + .context("add hybrid vsock device.")?; + + return Ok(()); + } + + async fn detach(&mut self, _h: &dyn hypervisor) -> Result> { + // no need to do detach, just return Ok(None) + Ok(None) + } + + async fn get_device_info(&self) -> DeviceType { + DeviceType::HybridVsock(self.clone()) + } + + async fn increase_attach_count(&mut self) -> Result { + // hybrid vsock devices will not be attached multiple times, Just return Ok(false) + + Ok(false) + } + + async fn decrease_attach_count(&mut self) -> Result { + // hybrid vsock devices will not be detached multiple times, Just return Ok(false) + + Ok(false) + } +} + +#[derive(Debug)] +pub struct VsockConfig { + /// A 32-bit Context Identifier (CID) used to identify the guest. + pub guest_cid: u32, + + /// Vhost vsock fd. Hold to ensure CID is not used by other VM. + pub vhost_fd: File, +} + +#[derive(Debug)] +pub struct VsockDevice { + /// Unique identifier of the device + pub id: String, + + /// config information for VsockDevice + pub config: VsockConfig, +} + +const VHOST_VSOCK_DEVICE: &str = "/dev/vhost-vsock"; + +// From +// Generate a wrapper function for VHOST_VSOCK_SET_GUEST_CID ioctl. +// It set guest CID for vsock fd, and return error if CID is already +// in use. +const VHOST_VIRTIO_IOCTL: u8 = 0xAF; +const VHOST_VSOCK_SET_GUEST_CID: u8 = 0x60; +nix::ioctl_write_ptr!( + vhost_vsock_set_guest_cid, + VHOST_VIRTIO_IOCTL, + VHOST_VSOCK_SET_GUEST_CID, + u64 +); + +const CID_RETRY_COUNT: u32 = 50; + +impl VsockDevice { + pub async fn new(id: String) -> Result { + let vhost_fd = OpenOptions::new() + .read(true) + .write(true) + .open(VHOST_VSOCK_DEVICE) + .await + .context(format!( + "failed to open {}, try to run modprobe vhost_vsock.", + VHOST_VSOCK_DEVICE + ))?; + let mut rng = rand::thread_rng(); + + // Try 50 times to find a context ID that is not in use. + for _ in 0..CID_RETRY_COUNT { + // First usable CID above VMADDR_CID_HOST (see vsock(7)) + let first_usable_cid = 3; + let rand_cid = rng.gen_range(first_usable_cid..=(u32::MAX)); + let guest_cid = + unsafe { vhost_vsock_set_guest_cid(vhost_fd.as_raw_fd(), &(rand_cid as u64)) }; + match guest_cid { + Ok(_) => { + return Ok(VsockDevice { + id, + config: VsockConfig { + guest_cid: rand_cid, + vhost_fd, + }, + }); + } + Err(nix::Error::EADDRINUSE) => { + // The CID is already in use. Try another one. + } + Err(err) => { + return Err(err).context("failed to set guest CID"); + } + } + } + + anyhow::bail!( + "failed to find a free vsock context ID after {} attempts", + CID_RETRY_COUNT + ); + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs new file mode 100644 index 000000000000..8154920574a7 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -0,0 +1,70 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fmt; + +use crate::device::driver::vhost_user_blk::VhostUserBlkDevice; +use crate::{ + BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor, + NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, + ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, VsockDevice, +}; +use anyhow::Result; +use async_trait::async_trait; + +pub mod device_manager; +pub mod driver; +pub mod util; + +#[derive(Debug)] +pub enum DeviceConfig { + BlockCfg(BlockConfig), + VhostUserBlkCfg(VhostUserConfig), + NetworkCfg(NetworkConfig), + ShareFsCfg(ShareFsDeviceConfig), + VfioCfg(VfioConfig), + ShareFsMountCfg(ShareFsMountConfig), + VsockCfg(VsockConfig), + HybridVsockCfg(HybridVsockConfig), +} + +#[derive(Debug)] +pub enum DeviceType { + Block(BlockDevice), + VhostUserBlk(VhostUserBlkDevice), + Vfio(VfioDevice), + Network(NetworkDevice), + ShareFs(ShareFsDevice), + ShareFsMount(ShareFsMountDevice), + HybridVsock(HybridVsockDevice), + Vsock(VsockDevice), +} + +impl fmt::Display for DeviceType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +#[async_trait] +pub trait Device: std::fmt::Debug + Send + Sync { + // attach is to plug device into VM + async fn attach(&mut self, h: &dyn hypervisor) -> Result<()>; + // detach is to unplug device from VM + async fn detach(&mut self, h: &dyn hypervisor) -> Result>; + // get_device_info returns device config + async fn get_device_info(&self) -> DeviceType; + // increase_attach_count is used to increase the attach count for a device + // return values: + // * true: no need to do real attach when current attach count is zero, skip following actions. + // * err error: error while do increase attach count + async fn increase_attach_count(&mut self) -> Result; + // decrease_attach_count is used to decrease the attach count for a device + // return values: + // * false: no need to do real dettach when current attach count is not zero, skip following actions. + // * err error: error while do decrease attach count + async fn decrease_attach_count(&mut self) -> Result; +} diff --git a/src/runtime-rs/crates/hypervisor/src/device/util.rs b/src/runtime-rs/crates/hypervisor/src/device/util.rs new file mode 100644 index 000000000000..5d999d8f6c64 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/device/util.rs @@ -0,0 +1,91 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; +use ini::Ini; + +const SYS_DEV_PREFIX: &str = "/sys/dev"; + +pub const DEVICE_TYPE_BLOCK: &str = "b"; +pub const DEVICE_TYPE_CHAR: &str = "c"; + +// get_host_path is used to fetch the host path for the device. +// The path passed in the spec refers to the path that should appear inside the container. +// We need to find the actual device path on the host based on the major-minor numbers of the device. +pub fn get_host_path(dev_type: &str, major: i64, minor: i64) -> Result { + let path_comp = match dev_type { + "c" | "u" => "char", + "b" => "block", + // for device type p will return an empty string + _ => return Ok(String::new()), + }; + let format = format!("{}:{}", major, minor); + let sys_dev_path = std::path::Path::new(SYS_DEV_PREFIX) + .join(path_comp) + .join(format) + .join("uevent"); + std::fs::metadata(&sys_dev_path)?; + let conf = Ini::load_from_file(&sys_dev_path)?; + let dev_name = conf + .section::(None) + .ok_or_else(|| anyhow!("has no section"))? + .get("DEVNAME") + .ok_or_else(|| anyhow!("has no DEVNAME"))?; + Ok(format!("/dev/{}", dev_name)) +} + +// get_virt_drive_name returns the disk name format for virtio-blk +// Reference: https://github.com/torvalds/linux/blob/master/drivers/block/virtio_blk.c @c0aa3e0916d7e531e69b02e426f7162dfb1c6c0 +pub(crate) fn get_virt_drive_name(mut index: i32) -> Result { + if index < 0 { + return Err(anyhow!("Index cannot be negative")); + } + + // Prefix used for virtio-block devices + const PREFIX: &str = "vd"; + + // Refer to DISK_NAME_LEN: https://github.com/torvalds/linux/blob/08c521a2011ff492490aa9ed6cc574be4235ce2b/include/linux/genhd.h#L61 + let disk_name_len = 32usize; + let base = 26i32; + + let suff_len = disk_name_len - PREFIX.len(); + let mut disk_letters = vec![0u8; suff_len]; + + let mut i = 0usize; + while i < suff_len && index >= 0 { + let letter: u8 = b'a' + (index % base) as u8; + disk_letters[i] = letter; + index = (index / base) - 1; + i += 1; + } + if index >= 0 { + return Err(anyhow!("Index not supported")); + } + disk_letters.truncate(i); + disk_letters.reverse(); + Ok(String::from(PREFIX) + std::str::from_utf8(&disk_letters)?) +} + +#[cfg(test)] +mod tests { + use crate::device::util::get_virt_drive_name; + + #[actix_rt::test] + async fn test_get_virt_drive_name() { + for &(input, output) in [ + (0i32, "vda"), + (25, "vdz"), + (27, "vdab"), + (704, "vdaac"), + (18277, "vdzzz"), + ] + .iter() + { + let out = get_virt_drive_name(input).unwrap(); + assert_eq!(&out, output); + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs new file mode 100644 index 000000000000..68d4d7fbe209 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -0,0 +1,391 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::vmm_instance::VmmInstance; +use crate::{ + device::DeviceType, hypervisor_persist::HypervisorState, kernel_param::KernelParams, VmmState, + DEV_HUGEPAGES, HUGETLBFS, HUGE_SHMEM, HYPERVISOR_DRAGONBALL, SHMEM, +}; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use dragonball::{ + api::v1::{BootSourceConfig, VcpuResizeInfo}, + vm::VmConfigInfo, +}; + +use kata_sys_util::mount; +use kata_types::{ + capabilities::{Capabilities, CapabilityBits}, + config::{ + hypervisor::{HugePageType, Hypervisor as HypervisorConfig}, + KATA_PATH, + }, +}; +use nix::mount::MsFlags; +use persist::sandbox_persist::Persist; +use std::{collections::HashSet, fs::create_dir_all}; + +const DRAGONBALL_KERNEL: &str = "vmlinux"; +const DRAGONBALL_ROOT_FS: &str = "rootfs"; + +#[derive(Debug)] +pub struct DragonballInner { + /// sandbox id + pub(crate) id: String, + + /// vm path + pub(crate) vm_path: String, + + /// jailed flag + pub(crate) jailed: bool, + + /// chroot base for the jailer + pub(crate) jailer_root: String, + + /// netns + pub(crate) netns: Option, + + /// hypervisor config + pub(crate) config: HypervisorConfig, + + /// vmm state + pub(crate) state: VmmState, + + /// vmm instance + pub(crate) vmm_instance: VmmInstance, + + /// hypervisor run dir + pub(crate) run_dir: String, + + /// pending device + pub(crate) pending_devices: Vec, + + /// cached block device + pub(crate) cached_block_devices: HashSet, + + /// dragonball capabilities + pub(crate) capabilities: Capabilities, +} + +impl DragonballInner { + pub fn new() -> DragonballInner { + let mut capabilities = Capabilities::new(); + capabilities.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::FsSharingSupport, + ); + DragonballInner { + id: "".to_string(), + vm_path: "".to_string(), + jailer_root: "".to_string(), + netns: None, + config: Default::default(), + pending_devices: vec![], + state: VmmState::NotReady, + jailed: false, + vmm_instance: VmmInstance::new(""), + run_dir: "".to_string(), + cached_block_devices: Default::default(), + capabilities, + } + } + + pub(crate) async fn cold_start_vm(&mut self, timeout: i32) -> Result<()> { + info!(sl!(), "start sandbox cold"); + + self.set_vm_base_config().context("set vm base config")?; + + // get rootfs driver + let rootfs_driver = self.config.blockdev_info.block_device_driver.clone(); + + // get kernel params + let mut kernel_params = KernelParams::new(self.config.debug_info.enable_debug); + kernel_params.append(&mut KernelParams::new_rootfs_kernel_params( + &rootfs_driver, + &self.config.boot_info.rootfs_type, + )?); + kernel_params.append(&mut KernelParams::from_string( + &self.config.boot_info.kernel_params, + )); + info!(sl!(), "prepared kernel_params={:?}", kernel_params); + + // set boot source + let kernel_path = self.config.boot_info.kernel.clone(); + self.set_boot_source( + &kernel_path, + &kernel_params + .to_string() + .context("kernel params to string")?, + ) + .context("set_boot_source")?; + + // add pending devices + while let Some(dev) = self.pending_devices.pop() { + self.add_device(dev).await.context("add_device")?; + } + + // start vmm and wait ready + self.start_vmm_instance().context("start vmm instance")?; + self.wait_vmm_ready(timeout).context("wait vmm")?; + + Ok(()) + } + + pub(crate) fn run_vmm_server(&mut self) -> Result<()> { + if !self.config.jailer_path.is_empty() { + self.jailed = true; + } + + // create jailer root + create_dir_all(self.jailer_root.as_str()) + .map_err(|e| anyhow!("Failed to create dir {} err : {:?}", self.jailer_root, e))?; + + // create run dir + self.run_dir = [KATA_PATH, self.id.as_str()].join("/"); + create_dir_all(self.run_dir.as_str()) + .with_context(|| format!("failed to create dir {}", self.run_dir.as_str()))?; + + // run vmm server + self.vmm_instance + .run_vmm_server(&self.id, self.netns.clone()) + .context("run vmm server")?; + self.state = VmmState::VmmServerReady; + + Ok(()) + } + + pub(crate) fn cleanup_resource(&self) { + if self.jailed { + self.umount_jail_resource(DRAGONBALL_KERNEL).ok(); + self.umount_jail_resource(DRAGONBALL_ROOT_FS).ok(); + for id in &self.cached_block_devices { + self.umount_jail_resource(id.as_str()).ok(); + } + } + + std::fs::remove_dir_all(&self.vm_path) + .map_err(|err| { + error!(sl!(), "failed to remove dir all for {}", &self.vm_path); + err + }) + .ok(); + } + + fn set_vm_base_config(&mut self) -> Result<()> { + let serial_path = [&self.run_dir, "console.sock"].join("/"); + let (mem_type, mem_file_path) = if self.config.memory_info.enable_hugepages { + match self.config.memory_info.hugepage_type { + HugePageType::THP => (String::from(HUGE_SHMEM), String::from("")), + HugePageType::Hugetlbfs => (String::from(HUGETLBFS), String::from(DEV_HUGEPAGES)), + } + } else { + (String::from(SHMEM), String::from("")) + }; + let vm_config = VmConfigInfo { + serial_path: Some(serial_path), + mem_size_mib: self.config.memory_info.default_memory as usize, + vcpu_count: self.config.cpu_info.default_vcpus as u8, + max_vcpu_count: self.config.cpu_info.default_maxvcpus as u8, + mem_type, + mem_file_path, + ..Default::default() + }; + info!(sl!(), "vm config: {:?}", vm_config); + + self.vmm_instance + .set_vm_configuration(vm_config) + .context("set vm configuration") + } + + pub(crate) fn umount_jail_resource(&self, jailed_path: &str) -> Result<()> { + let path = [self.jailer_root.as_str(), jailed_path].join("/"); + nix::mount::umount2(path.as_str(), nix::mount::MntFlags::MNT_DETACH) + .with_context(|| format!("umount path {}", &path)) + } + + pub(crate) fn get_resource(&self, src: &str, dst: &str) -> Result { + if self.jailed { + self.jail_resource(src, dst) + } else { + Ok(src.to_string()) + } + } + + fn jail_resource(&self, src: &str, dst: &str) -> Result { + info!(sl!(), "jail resource: src {} dst {}", src, dst); + if src.is_empty() || dst.is_empty() { + return Err(anyhow!("invalid param src {} dst {}", src, dst)); + } + + let jailed_location = [self.jailer_root.as_str(), dst].join("/"); + mount::bind_mount_unchecked(src, jailed_location.as_str(), false, MsFlags::MS_SLAVE) + .context("bind_mount")?; + + let mut abs_path = String::from("/"); + abs_path.push_str(dst); + Ok(abs_path) + } + + fn set_boot_source(&mut self, kernel_path: &str, kernel_params: &str) -> Result<()> { + info!( + sl!(), + "kernel path {} kernel params {}", kernel_path, kernel_params + ); + + let mut boot_cfg = BootSourceConfig { + kernel_path: self + .get_resource(kernel_path, DRAGONBALL_KERNEL) + .context("get resource")?, + ..Default::default() + }; + + if !kernel_params.is_empty() { + boot_cfg.boot_args = Some(kernel_params.to_string()); + } + + self.vmm_instance + .put_boot_source(boot_cfg) + .context("put boot source") + } + + fn start_vmm_instance(&mut self) -> Result<()> { + info!(sl!(), "Starting VM"); + self.vmm_instance + .instance_start() + .context("Failed to start vmm")?; + self.state = VmmState::VmRunning; + Ok(()) + } + + // wait_vmm_ready will wait for timeout seconds for the VMM to be up and running. + // This does not mean that the VM is up and running. It only indicates that the VMM is up and + // running and able to handle commands to setup and launch a VM + fn wait_vmm_ready(&mut self, timeout: i32) -> Result<()> { + if timeout < 0 { + return Err(anyhow!("Invalid param timeout {}", timeout)); + } + + let time_start = std::time::Instant::now(); + loop { + match self.vmm_instance.is_running() { + Ok(_) => return Ok(()), + Err(err) => { + let time_now = std::time::Instant::now(); + if time_now.duration_since(time_start).as_millis() > timeout as u128 { + return Err(anyhow!( + "waiting vmm ready timeout {} err: {:?}", + timeout, + err + )); + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + } + } + } + + // check if resizing info is valid + // the error in this function is not ok to be tolerated, the container boot will fail + fn precheck_resize_vcpus(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> { + // old_vcpus > 0, safe for conversion + let current_vcpus = old_vcpus; + + // a non-zero positive is required + if new_vcpus == 0 { + return Err(anyhow!("resize vcpu error: 0 vcpu resizing is invalid")); + } + + // cannot exceed maximum value + if new_vcpus > self.config.cpu_info.default_maxvcpus { + warn!( + sl!(), + "Cannot allocate more vcpus than the max allowed number of vcpus. The maximum allowed amount of vcpus will be used instead."); + return Ok((current_vcpus, self.config.cpu_info.default_maxvcpus)); + } + + Ok((current_vcpus, new_vcpus)) + } + + // do the check before resizing, returns Result<(old, new)> + pub(crate) async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> { + if old_vcpus == new_vcpus { + info!( + sl!(), + "resize_vcpu: no need to resize vcpus because old_vcpus is equal to new_vcpus" + ); + return Ok((new_vcpus, new_vcpus)); + } + + let (old_vcpus, new_vcpus) = self.precheck_resize_vcpus(old_vcpus, new_vcpus)?; + info!( + sl!(), + "check_resize_vcpus passed, passing new_vcpus = {:?} to vmm", new_vcpus + ); + + let cpu_resize_info = VcpuResizeInfo { + vcpu_count: Some(new_vcpus as u8), + }; + self.vmm_instance + .resize_vcpu(&cpu_resize_info) + .context(format!( + "failed to do_resize_vcpus on new_vcpus={:?}", + new_vcpus + ))?; + Ok((old_vcpus, new_vcpus)) + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = config; + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + self.config.clone() + } +} + +#[async_trait] +impl Persist for DragonballInner { + type State = HypervisorState; + type ConstructorArgs = (); + + /// Save a state of hypervisor + async fn save(&self) -> Result { + Ok(HypervisorState { + hypervisor_type: HYPERVISOR_DRAGONBALL.to_string(), + id: self.id.clone(), + vm_path: self.vm_path.clone(), + jailed: self.jailed, + jailer_root: self.jailer_root.clone(), + netns: self.netns.clone(), + config: self.hypervisor_config(), + run_dir: self.run_dir.clone(), + cached_block_devices: self.cached_block_devices.clone(), + ..Default::default() + }) + } + + /// Restore hypervisor + async fn restore( + _hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + Ok(DragonballInner { + id: hypervisor_state.id, + vm_path: hypervisor_state.vm_path, + jailed: hypervisor_state.jailed, + jailer_root: hypervisor_state.jailer_root, + netns: hypervisor_state.netns, + config: hypervisor_state.config, + state: VmmState::NotReady, + vmm_instance: VmmInstance::new(""), + run_dir: hypervisor_state.run_dir, + pending_devices: vec![], + cached_block_devices: hypervisor_state.cached_block_devices, + capabilities: Capabilities::new(), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs new file mode 100644 index 000000000000..3e2ea3e86060 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs @@ -0,0 +1,432 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::PathBuf; + +use anyhow::{anyhow, Context, Result}; +use dbs_utils::net::MacAddr; +use dragonball::{ + api::v1::{ + BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, VirtioNetDeviceConfigInfo, + VsockDeviceConfigInfo, + }, + device_manager::blk_dev_mgr::BlockDeviceType, +}; + +use super::DragonballInner; +use crate::{ + device::DeviceType, HybridVsockConfig, NetworkConfig, ShareFsDeviceConfig, ShareFsMountConfig, + ShareFsMountType, ShareFsOperation, VfioBusMode, VfioDevice, VmmState, JAILER_ROOT, +}; + +const MB_TO_B: u32 = 1024 * 1024; +const DEFAULT_VIRTIO_FS_NUM_QUEUES: i32 = 1; +const DEFAULT_VIRTIO_FS_QUEUE_SIZE: i32 = 1024; + +const VIRTIO_FS: &str = "virtio-fs"; +const INLINE_VIRTIO_FS: &str = "inline-virtio-fs"; + +pub(crate) fn drive_index_to_id(index: u64) -> String { + format!("drive_{}", index) +} + +impl DragonballInner { + pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result<()> { + if self.state == VmmState::NotReady { + info!(sl!(), "VMM not ready, queueing device {}", device); + + // add the pending device by reverse order, thus the + // start_vm would pop the devices in an right order + // to add the devices. + self.pending_devices.insert(0, device); + return Ok(()); + } + + info!(sl!(), "dragonball add device {:?}", &device); + match device { + DeviceType::Network(network) => self + .add_net_device(&network.config) + .context("add net device"), + DeviceType::Vfio(hostdev) => self.add_vfio_device(&hostdev).context("add vfio device"), + DeviceType::Block(block) => self + .add_block_device( + block.config.path_on_host.as_str(), + block.device_id.as_str(), + block.config.is_readonly, + block.config.no_drop, + ) + .context("add block device"), + DeviceType::VhostUserBlk(block) => self + .add_block_device( + block.config.socket_path.as_str(), + block.device_id.as_str(), + block.is_readonly, + block.no_drop, + ) + .context("add vhost user based block device"), + DeviceType::HybridVsock(hvsock) => self.add_hvsock(&hvsock.config).context("add vsock"), + DeviceType::ShareFs(sharefs) => self + .add_share_fs_device(&sharefs.config) + .context("add share fs device"), + DeviceType::ShareFsMount(sharefs_mount) => self + .add_share_fs_mount(&sharefs_mount.config) + .context("add share fs mount"), + DeviceType::Vsock(_) => { + todo!() + } + } + } + + pub(crate) async fn remove_device(&mut self, device: DeviceType) -> Result<()> { + info!(sl!(), "remove device {} ", device); + + match device { + DeviceType::Network(network) => { + // Dragonball doesn't support remove network device, just print message. + info!( + sl!(), + "dragonball remove network device: {:?}.", network.config.virt_iface_name + ); + + Ok(()) + } + DeviceType::Block(block) => { + let drive_id = drive_index_to_id(block.config.index); + self.remove_block_drive(drive_id.as_str()) + .context("remove block drive") + } + DeviceType::Vfio(hostdev) => { + let primary_device = hostdev.devices.first().unwrap().clone(); + let hostdev_id = primary_device.hostdev_id; + + self.remove_vfio_device(hostdev_id) + } + _ => Err(anyhow!("unsupported device {:?}", device)), + } + } + + fn add_vfio_device(&mut self, device: &VfioDevice) -> Result<()> { + let vfio_device = device.clone(); + + // FIXME: + // A device with multi-funtions, or a IOMMU group with one more + // devices, the Primary device is selected to be passed to VM. + // And the the first one is Primary device. + // safe here, devices is not empty. + let primary_device = vfio_device.devices.first().unwrap().clone(); + + let vendor_device_id = if let Some(vd) = primary_device.device_vendor { + vd.get_device_vendor_id()? + } else { + 0 + }; + + let guest_dev_id = if let Some(pci_path) = primary_device.guest_pci_path { + // safe here, dragonball's pci device directly connects to root bus. + // usually, it has been assigned in vfio device manager. + pci_path.get_device_slot().unwrap().0 + } else { + 0 + }; + + let bus_mode = VfioBusMode::to_string(vfio_device.bus_mode); + + info!(sl!(), "Mock for dragonball insert host device."); + info!( + sl!(), + " Mock for dragonball insert host device. + host device id: {:?}, + bus_slot_func: {:?}, + bus mod: {:?}, + guest device id: {:?}, + vendor/device id: {:?}", + primary_device.hostdev_id, + primary_device.bus_slot_func, + bus_mode, + guest_dev_id, + vendor_device_id, + ); + + // FIXME: + // interface implementation to be done when dragonball supports + // self.vmm_instance.insert_host_device(host_cfg)?; + + Ok(()) + } + + fn remove_vfio_device(&mut self, hostdev_id: String) -> Result<()> { + info!( + sl!(), + "Mock for dragonball remove host_device with hostdev id {:?}", hostdev_id + ); + // FIXME: + // interface implementation to be done when dragonball supports + // self.vmm_instance.remove_host_device(hostdev_id)?; + + Ok(()) + } + + fn add_block_device( + &mut self, + path: &str, + id: &str, + read_only: bool, + no_drop: bool, + ) -> Result<()> { + let jailed_drive = self.get_resource(path, id).context("get resource")?; + self.cached_block_devices.insert(id.to_string()); + + let blk_cfg = BlockDeviceConfigInfo { + drive_id: id.to_string(), + device_type: BlockDeviceType::get_type(path), + path_on_host: PathBuf::from(jailed_drive), + is_direct: self.config.blockdev_info.block_device_cache_direct, + no_drop, + is_read_only: read_only, + ..Default::default() + }; + self.vmm_instance + .insert_block_device(blk_cfg) + .context("insert block device") + } + + fn remove_block_drive(&mut self, id: &str) -> Result<()> { + self.vmm_instance + .remove_block_device(id) + .context("remove block device")?; + + if self.cached_block_devices.contains(id) && self.jailed { + self.umount_jail_resource(id) + .context("umount jail resource")?; + self.cached_block_devices.remove(id); + } + Ok(()) + } + + fn add_net_device(&mut self, config: &NetworkConfig) -> Result<()> { + let iface_cfg = VirtioNetDeviceConfigInfo { + iface_id: config.virt_iface_name.clone(), + host_dev_name: config.host_dev_name.clone(), + guest_mac: match &config.guest_mac { + Some(mac) => MacAddr::from_bytes(&mac.0).ok(), + None => None, + }, + num_queues: config.queue_num, + queue_size: config.queue_size as u16, + ..Default::default() + }; + + info!( + sl!(), + "add {} endpoint to {}", iface_cfg.host_dev_name, iface_cfg.iface_id + ); + + self.vmm_instance + .insert_network_device(iface_cfg) + .context("insert network device") + } + + fn add_hvsock(&mut self, config: &HybridVsockConfig) -> Result<()> { + let vsock_cfg = VsockDeviceConfigInfo { + id: String::from(JAILER_ROOT), + guest_cid: config.guest_cid, + uds_path: Some(config.uds_path.clone()), + ..Default::default() + }; + debug!(sl!(), "HybridVsock configure: {:?}", &vsock_cfg); + + self.vmm_instance + .insert_vsock(vsock_cfg) + .context("insert vsock") + } + + fn parse_inline_virtiofs_args( + &self, + fs_cfg: &mut FsDeviceConfigInfo, + options: &mut Vec, + ) -> Result<()> { + let mut debug = false; + let mut opt_list = String::new(); + + fs_cfg.mode = String::from("virtio"); + fs_cfg.cache_policy = self.config.shared_fs.virtio_fs_cache.clone(); + fs_cfg.fuse_killpriv_v2 = true; + + info!( + sl!(), + "args: {:?}", &self.config.shared_fs.virtio_fs_extra_args + ); + let mut args = self.config.shared_fs.virtio_fs_extra_args.clone(); + let _ = go_flag::parse_args_with_warnings::(&args, None, |flags| { + flags.add_flag("d", &mut debug); + flags.add_flag("thread-pool-size", &mut fs_cfg.thread_pool_size); + flags.add_flag("drop-sys-resource", &mut fs_cfg.drop_sys_resource); + flags.add_flag("o", &mut opt_list); + }) + .with_context(|| format!("parse args: {:?}", args))?; + + // more options parsed for inline virtio-fs' custom config + args.append(options); + + if debug { + warn!( + sl!(), + "Inline virtiofs \"-d\" option not implemented, ignore" + ); + } + + // Parse comma separated option list + if !opt_list.is_empty() { + let args: Vec<&str> = opt_list.split(',').collect(); + for arg in args { + match arg { + "cache=none" => fs_cfg.cache_policy = String::from("none"), + "cache=auto" => fs_cfg.cache_policy = String::from("auto"), + "cache=always" => fs_cfg.cache_policy = String::from("always"), + "no_open" => fs_cfg.no_open = true, + "open" => fs_cfg.no_open = false, + "writeback_cache" => fs_cfg.writeback_cache = true, + "no_writeback_cache" => fs_cfg.writeback_cache = false, + "writeback" => fs_cfg.writeback_cache = true, + "no_writeback" => fs_cfg.writeback_cache = false, + "xattr" => fs_cfg.xattr = true, + "no_xattr" => fs_cfg.xattr = false, + "cache_symlinks" => {} // inline virtiofs always cache symlinks + "no_readdir" => fs_cfg.no_readdir = true, + "trace" => warn!( + sl!(), + "Inline virtiofs \"-o trace\" option not supported yet, ignored." + ), + _ => warn!(sl!(), "Inline virtiofs unsupported option: {}", arg), + } + } + } + + debug!(sl!(), "Inline virtiofs config {:?}", fs_cfg); + Ok(()) + } + + fn add_share_fs_device(&self, config: &ShareFsDeviceConfig) -> Result<()> { + let mut fs_cfg = FsDeviceConfigInfo { + sock_path: config.sock_path.clone(), + tag: config.mount_tag.clone(), + num_queues: if config.queue_num > 0 { + config.queue_num as usize + } else { + DEFAULT_VIRTIO_FS_NUM_QUEUES as usize + }, + queue_size: if config.queue_size > 0 { + config.queue_size as u16 + } else { + DEFAULT_VIRTIO_FS_QUEUE_SIZE as u16 + }, + cache_size: (self.config.shared_fs.virtio_fs_cache_size as u64) + .saturating_mul(MB_TO_B as u64), + xattr: true, + ..Default::default() + }; + + let mut options = config.options.clone(); + self.do_add_fs_device(&config.fs_type, &mut fs_cfg, &mut options) + } + + fn do_add_fs_device( + &self, + fs_type: &str, + fs_cfg: &mut FsDeviceConfigInfo, + options: &mut Vec, + ) -> Result<()> { + match fs_type { + VIRTIO_FS => { + fs_cfg.mode = String::from("vhostuser"); + } + INLINE_VIRTIO_FS => { + // All parameters starting with --patch-fs do not need to be processed, these are the parameters required by patch fs + options.retain(|x| !x.starts_with("--patch-fs")); + self.parse_inline_virtiofs_args(fs_cfg, options)?; + } + _ => { + return Err(anyhow!( + "hypervisor isn't configured with shared_fs supported" + )); + } + } + self.vmm_instance + .insert_fs(fs_cfg) + .map_err(|e| anyhow!("insert {} fs error. {:?}", fs_cfg.mode, e)) + } + + fn add_share_fs_mount(&mut self, config: &ShareFsMountConfig) -> Result<()> { + let ops = match config.op { + ShareFsOperation::Mount => "mount", + ShareFsOperation::Umount => "umount", + ShareFsOperation::Update => "update", + }; + + let fstype = match config.fstype { + ShareFsMountType::PASSTHROUGH => "passthroughfs", + ShareFsMountType::RAFS => "rafs", + }; + + let cfg = FsMountConfigInfo { + ops: ops.to_string(), + fstype: Some(fstype.to_string()), + source: Some(config.source.clone()), + mountpoint: config.mount_point.clone(), + config: config.config.clone(), + tag: config.tag.clone(), + prefetch_list_path: config.prefetch_list_path.clone(), + dax_threshold_size_kb: None, + }; + + self.vmm_instance.patch_fs(&cfg, config.op).map_err(|e| { + anyhow!( + "{:?} {} at {} error: {:?}", + config.op, + fstype, + config.mount_point.clone(), + e + ) + }) + } +} + +#[cfg(test)] +mod tests { + use dragonball::api::v1::FsDeviceConfigInfo; + + use crate::dragonball::DragonballInner; + + #[test] + fn test_parse_inline_virtiofs_args() { + let mut dragonball = DragonballInner::new(); + let mut fs_cfg = FsDeviceConfigInfo::default(); + + // no_open and writeback_cache is the default, so test open and no_writeback_cache. "-d" + // and "trace" are ignored for now, but should not return error. + dragonball.config.shared_fs.virtio_fs_extra_args = vec![ + "-o".to_string(), + "open,no_writeback_cache,xattr,trace".to_string(), + "--thread-pool-size=128".to_string(), + "--drop-sys-resource".to_string(), + "-d".to_string(), + ]; + + let mut options: Vec = Vec::new(); + dragonball.config.shared_fs.virtio_fs_cache = "auto".to_string(); + dragonball + .parse_inline_virtiofs_args(&mut fs_cfg, &mut options) + .unwrap(); + + assert!(!fs_cfg.no_open); + assert!(fs_cfg.xattr); + assert!(fs_cfg.fuse_killpriv_v2); + assert!(!fs_cfg.writeback_cache); + assert_eq!(fs_cfg.cache_policy, "auto".to_string()); + assert!(fs_cfg.drop_sys_resource); + assert!(fs_cfg.thread_pool_size == 128); + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_hypervisor.rs new file mode 100644 index 000000000000..5de9ceb0018c --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_hypervisor.rs @@ -0,0 +1,144 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + collections::{HashMap, HashSet}, + iter::FromIterator, +}; + +use anyhow::{Context, Ok, Result}; +use kata_types::capabilities::Capabilities; + +use super::inner::DragonballInner; +use crate::{ + utils::{self, get_hvsock_path, get_jailer_root, get_sandbox_path}, + VcpuThreadIds, VmmState, +}; + +impl DragonballInner { + pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option) -> Result<()> { + self.id = id.to_string(); + self.state = VmmState::NotReady; + + self.vm_path = get_sandbox_path(id); + self.jailer_root = get_jailer_root(id); + self.netns = netns; + + Ok(()) + } + + // start_vm will start the hypervisor for the given sandbox. + // In the context of dragonball, this will start the hypervisor + pub(crate) async fn start_vm(&mut self, timeout: i32) -> Result<()> { + self.run_vmm_server().context("start vmm server")?; + self.cold_start_vm(timeout).await.map_err(|error| { + error!(sl!(), "start micro vm error {:?}", error); + if let Err(err) = self.stop_vm() { + error!(sl!(), "failed to call end err : {:?}", err); + } + error + })?; + + Ok(()) + } + + pub(crate) fn stop_vm(&mut self) -> Result<()> { + info!(sl!(), "Stopping dragonball VM"); + self.vmm_instance.stop().context("stop")?; + Ok(()) + } + + pub(crate) fn pause_vm(&self) -> Result<()> { + info!(sl!(), "do pause vm"); + self.vmm_instance.pause().context("pause vm")?; + Ok(()) + } + + pub(crate) fn resume_vm(&self) -> Result<()> { + info!(sl!(), "do resume vm"); + self.vmm_instance.resume().context("resume vm")?; + Ok(()) + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + todo!() + } + + pub(crate) async fn get_agent_socket(&self) -> Result { + const HYBRID_VSOCK_SCHEME: &str = "hvsock"; + Ok(format!( + "{}://{}", + HYBRID_VSOCK_SCHEME, + get_hvsock_path(&self.id), + )) + } + + pub(crate) async fn get_hypervisor_metrics(&self) -> Result { + info!(sl!(), "get hypervisor metrics"); + self.vmm_instance.get_hypervisor_metrics() + } + + pub(crate) async fn disconnect(&mut self) { + self.state = VmmState::NotReady; + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + let mut vcpu_thread_ids: VcpuThreadIds = VcpuThreadIds { + vcpus: HashMap::new(), + }; + + for tid in self.vmm_instance.get_vcpu_tids() { + vcpu_thread_ids.vcpus.insert(tid.0 as u32, tid.1); + } + info!(sl!(), "get thread ids {:?}", vcpu_thread_ids); + Ok(vcpu_thread_ids) + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + self.cleanup_resource(); + Ok(()) + } + + pub(crate) async fn get_pids(&self) -> Result> { + let mut pids = HashSet::new(); + // get shim thread ids + pids.insert(self.vmm_instance.pid()); + + for tid in utils::get_child_threads(self.vmm_instance.pid()) { + pids.insert(tid); + } + + // remove vcpus + for tid in self.vmm_instance.get_vcpu_tids() { + pids.remove(&tid.1); + } + + info!(sl!(), "get pids {:?}", pids); + Ok(Vec::from_iter(pids.into_iter())) + } + + pub(crate) async fn get_vmm_master_tid(&self) -> Result { + let master_tid = self.vmm_instance.get_vmm_master_tid(); + Ok(master_tid) + } + + pub(crate) async fn get_ns_path(&self) -> Result { + let ns_path = self.vmm_instance.get_ns_path(); + Ok(ns_path) + } + + pub(crate) async fn check(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + Ok(self.jailer_root.clone()) + } + + pub(crate) async fn capabilities(&self) -> Result { + Ok(self.capabilities.clone()) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs new file mode 100644 index 000000000000..3b98f38a2382 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs @@ -0,0 +1,189 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod inner; +mod inner_device; +mod inner_hypervisor; +use super::HypervisorState; +use inner::DragonballInner; +use persist::sandbox_persist::Persist; +pub mod vmm_instance; + +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use kata_types::capabilities::Capabilities; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use tokio::sync::RwLock; +use tracing::instrument; + +use crate::{DeviceType, Hypervisor, VcpuThreadIds}; + +pub struct Dragonball { + inner: Arc>, +} + +impl std::fmt::Debug for Dragonball { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Dragonball").finish() + } +} + +impl Default for Dragonball { + fn default() -> Self { + Self::new() + } +} + +impl Dragonball { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(DragonballInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for Dragonball { + #[instrument] + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + #[instrument] + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm() + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.pause_vm() + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.resume_vm() + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.save_vm().await + } + + // returns Result<(old_vcpus, new_vcpus)> + async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> { + let inner = self.inner.read().await; + inner.resize_vcpu(old_vcpus, new_vcpus).await + } + + async fn add_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.read().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.read().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn get_vmm_master_tid(&self) -> Result { + let inner = self.inner.read().await; + inner.get_vmm_master_tid().await + } + + async fn get_ns_path(&self) -> Result { + let inner = self.inner.read().await; + inner.get_ns_path().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + self.save().await + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } + + async fn get_hypervisor_metrics(&self) -> Result { + let inner = self.inner.read().await; + inner.get_hypervisor_metrics().await + } +} + +#[async_trait] +impl Persist for Dragonball { + type State = HypervisorState; + type ConstructorArgs = (); + /// Save a state of the component. + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await.context("save hypervisor state") + } + /// Restore a component from a specified state. + async fn restore( + hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let inner = DragonballInner::restore(hypervisor_args, hypervisor_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs new file mode 100644 index 000000000000..68dfe410418e --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs @@ -0,0 +1,366 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs::{File, OpenOptions}, + os::unix::{io::IntoRawFd, prelude::AsRawFd}, + sync::{Arc, Mutex, RwLock}, + thread, +}; + +use anyhow::{anyhow, Context, Result}; +use crossbeam_channel::{unbounded, Receiver, Sender}; +use dragonball::{ + api::v1::{ + BlockDeviceConfigInfo, BootSourceConfig, FsDeviceConfigInfo, FsMountConfigInfo, + InstanceInfo, InstanceState, VcpuResizeInfo, VirtioNetDeviceConfigInfo, VmmAction, + VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService, VsockDeviceConfigInfo, + }, + vm::VmConfigInfo, + Vmm, +}; +use nix::sched::{setns, CloneFlags}; +use seccompiler::BpfProgram; +use vmm_sys_util::eventfd::EventFd; + +use crate::ShareFsOperation; + +pub enum Request { + Sync(VmmAction), +} + +const DRAGONBALL_VERSION: &str = env!("CARGO_PKG_VERSION"); +const REQUEST_RETRY: u32 = 500; +const KVM_DEVICE: &str = "/dev/kvm"; + +#[derive(Debug)] +pub struct VmmInstance { + /// VMM instance info directly accessible from runtime + vmm_shared_info: Arc>, + to_vmm: Option>, + from_vmm: Option>, + to_vmm_fd: EventFd, + seccomp: BpfProgram, + vmm_thread: Option>>, +} + +impl VmmInstance { + pub fn new(id: &str) -> Self { + let vmm_shared_info = Arc::new(RwLock::new(InstanceInfo::new( + String::from(id), + DRAGONBALL_VERSION.to_string(), + ))); + + let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK) + .unwrap_or_else(|_| panic!("Failed to create eventfd for vmm {}", id)); + + VmmInstance { + vmm_shared_info, + to_vmm: None, + from_vmm: None, + to_vmm_fd, + seccomp: vec![], + vmm_thread: None, + } + } + + pub fn get_shared_info(&self) -> Arc> { + self.vmm_shared_info.clone() + } + + fn set_instance_id(&mut self, id: &str) { + let share_info_lock = self.vmm_shared_info.clone(); + share_info_lock.write().unwrap().id = String::from(id); + } + + pub fn get_vmm_master_tid(&self) -> u32 { + let info = self.vmm_shared_info.clone(); + let result = info.read().unwrap().master_tid; + result + } + + pub fn get_ns_path(&self) -> String { + let info_binding = self.vmm_shared_info.clone(); + let info = info_binding.read().unwrap(); + let result = format!("/proc/{}/task/{}/ns", info.pid, info.master_tid); + result + } + + pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> { + let info = self.vmm_shared_info.clone(); + let result = info.read().unwrap().tids.clone(); + result + } + + pub fn run_vmm_server(&mut self, id: &str, netns: Option) -> Result<()> { + let kvm = OpenOptions::new().read(true).write(true).open(KVM_DEVICE)?; + + let (to_vmm, from_runtime) = unbounded(); + let (to_runtime, from_vmm) = unbounded(); + + self.set_instance_id(id); + + let vmm_service = VmmService::new(from_runtime, to_runtime); + + self.to_vmm = Some(to_vmm); + self.from_vmm = Some(from_vmm); + + let api_event_fd2 = self.to_vmm_fd.try_clone().expect("Failed to dup eventfd"); + let vmm = Vmm::new( + self.vmm_shared_info.clone(), + api_event_fd2, + self.seccomp.clone(), + self.seccomp.clone(), + Some(kvm.into_raw_fd()), + ) + .expect("Failed to start vmm"); + let vmm_shared_info = self.get_shared_info(); + + self.vmm_thread = Some( + thread::Builder::new() + .name("vmm_master".to_owned()) + .spawn(move || { + || -> Result { + debug!(sl!(), "run vmm thread start"); + let cur_tid = nix::unistd::gettid().as_raw() as u32; + vmm_shared_info.write().unwrap().master_tid = cur_tid; + + if let Some(netns_path) = netns { + info!(sl!(), "set netns for vmm master {}", &netns_path); + let netns_fd = File::open(&netns_path) + .with_context(|| format!("open netns path {}", &netns_path))?; + setns(netns_fd.as_raw_fd(), CloneFlags::CLONE_NEWNET) + .context("set netns ")?; + } + let exit_code = + Vmm::run_vmm_event_loop(Arc::new(Mutex::new(vmm)), vmm_service); + debug!(sl!(), "run vmm thread exited: {}", exit_code); + Ok(exit_code) + }() + .map_err(|e| { + error!(sl!(), "run vmm thread err. {:?}", e); + e + }) + }) + .expect("Failed to start vmm event loop"), + ); + + Ok(()) + } + + pub fn put_boot_source(&self, boot_source_cfg: BootSourceConfig) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::ConfigureBootSource( + boot_source_cfg, + ))) + .context("Failed to configure boot source")?; + Ok(()) + } + + pub fn instance_start(&self) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::StartMicroVm)) + .context("Failed to start MicroVm")?; + Ok(()) + } + + pub fn is_uninitialized(&self) -> bool { + let share_info = self + .vmm_shared_info + .read() + .expect("Failed to read share_info due to poisoned lock"); + matches!(share_info.state, InstanceState::Uninitialized) + } + + pub fn is_running(&self) -> Result<()> { + let share_info_lock = self.vmm_shared_info.clone(); + let share_info = share_info_lock + .read() + .expect("Failed to read share_info due to poisoned lock"); + if let InstanceState::Running = share_info.state { + return Ok(()); + } + Err(anyhow!("vmm is not running")) + } + + pub fn get_machine_info(&self) -> Result> { + if let Ok(VmmData::MachineConfiguration(vm_config)) = + self.handle_request(Request::Sync(VmmAction::GetVmConfiguration)) + { + return Ok(vm_config); + } + Err(anyhow!("Failed to get machine info")) + } + + pub fn insert_block_device(&self, device_cfg: BlockDeviceConfigInfo) -> Result<()> { + self.handle_request_with_retry(Request::Sync(VmmAction::InsertBlockDevice( + device_cfg.clone(), + ))) + .with_context(|| format!("Failed to insert block device {:?}", device_cfg))?; + Ok(()) + } + + pub fn remove_block_device(&self, id: &str) -> Result<()> { + info!(sl!(), "remove block device {}", id); + self.handle_request(Request::Sync(VmmAction::RemoveBlockDevice(id.to_string()))) + .with_context(|| format!("Failed to remove block device {:?}", id))?; + Ok(()) + } + + pub fn set_vm_configuration(&self, vm_config: VmConfigInfo) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::SetVmConfiguration( + vm_config.clone(), + ))) + .with_context(|| format!("Failed to set vm configuration {:?}", vm_config))?; + Ok(()) + } + + pub fn insert_network_device(&self, net_cfg: VirtioNetDeviceConfigInfo) -> Result<()> { + self.handle_request_with_retry(Request::Sync(VmmAction::InsertNetworkDevice( + net_cfg.clone(), + ))) + .with_context(|| format!("Failed to insert network device {:?}", net_cfg))?; + Ok(()) + } + + pub fn insert_vsock(&self, vsock_cfg: VsockDeviceConfigInfo) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::InsertVsockDevice( + vsock_cfg.clone(), + ))) + .with_context(|| format!("Failed to insert vsock device {:?}", vsock_cfg))?; + Ok(()) + } + + pub fn insert_fs(&self, fs_cfg: &FsDeviceConfigInfo) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::InsertFsDevice(fs_cfg.clone()))) + .with_context(|| format!("Failed to insert {} fs device {:?}", fs_cfg.mode, fs_cfg))?; + Ok(()) + } + + pub fn patch_fs(&self, cfg: &FsMountConfigInfo, op: ShareFsOperation) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::ManipulateFsBackendFs(cfg.clone()))) + .with_context(|| { + format!( + "Failed to {:?} backend {:?} at {} mount config {:?}", + op, cfg.fstype, cfg.mountpoint, cfg + ) + })?; + Ok(()) + } + + pub fn resize_vcpu(&self, cfg: &VcpuResizeInfo) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::ResizeVcpu(cfg.clone()))) + .with_context(|| format!("Failed to resize_vm(hotplug vcpu), cfg: {:?}", cfg))?; + Ok(()) + } + + pub fn pause(&self) -> Result<()> { + todo!() + } + + pub fn resume(&self) -> Result<()> { + todo!() + } + + pub fn pid(&self) -> u32 { + std::process::id() + } + + pub fn get_hypervisor_metrics(&self) -> Result { + if let Ok(VmmData::HypervisorMetrics(metrics)) = + self.handle_request(Request::Sync(VmmAction::GetHypervisorMetrics)) + { + return Ok(metrics); + } + Err(anyhow!("Failed to get hypervisor metrics")) + } + + pub fn stop(&mut self) -> Result<()> { + self.handle_request(Request::Sync(VmmAction::ShutdownMicroVm)) + .map_err(|e| { + warn!(sl!(), "Failed to shutdown MicroVM. {}", e); + e + }) + .ok(); + // vmm is not running, join thread will be hang. + if self.is_uninitialized() || self.vmm_thread.is_none() { + debug!(sl!(), "vmm-master thread is uninitialized or has exited."); + return Ok(()); + } + debug!(sl!(), "join vmm-master thread exit."); + + // vmm_thread must be exited, otherwise there will be other sync issues. + // unwrap is safe, if vmm_thread is None, impossible run to here. + self.vmm_thread.take().unwrap().join().ok(); + info!(sl!(), "vmm-master thread join succeed."); + Ok(()) + } + + fn send_request(&self, vmm_action: VmmAction) -> Result { + if let Some(ref to_vmm) = self.to_vmm { + to_vmm + .send(Box::new(vmm_action.clone())) + .with_context(|| format!("Failed to send {:?} via channel ", vmm_action))?; + } else { + return Err(anyhow!("to_vmm is None")); + } + + //notify vmm action + if let Err(e) = self.to_vmm_fd.write(1) { + return Err(anyhow!("failed to notify vmm: {}", e)); + } + + if let Some(from_vmm) = self.from_vmm.as_ref() { + match from_vmm.recv() { + Err(e) => Err(anyhow!("vmm recv err: {}", e)), + Ok(vmm_outcome) => Ok(vmm_outcome), + } + } else { + Err(anyhow!("from_vmm is None")) + } + } + + fn handle_request(&self, req: Request) -> Result { + let Request::Sync(vmm_action) = req; + match self.send_request(vmm_action) { + Ok(vmm_outcome) => match *vmm_outcome { + Ok(vmm_data) => Ok(vmm_data), + Err(vmm_action_error) => Err(anyhow!("vmm action error: {:?}", vmm_action_error)), + }, + Err(e) => Err(e), + } + } + + fn handle_request_with_retry(&self, req: Request) -> Result { + let Request::Sync(vmm_action) = req; + for count in 0..REQUEST_RETRY { + match self.send_request(vmm_action.clone()) { + Ok(vmm_outcome) => match *vmm_outcome { + Ok(vmm_data) => { + info!( + sl!(), + "success to send {:?} after retry {}", &vmm_action, count + ); + return Ok(vmm_data); + } + Err(vmm_action_error) => { + if let VmmActionError::UpcallServerNotReady = vmm_action_error { + std::thread::sleep(std::time::Duration::from_millis(10)); + continue; + } else { + return Err(vmm_action_error.into()); + } + } + }, + Err(err) => { + return Err(err); + } + } + } + Err(anyhow::anyhow!( + "After {} attempts, it still doesn't work.", + REQUEST_RETRY + )) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs new file mode 100644 index 000000000000..ea870f3420ac --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::HypervisorConfig; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; + +#[derive(Serialize, Deserialize, Default, Clone, Debug)] +pub struct HypervisorState { + // Type of hypervisor, E.g. dragonball/qemu/firecracker/acrn. + pub hypervisor_type: String, + pub pid: Option, + pub uuid: String, + // clh sepcific: refer to 'virtcontainers/clh.go:CloudHypervisorState' + pub api_socket: String, + /// sandbox id + pub id: String, + /// vm path + pub vm_path: String, + /// jailed flag + pub jailed: bool, + /// chroot base for the jailer + pub jailer_root: String, + /// netns + pub netns: Option, + /// hypervisor config + pub config: HypervisorConfig, + /// hypervisor run dir + pub run_dir: String, + /// cached block device + pub cached_block_devices: HashSet, + pub virtiofs_daemon_pid: i32, +} diff --git a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs new file mode 100644 index 000000000000..9521ebac7e5d --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs @@ -0,0 +1,343 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; + +use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_MMIO, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, VM_ROOTFS_ROOT_PMEM, +}; +use kata_types::config::LOG_VPORT_OPTION; + +// Port where the agent will send the logs. Logs are sent through the vsock in cases +// where the hypervisor has no console.sock, i.e dragonball +const VSOCK_LOGS_PORT: &str = "1025"; + +const KERNEL_KV_DELIMITER: &str = "="; +const KERNEL_PARAM_DELIMITER: &str = " "; + +#[derive(Debug, Clone, PartialEq)] +pub struct Param { + pub key: String, + pub value: String, +} + +impl Param { + pub fn new(key: &str, value: &str) -> Self { + Param { + key: key.to_owned(), + value: value.to_owned(), + } + } + + pub fn to_string(&self) -> Result { + if self.key.is_empty() && self.value.is_empty() { + Err(anyhow!("Empty key and value")) + } else if self.key.is_empty() { + Err(anyhow!("Empty key")) + } else if self.value.is_empty() { + Ok(self.key.to_string()) + } else { + Ok(format!("{}{}{}", self.key, KERNEL_KV_DELIMITER, self.value)) + } + } +} + +#[derive(Debug, PartialEq)] +pub(crate) struct KernelParams { + params: Vec, +} + +impl KernelParams { + pub(crate) fn new(debug: bool) -> Self { + // default kernel params + let mut params = vec![ + Param::new("reboot", "k"), + Param::new("panic", "1"), + Param::new("systemd.unit", "kata-containers.target"), + Param::new("systemd.mask", "systemd-networkd.service"), + ]; + + if debug { + params.push(Param::new(LOG_VPORT_OPTION, VSOCK_LOGS_PORT)); + } + + Self { params } + } + + pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str, rootfs_type: &str) -> Result { + let mut params = vec![]; + + match rootfs_driver { + VM_ROOTFS_DRIVER_PMEM => { + params.push(Param::new("root", VM_ROOTFS_ROOT_PMEM)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new( + "rootflags", + "dax,data=ordered,errors=remount-ro ro", + )); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "dax ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type {}", rootfs_type)); + } + } + } + VM_ROOTFS_DRIVER_BLK | VM_ROOTFS_DRIVER_MMIO => { + params.push(Param::new("root", VM_ROOTFS_ROOT_BLK)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new("rootflags", "data=ordered,errors=remount-ro ro")); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type {}", rootfs_type)); + } + } + } + _ => { + return Err(anyhow!("Unsupported rootfs driver {}", rootfs_driver)); + } + } + + params.push(Param::new("rootfstype", rootfs_type)); + + Ok(Self { params }) + } + + pub(crate) fn append(&mut self, params: &mut KernelParams) { + self.params.append(&mut params.params); + } + + pub(crate) fn from_string(params_string: &str) -> Self { + let mut params = vec![]; + + let parameters_vec: Vec<&str> = params_string.split(KERNEL_PARAM_DELIMITER).collect(); + + for param in parameters_vec.iter() { + if param.is_empty() { + continue; + } + + let ps: Vec<&str> = param.splitn::<_>(2, KERNEL_KV_DELIMITER).collect(); + + if ps.len() == 2 { + params.push(Param { + key: String::from(ps[0]), + value: String::from(ps[1]), + }); + } else { + params.push(Param { + key: String::from(ps[0]), + value: String::from(""), + }); + } + } + + Self { params } + } + + pub(crate) fn to_string(&self) -> Result { + let mut parameters: Vec = Vec::new(); + + for param in &self.params { + parameters.push(param.to_string()?); + } + + Ok(parameters.join(KERNEL_PARAM_DELIMITER)) + } +} + +#[cfg(test)] +mod tests { + use anyhow::Result; + + use super::*; + + use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, + VM_ROOTFS_ROOT_PMEM, + }; + + #[test] + fn test_params() { + let param1 = Param::new("", ""); + let param2 = Param::new("", "foo"); + let param3 = Param::new("foo", ""); + + assert!(param1.to_string().is_err()); + assert!(param2.to_string().is_err()); + assert_eq!(param3.to_string().unwrap(), String::from("foo")); + + let param4 = Param::new("foo", "bar"); + assert_eq!(param4.to_string().unwrap(), String::from("foo=bar")); + } + + #[test] + fn test_kernel_params() -> Result<()> { + let expect_params_string = "k1=v1 k2=v2 k3=v3".to_string(); + let expect_params = KernelParams { + params: vec![ + Param::new("k1", "v1"), + Param::new("k2", "v2"), + Param::new("k3", "v3"), + ], + }; + + // check kernel params from string + let kernel_params = KernelParams::from_string(&expect_params_string); + assert_eq!(kernel_params, expect_params); + + // check kernel params to string + let kernel_params_string = expect_params.to_string()?; + assert_eq!(kernel_params_string, expect_params_string); + + Ok(()) + } + + #[derive(Debug)] + struct TestData<'a> { + rootfs_driver: &'a str, + rootfs_type: &'a str, + expect_params: KernelParams, + result: Result<()>, + } + + #[test] + fn test_rootfs_kernel_params() { + let tests = &[ + // EXT4 + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + // XFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // EROFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // Unsupported rootfs driver + TestData { + rootfs_driver: "foo", + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs driver foo")), + }, + // Unsupported rootfs type + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: "foo", + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs type foo")), + }, + ]; + + for (i, t) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, t); + let result = KernelParams::new_rootfs_kernel_params(t.rootfs_driver, t.rootfs_type); + let msg = format!("{}, result: {:?}", msg, result); + if t.result.is_ok() { + assert!(result.is_ok(), "{}", msg); + assert_eq!(t.expect_params, result.unwrap()); + } else { + let expected_error = format!("{}", t.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs new file mode 100644 index 000000000000..9e6184b74b2a --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -0,0 +1,105 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "hypervisor"); + +pub mod device; +pub mod hypervisor_persist; +pub use device::driver::*; +use device::DeviceType; +pub mod dragonball; +mod kernel_param; +pub mod qemu; +pub use kernel_param::Param; +pub mod utils; +use std::collections::HashMap; + +#[cfg(feature = "cloud-hypervisor")] +pub mod ch; + +use anyhow::Result; +use async_trait::async_trait; +use hypervisor_persist::HypervisorState; +use kata_types::capabilities::Capabilities; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; + +pub use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; + +// Config which driver to use as vm root dev +const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk-pci"; +const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem"; +const VM_ROOTFS_DRIVER_MMIO: &str = "virtio-blk-mmio"; + +//Configure the root corresponding to the driver +const VM_ROOTFS_ROOT_BLK: &str = "/dev/vda1"; +const VM_ROOTFS_ROOT_PMEM: &str = "/dev/pmem0p1"; + +// Config which filesystem to use as rootfs type +const VM_ROOTFS_FILESYSTEM_EXT4: &str = "ext4"; +const VM_ROOTFS_FILESYSTEM_XFS: &str = "xfs"; +const VM_ROOTFS_FILESYSTEM_EROFS: &str = "erofs"; + +// before using hugepages for VM, we need to mount hugetlbfs +// /dev/hugepages will be the mount point +// mkdir -p /dev/hugepages +// mount -t hugetlbfs none /dev/hugepages +const DEV_HUGEPAGES: &str = "/dev/hugepages"; +pub const HUGETLBFS: &str = "hugetlbfs"; +const SHMEM: &str = "shmem"; +const HUGE_SHMEM: &str = "hugeshmem"; + +pub const HYPERVISOR_DRAGONBALL: &str = "dragonball"; +pub const HYPERVISOR_QEMU: &str = "qemu"; + +pub const DEFAULT_HYBRID_VSOCK_NAME: &str = "kata.hvsock"; +pub const JAILER_ROOT: &str = "root"; + +#[derive(PartialEq, Debug, Clone)] +pub(crate) enum VmmState { + NotReady, + VmmServerReady, + VmRunning, +} + +// vcpu mapping from vcpu number to thread number +#[derive(Debug, Default)] +pub struct VcpuThreadIds { + pub vcpus: HashMap, +} + +#[async_trait] +pub trait Hypervisor: std::fmt::Debug + Send + Sync { + // vm manager + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()>; + async fn start_vm(&self, timeout: i32) -> Result<()>; + async fn stop_vm(&self) -> Result<()>; + async fn pause_vm(&self) -> Result<()>; + async fn save_vm(&self) -> Result<()>; + async fn resume_vm(&self) -> Result<()>; + async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus) + + // device manager + async fn add_device(&self, device: DeviceType) -> Result<()>; + async fn remove_device(&self, device: DeviceType) -> Result<()>; + + // utils + async fn get_agent_socket(&self) -> Result; + async fn disconnect(&self); + async fn hypervisor_config(&self) -> HypervisorConfig; + async fn get_thread_ids(&self) -> Result; + async fn get_pids(&self) -> Result>; + async fn get_vmm_master_tid(&self) -> Result; + async fn get_ns_path(&self) -> Result; + async fn cleanup(&self) -> Result<()>; + async fn check(&self) -> Result<()>; + async fn get_jailer_root(&self) -> Result; + async fn save_state(&self) -> Result; + async fn capabilities(&self) -> Result; + async fn get_hypervisor_metrics(&self) -> Result; +} diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs new file mode 100644 index 000000000000..6f59d933917b --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -0,0 +1,158 @@ +// Copyright (c) 2022 Red Hat +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; + +use crate::{HypervisorConfig, VcpuThreadIds}; +use kata_types::capabilities::{Capabilities, CapabilityBits}; + +const VSOCK_SCHEME: &str = "vsock"; +const VSOCK_AGENT_CID: u32 = 3; +const VSOCK_AGENT_PORT: u32 = 1024; +#[derive(Debug)] +pub struct QemuInner { + config: HypervisorConfig, +} + +impl QemuInner { + pub fn new() -> QemuInner { + QemuInner { + config: Default::default(), + } + } + + pub(crate) async fn prepare_vm(&mut self, _id: &str, _netns: Option) -> Result<()> { + info!(sl!(), "Preparing QEMU VM"); + Ok(()) + } + + pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> { + info!(sl!(), "Starting QEMU VM"); + + let mut command = std::process::Command::new(&self.config.path); + + command + .arg("-kernel") + .arg(&self.config.boot_info.kernel) + .arg("-m") + .arg(format!("{}M", &self.config.memory_info.default_memory)) + .arg("-initrd") + .arg(&self.config.boot_info.initrd) + .arg("-vga") + .arg("none") + .arg("-nodefaults") + .arg("-nographic"); + + command.spawn()?; + + Ok(()) + } + + pub(crate) fn stop_vm(&mut self) -> Result<()> { + info!(sl!(), "Stopping QEMU VM"); + todo!() + } + + pub(crate) fn pause_vm(&self) -> Result<()> { + info!(sl!(), "Pausing QEMU VM"); + todo!() + } + + pub(crate) fn resume_vm(&self) -> Result<()> { + info!(sl!(), "Resuming QEMU VM"); + todo!() + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + todo!() + } + + /// TODO: using a single hardcoded CID is clearly not adequate in the long + /// run. Use the recently added VsockConfig infrastructure to fix this. + pub(crate) async fn get_agent_socket(&self) -> Result { + info!(sl!(), "QemuInner::get_agent_socket()"); + Ok(format!( + "{}://{}:{}", + VSOCK_SCHEME, VSOCK_AGENT_CID, VSOCK_AGENT_PORT + )) + } + + pub(crate) async fn disconnect(&mut self) { + info!(sl!(), "QemuInner::disconnect()"); + todo!() + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + info!(sl!(), "QemuInner::get_thread_ids()"); + todo!() + } + + pub(crate) async fn get_vmm_master_tid(&self) -> Result { + info!(sl!(), "QemuInner::get_vmm_master_tid()"); + todo!() + } + + pub(crate) async fn get_ns_path(&self) -> Result { + info!(sl!(), "QemuInner::get_ns_path()"); + todo!() + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + info!(sl!(), "QemuInner::cleanup()"); + todo!() + } + + pub(crate) async fn resize_vcpu(&self, _old_vcpus: u32, _new_vcpus: u32) -> Result<(u32, u32)> { + info!(sl!(), "QemuInner::resize_vcpu()"); + todo!() + } + + pub(crate) async fn get_pids(&self) -> Result> { + info!(sl!(), "QemuInner::get_pids()"); + todo!() + } + + pub(crate) async fn check(&self) -> Result<()> { + todo!() + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + todo!() + } + + pub(crate) async fn capabilities(&self) -> Result { + let mut caps = Capabilities::default(); + caps.set(CapabilityBits::FsSharingSupport); + Ok(caps) + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = config; + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + info!(sl!(), "QemuInner::hypervisor_config()"); + self.config.clone() + } + + pub(crate) async fn get_hypervisor_metrics(&self) -> Result { + todo!() + } +} + +use crate::device::DeviceType; + +// device manager part of Hypervisor +impl QemuInner { + pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result<()> { + info!(sl!(), "QemuInner::add_device() {}", device); + todo!() + } + + pub(crate) async fn remove_device(&mut self, device: DeviceType) -> Result<()> { + info!(sl!(), "QemuInner::remove_device() {} ", device); + todo!() + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs new file mode 100644 index 000000000000..1221af26b8b3 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs @@ -0,0 +1,155 @@ +// Copyright (c) 2022 Red Hat +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod inner; + +use crate::device::DeviceType; +use crate::hypervisor_persist::HypervisorState; +use crate::Hypervisor; +use crate::{HypervisorConfig, VcpuThreadIds}; +use inner::QemuInner; +use kata_types::capabilities::Capabilities; + +use anyhow::Result; +use async_trait::async_trait; + +use std::sync::Arc; +use tokio::sync::RwLock; + +#[derive(Debug)] +pub struct Qemu { + inner: Arc>, +} + +impl Default for Qemu { + fn default() -> Self { + Self::new() + } +} + +impl Qemu { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(QemuInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for Qemu { + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm() + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.pause_vm() + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.resume_vm() + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.save_vm().await + } + + async fn add_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: DeviceType) -> Result<()> { + let mut inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.read().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.read().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn get_vmm_master_tid(&self) -> Result { + let inner = self.inner.read().await; + inner.get_vmm_master_tid().await + } + + async fn get_ns_path(&self) -> Result { + let inner = self.inner.read().await; + inner.get_ns_path().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> { + let inner = self.inner.read().await; + inner.resize_vcpu(old_vcpus, new_vcpus).await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + todo!() + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } + + async fn get_hypervisor_metrics(&self) -> Result { + let inner = self.inner.read().await; + inner.get_hypervisor_metrics().await + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/utils.rs b/src/runtime-rs/crates/hypervisor/src/utils.rs new file mode 100644 index 000000000000..d2078c2d2f48 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/utils.rs @@ -0,0 +1,49 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashSet; + +use kata_types::config::KATA_PATH; + +use crate::{DEFAULT_HYBRID_VSOCK_NAME, JAILER_ROOT}; + +pub fn get_child_threads(pid: u32) -> HashSet { + let mut result = HashSet::new(); + let path_name = format!("/proc/{}/task", pid); + let path = std::path::Path::new(path_name.as_str()); + if path.is_dir() { + if let Ok(dir) = path.read_dir() { + for entity in dir { + if let Ok(entity) = entity.as_ref() { + let file_name = entity.file_name(); + let file_name = file_name.to_str().unwrap_or_default(); + if let Ok(tid) = file_name.parse::() { + result.insert(tid); + } + } + } + } + } + result +} + +// Return the path for a _hypothetical_ sandbox: the path does *not* exist +// yet, and for this reason safe-path cannot be used. +pub fn get_sandbox_path(sid: &str) -> String { + [KATA_PATH, sid].join("/") +} + +pub fn get_hvsock_path(sid: &str) -> String { + let jailer_root_path = get_jailer_root(sid); + + [jailer_root_path, DEFAULT_HYBRID_VSOCK_NAME.to_owned()].join("/") +} + +pub fn get_jailer_root(sid: &str) -> String { + let sandbox_path = get_sandbox_path(sid); + + [&sandbox_path, JAILER_ROOT].join("/") +} diff --git a/src/runtime-rs/crates/persist/Cargo.toml b/src/runtime-rs/crates/persist/Cargo.toml new file mode 100644 index 000000000000..245480247316 --- /dev/null +++ b/src/runtime-rs/crates/persist/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "persist" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +async-trait = "0.1.48" +anyhow = "^1.0" +kata-sys-util = { path = "../../../libs/kata-sys-util"} +kata-types = { path = "../../../libs/kata-types" } +shim-interface = { path = "../../../libs/shim-interface" } +libc = "0.2" +serde = { version = "1.0.138", features = ["derive"] } +serde_json = "1.0.82" +safe-path = { path = "../../../libs/safe-path"} diff --git a/src/runtime-rs/crates/persist/src/lib.rs b/src/runtime-rs/crates/persist/src/lib.rs new file mode 100644 index 000000000000..0c6510a0dc94 --- /dev/null +++ b/src/runtime-rs/crates/persist/src/lib.rs @@ -0,0 +1,85 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod sandbox_persist; +use anyhow::{anyhow, Context, Ok, Result}; +use kata_types::config::KATA_PATH; +use serde::de; +use std::{fs::File, io::BufReader}; + +pub const PERSIST_FILE: &str = "state.json"; +use kata_sys_util::validate::verify_id; +use safe_path::scoped_join; + +pub fn to_disk(value: &T, sid: &str) -> Result<()> { + verify_id(sid).context("failed to verify sid")?; + let mut path = scoped_join(KATA_PATH, sid)?; + if path.exists() { + path.push(PERSIST_FILE); + let f = File::create(path) + .context("failed to create the file") + .context("failed to join the path")?; + let j = serde_json::to_value(value).context("failed to convert to the json value")?; + serde_json::to_writer_pretty(f, &j)?; + return Ok(()); + } + Err(anyhow!("invalid sid {}", sid)) +} + +pub fn from_disk(sid: &str) -> Result +where + T: de::DeserializeOwned, +{ + verify_id(sid).context("failed to verify sid")?; + let mut path = scoped_join(KATA_PATH, sid)?; + if path.exists() { + path.push(PERSIST_FILE); + let file = File::open(path).context("failed to open the file")?; + let reader = BufReader::new(file); + return serde_json::from_reader(reader).map_err(|e| anyhow!(e.to_string())); + } + Err(anyhow!("invalid sid {}", sid)) +} + +#[cfg(test)] +mod tests { + use crate::{from_disk, to_disk, KATA_PATH}; + use serde::{Deserialize, Serialize}; + use std::fs::DirBuilder; + use std::{fs, result::Result::Ok}; + #[test] + fn test_to_from_disk() { + #[derive(Serialize, Deserialize, Debug)] + struct Kata { + name: String, + key: u8, + } + let data = Kata { + name: "kata".to_string(), + key: 1, + }; + // invalid sid + assert!(to_disk(&data, "..3").is_err()); + assert!(to_disk(&data, "../../../3").is_err()); + assert!(to_disk(&data, "a/b/c").is_err()); + assert!(to_disk(&data, ".#cdscd.").is_err()); + + let sid = "aadede"; + let sandbox_dir = [KATA_PATH, sid].join("/"); + if DirBuilder::new() + .recursive(true) + .create(&sandbox_dir) + .is_ok() + { + assert!(to_disk(&data, sid).is_ok()); + if let Ok(result) = from_disk::(sid) { + assert_eq!(result.name, data.name); + assert_eq!(result.key, data.key); + } + assert!(fs::remove_dir_all(&sandbox_dir).is_ok()); + } + } +} diff --git a/src/runtime-rs/crates/persist/src/sandbox_persist.rs b/src/runtime-rs/crates/persist/src/sandbox_persist.rs new file mode 100644 index 000000000000..9ffdf15acc8a --- /dev/null +++ b/src/runtime-rs/crates/persist/src/sandbox_persist.rs @@ -0,0 +1,25 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use async_trait::async_trait; + +#[async_trait] +pub trait Persist +where + Self: Sized, +{ + /// The type of the object representing the state of the component. + type State; + /// The type of the object holding the constructor arguments. + type ConstructorArgs; + + /// Save a state of the component. + async fn save(&self) -> Result; + + /// Restore a component from a specified state. + async fn restore(constructor_args: Self::ConstructorArgs, state: Self::State) -> Result; +} diff --git a/src/runtime-rs/crates/resource/Cargo.toml b/src/runtime-rs/crates/resource/Cargo.toml new file mode 100644 index 000000000000..78d203d4a839 --- /dev/null +++ b/src/runtime-rs/crates/resource/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "resource" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dev-dependencies] +test-utils = { path = "../../../libs/test-utils" } +tempfile = "3.2.0" + +[dependencies] +anyhow = "^1.0" +actix-rt = "2.7.0" +async-trait = "0.1.48" +bitflags = "1.2.1" +byte-unit = "4.0.14" +cgroups-rs = "0.3.2" +futures = "0.3.11" +hex = "0.4.3" +lazy_static = "1.4.0" +libc = ">=0.2.39" +netns-rs = "0.1.0" +netlink-sys = "0.8.3" +netlink-packet-route = "0.13.0" +nix = "0.24.2" +rand = "^0.7.2" +rtnetlink = "0.11.0" +scopeguard = "1.0.0" +serde = { version = "1.0.138", features = ["derive"] } +serde_json = "1.0.82" +slog = "2.5.2" +slog-scope = "4.4.0" +tokio = { version = "1.28.1", features = ["process"] } +tracing = "0.1.36" +uuid = { version = "0.4", features = ["v4"] } + +agent = { path = "../agent" } +hypervisor = { path = "../hypervisor" } +kata-types = { path = "../../../libs/kata-types" } +kata-sys-util = { path = "../../../libs/kata-sys-util" } +logging = { path = "../../../libs/logging" } +oci = { path = "../../../libs/oci" } +persist = { path = "../persist"} +tests_utils = { path = "../../tests/utils" } + +[features] diff --git a/src/runtime-rs/crates/resource/src/cgroups/cgroup_persist.rs b/src/runtime-rs/crates/resource/src/cgroups/cgroup_persist.rs new file mode 100644 index 000000000000..be15610b7cd5 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cgroups/cgroup_persist.rs @@ -0,0 +1,13 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Default)] +pub struct CgroupState { + pub path: Option, + pub overhead_path: Option, + pub sandbox_cgroup_only: bool, +} diff --git a/src/runtime-rs/crates/resource/src/cgroups/mod.rs b/src/runtime-rs/crates/resource/src/cgroups/mod.rs new file mode 100644 index 000000000000..831e30c0ffc8 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cgroups/mod.rs @@ -0,0 +1,304 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod cgroup_persist; +mod utils; + +use std::{ + collections::{HashMap, HashSet}, + error::Error, + io, + iter::FromIterator, + sync::Arc, +}; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use cgroup_persist::CgroupState; +use cgroups_rs::{cgroup_builder::CgroupBuilder, Cgroup, CgroupPid, CpuResources, Resources}; +use hypervisor::Hypervisor; +use kata_sys_util::spec::load_oci_spec; +use kata_types::config::TomlConfig; +use oci::LinuxResources; +use persist::sandbox_persist::Persist; +use tokio::sync::RwLock; + +use crate::ResourceUpdateOp; + +const OS_ERROR_NO_SUCH_PROCESS: i32 = 3; + +pub struct CgroupArgs { + pub sid: String, + pub config: TomlConfig, +} + +pub struct CgroupConfig { + pub path: String, + pub overhead_path: String, + pub sandbox_cgroup_only: bool, +} + +impl CgroupConfig { + fn new(sid: &str, toml_config: &TomlConfig) -> Result { + let overhead_path = utils::gen_overhead_path(sid); + let spec = load_oci_spec()?; + let path = spec + .linux + // The trim of '/' is important, because cgroup_path is a relative path. + .map(|linux| linux.cgroups_path.trim_start_matches('/').to_string()) + .unwrap_or_default(); + + Ok(Self { + path, + overhead_path, + sandbox_cgroup_only: toml_config.runtime.sandbox_cgroup_only, + }) + } +} + +pub struct CgroupsResource { + resources: Arc>>, + cgroup_manager: Cgroup, + overhead_cgroup_manager: Option, + cgroup_config: CgroupConfig, +} + +impl CgroupsResource { + pub fn new(sid: &str, toml_config: &TomlConfig) -> Result { + let config = CgroupConfig::new(sid, toml_config)?; + + // Create the sandbox cgroups manager (cgroups on Linux). + // Depending on the sandbox_cgroup_only value, this cgroup + // will either hold all the pod threads (sandbox_cgroup_only is true) + // or only the virtual CPU ones (sandbox_cgroup_only is false). + let hier = cgroups_rs::hierarchies::auto(); + let cgroup_manager = CgroupBuilder::new(&config.path).build(hier)?; + + // The shim configuration is requesting that we do not put all threads + // into the sandbox resource controller. + // We're creating an overhead controller, with no constraints. Everything but + // the vCPU threads will eventually make it there. + let overhead_cgroup_manager = if !config.sandbox_cgroup_only { + let hier = cgroups_rs::hierarchies::auto(); + Some(CgroupBuilder::new(&config.overhead_path).build(hier)?) + } else { + None + }; + + // Add the runtime to the VMM sandbox resource controller + + // By adding the runtime process to either the sandbox or overhead controller, we are making + // sure that any child process of the runtime (i.e. *all* processes serving a Kata pod) + // will initially live in this controller. Depending on the sandbox_cgroup_only settings, we will + // then move the vCPU threads between resource controllers. + let pid = CgroupPid { pid: 0 }; + if let Some(manager) = overhead_cgroup_manager.as_ref() { + manager.add_task_by_tgid(pid).context("add task by tgid")?; + } else { + cgroup_manager + .add_task_by_tgid(pid) + .context("add task by tgid with sandbox only")?; + } + + Ok(Self { + cgroup_manager, + resources: Arc::new(RwLock::new(HashMap::new())), + overhead_cgroup_manager, + cgroup_config: config, + }) + } + + /// delete will move the running processes in the cgroup_manager and + /// overhead_cgroup_manager to the parent and then delete the cgroups. + pub async fn delete(&self) -> Result<()> { + for cg_pid in self.cgroup_manager.tasks() { + // For now, we can't guarantee that the thread in cgroup_manager does still + // exist. Once it exit, we should ignore that error returned by remove_task + // to let it go. + if let Err(error) = self.cgroup_manager.remove_task(cg_pid) { + match error.source() { + Some(err) => match err.downcast_ref::() { + Some(e) => { + if e.raw_os_error() != Some(OS_ERROR_NO_SUCH_PROCESS) { + return Err(error.into()); + } + } + None => return Err(error.into()), + }, + None => return Err(error.into()), + } + } + } + + self.cgroup_manager + .delete() + .context("delete cgroup manager")?; + + if let Some(overhead) = self.overhead_cgroup_manager.as_ref() { + for cg_pid in overhead.tasks() { + overhead.remove_task(cg_pid)?; + } + overhead.delete().context("delete overhead")?; + } + + Ok(()) + } + + pub async fn update_cgroups( + &self, + cid: &str, + linux_resources: Option<&LinuxResources>, + op: ResourceUpdateOp, + h: &dyn Hypervisor, + ) -> Result<()> { + let new_resources = self.calc_resource(linux_resources); + let old_resources = self.update_resources(cid, new_resources.clone(), op).await; + + if let Some(old_resource) = old_resources.clone() { + if old_resource == new_resources { + return Ok(()); + } + } + + match self.do_update_cgroups(h).await { + Err(e) => { + // if update failed, we should roll back the records in resources + let mut resources = self.resources.write().await; + match op { + ResourceUpdateOp::Add => { + resources.remove(cid); + } + ResourceUpdateOp::Update | ResourceUpdateOp::Del => { + if let Some(old_resource) = old_resources { + resources.insert(cid.to_owned(), old_resource); + } + } + } + Err(e) + } + Ok(()) => Ok(()), + } + } + + async fn update_resources( + &self, + cid: &str, + new_resource: Resources, + op: ResourceUpdateOp, + ) -> Option { + let mut resources = self.resources.write().await; + match op { + ResourceUpdateOp::Add | ResourceUpdateOp::Update => { + resources.insert(cid.to_owned(), new_resource.clone()) + } + ResourceUpdateOp::Del => resources.remove(cid), + } + } + + async fn do_update_cgroups(&self, h: &dyn Hypervisor) -> Result<()> { + let merged_resources = self.merge_resources().await; + self.cgroup_manager + .apply(&merged_resources) + .map_err(|e| anyhow!(e))?; + + if self.overhead_cgroup_manager.is_some() { + // If we have an overhead controller, new vCPU threads would start there, + // as being children of the VMM PID. + // We need to constrain them by moving them into the sandbox controller. + self.constrain_hypervisor(h).await? + } + + Ok(()) + } + + /// constrain_hypervisor will place the VMM and vCPU threads into resource controllers (cgroups on Linux). + async fn constrain_hypervisor(&self, h: &dyn Hypervisor) -> Result<()> { + let tids = h.get_thread_ids().await?; + let tids = tids.vcpus.values(); + + // All vCPU threads move to the sandbox controller. + for tid in tids { + self.cgroup_manager + .add_task(CgroupPid { pid: *tid as u64 })? + } + + Ok(()) + } + + async fn merge_resources(&self) -> Resources { + let resources = self.resources.read().await; + + let mut cpu_list: HashSet = HashSet::new(); + let mut mem_list: HashSet = HashSet::new(); + + resources.values().for_each(|r| { + if let Some(cpus) = &r.cpu.cpus { + cpu_list.insert(cpus.clone()); + } + if let Some(mems) = &r.cpu.mems { + mem_list.insert(mems.clone()); + } + }); + + let cpu_resource = CpuResources { + cpus: Some(Vec::from_iter(cpu_list.into_iter()).join(",")), + mems: Some(Vec::from_iter(mem_list.into_iter()).join(",")), + ..Default::default() + }; + + Resources { + cpu: cpu_resource, + ..Default::default() + } + } + + fn calc_cpu_resources(&self, linux_resources: Option<&LinuxResources>) -> CpuResources { + let cpu = || -> Option { linux_resources.as_ref()?.cpu.clone() }(); + + CpuResources { + cpus: cpu.clone().map(|cpu| cpu.cpus), + mems: cpu.map(|cpu| cpu.mems), + ..Default::default() + } + } + + fn calc_resource(&self, linux_resources: Option<&LinuxResources>) -> Resources { + Resources { + cpu: self.calc_cpu_resources(linux_resources), + ..Default::default() + } + } +} + +#[async_trait] +impl Persist for CgroupsResource { + type State = CgroupState; + type ConstructorArgs = CgroupArgs; + /// Save a state of the component. + async fn save(&self) -> Result { + Ok(CgroupState { + path: Some(self.cgroup_config.path.clone()), + overhead_path: Some(self.cgroup_config.overhead_path.clone()), + sandbox_cgroup_only: self.cgroup_config.sandbox_cgroup_only, + }) + } + /// Restore a component from a specified state. + async fn restore( + cgroup_args: Self::ConstructorArgs, + cgroup_state: Self::State, + ) -> Result { + let hier = cgroups_rs::hierarchies::auto(); + let config = CgroupConfig::new(&cgroup_args.sid, &cgroup_args.config)?; + let path = cgroup_state.path.unwrap_or_default(); + let cgroup_manager = Cgroup::load(hier, path.as_str()); + Ok(Self { + cgroup_manager, + resources: Arc::new(RwLock::new(HashMap::new())), + overhead_cgroup_manager: None, + cgroup_config: config, + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/cgroups/utils.rs b/src/runtime-rs/crates/resource/src/cgroups/utils.rs new file mode 100644 index 000000000000..7a2d630982c0 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cgroups/utils.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// When the Kata overhead threads (I/O, VMM, etc) are not +// placed in the sandbox resource controller (A cgroup on Linux), +// they are moved to a specific, unconstrained resource controller. +// On Linux, assuming the cgroup mount point is at /sys/fs/cgroup/, +// on a cgroup v1 system, the Kata overhead memory cgroup will be at +// /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is +// defined by the orchestrator. +pub(crate) fn gen_overhead_path(path: &str) -> String { + format!("kata_overhead/{}", path.trim_start_matches('/')) +} diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs new file mode 100644 index 000000000000..661e1a5e3aca --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs @@ -0,0 +1,188 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + cmp, + collections::{HashMap, HashSet}, + convert::TryFrom, + sync::Arc, +}; + +use agent::{Agent, OnlineCPUMemRequest}; +use anyhow::{Context, Ok, Result}; +use hypervisor::Hypervisor; +use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources}; +use oci::LinuxCpu; +use tokio::sync::RwLock; + +use crate::ResourceUpdateOp; + +#[derive(Default, Debug, Clone)] +pub struct CpuResource { + /// Current number of vCPUs + pub(crate) current_vcpu: Arc>, + + /// Default number of vCPUs + pub(crate) default_vcpu: u32, + + /// CpuResource of each container + pub(crate) container_cpu_resources: Arc>>, +} + +impl CpuResource { + pub fn new(config: Arc) -> Result { + let hypervisor_name = config.runtime.hypervisor_name.clone(); + let hypervisor_config = config + .hypervisor + .get(&hypervisor_name) + .context(format!("failed to get hypervisor {}", hypervisor_name))?; + Ok(Self { + current_vcpu: Arc::new(RwLock::new(hypervisor_config.cpu_info.default_vcpus as u32)), + default_vcpu: hypervisor_config.cpu_info.default_vcpus as u32, + container_cpu_resources: Arc::new(RwLock::new(HashMap::new())), + }) + } + + pub(crate) async fn update_cpu_resources( + &self, + cid: &str, + linux_cpus: Option<&LinuxCpu>, + op: ResourceUpdateOp, + hypervisor: &dyn Hypervisor, + agent: &dyn Agent, + ) -> Result<()> { + self.update_container_cpu_resources(cid, linux_cpus, op) + .await + .context("update container cpu resources")?; + let vcpu_required = self + .calc_cpu_resources() + .await + .context("calculate vcpus required")?; + + if vcpu_required == self.current_vcpu().await { + return Ok(()); + } + + let curr_vcpus = self + .do_update_cpu_resources(vcpu_required, op, hypervisor, agent) + .await?; + self.update_current_vcpu(curr_vcpus).await; + Ok(()) + } + + async fn current_vcpu(&self) -> u32 { + let current_vcpu = self.current_vcpu.read().await; + *current_vcpu + } + + async fn update_current_vcpu(&self, new_vcpus: u32) { + let mut current_vcpu = self.current_vcpu.write().await; + *current_vcpu = new_vcpus; + } + + // update container_cpu_resources field + async fn update_container_cpu_resources( + &self, + cid: &str, + linux_cpus: Option<&LinuxCpu>, + op: ResourceUpdateOp, + ) -> Result<()> { + if let Some(cpu) = linux_cpus { + let container_resource = LinuxContainerCpuResources::try_from(cpu)?; + let mut resources = self.container_cpu_resources.write().await; + match op { + ResourceUpdateOp::Add => { + resources.insert(cid.to_owned(), container_resource); + } + ResourceUpdateOp::Update => { + let resource = resources.insert(cid.to_owned(), container_resource.clone()); + if let Some(old_container_resource) = resource { + // the priority of cpu-quota is higher than cpuset when determine the number of vcpus. + // we should better ignore the resource update when update cpu only by cpuset if cpu-quota + // has been set previously. + if old_container_resource.quota() > 0 && container_resource.quota() < 0 { + resources.insert(cid.to_owned(), old_container_resource); + } + } + } + ResourceUpdateOp::Del => { + resources.remove(cid); + } + } + } + + Ok(()) + } + + // calculates the total required vcpus by adding each container's requirements within the pod + async fn calc_cpu_resources(&self) -> Result { + let mut total_vcpu = 0; + let mut cpuset_vcpu: HashSet = HashSet::new(); + + let resources = self.container_cpu_resources.read().await; + for (_, cpu_resource) in resources.iter() { + let vcpu = cpu_resource.get_vcpus().unwrap_or(0) as u32; + cpuset_vcpu.extend(cpu_resource.cpuset().iter()); + total_vcpu += vcpu; + } + + // contrained only by cpuset + if total_vcpu == 0 && !cpuset_vcpu.is_empty() { + info!(sl!(), "(from cpuset)get vcpus # {:?}", cpuset_vcpu); + return Ok(cpuset_vcpu.len() as u32); + } + + info!( + sl!(), + "(from cfs_quota&cfs_period)get vcpus count {}", total_vcpu + ); + Ok(total_vcpu) + } + + // do hotplug and hot-unplug the vcpu + async fn do_update_cpu_resources( + &self, + new_vcpus: u32, + op: ResourceUpdateOp, + hypervisor: &dyn Hypervisor, + agent: &dyn Agent, + ) -> Result { + let old_vcpus = self.current_vcpu().await; + + // when adding vcpus, ignore old_vcpus > new_vcpus + // when deleting vcpus, ignore old_vcpus < new_vcpus + if (op == ResourceUpdateOp::Add && old_vcpus > new_vcpus) + || (op == ResourceUpdateOp::Del && old_vcpus < new_vcpus) + { + return Ok(old_vcpus); + } + + // do not reduce computing power + // the number of vcpus would not be lower than the default size + let new_vcpus = cmp::max(new_vcpus, self.default_vcpu); + + let (old, new) = hypervisor + .resize_vcpu(old_vcpus, new_vcpus) + .await + .context("resize vcpus")?; + + if old < new { + let add = new - old; + info!(sl!(), "request to onlineCpuMem with {:?} cpus", add); + + agent + .online_cpu_mem(OnlineCPUMemRequest { + wait: false, + nb_cpus: new, + cpu_only: true, + }) + .await + .context("online vcpus")?; + } + + Ok(new) + } +} diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs new file mode 100644 index 000000000000..bb3af793d033 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs @@ -0,0 +1,295 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2019-2021 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::TryFrom; + +use anyhow::{Context, Result}; + +use kata_types::{ + annotations::Annotation, config::TomlConfig, container::ContainerType, + cpu::LinuxContainerCpuResources, k8s::container_type, +}; + +// initial resource that InitialSizeManager needs, this is the spec for the +// sandbox/container's workload +#[derive(Clone, Copy, Debug)] +struct InitialSize { + vcpu: u32, + mem_mb: u32, +} + +// generate initial resource(vcpu and memory in MiB) from spec's information +impl TryFrom<&oci::Spec> for InitialSize { + type Error = anyhow::Error; + fn try_from(spec: &oci::Spec) -> Result { + let mut vcpu: u32 = 0; + let mut mem_mb: u32 = 0; + match container_type(spec) { + // podsandbox, from annotation + ContainerType::PodSandbox => { + let annotation = Annotation::new(spec.annotations.clone()); + let (period, quota, memory) = + get_sizing_info(annotation).context("failed to get sizing info")?; + let cpu = oci::LinuxCpu { + period: Some(period), + quota: Some(quota), + ..Default::default() + }; + // although it may not be actually a linux container, we are only using the calculation inside + // LinuxContainerCpuResources::try_from to generate our vcpu number + if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(&cpu) { + vcpu = get_nr_vcpu(&cpu_resource); + } + mem_mb = convert_memory_to_mb(memory); + } + // single container, from container spec + _ => { + if let Some(linux) = &spec.linux { + if let Some(resource) = &linux.resources { + if let Some(cpu) = &resource.cpu { + if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(cpu) { + vcpu = get_nr_vcpu(&cpu_resource); + } + } + if let Some(mem) = &resource.memory { + let memory = mem.limit.unwrap_or(0); + mem_mb = convert_memory_to_mb(memory); + } + } + } + } + } + info!( + sl!(), + "(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb + ); + Ok(Self { vcpu, mem_mb }) + } +} + +// InitialSizeManager is responsible for initial vcpu/mem management +// +// inital vcpu/mem management sizing information is optionally provided, either by +// upper layer runtime (containerd / crio) or by the container spec itself (when it +// is a standalone single container such as the one started with *docker run*) +// +// the sizing information uses three values, cpu quota, cpu period and memory limit, +// and with above values it calculates the # vcpus and memory for the workload +// +// if the workload # of vcpus and memory is invalid for vmms, we still use default +// value in toml_config +#[derive(Clone, Copy, Debug)] +pub struct InitialSizeManager { + resource: InitialSize, +} + +impl InitialSizeManager { + pub fn new(spec: &oci::Spec) -> Result { + Ok(Self { + resource: InitialSize::try_from(spec).context("failed to construct static resource")?, + }) + } + + pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> { + // update this data to the hypervisor config for later use by hypervisor + let hypervisor_name = &config.runtime.hypervisor_name; + let hv = config + .hypervisor + .get_mut(hypervisor_name) + .context("failed to get hypervisor config")?; + + if self.resource.vcpu > 0 { + hv.cpu_info.default_vcpus = self.resource.vcpu as i32 + } + if self.resource.mem_mb > 0 { + // since the memory overhead introduced by kata-agent and system components + // will really affect the amount of memory the user can use, so we choose to + // plus the default_memory here, instead of overriding it. + // (if we override the default_memory here, and user apllications still + // use memory as they orignally expected, it would be easy to OOM.) + hv.memory_info.default_memory += self.resource.mem_mb; + } + Ok(()) + } +} + +fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 { + if let Some(v) = resource.get_vcpus() { + v as u32 + } else { + 0 + } +} + +fn convert_memory_to_mb(memory_in_byte: i64) -> u32 { + if memory_in_byte < 0 { + 0 + } else { + (memory_in_byte / 1024 / 1024) as u32 + } +} + +// from the upper layer runtime's annotation (e.g. crio, k8s), get the *cpu quota, +// cpu period and memory limit* for a sandbox/container +fn get_sizing_info(annotation: Annotation) -> Result<(u64, i64, i64)> { + // since we are *adding* our result to the config, a value of 0 will cause no change + // and if the annotation is not assigned (but static resource management is), we will + // log a *warning* to fill that with zero value + let period = annotation.get_sandbox_cpu_period(); + let quota = annotation.get_sandbox_cpu_quota(); + let memory = annotation.get_sandbox_mem(); + Ok((period, quota, memory)) +} + +#[cfg(test)] +mod tests { + use super::*; + use kata_types::annotations::cri_containerd; + use std::collections::HashMap; + + #[derive(Clone)] + struct InputData { + period: Option, + quota: Option, + memory: Option, + } + + #[derive(Clone)] + struct TestData<'a> { + desc: &'a str, + input: InputData, + result: InitialSize, + } + + fn get_test_data() -> Vec> { + [ + TestData { + desc: "no resource limit", + input: InputData { + period: None, + quota: None, + memory: None, + }, + result: InitialSize { vcpu: 0, mem_mb: 0 }, + }, + TestData { + desc: "normal resource limit", + // data below should result in 2200 mCPU(round up to 3 vcpus) and 512 MiB of memory + input: InputData { + period: Some(100_000), + quota: Some(220_000), + memory: Some(1024 * 1024 * 512), + }, + result: InitialSize { + vcpu: 3, + mem_mb: 512, + }, + }, + ] + .to_vec() + } + + #[test] + fn test_initial_size_sandbox() { + let tests = get_test_data(); + + // run tests + for (i, d) in tests.iter().enumerate() { + let spec = oci::Spec { + annotations: HashMap::from([ + ( + cri_containerd::CONTAINER_TYPE_LABEL_KEY.to_string(), + cri_containerd::SANDBOX.to_string(), + ), + ( + cri_containerd::SANDBOX_CPU_PERIOD_KEY.to_string(), + d.input.period.map_or(String::new(), |v| format!("{}", v)), + ), // CPU period + ( + cri_containerd::SANDBOX_CPU_QUOTA_KEY.to_string(), + d.input.quota.map_or(String::new(), |v| format!("{}", v)), + ), // CPU quota + ( + cri_containerd::SANDBOX_MEM_KEY.to_string(), + d.input.memory.map_or(String::new(), |v| format!("{}", v)), + ), // memory in bytes + ]), + ..Default::default() + }; + + let initial_size = InitialSize::try_from(&spec); + assert!( + initial_size.is_ok(), + "test[{}]: {:?} should be ok", + i, + d.desc + ); + + let initial_size = initial_size.unwrap(); + assert_eq!( + initial_size.vcpu, d.result.vcpu, + "test[{}]: {:?} vcpu should be {}", + i, d.desc, d.result.vcpu, + ); + assert_eq!( + initial_size.mem_mb, d.result.mem_mb, + "test[{}]: {:?} memory should be {}", + i, d.desc, d.result.mem_mb, + ); + } + } + + #[test] + fn test_initial_size_container() { + let tests = get_test_data(); + + // run tests + for (i, d) in tests.iter().enumerate() { + let spec = oci::Spec { + annotations: HashMap::from([( + cri_containerd::CONTAINER_TYPE_LABEL_KEY.to_string(), + cri_containerd::CONTAINER.to_string(), + )]), + linux: Some(oci::Linux { + resources: Some(oci::LinuxResources { + cpu: Some(oci::LinuxCpu { + period: d.input.period, + quota: d.input.quota, + ..Default::default() + }), + memory: Some(oci::LinuxMemory { + limit: d.input.memory, + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + + let initial_size = InitialSize::try_from(&spec); + assert!( + initial_size.is_ok(), + "test[{}]: {:?} should be ok", + i, + d.desc + ); + + let initial_size = initial_size.unwrap(); + assert_eq!( + initial_size.vcpu, d.result.vcpu, + "test[{}]: {:?} vcpu should be {}", + i, d.desc, d.result.vcpu, + ); + assert_eq!( + initial_size.mem_mb, d.result.mem_mb, + "test[{}]: {:?} memory should be {}", + i, d.desc, d.result.mem_mb, + ); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs b/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs new file mode 100644 index 000000000000..f2984cd1cb8c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs @@ -0,0 +1,8 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod cpu; +pub mod initial_size; diff --git a/src/runtime-rs/crates/resource/src/lib.rs b/src/runtime-rs/crates/resource/src/lib.rs new file mode 100644 index 000000000000..f7df9b687b13 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/lib.rs @@ -0,0 +1,43 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate lazy_static; + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "resource"); + +pub mod cgroups; +pub mod manager; +mod manager_inner; +pub mod network; +pub mod resource_persist; +use hypervisor::{BlockConfig, HybridVsockConfig}; +use network::NetworkConfig; +pub mod rootfs; +pub mod share_fs; +pub mod volume; +pub use manager::ResourceManager; +pub mod cpu_mem; + +use kata_types::config::hypervisor::SharedFsInfo; + +#[derive(Debug)] +pub enum ResourceConfig { + Network(NetworkConfig), + ShareFs(SharedFsInfo), + VmRootfs(BlockConfig), + HybridVsock(HybridVsockConfig), +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ResourceUpdateOp { + Add, + Del, + Update, +} diff --git a/src/runtime-rs/crates/resource/src/manager.rs b/src/runtime-rs/crates/resource/src/manager.rs new file mode 100644 index 000000000000..a96e16b8ddaf --- /dev/null +++ b/src/runtime-rs/crates/resource/src/manager.rs @@ -0,0 +1,159 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use agent::types::Device; +use agent::{Agent, Storage}; +use anyhow::Result; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use hypervisor::Hypervisor; +use kata_types::config::TomlConfig; +use kata_types::mount::Mount; +use oci::{Linux, LinuxResources}; +use persist::sandbox_persist::Persist; +use tokio::sync::RwLock; +use tracing::instrument; + +use crate::network::NetworkConfig; +use crate::resource_persist::ResourceState; +use crate::ResourceUpdateOp; +use crate::{manager_inner::ResourceManagerInner, rootfs::Rootfs, volume::Volume, ResourceConfig}; + +pub struct ManagerArgs { + pub sid: String, + pub agent: Arc, + pub hypervisor: Arc, + pub config: TomlConfig, +} + +pub struct ResourceManager { + inner: Arc>, +} + +impl std::fmt::Debug for ResourceManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ResourceManager").finish() + } +} + +impl ResourceManager { + pub async fn new( + sid: &str, + agent: Arc, + hypervisor: Arc, + toml_config: Arc, + ) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new( + ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?, + )), + }) + } + + pub async fn config(&self) -> Arc { + let inner = self.inner.read().await; + inner.config() + } + + pub async fn get_device_manager(&self) -> Arc> { + let inner = self.inner.read().await; + inner.get_device_manager() + } + + #[instrument] + pub async fn prepare_before_start_vm(&self, device_configs: Vec) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_before_start_vm(device_configs).await + } + + pub async fn handle_network(&self, network_config: NetworkConfig) -> Result<()> { + let mut inner = self.inner.write().await; + inner.handle_network(network_config).await + } + + #[instrument] + pub async fn setup_after_start_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.setup_after_start_vm().await + } + + pub async fn get_storage_for_sandbox(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_storage_for_sandbox().await + } + + pub async fn handler_rootfs( + &self, + cid: &str, + root: &oci::Root, + bundle_path: &str, + rootfs_mounts: &[Mount], + ) -> Result> { + let inner = self.inner.read().await; + inner + .handler_rootfs(cid, root, bundle_path, rootfs_mounts) + .await + } + + pub async fn handler_volumes( + &self, + cid: &str, + spec: &oci::Spec, + ) -> Result>> { + let inner = self.inner.read().await; + inner.handler_volumes(cid, spec).await + } + + pub async fn handler_devices(&self, cid: &str, linux: &Linux) -> Result> { + let inner = self.inner.read().await; + inner.handler_devices(cid, linux).await + } + + pub async fn dump(&self) { + let inner = self.inner.read().await; + inner.dump().await + } + + pub async fn update_linux_resource( + &self, + cid: &str, + linux_resources: Option<&LinuxResources>, + op: ResourceUpdateOp, + ) -> Result> { + let inner = self.inner.read().await; + inner.update_linux_resource(cid, linux_resources, op).await + } + + pub async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } +} + +#[async_trait] +impl Persist for ResourceManager { + type State = ResourceState; + type ConstructorArgs = ManagerArgs; + + /// Save a state of ResourceManager + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await + } + + /// Restore ResourceManager + async fn restore( + resource_args: Self::ConstructorArgs, + resource_state: Self::State, + ) -> Result { + let inner = ResourceManagerInner::restore(resource_args, resource_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs new file mode 100644 index 000000000000..1f51ca9c0f3b --- /dev/null +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -0,0 +1,504 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{sync::Arc, thread}; + +use agent::{types::Device, Agent, Storage}; +use anyhow::{anyhow, Context, Ok, Result}; +use async_trait::async_trait; +use hypervisor::{ + device::{ + device_manager::{do_handle_device, get_block_driver, DeviceManager}, + util::{get_host_path, DEVICE_TYPE_CHAR}, + DeviceConfig, DeviceType, + }, + BlockConfig, Hypervisor, VfioConfig, +}; +use kata_types::config::TomlConfig; +use kata_types::mount::Mount; +use oci::{Linux, LinuxCpu, LinuxResources}; +use persist::sandbox_persist::Persist; +use tokio::{runtime, sync::RwLock}; + +use crate::{ + cgroups::{CgroupArgs, CgroupsResource}, + cpu_mem::cpu::CpuResource, + manager::ManagerArgs, + network::{self, Network, NetworkConfig}, + resource_persist::ResourceState, + rootfs::{RootFsResource, Rootfs}, + share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, ShareFs}, + volume::{Volume, VolumeResource}, + ResourceConfig, ResourceUpdateOp, +}; + +pub(crate) struct ResourceManagerInner { + sid: String, + toml_config: Arc, + agent: Arc, + hypervisor: Arc, + device_manager: Arc>, + network: Option>, + share_fs: Option>, + + pub rootfs_resource: RootFsResource, + pub volume_resource: VolumeResource, + pub cgroups_resource: CgroupsResource, + pub cpu_resource: CpuResource, +} + +impl ResourceManagerInner { + pub(crate) async fn new( + sid: &str, + agent: Arc, + hypervisor: Arc, + toml_config: Arc, + ) -> Result { + // create device manager + let dev_manager = DeviceManager::new(hypervisor.clone()) + .await + .context("failed to create device manager")?; + + let cgroups_resource = CgroupsResource::new(sid, &toml_config)?; + let cpu_resource = CpuResource::new(toml_config.clone())?; + Ok(Self { + sid: sid.to_string(), + toml_config, + agent, + hypervisor, + device_manager: Arc::new(RwLock::new(dev_manager)), + network: None, + share_fs: None, + rootfs_resource: RootFsResource::new(), + volume_resource: VolumeResource::new(), + cgroups_resource, + cpu_resource, + }) + } + + pub fn config(&self) -> Arc { + self.toml_config.clone() + } + + pub fn get_device_manager(&self) -> Arc> { + self.device_manager.clone() + } + + pub async fn prepare_before_start_vm( + &mut self, + device_configs: Vec, + ) -> Result<()> { + for dc in device_configs { + match dc { + ResourceConfig::ShareFs(c) => { + self.share_fs = if self + .hypervisor + .capabilities() + .await? + .is_fs_sharing_supported() + { + let share_fs = share_fs::new(&self.sid, &c).context("new share fs")?; + share_fs + .setup_device_before_start_vm(self.hypervisor.as_ref()) + .await + .context("setup share fs device before start vm")?; + + // setup sandbox bind mounts: setup = true + self.handle_sandbox_bindmounts(true) + .await + .context("failed setup sandbox bindmounts")?; + + Some(share_fs) + } else { + None + }; + } + ResourceConfig::Network(c) => { + self.handle_network(c) + .await + .context("failed to handle network")?; + } + ResourceConfig::VmRootfs(r) => { + do_handle_device(&self.device_manager, &DeviceConfig::BlockCfg(r)) + .await + .context("do handle device failed.")?; + } + ResourceConfig::HybridVsock(hv) => { + do_handle_device(&self.device_manager, &DeviceConfig::HybridVsockCfg(hv)) + .await + .context("do handle hybrid-vsock device failed.")?; + } + }; + } + + Ok(()) + } + + pub async fn handle_network(&mut self, network_config: NetworkConfig) -> Result<()> { + // 1. When using Rust asynchronous programming, we use .await to + // allow other task to run instead of waiting for the completion of the current task. + // 2. Also, when handling the pod network, we need to set the shim threads + // into the network namespace to perform those operations. + // However, as the increase of the I/O intensive tasks, two issues could be caused by the two points above: + // a. When the future is blocked, the current thread (which is in the pod netns) + // might be take over by other tasks. After the future is finished, the thread take over + // the current task might not be in the pod netns. But the current task still need to run in pod netns + // b. When finish setting up the network, the current thread will be set back to the host namespace. + // In Rust Async, if the current thread is taken over by other task, the netns is dropped on another thread, + // but it is not in netns. So, the previous thread would still remain in the pod netns. + // The solution is to block the future on the current thread, it is enabled by spawn an os thread, create a + // tokio runtime, and block the task on it. + let device_manager = self.device_manager.clone(); + let network = thread::spawn(move || -> Result> { + let rt = runtime::Builder::new_current_thread().enable_io().build()?; + let d = rt + .block_on(network::new(&network_config, device_manager)) + .context("new network")?; + rt.block_on(d.setup()).context("setup network")?; + Ok(d) + }) + .join() + .map_err(|e| anyhow!("{:?}", e)) + .context("Couldn't join on the associated thread")? + .context("failed to set up network")?; + self.network = Some(network); + Ok(()) + } + + async fn handle_interfaces(&self, network: &dyn Network) -> Result<()> { + for i in network.interfaces().await.context("get interfaces")? { + // update interface + info!(sl!(), "update interface {:?}", i); + self.agent + .update_interface(agent::UpdateInterfaceRequest { interface: Some(i) }) + .await + .context("update interface")?; + } + + Ok(()) + } + + async fn handle_neighbours(&self, network: &dyn Network) -> Result<()> { + let neighbors = network.neighs().await.context("neighs")?; + if !neighbors.is_empty() { + info!(sl!(), "update neighbors {:?}", neighbors); + self.agent + .add_arp_neighbors(agent::AddArpNeighborRequest { + neighbors: Some(agent::ARPNeighbors { neighbors }), + }) + .await + .context("update neighbors")?; + } + Ok(()) + } + + async fn handle_routes(&self, network: &dyn Network) -> Result<()> { + let routes = network.routes().await.context("routes")?; + if !routes.is_empty() { + info!(sl!(), "update routes {:?}", routes); + self.agent + .update_routes(agent::UpdateRoutesRequest { + route: Some(agent::Routes { routes }), + }) + .await + .context("update routes")?; + } + Ok(()) + } + + pub async fn setup_after_start_vm(&mut self) -> Result<()> { + if let Some(share_fs) = self.share_fs.as_ref() { + share_fs + .setup_device_after_start_vm(self.hypervisor.as_ref()) + .await + .context("setup share fs device after start vm")?; + } + + if let Some(network) = self.network.as_ref() { + let network = network.as_ref(); + self.handle_interfaces(network) + .await + .context("handle interfaces")?; + self.handle_neighbours(network) + .await + .context("handle neighbors")?; + self.handle_routes(network).await.context("handle routes")?; + } + Ok(()) + } + + pub async fn get_storage_for_sandbox(&self) -> Result> { + let mut storages = vec![]; + if let Some(d) = self.share_fs.as_ref() { + let mut s = d.get_storages().await.context("get storage")?; + storages.append(&mut s); + } + Ok(storages) + } + + pub async fn handler_rootfs( + &self, + cid: &str, + root: &oci::Root, + bundle_path: &str, + rootfs_mounts: &[Mount], + ) -> Result> { + self.rootfs_resource + .handler_rootfs( + &self.share_fs, + self.device_manager.as_ref(), + self.hypervisor.as_ref(), + &self.sid, + cid, + root, + bundle_path, + rootfs_mounts, + ) + .await + } + + pub async fn handler_volumes( + &self, + cid: &str, + spec: &oci::Spec, + ) -> Result>> { + self.volume_resource + .handler_volumes( + &self.share_fs, + cid, + spec, + self.device_manager.as_ref(), + &self.sid, + self.agent.clone(), + ) + .await + } + + pub async fn handler_devices(&self, _cid: &str, linux: &Linux) -> Result> { + let mut devices = vec![]; + for d in linux.devices.iter() { + match d.r#type.as_str() { + "b" => { + let block_driver = get_block_driver(&self.device_manager).await; + let dev_info = DeviceConfig::BlockCfg(BlockConfig { + major: d.major, + minor: d.minor, + driver_option: block_driver, + ..Default::default() + }); + + let device_info = do_handle_device(&self.device_manager, &dev_info) + .await + .context("do handle device")?; + + // create block device for kata agent, + // if driver is virtio-blk-pci, the id will be pci address. + if let DeviceType::Block(device) = device_info { + let agent_device = Device { + id: device.config.virt_path.clone(), + container_path: d.path.clone(), + field_type: device.config.driver_option, + vm_path: device.config.virt_path, + ..Default::default() + }; + devices.push(agent_device); + } + } + "c" => { + let host_path = get_host_path(DEVICE_TYPE_CHAR, d.major, d.minor) + .context("get host path failed")?; + // First of all, filter vfio devices. + if !host_path.starts_with("/dev/vfio") { + continue; + } + + let dev_info = DeviceConfig::VfioCfg(VfioConfig { + host_path, + dev_type: "c".to_string(), + hostdev_prefix: "vfio_device".to_owned(), + ..Default::default() + }); + + let device_info = do_handle_device(&self.device_manager.clone(), &dev_info) + .await + .context("do handle device")?; + + // vfio mode: vfio-pci and vfio-pci-gk for x86_64 + // - vfio-pci, devices appear as VFIO character devices under /dev/vfio in container. + // - vfio-pci-gk, devices are managed by whatever driver in Guest kernel. + let vfio_mode = match self.toml_config.runtime.vfio_mode.as_str() { + "vfio" => "vfio-pci".to_string(), + _ => "vfio-pci-gk".to_string(), + }; + + // create agent device + if let DeviceType::Vfio(device) = device_info { + let agent_device = Device { + id: device.device_id, // just for kata-agent + container_path: d.path.clone(), + field_type: vfio_mode, + options: device.device_options, + ..Default::default() + }; + devices.push(agent_device); + } + } + _ => { + // TODO enable other devices type + continue; + } + } + } + Ok(devices) + } + + async fn handle_sandbox_bindmounts(&self, setup: bool) -> Result<()> { + let bindmounts = self.toml_config.runtime.sandbox_bind_mounts.clone(); + if bindmounts.is_empty() { + info!(sl!(), "sandbox bindmounts empty, just skip it."); + return Ok(()); + } + + let sb_bindmnt = SandboxBindMounts::new(self.sid.clone(), bindmounts)?; + + if setup { + sb_bindmnt.setup_sandbox_bind_mounts() + } else { + sb_bindmnt.cleanup_sandbox_bind_mounts() + } + } + + pub async fn cleanup(&self) -> Result<()> { + // clean up cgroup + self.cgroups_resource + .delete() + .await + .context("delete cgroup")?; + + // cleanup sandbox bind mounts: setup = false + self.handle_sandbox_bindmounts(false) + .await + .context("failed to cleanup sandbox bindmounts")?; + + // clean up share fs mount + if let Some(share_fs) = &self.share_fs { + share_fs + .get_share_fs_mount() + .cleanup(&self.sid) + .await + .context("failed to cleanup host path")?; + } + // TODO cleanup other resources + Ok(()) + } + + pub async fn dump(&self) { + self.rootfs_resource.dump().await; + self.volume_resource.dump().await; + } + + pub async fn update_linux_resource( + &self, + cid: &str, + linux_resources: Option<&LinuxResources>, + op: ResourceUpdateOp, + ) -> Result> { + let linux_cpus = || -> Option<&LinuxCpu> { linux_resources.as_ref()?.cpu.as_ref() }(); + + // if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource + if !self.toml_config.runtime.static_sandbox_resource_mgmt { + self.cpu_resource + .update_cpu_resources( + cid, + linux_cpus, + op, + self.hypervisor.as_ref(), + self.agent.as_ref(), + ) + .await?; + } + + // we should firstly update the vcpus and mems, and then update the host cgroups + self.cgroups_resource + .update_cgroups(cid, linux_resources, op, self.hypervisor.as_ref()) + .await?; + + // update the linux resources for agent + self.agent_linux_resources(linux_resources) + } + + fn agent_linux_resources( + &self, + linux_resources: Option<&LinuxResources>, + ) -> Result> { + let mut resources = match linux_resources { + Some(linux_resources) => linux_resources.clone(), + None => { + return Ok(None); + } + }; + + // clear the cpuset + // for example, if there are only 5 vcpus now, and the cpuset in LinuxResources is 0-2,6, guest os will report + // error when creating the container. so we choose to clear the cpuset here. + if let Some(cpu) = &mut resources.cpu { + cpu.cpus = String::new(); + } + + Ok(Some(resources)) + } +} + +#[async_trait] +impl Persist for ResourceManagerInner { + type State = ResourceState; + type ConstructorArgs = ManagerArgs; + + /// Save a state of ResourceManagerInner + async fn save(&self) -> Result { + let mut endpoint_state = vec![]; + if let Some(network) = &self.network { + if let Some(ens) = network.save().await { + endpoint_state = ens; + } + } + let cgroup_state = self.cgroups_resource.save().await?; + Ok(ResourceState { + endpoint: endpoint_state, + cgroup_state: Some(cgroup_state), + }) + } + + /// Restore ResourceManagerInner + async fn restore( + resource_args: Self::ConstructorArgs, + resource_state: Self::State, + ) -> Result { + let args = CgroupArgs { + sid: resource_args.sid.clone(), + config: resource_args.config, + }; + Ok(Self { + sid: resource_args.sid, + agent: resource_args.agent, + hypervisor: resource_args.hypervisor.clone(), + device_manager: Arc::new(RwLock::new( + DeviceManager::new(resource_args.hypervisor).await?, + )), + network: None, + share_fs: None, + rootfs_resource: RootFsResource::new(), + volume_resource: VolumeResource::new(), + cgroups_resource: CgroupsResource::restore( + args, + resource_state.cgroup_state.unwrap_or_default(), + ) + .await?, + toml_config: Arc::new(TomlConfig::default()), + cpu_resource: CpuResource::default(), + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/dan.rs b/src/runtime-rs/crates/resource/src/network/dan.rs new file mode 100644 index 000000000000..d59875bca1c4 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/dan.rs @@ -0,0 +1,406 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! Directly Attachable Network (DAN) is a type of network that runs in the host +//! netns. It supports host-tap, vhost-user (DPDK), etc. +//! The device information is retrieved from a JSON file, the type of which is +//! `Vec`. +//! In this module, `IPAddress`, `Interface`, etc., are duplicated mostly from +//! `agent::IPAddress`, `agent::Interface`, and so on. They can't be referenced +//! directly because the former represents the structure of the JSON file written +//! by CNI plugins. They might have some slight differences, and may be revised in +//! the future. + +use std::net::IpAddr; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; + +use agent::IPFamily; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use hypervisor::Hypervisor; +use kata_types::config::TomlConfig; +use scopeguard::defer; +use serde::{Deserialize, Serialize}; +use tokio::fs; +use tokio::sync::RwLock; + +use super::network_entity::NetworkEntity; +use super::utils::address::{ip_family_from_ip_addr, parse_ip_cidr}; +use super::{EndpointState, NetnsGuard, Network}; +use crate::network::endpoint::TapEndpoint; +use crate::network::network_info::network_info_from_dan::NetworkInfoFromDan; +use crate::network::utils::generate_private_mac_addr; + +/// Directly attachable network +pub struct Dan { + inner: Arc>, +} + +pub struct DanInner { + netns: Option, + entity_list: Vec, +} + +impl Dan { + pub async fn new( + config: &DanNetworkConfig, + dev_mgr: Arc>, + ) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new(DanInner::new(config, &dev_mgr).await?)), + }) + } +} + +impl DanInner { + /// DanInner initialization deserializes DAN devices from a file writen + /// by CNI plugins. Respective endpoint and network_info are retrieved + /// from the devices, and compose NetworkEntity. + async fn new(config: &DanNetworkConfig, dev_mgr: &Arc>) -> Result { + let json_str = fs::read_to_string(&config.dan_conf_path) + .await + .context("Read DAN config from file")?; + let config: DanConfig = serde_json::from_str(&json_str).context("Invalid DAN config")?; + info!(sl!(), "Dan config is loaded = {:?}", config); + + let (connection, handle, _) = rtnetlink::new_connection().context("New connection")?; + let thread_handler = tokio::spawn(connection); + defer!({ + thread_handler.abort(); + }); + + let mut entity_list = Vec::with_capacity(config.devices.len()); + for (idx, device) in config.devices.iter().enumerate() { + let name = format!("eth{}", idx); + let endpoint = match &device.device { + // TODO: Support VhostUserNet protocol + Device::VhostUser { + path, + queue_num: _, + queue_size: _, + } => { + warn!(sl!(), "A DAN device whose type is \"vhost-user\" and socket path is {} is ignored.", path); + continue; + } + Device::HostTap { + tap_name, + queue_num, + queue_size, + } => Arc::new( + TapEndpoint::new( + &handle, + idx as u32, + &name, + tap_name, + &device.guest_mac, + *queue_num, + *queue_size, + dev_mgr, + ) + .await + .with_context(|| format!("New a {} tap endpoint", tap_name))?, + ), + }; + + let network_info = Arc::new( + NetworkInfoFromDan::new(device) + .await + .context("Network info from DAN")?, + ); + + entity_list.push(NetworkEntity { + endpoint, + network_info, + }) + } + + Ok(Self { + netns: config.netns, + entity_list, + }) + } +} + +#[async_trait] +impl Network for Dan { + async fn setup(&self) -> Result<()> { + let inner = self.inner.read().await; + let _netns_guard; + if let Some(netns) = inner.netns.as_ref() { + _netns_guard = NetnsGuard::new(netns).context("New netns guard")?; + } + for e in inner.entity_list.iter() { + e.endpoint.attach().await.context("Attach")?; + } + Ok(()) + } + + async fn interfaces(&self) -> Result> { + let inner = self.inner.read().await; + let mut interfaces = vec![]; + for e in inner.entity_list.iter() { + interfaces.push(e.network_info.interface().await.context("Interface")?); + } + Ok(interfaces) + } + + async fn routes(&self) -> Result> { + let inner = self.inner.read().await; + let mut routes = vec![]; + for e in inner.entity_list.iter() { + let mut list = e.network_info.routes().await.context("Routes")?; + routes.append(&mut list); + } + Ok(routes) + } + + async fn neighs(&self) -> Result> { + let inner = self.inner.read().await; + let mut neighs = vec![]; + for e in &inner.entity_list { + let mut list = e.network_info.neighs().await.context("Neighs")?; + neighs.append(&mut list); + } + Ok(neighs) + } + + async fn save(&self) -> Option> { + let inner = self.inner.read().await; + let mut ep_states = vec![]; + for e in &inner.entity_list { + if let Some(state) = e.endpoint.save().await { + ep_states.push(state); + } + } + Some(ep_states) + } + + async fn remove(&self, h: &dyn Hypervisor) -> Result<()> { + let inner = self.inner.read().await; + let _netns_guard; + if let Some(netns) = inner.netns.as_ref() { + _netns_guard = NetnsGuard::new(netns).context("New netns guard")?; + } + for e in inner.entity_list.iter() { + e.endpoint.detach(h).await.context("Detach")?; + } + Ok(()) + } +} + +/// Directly attachable network config +#[derive(Debug)] +pub struct DanNetworkConfig { + pub dan_conf_path: PathBuf, +} + +/// Directly attachable network config written by CNI plugins +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct DanConfig { + netns: Option, + devices: Vec, +} + +/// Directly attachable network device +/// This struct is serilized from a file containing devices information, +/// sent from CNI plugins. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct DanDevice { + // Name of device (interface name on the guest) + pub(crate) name: String, + // Mac address of interface on the guest, if it is not specified, a + // private address is generated as default. + #[serde(default = "generate_private_mac_addr")] + pub(crate) guest_mac: String, + // Device + pub(crate) device: Device, + // Network info + pub(crate) network_info: NetworkInfo, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type")] +pub(crate) enum Device { + #[serde(rename = "vhost-user")] + VhostUser { + // Vhost-user socket path + path: String, + #[serde(default)] + queue_num: usize, + #[serde(default)] + queue_size: usize, + }, + #[serde(rename = "host-tap")] + HostTap { + tap_name: String, + #[serde(default)] + queue_num: usize, + #[serde(default)] + queue_size: usize, + }, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct NetworkInfo { + pub(crate) interface: Interface, + #[serde(default)] + pub(crate) routes: Vec, + #[serde(default)] + pub(crate) neighbors: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Interface { + // IP addresses in the format of CIDR + pub ip_addresses: Vec, + #[serde(default = "default_mtu")] + pub mtu: u64, + #[serde(default)] + // Link type + pub ntype: String, + #[serde(default)] + pub flags: u32, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Route { + #[serde(default)] + // Destination(CIDR), an empty string denotes no destination + pub dest: String, + #[serde(default)] + // Gateway(IP Address), an empty string denotes no gateway + pub gateway: String, + // Source(IP Address), an empty string denotes no gateway + #[serde(default)] + pub source: String, + // Scope + #[serde(default)] + pub scope: u32, +} + +impl Route { + pub(crate) fn ip_family(&self) -> Result { + if !self.dest.is_empty() { + return Ok(ip_family_from_ip_addr( + &parse_ip_cidr(&self.dest) + .context("Parse ip addr from dest")? + .0, + )); + } + + if !self.gateway.is_empty() { + return Ok(ip_family_from_ip_addr( + &IpAddr::from_str(&self.gateway).context("Parse ip addr from gateway")?, + )); + } + + if !self.source.is_empty() { + return Ok(ip_family_from_ip_addr( + &IpAddr::from_str(&self.source).context("Parse ip addr from source")?, + )); + } + + Err(anyhow!("Failed to retrieve IP family from {:?}", self)) + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct ARPNeighbor { + // IP address in the format of CIDR + pub ip_address: Option, + #[serde(default)] + pub hardware_addr: String, + #[serde(default)] + pub state: u32, + #[serde(default)] + pub flags: u32, +} + +fn default_mtu() -> u64 { + 1500 +} + +/// Path of DAN config, the file contains an array of DanDevices. +#[inline] +pub fn dan_config_path(config: &TomlConfig, sandbox_id: &str) -> PathBuf { + PathBuf::from(config.runtime.dan_conf.as_str()).join(format!("{}.json", sandbox_id)) +} + +#[cfg(test)] +mod tests { + use crate::network::dan::{ARPNeighbor, DanDevice, Device, Interface, NetworkInfo, Route}; + + #[test] + fn test_dan_json() { + let json_str = r#"{ + "name": "eth0", + "guest_mac": "xx:xx:xx:xx:xx", + "device": { + "type": "vhost-user", + "path": "/tmp/test", + "queue_num": 1, + "queue_size": 1 + }, + "network_info": { + "interface": { + "ip_addresses": ["192.168.0.1/24"], + "mtu": 1500, + "ntype": "tuntap", + "flags": 0 + }, + "routes": [{ + "dest": "172.18.0.0/16", + "source": "172.18.0.1", + "gateway": "172.18.31.1", + "scope": 0, + "flags": 0 + }], + "neighbors": [{ + "ip_address": "192.168.0.3/16", + "device": "", + "state": 0, + "flags": 0, + "hardware_addr": "xx:xx:xx:xx:xx" + }] + } + }"#; + let dev_from_json: DanDevice = serde_json::from_str(json_str).unwrap(); + let dev = DanDevice { + name: "eth0".to_owned(), + guest_mac: "xx:xx:xx:xx:xx".to_owned(), + device: Device::VhostUser { + path: "/tmp/test".to_owned(), + queue_num: 1, + queue_size: 1, + }, + network_info: NetworkInfo { + interface: Interface { + ip_addresses: vec!["192.168.0.1/24".to_owned()], + mtu: 1500, + ntype: "tuntap".to_owned(), + flags: 0, + }, + routes: vec![Route { + dest: "172.18.0.0/16".to_owned(), + source: "172.18.0.1".to_owned(), + gateway: "172.18.31.1".to_owned(), + scope: 0, + }], + neighbors: vec![ARPNeighbor { + ip_address: Some("192.168.0.3/16".to_owned()), + hardware_addr: "xx:xx:xx:xx:xx".to_owned(), + state: 0, + flags: 0, + }], + }, + }; + + assert_eq!(dev_from_json, dev); + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs b/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs new file mode 100644 index 000000000000..b637b2afe67f --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/endpoint_persist.rs @@ -0,0 +1,56 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct PhysicalEndpointState { + pub bdf: String, + pub driver: String, + pub vendor_id: String, + pub device_id: String, + pub hard_addr: String, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct MacvlanEndpointState { + pub if_name: String, + pub network_qos: bool, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct VlanEndpointState { + pub if_name: String, + pub network_qos: bool, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct VethEndpointState { + pub if_name: String, + pub network_qos: bool, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct IpVlanEndpointState { + pub if_name: String, + pub network_qos: bool, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct TapEndpointState { + pub if_name: String, +} + +#[derive(Serialize, Deserialize, Clone, Default)] +pub struct EndpointState { + pub physical_endpoint: Option, + pub veth_endpoint: Option, + pub ipvlan_endpoint: Option, + pub macvlan_endpoint: Option, + pub vlan_endpoint: Option, + pub tap_endpoint: Option, + // TODO : other endpoint +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/endpoints_test.rs b/src/runtime-rs/crates/resource/src/network/endpoint/endpoints_test.rs new file mode 100644 index 000000000000..7bfb429621e6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/endpoints_test.rs @@ -0,0 +1,397 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use anyhow::{anyhow, Context, Result}; + use netlink_packet_route::MACVLAN_MODE_PRIVATE; + use scopeguard::defer; + use tests_utils::load_test_config; + use tokio::sync::RwLock; + + use crate::network::{ + endpoint::{IPVlanEndpoint, MacVlanEndpoint, VlanEndpoint}, + network_model::{ + self, + tc_filter_model::{fetch_index, TcFilterModel}, + NetworkModelType, TC_FILTER_NET_MODEL_STR, + }, + network_pair::{NetworkInterface, NetworkPair, TapInterface}, + utils::link::net_test_utils::delete_link, + }; + use hypervisor::{device::device_manager::DeviceManager, qemu::Qemu}; + + async fn get_device_manager() -> Result>> { + let hypervisor_name: &str = "qemu"; + let toml_config = load_test_config(hypervisor_name.to_owned())?; + let hypervisor_config = toml_config + .hypervisor + .get(hypervisor_name) + .ok_or_else(|| anyhow!("failed to get hypervisor for {}", &hypervisor_name))?; + + let mut hypervisor = Qemu::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + + let dm = Arc::new(RwLock::new( + DeviceManager::new(Arc::new(hypervisor)) + .await + .context("device manager")?, + )); + + Ok(dm) + } + + // this unit test tests the integrity of MacVlanEndpoint::new() + #[actix_rt::test] + async fn test_vlan_construction() { + let idx = 8193; + let mac_addr = String::from("02:78:CA:FE:00:04"); + let manual_vlan_iface_name = format!("eth{}", idx); + let tap_iface_name = format!("tap{}_kata", idx); // create by NetworkPair::new() + let dummy_name = format!("dummy{}", idx); + let vlanid = 123; + + let dm = get_device_manager().await; + assert!(dm.is_ok()); + let d = dm.unwrap(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + + if let Ok(()) = handle + .link() + .add() + .dummy(dummy_name.clone()) + .execute() + .await + .context("failed to create dummy link") + { + let dummy_index = fetch_index(&handle, dummy_name.clone().as_str()) + .await + .expect("failed to get the index of dummy link"); + + // since IPVlanEndpoint::new() needs an EXISTING virt_iface (which is created + // by containerd normally), we have to manually create a virt_iface. + if let Ok(()) = handle + .link() + .add() + .vlan(manual_vlan_iface_name.clone(), dummy_index, vlanid) + .execute() + .await + .context("failed to create manual veth pair") + { + if let Ok(mut result) = VlanEndpoint::new(&d, &handle, "", idx, 5) + .await + .context("failed to create new ipvlan endpoint") + { + let manual = VlanEndpoint { + d, + net_pair: NetworkPair { + tap: TapInterface { + id: String::from("uniqueTestID_kata"), + name: format!("br{}_kata", idx), + tap_iface: NetworkInterface { + name: tap_iface_name.clone(), + ..Default::default() + }, + }, + virt_iface: NetworkInterface { + name: manual_vlan_iface_name.clone(), + hard_addr: mac_addr.clone(), + ..Default::default() + }, + model: Arc::new(TcFilterModel::new().unwrap()), // impossible to panic + network_qos: false, + }, + }; + + result.net_pair.tap.id = String::from("uniqueTestID_kata"); + result.net_pair.tap.tap_iface.hard_addr = String::from(""); + result.net_pair.virt_iface.hard_addr = mac_addr.clone(); + + // check the integrity by compare all variables + assert_eq!(manual.net_pair.tap.id, result.net_pair.tap.id); + assert_eq!(manual.net_pair.tap.name, result.net_pair.tap.name); + assert_eq!( + manual.net_pair.tap.tap_iface.name, + result.net_pair.tap.tap_iface.name + ); + assert_eq!( + manual.net_pair.tap.tap_iface.hard_addr, + result.net_pair.tap.tap_iface.hard_addr + ); + assert_eq!( + manual.net_pair.tap.tap_iface.addrs, + result.net_pair.tap.tap_iface.addrs + ); + assert_eq!( + manual.net_pair.virt_iface.name, + result.net_pair.virt_iface.name + ); + assert_eq!( + manual.net_pair.virt_iface.hard_addr, + result.net_pair.virt_iface.hard_addr + ); + // using match branch to avoid deriving PartialEq trait + match manual.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} // ok + _ => unreachable!(), + } + match result.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} + _ => unreachable!(), + } + assert_eq!(manual.net_pair.network_qos, result.net_pair.network_qos); + } + assert!(delete_link(&handle, manual_vlan_iface_name.as_str()) + .await + .is_ok()); + assert!(delete_link(&handle, tap_iface_name.as_str()).await.is_ok()); + assert!(handle.link().del(dummy_index).execute().await.is_ok()); + } + } + } + } + + // this unit test tests the integrity of VlanEndpoint::new() + #[actix_rt::test] + async fn test_macvlan_construction() { + let idx = 8194; + let mac_addr = String::from("02:25:CA:FE:00:04"); + let manual_macvlan_iface_name = format!("eth{}", idx); + let tap_iface_name = format!("tap{}_kata", idx); // create by NetworkPair::new() + let model_str = TC_FILTER_NET_MODEL_STR; + let dummy_name = format!("dummy{}", idx); + let dm = get_device_manager().await; + assert!(dm.is_ok()); + let d = dm.unwrap(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + + if let Ok(()) = handle + .link() + .add() + .dummy(dummy_name.clone()) + .execute() + .await + .context("failed to create dummy link") + { + let dummy_index = fetch_index(&handle, dummy_name.clone().as_str()) + .await + .expect("failed to get the index of dummy link"); + + // the mode here does not matter, could be any of available modes + if let Ok(()) = handle + .link() + .add() + .macvlan( + manual_macvlan_iface_name.clone(), + dummy_index, + MACVLAN_MODE_PRIVATE, + ) + .execute() + .await + .context("failed to create manual macvlan pair") + { + // model here does not matter, could be any of supported models + if let Ok(mut result) = MacVlanEndpoint::new( + &d, + &handle, + manual_macvlan_iface_name.clone().as_str(), + idx, + model_str, + 5, + ) + .await + .context("failed to create new macvlan endpoint") + { + let manual = MacVlanEndpoint { + d, + net_pair: NetworkPair { + tap: TapInterface { + id: String::from("uniqueTestID_kata"), + name: format!("br{}_kata", idx), + tap_iface: NetworkInterface { + name: tap_iface_name.clone(), + ..Default::default() + }, + }, + virt_iface: NetworkInterface { + name: manual_macvlan_iface_name.clone(), + hard_addr: mac_addr.clone(), + ..Default::default() + }, + model: network_model::new(model_str) + .expect("failed to create new network model"), + network_qos: false, + }, + }; + + result.net_pair.tap.id = String::from("uniqueTestID_kata"); + result.net_pair.tap.tap_iface.hard_addr = String::from(""); + result.net_pair.virt_iface.hard_addr = mac_addr.clone(); + + // check the integrity by compare all variables + assert_eq!(manual.net_pair.tap.id, result.net_pair.tap.id); + assert_eq!(manual.net_pair.tap.name, result.net_pair.tap.name); + assert_eq!( + manual.net_pair.tap.tap_iface.name, + result.net_pair.tap.tap_iface.name + ); + assert_eq!( + manual.net_pair.tap.tap_iface.hard_addr, + result.net_pair.tap.tap_iface.hard_addr + ); + assert_eq!( + manual.net_pair.tap.tap_iface.addrs, + result.net_pair.tap.tap_iface.addrs + ); + assert_eq!( + manual.net_pair.virt_iface.name, + result.net_pair.virt_iface.name + ); + assert_eq!( + manual.net_pair.virt_iface.hard_addr, + result.net_pair.virt_iface.hard_addr + ); + // using match branch to avoid deriving PartialEq trait + // TcFilter model is hard-coded "model_str" variable + match manual.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} // ok + _ => unreachable!(), + } + match result.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} + _ => unreachable!(), + } + assert_eq!(manual.net_pair.network_qos, result.net_pair.network_qos); + } + // delete the manually created links + assert!(delete_link(&handle, manual_macvlan_iface_name.as_str()) + .await + .is_ok()); + assert!(delete_link(&handle, tap_iface_name.as_str()).await.is_ok()); + assert!(handle.link().del(dummy_index).execute().await.is_ok()); + } + } + } + } + + // this unit test tests the integrity of IPVlanEndpoint::new() + #[actix_rt::test] + async fn test_ipvlan_construction() { + let idx = 8192; + let mac_addr = String::from("02:00:CA:FE:00:04"); + let manual_virt_iface_name = format!("eth{}", idx); + let tap_iface_name = format!("tap{}_kata", idx); // create by kata + let dm = get_device_manager().await; + assert!(dm.is_ok()); + let d = dm.unwrap(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + + // since IPVlanEndpoint::new() needs an EXISTING virt_iface (which is created + // by containerd normally), we have to manually create a virt_iface. + if let Ok(()) = handle + .link() + .add() + .veth("foo".to_string(), manual_virt_iface_name.clone()) + .execute() + .await + .context("failed to create manual veth pair") + { + if let Ok(mut result) = IPVlanEndpoint::new(&d, &handle, "", idx, 5) + .await + .context("failed to create new ipvlan endpoint") + { + let manual = IPVlanEndpoint { + d, + net_pair: NetworkPair { + tap: TapInterface { + id: String::from("uniqueTestID_kata"), + name: format!("br{}_kata", idx), + tap_iface: NetworkInterface { + name: tap_iface_name.clone(), + ..Default::default() + }, + }, + virt_iface: NetworkInterface { + name: manual_virt_iface_name.clone(), + hard_addr: mac_addr.clone(), + ..Default::default() + }, + model: Arc::new(TcFilterModel::new().unwrap()), // impossible to panic + network_qos: false, + }, + }; + + result.net_pair.tap.id = String::from("uniqueTestID_kata"); + result.net_pair.tap.tap_iface.hard_addr = String::from(""); + result.net_pair.virt_iface.hard_addr = mac_addr.clone(); + + // check the integrity by compare all variables + assert_eq!(manual.net_pair.tap.id, result.net_pair.tap.id); + assert_eq!(manual.net_pair.tap.name, result.net_pair.tap.name); + assert_eq!( + manual.net_pair.tap.tap_iface.name, + result.net_pair.tap.tap_iface.name + ); + assert_eq!( + manual.net_pair.tap.tap_iface.hard_addr, + result.net_pair.tap.tap_iface.hard_addr + ); + assert_eq!( + manual.net_pair.tap.tap_iface.addrs, + result.net_pair.tap.tap_iface.addrs + ); + assert_eq!( + manual.net_pair.virt_iface.name, + result.net_pair.virt_iface.name + ); + assert_eq!( + manual.net_pair.virt_iface.hard_addr, + result.net_pair.virt_iface.hard_addr + ); + // using match branch to avoid deriving PartialEq trait + match manual.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} // ok + _ => unreachable!(), + } + match result.net_pair.model.model_type() { + NetworkModelType::TcFilter => {} + _ => unreachable!(), + } + assert_eq!(manual.net_pair.network_qos, result.net_pair.network_qos); + } + assert!(delete_link(&handle, manual_virt_iface_name.as_str()) + .await + .is_ok()); + assert!(delete_link(&handle, tap_iface_name.as_str()).await.is_ok()); + } + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/ipvlan_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/ipvlan_endpoint.rs new file mode 100644 index 000000000000..7039275e866d --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/ipvlan_endpoint.rs @@ -0,0 +1,127 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + io::{self, Error}, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use hypervisor::{ + device::{ + device_manager::{do_handle_device, DeviceManager}, + driver::NetworkConfig, + DeviceConfig, DeviceType, + }, + Hypervisor, NetworkDevice, +}; + +use super::{ + endpoint_persist::{EndpointState, IpVlanEndpointState}, + Endpoint, +}; +use crate::network::{network_model::TC_FILTER_NET_MODEL_STR, utils, NetworkPair}; + +// IPVlanEndpoint is the endpoint bridged to VM +#[derive(Debug)] +pub struct IPVlanEndpoint { + pub(crate) net_pair: NetworkPair, + pub(crate) d: Arc>, +} + +impl IPVlanEndpoint { + pub async fn new( + d: &Arc>, + handle: &rtnetlink::Handle, + name: &str, + idx: u32, + queues: usize, + ) -> Result { + // tc filter network model is the only for ipvlan + let net_pair = NetworkPair::new(handle, idx, name, TC_FILTER_NET_MODEL_STR, queues) + .await + .context("error creating new NetworkPair")?; + + Ok(IPVlanEndpoint { + net_pair, + d: d.clone(), + }) + } + + fn get_network_config(&self) -> Result { + let iface = &self.net_pair.tap.tap_iface; + let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| { + Error::new( + io::ErrorKind::InvalidData, + format!("hard_addr {}", &iface.hard_addr), + ) + })?; + + Ok(NetworkConfig { + host_dev_name: iface.name.clone(), + virt_iface_name: self.net_pair.virt_iface.name.clone(), + guest_mac: Some(guest_mac), + ..Default::default() + }) + } +} + +#[async_trait] +impl Endpoint for IPVlanEndpoint { + async fn name(&self) -> String { + self.net_pair.virt_iface.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.net_pair.tap.tap_iface.hard_addr.clone() + } + + async fn attach(&self) -> Result<()> { + self.net_pair + .add_network_model() + .await + .context("error adding network model")?; + + let config = self.get_network_config().context("get network config")?; + do_handle_device(&self.d, &DeviceConfig::NetworkCfg(config)) + .await + .context("do handle network IPVlan endpoint device failed.")?; + + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .del_network_model() + .await + .context("error deleting network model")?; + let config = self + .get_network_config() + .context("error getting network config")?; + + h.remove_device(DeviceType::Network(NetworkDevice { + config, + ..Default::default() + })) + .await + .context("remove IPVlan endpoint device by hypervisor failed.")?; + + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + ipvlan_endpoint: Some(IpVlanEndpointState { + if_name: self.net_pair.virt_iface.name.clone(), + network_qos: self.net_pair.network_qos, + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/macvlan_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/macvlan_endpoint.rs new file mode 100644 index 000000000000..ad390973fbd7 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/macvlan_endpoint.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + io::{self, Error}, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use hypervisor::{ + device::{ + device_manager::{do_handle_device, DeviceManager}, + driver::NetworkConfig, + DeviceConfig, DeviceType, + }, + Hypervisor, NetworkDevice, +}; + +use super::{ + endpoint_persist::{EndpointState, MacvlanEndpointState}, + Endpoint, +}; +use crate::network::{utils, NetworkPair}; + +#[derive(Debug)] +pub struct MacVlanEndpoint { + pub(crate) net_pair: NetworkPair, + pub(crate) d: Arc>, +} + +impl MacVlanEndpoint { + pub async fn new( + d: &Arc>, + handle: &rtnetlink::Handle, + name: &str, + idx: u32, + model: &str, + queues: usize, + ) -> Result { + let net_pair = NetworkPair::new(handle, idx, name, model, queues) + .await + .context("error creating new networkInterfacePair")?; + + Ok(MacVlanEndpoint { + net_pair, + d: d.clone(), + }) + } + + fn get_network_config(&self) -> Result { + let iface = &self.net_pair.tap.tap_iface; + let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| { + Error::new( + io::ErrorKind::InvalidData, + format!("hard_addr {}", &iface.hard_addr), + ) + })?; + + Ok(NetworkConfig { + host_dev_name: iface.name.clone(), + virt_iface_name: self.net_pair.virt_iface.name.clone(), + guest_mac: Some(guest_mac), + ..Default::default() + }) + } +} + +#[async_trait] +impl Endpoint for MacVlanEndpoint { + async fn name(&self) -> String { + self.net_pair.virt_iface.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.net_pair.tap.tap_iface.hard_addr.clone() + } + + async fn attach(&self) -> Result<()> { + self.net_pair + .add_network_model() + .await + .context("add network model")?; + + let config = self.get_network_config().context("get network config")?; + do_handle_device(&self.d, &DeviceConfig::NetworkCfg(config)) + .await + .context("do handle network MacVlan endpoint device failed.")?; + + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .del_network_model() + .await + .context("del network model")?; + + let config = self.get_network_config().context("get network config")?; + h.remove_device(DeviceType::Network(NetworkDevice { + config, + ..Default::default() + })) + .await + .context("remove MacVlan endpoint device by hypervisor failed.")?; + + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + macvlan_endpoint: Some(MacvlanEndpointState { + if_name: self.net_pair.virt_iface.name.clone(), + network_qos: self.net_pair.network_qos, + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs new file mode 100644 index 000000000000..1c15f67e0355 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/mod.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod physical_endpoint; +pub use physical_endpoint::PhysicalEndpoint; +mod veth_endpoint; +pub use veth_endpoint::VethEndpoint; +mod ipvlan_endpoint; +pub use ipvlan_endpoint::IPVlanEndpoint; +mod vlan_endpoint; +pub use vlan_endpoint::VlanEndpoint; +mod macvlan_endpoint; +pub use macvlan_endpoint::MacVlanEndpoint; +pub mod endpoint_persist; +mod endpoints_test; +mod tap_endpoint; +pub use tap_endpoint::TapEndpoint; + +use anyhow::Result; +use async_trait::async_trait; +use hypervisor::Hypervisor; + +use super::EndpointState; + +#[async_trait] +pub trait Endpoint: std::fmt::Debug + Send + Sync { + async fn name(&self) -> String; + async fn hardware_addr(&self) -> String; + async fn attach(&self) -> Result<()>; + async fn detach(&self, hypervisor: &dyn Hypervisor) -> Result<()>; + async fn save(&self) -> Option; +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs new file mode 100644 index 000000000000..9bb1dbbcf9b6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/physical_endpoint.rs @@ -0,0 +1,162 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::{do_handle_device, DeviceManager}; +use hypervisor::device::DeviceConfig; +use hypervisor::{device::driver, Hypervisor}; +use hypervisor::{get_vfio_device, VfioConfig}; +use tokio::sync::RwLock; + +use super::endpoint_persist::{EndpointState, PhysicalEndpointState}; +use super::Endpoint; +use crate::network::utils::{self, link}; +pub const SYS_PCI_DEVICES_PATH: &str = "/sys/bus/pci/devices"; + +#[derive(Debug)] +pub struct VendorDevice { + vendor_id: String, + device_id: String, +} + +impl VendorDevice { + pub fn new(vendor_id: &str, device_id: &str) -> Result { + if vendor_id.is_empty() || device_id.is_empty() { + return Err(anyhow!( + "invalid parameters vendor_id {} device_id {}", + vendor_id, + device_id + )); + } + Ok(Self { + vendor_id: vendor_id.to_string(), + device_id: device_id.to_string(), + }) + } + + pub fn vendor_device_id(&self) -> String { + format!("{}_{}", &self.vendor_id, &self.device_id) + } +} + +#[derive(Debug)] +pub struct PhysicalEndpoint { + iface_name: String, + hard_addr: String, + bdf: String, + driver: String, + vendor_device_id: VendorDevice, + d: Arc>, +} + +impl PhysicalEndpoint { + pub fn new(name: &str, hardware_addr: &[u8], d: Arc>) -> Result { + let driver_info = link::get_driver_info(name).context("get driver info")?; + let bdf = driver_info.bus_info; + let sys_pci_devices_path = Path::new(SYS_PCI_DEVICES_PATH); + // get driver by following symlink /sys/bus/pci/devices/$bdf/driver + let driver_path = sys_pci_devices_path.join(&bdf).join("driver"); + let link = driver_path.read_link().context("read link")?; + let driver = link + .file_name() + .map_or(String::new(), |v| v.to_str().unwrap().to_owned()); + + // get vendor and device id from pci space (sys/bus/pci/devices/$bdf) + let iface_device_path = sys_pci_devices_path.join(&bdf).join("device"); + let device_id = std::fs::read_to_string(&iface_device_path) + .with_context(|| format!("read device path {:?}", &iface_device_path))?; + + let iface_vendor_path = sys_pci_devices_path.join(&bdf).join("vendor"); + let vendor_id = std::fs::read_to_string(&iface_vendor_path) + .with_context(|| format!("read vendor path {:?}", &iface_vendor_path))?; + + Ok(Self { + iface_name: name.to_string(), + hard_addr: utils::get_mac_addr(hardware_addr).context("get mac addr")?, + vendor_device_id: VendorDevice::new(&vendor_id, &device_id) + .context("new vendor device")?, + driver, + bdf, + d, + }) + } +} + +#[async_trait] +impl Endpoint for PhysicalEndpoint { + async fn name(&self) -> String { + self.iface_name.clone() + } + + async fn hardware_addr(&self) -> String { + self.hard_addr.clone() + } + + async fn attach(&self) -> Result<()> { + // bind physical interface from host driver and bind to vfio + driver::bind_device_to_vfio( + &self.bdf, + &self.driver, + &self.vendor_device_id.vendor_device_id(), + ) + .with_context(|| format!("bind physical endpoint from {} to vfio", &self.driver))?; + + let vfio_device = get_vfio_device(self.bdf.clone()).context("get vfio device failed.")?; + let vfio_dev_config = &mut VfioConfig { + host_path: vfio_device.clone(), + dev_type: "pci".to_string(), + hostdev_prefix: "physical_nic_".to_owned(), + ..Default::default() + }; + + // create and insert VFIO device into Kata VM + do_handle_device(&self.d, &DeviceConfig::VfioCfg(vfio_dev_config.clone())) + .await + .context("do handle device failed.")?; + + Ok(()) + } + + // detach for physical endpoint unbinds the physical network interface from vfio-pci + // and binds it back to the saved host driver. + async fn detach(&self, _hypervisor: &dyn Hypervisor) -> Result<()> { + // bind back the physical network interface to host. + // we need to do this even if a new network namespace has not + // been created by virt-containers. + + // we do not need to enter the network namespace to bind back the + // physical interface to host driver. + driver::bind_device_to_host( + &self.bdf, + &self.driver, + &self.vendor_device_id.vendor_device_id(), + ) + .with_context(|| { + format!( + "bind physical endpoint device from vfio to {}", + &self.driver + ) + })?; + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + physical_endpoint: Some(PhysicalEndpointState { + bdf: self.bdf.clone(), + driver: self.driver.clone(), + vendor_id: self.vendor_device_id.vendor_id.clone(), + device_id: self.vendor_device_id.device_id.clone(), + hard_addr: self.hard_addr.clone(), + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs new file mode 100644 index 000000000000..e22a91d92208 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/tap_endpoint.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::{do_handle_device, DeviceManager}; +use hypervisor::device::{DeviceConfig, DeviceType}; +use hypervisor::{Hypervisor, NetworkConfig, NetworkDevice}; +use tokio::sync::RwLock; + +use super::endpoint_persist::TapEndpointState; +use super::Endpoint; +use crate::network::network_pair::{get_link_by_name, NetworkInterface}; +use crate::network::{utils, EndpointState}; + +/// TapEndpoint is used to attach to the hypervisor directly +#[derive(Debug)] +pub struct TapEndpoint { + // Index + #[allow(dead_code)] + index: u32, + // Name of virt interface + name: String, + // Hardware address of virt interface + guest_mac: String, + // Tap interface on the host + tap_iface: NetworkInterface, + // Device manager + dev_mgr: Arc>, + // Virtio queue num + queue_num: usize, + // Virtio queue size + queue_size: usize, +} + +impl TapEndpoint { + #[allow(clippy::too_many_arguments)] + pub async fn new( + handle: &rtnetlink::Handle, + index: u32, + name: &str, + tap_name: &str, + guest_mac: &str, + queue_num: usize, + queue_size: usize, + dev_mgr: &Arc>, + ) -> Result { + let tap_link = get_link_by_name(handle, tap_name) + .await + .context("get link by name")?; + let tap_hard_addr = + utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("Get mac addr of tap")?; + + Ok(TapEndpoint { + index, + name: name.to_owned(), + guest_mac: guest_mac.to_owned(), + tap_iface: NetworkInterface { + name: tap_name.to_owned(), + hard_addr: tap_hard_addr, + ..Default::default() + }, + dev_mgr: dev_mgr.clone(), + queue_num, + queue_size, + }) + } + + fn get_network_config(&self) -> Result { + let guest_mac = utils::parse_mac(&self.guest_mac).context("Parse mac address")?; + Ok(NetworkConfig { + host_dev_name: self.tap_iface.name.clone(), + virt_iface_name: self.name.clone(), + guest_mac: Some(guest_mac), + queue_num: self.queue_num, + queue_size: self.queue_size, + ..Default::default() + }) + } +} + +#[async_trait] +impl Endpoint for TapEndpoint { + async fn name(&self) -> String { + self.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.guest_mac.clone() + } + + async fn attach(&self) -> Result<()> { + let config = self.get_network_config().context("Get network config")?; + do_handle_device(&self.dev_mgr, &DeviceConfig::NetworkCfg(config)) + .await + .context("Handle device")?; + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + let config = self.get_network_config().context("Get network config")?; + h.remove_device(DeviceType::Network(NetworkDevice { + config, + ..Default::default() + })) + .await + .context("Remove device")?; + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + tap_endpoint: Some(TapEndpointState { + if_name: self.name.clone(), + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs new file mode 100644 index 000000000000..b24b5cf31ca6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/veth_endpoint.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + io::{self, Error}, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use hypervisor::{ + device::{ + device_manager::{do_handle_device, DeviceManager}, + driver::NetworkConfig, + DeviceConfig, DeviceType, + }, + Hypervisor, NetworkDevice, +}; + +use super::{ + endpoint_persist::{EndpointState, VethEndpointState}, + Endpoint, +}; +use crate::network::{utils, NetworkPair}; + +#[derive(Debug)] +pub struct VethEndpoint { + pub(crate) net_pair: NetworkPair, + pub(crate) d: Arc>, +} + +impl VethEndpoint { + pub async fn new( + d: &Arc>, + handle: &rtnetlink::Handle, + name: &str, + idx: u32, + model: &str, + queues: usize, + ) -> Result { + let net_pair = NetworkPair::new(handle, idx, name, model, queues) + .await + .context("new network interface pair failed.")?; + + Ok(VethEndpoint { + net_pair, + d: d.clone(), + }) + } + + fn get_network_config(&self) -> Result { + let iface = &self.net_pair.tap.tap_iface; + let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| { + Error::new( + io::ErrorKind::InvalidData, + format!("hard_addr {}", &iface.hard_addr), + ) + })?; + + Ok(NetworkConfig { + host_dev_name: iface.name.clone(), + virt_iface_name: self.net_pair.virt_iface.name.clone(), + guest_mac: Some(guest_mac), + ..Default::default() + }) + } +} + +#[async_trait] +impl Endpoint for VethEndpoint { + async fn name(&self) -> String { + self.net_pair.virt_iface.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.net_pair.tap.tap_iface.hard_addr.clone() + } + + async fn attach(&self) -> Result<()> { + self.net_pair + .add_network_model() + .await + .context("add network model")?; + + let config = self.get_network_config().context("get network config")?; + do_handle_device(&self.d, &DeviceConfig::NetworkCfg(config)) + .await + .context("do handle network Veth endpoint device failed.")?; + + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .del_network_model() + .await + .context("del network model failed.")?; + + let config = self.get_network_config().context("get network config")?; + h.remove_device(DeviceType::Network(NetworkDevice { + config, + ..Default::default() + })) + .await + .context("remove Veth endpoint device by hypervisor failed.")?; + + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + veth_endpoint: Some(VethEndpointState { + if_name: self.net_pair.virt_iface.name.clone(), + network_qos: self.net_pair.network_qos, + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/endpoint/vlan_endpoint.rs b/src/runtime-rs/crates/resource/src/network/endpoint/vlan_endpoint.rs new file mode 100644 index 000000000000..bfc852d39b1b --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/endpoint/vlan_endpoint.rs @@ -0,0 +1,125 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + io::{self, Error}, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use hypervisor::{ + device::{ + device_manager::{do_handle_device, DeviceManager}, + driver::NetworkConfig, + DeviceConfig, DeviceType, + }, + Hypervisor, NetworkDevice, +}; + +use super::{ + endpoint_persist::{EndpointState, VlanEndpointState}, + Endpoint, +}; +use crate::network::{network_model::TC_FILTER_NET_MODEL_STR, utils, NetworkPair}; + +#[derive(Debug)] +pub struct VlanEndpoint { + pub(crate) net_pair: NetworkPair, + pub(crate) d: Arc>, +} + +impl VlanEndpoint { + pub async fn new( + d: &Arc>, + handle: &rtnetlink::Handle, + name: &str, + idx: u32, + queues: usize, + ) -> Result { + let net_pair = NetworkPair::new(handle, idx, name, TC_FILTER_NET_MODEL_STR, queues) + .await + .context("new network interface pair failed.")?; + + Ok(VlanEndpoint { + net_pair, + d: d.clone(), + }) + } + + fn get_network_config(&self) -> Result { + let iface = &self.net_pair.tap.tap_iface; + let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| { + Error::new( + io::ErrorKind::InvalidData, + format!("hard_addr {}", &iface.hard_addr), + ) + })?; + + Ok(NetworkConfig { + host_dev_name: iface.name.clone(), + virt_iface_name: self.net_pair.virt_iface.name.clone(), + guest_mac: Some(guest_mac), + ..Default::default() + }) + } +} + +#[async_trait] +impl Endpoint for VlanEndpoint { + async fn name(&self) -> String { + self.net_pair.virt_iface.name.clone() + } + + async fn hardware_addr(&self) -> String { + self.net_pair.tap.tap_iface.hard_addr.clone() + } + + async fn attach(&self) -> Result<()> { + self.net_pair + .add_network_model() + .await + .context("add network model failed.")?; + + let config = self.get_network_config().context("get network config")?; + do_handle_device(&self.d, &DeviceConfig::NetworkCfg(config)) + .await + .context("do handle network Vlan endpoint device failed.")?; + + Ok(()) + } + + async fn detach(&self, h: &dyn Hypervisor) -> Result<()> { + self.net_pair + .del_network_model() + .await + .context("delete network model failed.")?; + + let config = self + .get_network_config() + .context("get network config failed.")?; + h.remove_device(DeviceType::Network(NetworkDevice { + config, + ..Default::default() + })) + .await + .context("remove Vlan endpoint device by hypervisor failed.")?; + + Ok(()) + } + + async fn save(&self) -> Option { + Some(EndpointState { + vlan_endpoint: Some(VlanEndpointState { + if_name: self.net_pair.virt_iface.name.clone(), + network_qos: self.net_pair.network_qos, + }), + ..Default::default() + }) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/mod.rs b/src/runtime-rs/crates/resource/src/network/mod.rs new file mode 100644 index 000000000000..5a85ee08958e --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/mod.rs @@ -0,0 +1,64 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +mod dan; +mod endpoint; +pub use dan::{dan_config_path, Dan, DanNetworkConfig}; +pub use endpoint::endpoint_persist::EndpointState; +pub use endpoint::Endpoint; +mod network_entity; +mod network_info; +pub use network_info::NetworkInfo; +mod network_model; +pub use network_model::NetworkModel; +mod network_with_netns; +pub use network_with_netns::NetworkWithNetNsConfig; +use network_with_netns::NetworkWithNetns; +mod network_pair; +use network_pair::NetworkPair; +mod utils; +use tokio::sync::RwLock; +pub use utils::netns::{generate_netns_name, NetnsGuard}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; + +#[derive(Debug)] +pub enum NetworkConfig { + NetNs(NetworkWithNetNsConfig), + Dan(DanNetworkConfig), +} + +#[async_trait] +pub trait Network: Send + Sync { + async fn setup(&self) -> Result<()>; + async fn interfaces(&self) -> Result>; + async fn routes(&self) -> Result>; + async fn neighs(&self) -> Result>; + async fn save(&self) -> Option>; + async fn remove(&self, h: &dyn Hypervisor) -> Result<()>; +} + +pub async fn new( + config: &NetworkConfig, + d: Arc>, +) -> Result> { + match config { + NetworkConfig::NetNs(c) => Ok(Arc::new( + NetworkWithNetns::new(c, d) + .await + .context("new network with netns")?, + )), + NetworkConfig::Dan(c) => Ok(Arc::new( + Dan::new(c, d) + .await + .context("New directly attachable network")?, + )), + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_entity.rs b/src/runtime-rs/crates/resource/src/network/network_entity.rs new file mode 100644 index 000000000000..5182dfe4b0ac --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_entity.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use super::{Endpoint, NetworkInfo}; + +#[derive(Debug)] +pub(crate) struct NetworkEntity { + pub(crate) endpoint: Arc, + pub(crate) network_info: Arc, +} + +impl NetworkEntity { + pub fn new(endpoint: Arc, network_info: Arc) -> Self { + Self { + endpoint, + network_info, + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_info/mod.rs b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs new file mode 100644 index 000000000000..a0e896bb3aff --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_info/mod.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod network_info_from_dan; +pub(crate) mod network_info_from_link; + +use agent::{ARPNeighbor, Interface, Route}; +use anyhow::Result; +use async_trait::async_trait; + +#[async_trait] +pub trait NetworkInfo: std::fmt::Debug + Send + Sync { + async fn interface(&self) -> Result; + async fn routes(&self) -> Result>; + async fn neighs(&self) -> Result>; +} diff --git a/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs new file mode 100644 index 000000000000..5ca06d340ccf --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_dan.rs @@ -0,0 +1,213 @@ +// Copyright (c) 2019-2023 Alibaba Cloud +// Copyright (c) 2019-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use agent::{ARPNeighbor, IPAddress, Interface, Route}; +use anyhow::Result; +use async_trait::async_trait; +use netlink_packet_route::IFF_NOARP; + +use super::NetworkInfo; +use crate::network::dan::DanDevice; +use crate::network::utils::address::{ip_family_from_ip_addr, parse_ip_cidr}; + +/// NetworkInfoFromDan is responsible for converting network info in JSON +/// to agent's network info. +#[derive(Debug)] +pub(crate) struct NetworkInfoFromDan { + interface: Interface, + routes: Vec, + neighs: Vec, +} + +impl NetworkInfoFromDan { + pub async fn new(dan_device: &DanDevice) -> Result { + let ip_addresses = dan_device + .network_info + .interface + .ip_addresses + .iter() + .filter_map(|addr| { + let (ipaddr, mask) = match parse_ip_cidr(addr) { + Ok(ip_cidr) => (ip_cidr.0, ip_cidr.1), + Err(_) => return None, + }; + // Skip if it is a loopback address + if ipaddr.is_loopback() { + return None; + } + + Some(IPAddress { + family: ip_family_from_ip_addr(&ipaddr), + address: ipaddr.to_string(), + mask: format!("{}", mask), + }) + }) + .collect(); + + let interface = Interface { + device: dan_device.name.clone(), + name: dan_device.name.clone(), + ip_addresses, + mtu: dan_device.network_info.interface.mtu, + hw_addr: dan_device.guest_mac.clone(), + pci_addr: String::default(), + field_type: dan_device.network_info.interface.ntype.clone(), + raw_flags: dan_device.network_info.interface.flags & IFF_NOARP, + }; + + let routes = dan_device + .network_info + .routes + .iter() + .filter_map(|route| { + let family = match route.ip_family() { + Ok(family) => family, + Err(_) => return None, + }; + Some(Route { + dest: route.dest.clone(), + gateway: route.gateway.clone(), + device: dan_device.name.clone(), + source: route.source.clone(), + scope: route.scope, + family, + }) + }) + .collect(); + + let neighs = dan_device + .network_info + .neighbors + .iter() + .map(|neigh| { + let to_ip_address = neigh.ip_address.as_ref().and_then(|ip_address| { + parse_ip_cidr(ip_address) + .ok() + .map(|(ipaddr, mask)| IPAddress { + family: ip_family_from_ip_addr(&ipaddr), + address: ipaddr.to_string(), + mask: format!("{}", mask), + }) + }); + + ARPNeighbor { + to_ip_address, + device: dan_device.name.clone(), + ll_addr: neigh.hardware_addr.clone(), + state: neigh.state as i32, + flags: neigh.flags as i32, + } + }) + .collect(); + + Ok(Self { + interface, + routes, + neighs, + }) + } +} + +#[async_trait] +impl NetworkInfo for NetworkInfoFromDan { + async fn interface(&self) -> Result { + Ok(self.interface.clone()) + } + + async fn routes(&self) -> Result> { + Ok(self.routes.clone()) + } + + async fn neighs(&self) -> Result> { + Ok(self.neighs.clone()) + } +} + +#[cfg(test)] +mod tests { + use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route}; + + use super::NetworkInfoFromDan; + use crate::network::dan::{ + ARPNeighbor as DanARPNeighbor, DanDevice, Device, Interface as DanInterface, + NetworkInfo as DanNetworkInfo, Route as DanRoute, + }; + use crate::network::NetworkInfo; + + #[tokio::test] + async fn test_network_info_from_dan() { + let dan_device = DanDevice { + name: "eth0".to_owned(), + guest_mac: "xx:xx:xx:xx:xx".to_owned(), + device: Device::HostTap { + tap_name: "tap0".to_owned(), + queue_num: 0, + queue_size: 0, + }, + network_info: DanNetworkInfo { + interface: DanInterface { + ip_addresses: vec!["192.168.0.1/24".to_owned()], + mtu: 1500, + ntype: "tuntap".to_owned(), + flags: 0, + }, + routes: vec![DanRoute { + dest: "172.18.0.0/16".to_owned(), + source: "172.18.0.1".to_owned(), + gateway: "172.18.31.1".to_owned(), + scope: 0, + }], + neighbors: vec![DanARPNeighbor { + ip_address: Some("192.168.0.3/16".to_owned()), + hardware_addr: "yy:yy:yy:yy:yy".to_owned(), + state: 0, + flags: 0, + }], + }, + }; + + let network_info = NetworkInfoFromDan::new(&dan_device).await.unwrap(); + + let interface = Interface { + device: "eth0".to_owned(), + name: "eth0".to_owned(), + ip_addresses: vec![IPAddress { + family: IPFamily::V4, + address: "192.168.0.1".to_owned(), + mask: "24".to_owned(), + }], + mtu: 1500, + hw_addr: "xx:xx:xx:xx:xx".to_owned(), + pci_addr: String::default(), + field_type: "tuntap".to_owned(), + raw_flags: 0, + }; + assert_eq!(interface, network_info.interface().await.unwrap()); + + let routes = vec![Route { + dest: "172.18.0.0/16".to_owned(), + gateway: "172.18.31.1".to_owned(), + device: "eth0".to_owned(), + source: "172.18.0.1".to_owned(), + scope: 0, + family: IPFamily::V4, + }]; + assert_eq!(routes, network_info.routes().await.unwrap()); + + let neighbors = vec![ARPNeighbor { + to_ip_address: Some(IPAddress { + family: IPFamily::V4, + address: "192.168.0.3".to_owned(), + mask: "16".to_owned(), + }), + device: "eth0".to_owned(), + ll_addr: "yy:yy:yy:yy:yy".to_owned(), + state: 0, + flags: 0, + }]; + assert_eq!(neighbors, network_info.neighs().await.unwrap()); + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs new file mode 100644 index 000000000000..d6dc0a82b615 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_info/network_info_from_link.rs @@ -0,0 +1,230 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::TryFrom; + +use agent::{ARPNeighbor, IPAddress, IPFamily, Interface, Route}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use futures::stream::TryStreamExt; +use netlink_packet_route::{ + self, neighbour::NeighbourMessage, nlas::neighbour::Nla, route::RouteMessage, +}; + +use super::NetworkInfo; +use crate::network::utils::{ + address::{parse_ip, Address}, + link::{self, LinkAttrs}, +}; + +#[derive(Debug)] +pub(crate) struct NetworkInfoFromLink { + interface: Interface, + neighs: Vec, + routes: Vec, +} + +impl NetworkInfoFromLink { + pub async fn new( + handle: &rtnetlink::Handle, + link: &dyn link::Link, + addrs: Vec, + hw_addr: &str, + ) -> Result { + let attrs = link.attrs(); + let name = &attrs.name; + + Ok(Self { + interface: Interface { + device: name.clone(), + name: name.clone(), + ip_addresses: addrs.clone(), + mtu: attrs.mtu as u64, + hw_addr: hw_addr.to_string(), + pci_addr: Default::default(), + field_type: link.r#type().to_string(), + raw_flags: attrs.flags & libc::IFF_NOARP as u32, + }, + neighs: handle_neighbors(handle, attrs) + .await + .context("handle neighbours")?, + routes: handle_routes(handle, attrs) + .await + .context("handle routes")?, + }) + } +} + +pub async fn handle_addresses( + handle: &rtnetlink::Handle, + attrs: &LinkAttrs, +) -> Result> { + let mut addr_msg_list = handle + .address() + .get() + .set_link_index_filter(attrs.index) + .execute(); + + let mut addresses = vec![]; + while let Some(addr_msg) = addr_msg_list + .try_next() + .await + .context("try next address msg")? + { + let family = addr_msg.header.family as i32; + if family != libc::AF_INET && family != libc::AF_INET6 { + warn!(sl!(), "unsupported ip family {}", family); + continue; + } + let a = Address::try_from(addr_msg).context("get addr from msg")?; + if a.addr.is_loopback() { + continue; + } + + addresses.push(IPAddress { + family: if a.addr.is_ipv4() { + IPFamily::V4 + } else { + IPFamily::V6 + }, + address: a.addr.to_string(), + mask: a.perfix_len.to_string(), + }); + } + Ok(addresses) +} + +fn generate_neigh(name: &str, n: &NeighbourMessage) -> Result { + let mut neigh = ARPNeighbor { + device: name.to_string(), + state: n.header.state as i32, + ..Default::default() + }; + for nla in &n.nlas { + match nla { + Nla::Destination(addr) => { + let dest = parse_ip(addr, n.header.family).context("parse ip")?; + let addr = Some(IPAddress { + family: if dest.is_ipv4() { + IPFamily::V4 + } else { + IPFamily::V6 + }, + address: dest.to_string(), + mask: "".to_string(), + }); + neigh.to_ip_address = addr; + } + Nla::LinkLocalAddress(addr) => { + if addr.len() < 6 { + continue; + } + let lladdr = format!( + "{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}:{:<02x}", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5] + ); + neigh.ll_addr = lladdr; + } + _ => { + // skip the unused Nla + } + } + } + + Ok(neigh) +} + +async fn handle_neighbors( + handle: &rtnetlink::Handle, + attrs: &LinkAttrs, +) -> Result> { + let name = &attrs.name; + let mut neighs = vec![]; + let mut neigh_msg_list = handle.neighbours().get().execute(); + while let Some(neigh) = neigh_msg_list + .try_next() + .await + .context("try next neigh msg")? + { + // get neigh filter with index + if neigh.header.ifindex == attrs.index { + neighs.push(generate_neigh(name, &neigh).context("generate neigh")?) + } + } + Ok(neighs) +} + +fn generate_route(name: &str, route: &RouteMessage) -> Result> { + if route.header.protocol == libc::RTPROT_KERNEL { + return Ok(None); + } + + Ok(Some(Route { + dest: route + .destination_prefix() + .map(|(addr, prefix)| format!("{}/{}", addr, prefix)) + .unwrap_or_default(), + gateway: route.gateway().map(|v| v.to_string()).unwrap_or_default(), + device: name.to_string(), + source: route + .source_prefix() + .map(|(addr, _)| addr.to_string()) + .unwrap_or_default(), + scope: route.header.scope as u32, + family: if route.header.address_family == libc::AF_INET as u8 { + IPFamily::V4 + } else { + IPFamily::V6 + }, + })) +} + +async fn get_route_from_msg( + routes: &mut Vec, + handle: &rtnetlink::Handle, + attrs: &LinkAttrs, + ip_version: rtnetlink::IpVersion, +) -> Result<()> { + let name = &attrs.name; + let mut route_msg_list = handle.route().get(ip_version).execute(); + while let Some(route) = route_msg_list.try_next().await? { + // get route filter with index + if let Some(index) = route.output_interface() { + if index == attrs.index { + if let Some(route) = generate_route(name, &route).context("generate route")? { + routes.push(route); + } + } + } + } + Ok(()) +} + +async fn handle_routes(handle: &rtnetlink::Handle, attrs: &LinkAttrs) -> Result> { + let mut routes = vec![]; + get_route_from_msg(&mut routes, handle, attrs, rtnetlink::IpVersion::V4) + .await + .context("get ip v4 route")?; + get_route_from_msg(&mut routes, handle, attrs, rtnetlink::IpVersion::V6) + .await + .context("get ip v6 route")?; + Ok(routes) +} + +#[async_trait] +impl NetworkInfo for NetworkInfoFromLink { + async fn interface(&self) -> Result { + Ok(self.interface.clone()) + } + + async fn routes(&self) -> Result> { + Ok(self.routes.clone()) + } + + async fn neighs(&self) -> Result> { + Ok(self.neighs.clone()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/mod.rs b/src/runtime-rs/crates/resource/src/network/network_model/mod.rs new file mode 100644 index 000000000000..d96abf3da640 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/mod.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub mod none_model; +pub mod tc_filter_model; +pub mod test_network_model; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; + +use super::NetworkPair; + +pub(crate) const TC_FILTER_NET_MODEL_STR: &str = "tcfilter"; + +pub enum NetworkModelType { + NoneModel, + TcFilter, +} + +#[async_trait] +pub trait NetworkModel: std::fmt::Debug + Send + Sync { + fn model_type(&self) -> NetworkModelType; + async fn add(&self, net_pair: &NetworkPair) -> Result<()>; + async fn del(&self, net_pair: &NetworkPair) -> Result<()>; +} + +pub fn new(model: &str) -> Result> { + match model { + TC_FILTER_NET_MODEL_STR => Ok(Arc::new( + tc_filter_model::TcFilterModel::new().context("new tc filter model")?, + )), + _ => Ok(Arc::new( + none_model::NoneModel::new().context("new none model")?, + )), + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs new file mode 100644 index 000000000000..f68b4d3e223f --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/none_model.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use async_trait::async_trait; + +use super::{NetworkModel, NetworkModelType}; +use crate::network::NetworkPair; + +#[derive(Debug)] +pub(crate) struct NoneModel {} + +impl NoneModel { + pub fn new() -> Result { + Ok(Self {}) + } +} + +#[async_trait] +impl NetworkModel for NoneModel { + fn model_type(&self) -> NetworkModelType { + NetworkModelType::NoneModel + } + + async fn add(&self, _pair: &NetworkPair) -> Result<()> { + Ok(()) + } + + async fn del(&self, _pair: &NetworkPair) -> Result<()> { + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs new file mode 100644 index 000000000000..ff689b9b845b --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/tc_filter_model.rs @@ -0,0 +1,104 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use rtnetlink::Handle; +use scopeguard::defer; + +use super::{NetworkModel, NetworkModelType}; +use crate::network::NetworkPair; + +#[derive(Debug)] +pub(crate) struct TcFilterModel {} + +impl TcFilterModel { + pub fn new() -> Result { + Ok(Self {}) + } +} + +#[async_trait] +impl NetworkModel for TcFilterModel { + fn model_type(&self) -> NetworkModelType { + NetworkModelType::TcFilter + } + + async fn add(&self, pair: &NetworkPair) -> Result<()> { + let (connection, handle, _) = rtnetlink::new_connection().context("new connection")?; + let thread_handler = tokio::spawn(connection); + + defer!({ + thread_handler.abort(); + }); + + let tap_index = fetch_index(&handle, pair.tap.tap_iface.name.as_str()) + .await + .context("fetch tap by index")?; + let virt_index = fetch_index(&handle, pair.virt_iface.name.as_str()) + .await + .context("fetch virt by index")?; + + handle + .qdisc() + .add(tap_index as i32) + .ingress() + .execute() + .await + .context("add tap ingress")?; + + handle + .qdisc() + .add(virt_index as i32) + .ingress() + .execute() + .await + .context("add virt ingress")?; + + handle + .traffic_filter(tap_index as i32) + .add() + .parent(0xffff0000) + // get protocol with network byte order + .protocol(0x0003_u16.to_be()) + .redirect(virt_index) + .execute() + .await + .context("add redirect for tap")?; + + handle + .traffic_filter(virt_index as i32) + .add() + .parent(0xffff0000) + // get protocol with network byte order + .protocol(0x0003_u16.to_be()) + .redirect(tap_index) + .execute() + .await + .context("add redirect for virt")?; + + Ok(()) + } + + async fn del(&self, pair: &NetworkPair) -> Result<()> { + let (connection, handle, _) = rtnetlink::new_connection().context("new connection")?; + let thread_handler = tokio::spawn(connection); + defer!({ + thread_handler.abort(); + }); + let virt_index = fetch_index(&handle, &pair.virt_iface.name).await?; + handle.qdisc().del(virt_index as i32).execute().await?; + Ok(()) + } +} + +pub async fn fetch_index(handle: &Handle, name: &str) -> Result { + let link = crate::network::network_pair::get_link_by_name(handle, name) + .await + .context("get link by name")?; + let base = link.attrs(); + Ok(base.index) +} diff --git a/src/runtime-rs/crates/resource/src/network/network_model/test_network_model.rs b/src/runtime-rs/crates/resource/src/network/network_model/test_network_model.rs new file mode 100644 index 000000000000..bd1bb628f289 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_model/test_network_model.rs @@ -0,0 +1,39 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[cfg(test)] +mod tests { + use crate::network::{ + network_model::{tc_filter_model::fetch_index, TC_FILTER_NET_MODEL_STR}, + network_pair::NetworkPair, + }; + use anyhow::Context; + use scopeguard::defer; + #[actix_rt::test] + async fn test_tc_redirect_network() { + if let Ok((connection, handle, _)) = rtnetlink::new_connection().context("new connection") { + let thread_handler = tokio::spawn(connection); + defer!({ + thread_handler.abort(); + }); + + handle + .link() + .add() + .veth("foo".to_string(), "bar".to_string()); + + if let Ok(net_pair) = + NetworkPair::new(&handle, 1, "bar", TC_FILTER_NET_MODEL_STR, 2).await + { + if let Ok(index) = fetch_index(&handle, "bar").await { + assert!(net_pair.add_network_model().await.is_ok()); + assert!(net_pair.del_network_model().await.is_ok()); + assert!(handle.link().del(index).execute().await.is_ok()); + } + } + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_pair.rs b/src/runtime-rs/crates/resource/src/network/network_pair.rs new file mode 100644 index 000000000000..bfb0623fe6ea --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_pair.rs @@ -0,0 +1,262 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{convert::TryFrom, sync::Arc, usize}; + +use anyhow::{anyhow, Context, Result}; +use futures::stream::TryStreamExt; + +use super::{ + network_model, + utils::{self, address::Address, link}, +}; + +const TAP_SUFFIX: &str = "_kata"; + +#[derive(Default, Copy, Clone, Debug, PartialEq, Eq)] +pub struct NetInterworkingModel(u32); + +#[derive(Default, Debug, Clone)] +pub struct NetworkInterface { + pub name: String, + pub hard_addr: String, + pub addrs: Vec
, +} + +#[derive(Default, Debug)] +pub struct TapInterface { + pub id: String, + pub name: String, + pub tap_iface: NetworkInterface, +} +#[derive(Debug)] +pub struct NetworkPair { + pub tap: TapInterface, + pub virt_iface: NetworkInterface, + pub model: Arc, + pub network_qos: bool, +} + +impl NetworkPair { + pub(crate) async fn new( + handle: &rtnetlink::Handle, + idx: u32, + name: &str, + model: &str, + queues: usize, + ) -> Result { + let unique_id = kata_sys_util::rand::UUID::new(); + let model = network_model::new(model).context("new network model")?; + let tap_iface_name = format!("tap{}{}", idx, TAP_SUFFIX); + let virt_iface_name = format!("eth{}", idx); + let tap_link = create_link(handle, &tap_iface_name, queues) + .await + .context("create link")?; + + let virt_link = get_link_by_name(handle, virt_iface_name.clone().as_str()) + .await + .context("get link by name")?; + + let mut virt_addr_msg_list = handle + .address() + .get() + .set_link_index_filter(virt_link.attrs().index) + .execute(); + + let mut virt_address = vec![]; + while let Some(addr_msg) = virt_addr_msg_list.try_next().await? { + let addr = Address::try_from(addr_msg).context("get address from msg")?; + virt_address.push(addr); + } + + // Save the veth MAC address to the TAP so that it can later be used + // to build the hypervisor command line. This MAC address has to be + // the one inside the VM in order to avoid any firewall issues. The + // bridge created by the network plugin on the host actually expects + // to see traffic from this MAC address and not another one. + let tap_hard_addr = + utils::get_mac_addr(&virt_link.attrs().hardware_addr).context("get mac addr")?; + + // Save the TAP Mac address to the virt_iface so that it can later updated + // the guest's gateway IP's mac as this TAP device. This MAC address has + // to be inside the VM in order to the network reach to the gateway. + let virt_hard_addr = + utils::get_mac_addr(&tap_link.attrs().hardware_addr).context("get mac addr")?; + + handle + .link() + .set(tap_link.attrs().index) + .mtu(virt_link.attrs().mtu) + .execute() + .await + .context("set link mtu")?; + + handle + .link() + .set(tap_link.attrs().index) + .up() + .execute() + .await + .context("set link up")?; + + let mut net_pair = NetworkPair { + tap: TapInterface { + id: String::from(&unique_id), + name: format!("br{}{}", idx, TAP_SUFFIX), + tap_iface: NetworkInterface { + name: tap_iface_name, + hard_addr: tap_hard_addr, + ..Default::default() + }, + }, + virt_iface: NetworkInterface { + name: virt_iface_name, + hard_addr: virt_hard_addr, + addrs: virt_address, + }, + model, + network_qos: false, + }; + + if !name.is_empty() { + net_pair.virt_iface.name = String::from(name); + } + + Ok(net_pair) + } + + pub(crate) async fn add_network_model(&self) -> Result<()> { + let model = self.model.clone(); + model.add(self).await.context("add")?; + Ok(()) + } + + pub(crate) async fn del_network_model(&self) -> Result<()> { + let model = self.model.clone(); + model.del(self).await.context("del")?; + Ok(()) + } +} + +pub async fn create_link( + handle: &rtnetlink::Handle, + name: &str, + queues: usize, +) -> Result> { + link::create_link(name, link::LinkType::Tap, queues)?; + + let link = get_link_by_name(handle, name) + .await + .context("get link by name")?; + + let base = link.attrs(); + if base.master_index != 0 { + handle + .link() + .set(base.index) + .master(base.master_index) + .execute() + .await + .context("set index")?; + } + Ok(link) +} + +pub async fn get_link_by_name( + handle: &rtnetlink::Handle, + name: &str, +) -> Result> { + let mut link_msg_list = handle.link().get().match_name(name.to_string()).execute(); + let msg = if let Some(msg) = link_msg_list.try_next().await? { + msg + } else { + return Err(anyhow!("failed to find link by name {}", name)); + }; + + Ok(link::get_link_from_message(msg)) +} + +#[cfg(test)] +mod tests { + use scopeguard::defer; + + use super::*; + use crate::network::network_model::TC_FILTER_NET_MODEL_STR; + use test_utils::skip_if_not_root; + use utils::link::net_test_utils::delete_link; + + // this ut tests create_link() and get_link_by_name() + #[actix_rt::test] + async fn test_utils() { + skip_if_not_root!(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + + assert!(create_link(&handle, "kata_test_1", 2).await.is_ok()); + assert!(create_link(&handle, "kata_test_2", 3).await.is_ok()); + assert!(create_link(&handle, "kata_test_3", 4).await.is_ok()); + + assert!(get_link_by_name(&handle, "kata_test_1").await.is_ok()); + assert!(get_link_by_name(&handle, "kata_test_2").await.is_ok()); + assert!(get_link_by_name(&handle, "kata_test_3").await.is_ok()); + + assert!(delete_link(&handle, "kata_test_1").await.is_ok()); + assert!(delete_link(&handle, "kata_test_2").await.is_ok()); + assert!(delete_link(&handle, "kata_test_3").await.is_ok()); + + assert!(get_link_by_name(&handle, "kata_test_1").await.is_err()); + assert!(get_link_by_name(&handle, "kata_test_2").await.is_err()); + assert!(get_link_by_name(&handle, "kata_test_3").await.is_err()); + } + } + + #[actix_rt::test] + async fn test_network_pair() { + let idx = 123456; + let virt_iface_name = format!("eth{}", idx); + let tap_name = format!("tap{}{}", idx, TAP_SUFFIX); + let queues = 2; + let model = TC_FILTER_NET_MODEL_STR; + + skip_if_not_root!(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + // the network pair has not been created + assert!(get_link_by_name(&handle, virt_iface_name.as_str()) + .await + .is_err()); + + // mock containerd to create one end of the network pair + assert!(create_link(&handle, virt_iface_name.as_str(), queues) + .await + .is_ok()); + + if let Ok(_pair) = NetworkPair::new(&handle, idx, "", model, queues).await { + // the pair is created, we can find the two ends of network pair + assert!(get_link_by_name(&handle, virt_iface_name.as_str()) + .await + .is_ok()); + assert!(get_link_by_name(&handle, tap_name.as_str()).await.is_ok()); + + //delete the link created in test + assert!(delete_link(&handle, virt_iface_name.as_str()).await.is_ok()); + assert!(delete_link(&handle, tap_name.as_str()).await.is_ok()); + } + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/network_with_netns.rs b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs new file mode 100644 index 000000000000..81c8c3939a1b --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/network_with_netns.rs @@ -0,0 +1,304 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, +}; + +use super::endpoint::endpoint_persist::EndpointState; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use futures::stream::TryStreamExt; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use netns_rs::get_from_path; +use scopeguard::defer; +use tokio::sync::RwLock; + +use super::{ + endpoint::{ + Endpoint, IPVlanEndpoint, MacVlanEndpoint, PhysicalEndpoint, VethEndpoint, VlanEndpoint, + }, + network_entity::NetworkEntity, + network_info::network_info_from_link::{handle_addresses, NetworkInfoFromLink}, + utils::{link, netns}, + Network, +}; +use crate::network::NetworkInfo; + +#[derive(Debug)] +pub struct NetworkWithNetNsConfig { + pub network_model: String, + pub netns_path: String, + pub queues: usize, + pub network_created: bool, +} + +struct NetworkWithNetnsInner { + netns_path: String, + entity_list: Vec, + network_created: bool, +} + +impl NetworkWithNetnsInner { + async fn new(config: &NetworkWithNetNsConfig, d: Arc>) -> Result { + let entity_list = if config.netns_path.is_empty() { + warn!(sl!(), "Skip to scan network for empty netns"); + vec![] + } else if config.network_model.as_str() == "none" { + warn!( + sl!(), + "Skip to scan network from netns due to the none network model" + ); + vec![] + } else { + // get endpoint + get_entity_from_netns(config, d) + .await + .context("get entity from netns")? + }; + Ok(Self { + netns_path: config.netns_path.to_string(), + entity_list, + network_created: config.network_created, + }) + } +} + +pub(crate) struct NetworkWithNetns { + inner: Arc>, +} + +impl NetworkWithNetns { + pub(crate) async fn new( + config: &NetworkWithNetNsConfig, + d: Arc>, + ) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new(NetworkWithNetnsInner::new(config, d).await?)), + }) + } +} + +#[async_trait] +impl Network for NetworkWithNetns { + async fn setup(&self) -> Result<()> { + let inner = self.inner.read().await; + let _netns_guard = netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?; + for e in &inner.entity_list { + e.endpoint.attach().await.context("attach")?; + } + Ok(()) + } + + async fn interfaces(&self) -> Result> { + let inner = self.inner.read().await; + let mut interfaces = vec![]; + for e in &inner.entity_list { + interfaces.push(e.network_info.interface().await.context("interface")?); + } + Ok(interfaces) + } + + async fn routes(&self) -> Result> { + let inner = self.inner.read().await; + let mut routes = vec![]; + for e in &inner.entity_list { + let mut list = e.network_info.routes().await.context("routes")?; + routes.append(&mut list); + } + Ok(routes) + } + + async fn neighs(&self) -> Result> { + let inner = self.inner.read().await; + let mut neighs = vec![]; + for e in &inner.entity_list { + let mut list = e.network_info.neighs().await.context("neighs")?; + neighs.append(&mut list); + } + Ok(neighs) + } + + async fn save(&self) -> Option> { + let inner = self.inner.read().await; + let mut endpoint = vec![]; + for e in &inner.entity_list { + if let Some(state) = e.endpoint.save().await { + endpoint.push(state); + } + } + Some(endpoint) + } + + async fn remove(&self, h: &dyn Hypervisor) -> Result<()> { + let inner = self.inner.read().await; + // The network namespace would have been deleted at this point + // if it has not been created by virtcontainers. + if !inner.network_created { + return Ok(()); + } + { + let _netns_guard = + netns::NetnsGuard::new(&inner.netns_path).context("net netns guard")?; + for e in &inner.entity_list { + e.endpoint.detach(h).await.context("detach")?; + } + } + let netns = get_from_path(inner.netns_path.clone())?; + netns.remove()?; + fs::remove_dir_all(inner.netns_path.clone()).context("failed to remove netns path")?; + Ok(()) + } +} + +async fn get_entity_from_netns( + config: &NetworkWithNetNsConfig, + d: Arc>, +) -> Result> { + info!( + sl!(), + "get network entity from config {:?} tid {:?}", + config, + nix::unistd::gettid() + ); + let mut entity_list = vec![]; + let _netns_guard = netns::NetnsGuard::new(&config.netns_path) + .context("net netns guard") + .unwrap(); + let (connection, handle, _) = rtnetlink::new_connection().context("new connection")?; + let thread_handler = tokio::spawn(connection); + defer!({ + thread_handler.abort(); + }); + + let mut links = handle.link().get().execute(); + + let idx = AtomicU32::new(0); + while let Some(link) = links.try_next().await? { + let link = link::get_link_from_message(link); + let attrs = link.attrs(); + + if (attrs.flags & libc::IFF_LOOPBACK as u32) != 0 { + continue; + } + + let ip_addresses = handle_addresses(&handle, attrs) + .await + .context("handle addresses")?; + // Ignore unconfigured network interfaces. These are either base tunnel devices that are not namespaced + // like gre0, gretap0, sit0, ipip0, tunl0 or incorrectly setup interfaces. + if ip_addresses.is_empty() { + continue; + } + + let idx = idx.fetch_add(1, Ordering::Relaxed); + let (endpoint, network_info) = + create_endpoint(&handle, link.as_ref(), ip_addresses, idx, config, d.clone()) + .await + .context("create endpoint")?; + + entity_list.push(NetworkEntity::new(endpoint, network_info)); + } + + Ok(entity_list) +} + +async fn create_endpoint( + handle: &rtnetlink::Handle, + link: &dyn link::Link, + addrs: Vec, + idx: u32, + config: &NetworkWithNetNsConfig, + d: Arc>, +) -> Result<(Arc, Arc)> { + let _netns_guard = netns::NetnsGuard::new(&config.netns_path) + .context("net netns guard") + .unwrap(); + let attrs = link.attrs(); + let link_type = link.r#type(); + let endpoint: Arc = if is_physical_iface(&attrs.name)? { + info!( + sl!(), + "physical network interface found: {} {:?}", + &attrs.name, + nix::unistd::gettid() + ); + let t = PhysicalEndpoint::new(&attrs.name, &attrs.hardware_addr, d) + .context("new physical endpoint")?; + Arc::new(t) + } else { + info!( + sl!(), + "{} network interface found: {}", &link_type, &attrs.name + ); + match link_type { + "veth" => { + let ret = VethEndpoint::new( + &d, + handle, + &attrs.name, + idx, + &config.network_model, + config.queues, + ) + .await + .context("veth endpoint")?; + Arc::new(ret) + } + "vlan" => { + let ret = VlanEndpoint::new(&d, handle, &attrs.name, idx, config.queues) + .await + .context("vlan endpoint")?; + Arc::new(ret) + } + "ipvlan" => { + let ret = IPVlanEndpoint::new(&d, handle, &attrs.name, idx, config.queues) + .await + .context("ipvlan endpoint")?; + Arc::new(ret) + } + "macvlan" => { + let ret = MacVlanEndpoint::new( + &d, + handle, + &attrs.name, + idx, + &config.network_model, + config.queues, + ) + .await + .context("macvlan endpoint")?; + Arc::new(ret) + } + _ => return Err(anyhow!("unsupported link type: {}", link_type)), + } + }; + + let network_info = Arc::new( + NetworkInfoFromLink::new(handle, link, addrs, &endpoint.hardware_addr().await) + .await + .context("network info from link")?, + ); + + info!(sl!(), "network info {:?}", network_info); + + Ok((endpoint, network_info)) +} + +fn is_physical_iface(name: &str) -> Result { + if name == "lo" { + return Ok(false); + } + let driver_info = link::get_driver_info(name)?; + if driver_info.bus_info.split(':').count() != 3 { + return Ok(false); + } + Ok(true) +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/address.rs b/src/runtime-rs/crates/resource/src/network/utils/address.rs new file mode 100644 index 000000000000..a9a4c75002a3 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/address.rs @@ -0,0 +1,172 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::TryFrom; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::str::FromStr; + +use agent::IPFamily; +use anyhow::{anyhow, Context, Result}; +use netlink_packet_route::nlas::address::Nla; +use netlink_packet_route::{AddressMessage, AF_INET, AF_INET6}; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Address { + pub addr: IpAddr, + pub label: String, + pub flags: u32, + pub scope: u8, + pub perfix_len: u8, + pub peer: IpAddr, + pub broadcast: IpAddr, + pub prefered_lft: u32, + pub valid_ltf: u32, +} + +impl TryFrom for Address { + type Error = anyhow::Error; + fn try_from(msg: AddressMessage) -> Result { + let AddressMessage { header, nlas } = msg; + let mut addr = Address { + addr: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + peer: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + broadcast: IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), + label: String::default(), + flags: 0, + scope: header.scope, + perfix_len: header.prefix_len, + prefered_lft: 0, + valid_ltf: 0, + }; + + for nla in nlas.into_iter() { + match nla { + Nla::Address(a) => { + addr.addr = parse_ip(&a, header.family)?; + } + Nla::Broadcast(b) => { + addr.broadcast = parse_ip(&b, header.family)?; + } + Nla::Label(l) => { + addr.label = l; + } + Nla::Flags(f) => { + addr.flags = f; + } + Nla::CacheInfo(_c) => {} + _ => {} + } + } + + Ok(addr) + } +} + +pub(crate) fn parse_ip(ip: &[u8], family: u8) -> Result { + let support_len = if family as u16 == AF_INET { 4 } else { 16 }; + if ip.len() != support_len { + return Err(anyhow!( + "invalid ip addresses {:?} support {}", + &ip, + support_len + )); + } + match family as u16 { + AF_INET => Ok(IpAddr::V4(Ipv4Addr::new(ip[0], ip[1], ip[2], ip[3]))), + AF_INET6 => { + let mut octets = [0u8; 16]; + octets.copy_from_slice(&ip[..16]); + Ok(IpAddr::V6(Ipv6Addr::from(octets))) + } + _ => Err(anyhow!("unknown IP network family {}", family)), + } +} + +pub(crate) fn parse_ip_cidr(ip: &str) -> Result<(IpAddr, u8)> { + let items: Vec<&str> = ip.split('/').collect(); + if items.len() != 2 { + return Err(anyhow!(format!( + "{} is a bad IP address in format of CIDR", + ip + ))); + } + let ipaddr = IpAddr::from_str(items[0]).context("Parse IP address from string")?; + let mask = u8::from_str(items[1]).context("Parse mask")?; + if ipaddr.is_ipv4() && mask > 32 { + return Err(anyhow!(format!( + "The mask of IPv4 address should be less than or equal to 32, but we got {}.", + mask + ))); + } + if mask > 128 { + return Err(anyhow!(format!( + "The mask should be less than or equal to 128, but we got {}.", + mask + ))); + } + Ok((ipaddr, mask)) +} + +/// Retrieve IP Family defined at agent crate from IpAddr. +#[inline] +pub(crate) fn ip_family_from_ip_addr(ip_addr: &IpAddr) -> IPFamily { + if ip_addr.is_ipv4() { + IPFamily::V4 + } else { + IPFamily::V6 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_ip() { + let test_ipv4 = [10, 25, 64, 128]; + let ipv4 = parse_ip(test_ipv4.as_slice(), AF_INET as u8).unwrap(); + let expected_ipv4 = IpAddr::V4(Ipv4Addr::new(10, 25, 64, 128)); + assert_eq!(ipv4, expected_ipv4); + + let test_ipv6 = [0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0, 0, 2, 4, 0]; + let ipv6 = parse_ip(test_ipv6.as_slice(), AF_INET6 as u8).unwrap(); + // two u8 => one u16, (0u8, 2u8 => 0x0002), (4u8, 0u8 => 0x0400) + let expected_ipv6 = IpAddr::V6(Ipv6Addr::new( + 0x0002, 0x0400, 0x0002, 0x0400, 0x0002, 0x0400, 0x0002, 0x0400, + )); + assert_eq!(ipv6, expected_ipv6); + + let fail_ipv4 = [10, 22, 33, 44, 55]; + assert!(parse_ip(fail_ipv4.as_slice(), AF_INET as u8).is_err()); + + let fail_ipv6 = [1, 2, 3, 4, 5, 6, 7, 8, 2, 3]; + assert!(parse_ip(fail_ipv6.as_slice(), AF_INET6 as u8).is_err()); + } + + #[test] + fn test_parse_ip_cidr() { + let test_cases = [ + ("127.0.0.1/32", ("127.0.0.1", 32u8)), + ("2001:4860:4860::8888/32", ("2001:4860:4860::8888", 32u8)), + ("2001:4860:4860::8888/128", ("2001:4860:4860::8888", 128u8)), + ]; + for tc in test_cases.iter() { + let (ipaddr, mask) = parse_ip_cidr(tc.0).unwrap(); + assert_eq!(ipaddr.to_string(), tc.1 .0); + assert_eq!(mask, tc.1 .1); + } + let test_cases = [ + "127.0.0.1/33", + "2001:4860:4860::8888/129", + "2001:4860:4860::8888/300", + "127.0.0.1/33/1", + "127.0.0.1", + ]; + for tc in test_cases.iter() { + assert!(parse_ip_cidr(tc).is_err()); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/create.rs b/src/runtime-rs/crates/resource/src/network/utils/link/create.rs new file mode 100644 index 000000000000..10c7c79427d6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/create.rs @@ -0,0 +1,189 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs::{File, OpenOptions}, + os::unix::io::AsRawFd, + path::Path, + {io, mem}, +}; + +use anyhow::{Context, Result}; +use nix::ioctl_write_ptr; + +use super::macros::{get_name, set_name}; + +type IfName = [u8; libc::IFNAMSIZ]; + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +struct CreateLinkMap { + pub mem_start: libc::c_ulong, + pub mem_end: libc::c_ulong, + pub base_addr: libc::c_ushort, + pub irq: libc::c_uchar, + pub dma: libc::c_uchar, + pub port: libc::c_uchar, +} + +#[repr(C)] +union CreateLinkIfru { + pub ifr_addr: libc::sockaddr, + pub ifr_dst_addr: libc::sockaddr, + pub ifr_broad_addr: libc::sockaddr, + pub ifr_netmask: libc::sockaddr, + pub ifr_hw_addr: libc::sockaddr, + pub ifr_flags: libc::c_short, + pub ifr_if_index: libc::c_int, + pub ifr_metric: libc::c_int, + pub ifr_mtu: libc::c_int, + pub ifr_map: CreateLinkMap, + pub ifr_slave: IfName, + pub ifr_new_name: IfName, + pub ifr_data: *mut libc::c_char, +} + +#[repr(C)] +struct CreateLinkReq { + pub ifr_name: IfName, + pub ifr_ifru: CreateLinkIfru, +} + +impl CreateLinkReq { + pub fn from_name(name: &str) -> io::Result { + let mut req: CreateLinkReq = unsafe { mem::zeroed() }; + req.set_name(name)?; + Ok(req) + } + + pub fn set_name(&mut self, name: &str) -> io::Result<()> { + set_name!(self.ifr_name, name) + } + + pub fn get_name(&self) -> io::Result { + get_name!(self.ifr_name) + } + + pub unsafe fn set_raw_flags(&mut self, raw_flags: libc::c_short) { + self.ifr_ifru.ifr_flags = raw_flags; + } +} + +const DEVICE_PATH: &str = "/dev/net/tun"; + +ioctl_write_ptr!(tun_set_iff, b'T', 202, libc::c_int); +ioctl_write_ptr!(tun_set_persist, b'T', 203, libc::c_int); + +#[derive(Clone, Copy, Debug)] +pub enum LinkType { + #[allow(dead_code)] + Tun, + Tap, +} + +pub fn create_link(name: &str, link_type: LinkType, queues: usize) -> Result<()> { + let mut flags = libc::IFF_VNET_HDR; + flags |= match link_type { + LinkType::Tun => libc::IFF_TUN, + LinkType::Tap => libc::IFF_TAP, + }; + + let queues = if queues == 0 { 1 } else { queues }; + if queues > 1 { + flags |= libc::IFF_MULTI_QUEUE | libc::IFF_NO_PI; + } else { + flags |= libc::IFF_ONE_QUEUE; + }; + + // create first queue + let mut files = vec![]; + let (file, result_name) = create_queue(name, flags)?; + unsafe { + tun_set_persist(file.as_raw_fd(), &1).context("tun set persist")?; + } + files.push(file); + + // create other queues + if queues > 1 { + for _ in 0..queues - 1 { + files.push(create_queue(&result_name, flags)?.0); + } + } + + info!(sl!(), "create link with fds {:?}", files); + Ok(()) +} + +fn create_queue(name: &str, flags: libc::c_int) -> Result<(File, String)> { + let path = Path::new(DEVICE_PATH); + let file = OpenOptions::new().read(true).write(true).open(path)?; + let mut req = CreateLinkReq::from_name(name)?; + unsafe { + req.set_raw_flags(flags as libc::c_short); + tun_set_iff(file.as_raw_fd(), &mut req as *mut _ as *mut _).context("tun set iff")?; + }; + Ok((file, req.get_name()?)) +} + +#[cfg(test)] +pub mod net_test_utils { + use crate::network::network_model::tc_filter_model::fetch_index; + + // remove a link by its name + #[allow(dead_code)] + pub async fn delete_link( + handle: &rtnetlink::Handle, + name: &str, + ) -> Result<(), rtnetlink::Error> { + let link_index = fetch_index(handle, name) + .await + .expect("failed to fetch index"); + // the ifindex of a link will not change during its lifetime, so the index + // remains the same between the query above and the deletion below + handle.link().del(link_index).execute().await + } +} + +#[cfg(test)] +mod tests { + use scopeguard::defer; + use test_utils::skip_if_not_root; + + use crate::network::{ + network_pair::get_link_by_name, utils::link::create::net_test_utils::delete_link, + }; + + use super::*; + + #[actix_rt::test] + async fn test_create_link() { + let name_tun = "___test_tun"; + let name_tap = "___test_tap"; + + // tests should be taken under root + skip_if_not_root!(); + + if let Ok((conn, handle, _)) = + rtnetlink::new_connection().context("failed to create netlink connection") + { + let thread_handler = tokio::spawn(conn); + defer!({ + thread_handler.abort(); + }); + + assert!(create_link(name_tun, LinkType::Tun, 2).is_ok()); + assert!(create_link(name_tap, LinkType::Tap, 2).is_ok()); + assert!(get_link_by_name(&handle, name_tap).await.is_ok()); + assert!(get_link_by_name(&handle, name_tun).await.is_ok()); + assert!(delete_link(&handle, name_tun).await.is_ok()); + assert!(delete_link(&handle, name_tap).await.is_ok()); + + // link does not present + assert!(get_link_by_name(&handle, name_tun).await.is_err()); + assert!(get_link_by_name(&handle, name_tap).await.is_err()); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs b/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs new file mode 100644 index 000000000000..a7269d013ad6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/driver_info.rs @@ -0,0 +1,102 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{io, mem}; + +use anyhow::{Context, Result}; +use nix::sys::socket::{socket, AddressFamily, SockFlag, SockType}; +use scopeguard::defer; + +use super::macros::{get_name, set_name}; + +/// FW version length +const ETHTOOL_FW_VERSION_LEN: usize = 32; + +/// bus info length +const ETHTOOL_BUS_INFO_LEN: usize = 32; + +/// erom version length +const ETHTOOL_EROM_VERSION_LEN: usize = 32; + +/// driver info +const ETHTOOL_DRIVER_INFO: u32 = 0x00000003; + +/// Ethtool interface define 0x8946 +const IOCTL_ETHTOOL_INTERFACE: u32 = 0x8946; + +nix::ioctl_readwrite_bad!(ioctl_ethtool, IOCTL_ETHTOOL_INTERFACE, DeviceInfoReq); + +#[repr(C)] +pub union DeviceInfoIfru { + pub ifr_addr: libc::sockaddr, + pub ifr_data: *mut libc::c_char, +} + +type IfName = [u8; libc::IFNAMSIZ]; + +#[repr(C)] +pub struct DeviceInfoReq { + pub ifr_name: IfName, + pub ifr_ifru: DeviceInfoIfru, +} + +impl DeviceInfoReq { + pub fn from_name(name: &str) -> io::Result { + let mut req: DeviceInfoReq = unsafe { mem::zeroed() }; + req.set_name(name)?; + Ok(req) + } + + pub fn set_name(&mut self, name: &str) -> io::Result<()> { + set_name!(self.ifr_name, name) + } +} + +#[repr(C)] +#[derive(Debug, Clone)] +struct Driver { + pub cmd: u32, + pub driver: [u8; 32], + pub version: [u8; 32], + pub fw_version: [u8; ETHTOOL_FW_VERSION_LEN], + pub bus_info: [u8; ETHTOOL_BUS_INFO_LEN], + pub erom_version: [u8; ETHTOOL_EROM_VERSION_LEN], + pub reserved2: [u8; 12], + pub n_priv_flags: u32, + pub n_stats: u32, + pub test_info_len: u32, + pub eedump_len: u32, + pub regdump_len: u32, +} + +#[derive(Debug, Clone)] +pub struct DriverInfo { + pub driver: String, + pub bus_info: String, +} + +pub fn get_driver_info(name: &str) -> Result { + let mut req = DeviceInfoReq::from_name(name).context(format!("ifreq from name {}", name))?; + let mut ereq: Driver = unsafe { mem::zeroed() }; + ereq.cmd = ETHTOOL_DRIVER_INFO; + req.ifr_ifru.ifr_data = &mut ereq as *mut _ as *mut _; + + let fd = socket( + AddressFamily::Inet, + SockType::Datagram, + SockFlag::empty(), + None, + ) + .context("new socket")?; + defer!({ + let _ = nix::unistd::close(fd); + }); + unsafe { ioctl_ethtool(fd, &mut req).context("ioctl ethtool")? }; + Ok(DriverInfo { + driver: get_name!(ereq.driver).context("get driver name")?, + bus_info: get_name!(ereq.bus_info).context("get bus info name")?, + }) +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs b/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs new file mode 100644 index 000000000000..128a76bb29a7 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/macros.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +macro_rules! set_name { + ($name_field:expr, $name_str:expr) => {{ + let name_c = &::std::ffi::CString::new($name_str.to_owned()).map_err(|_| { + ::std::io::Error::new( + ::std::io::ErrorKind::InvalidInput, + "malformed interface name", + ) + })?; + let name_slice = name_c.as_bytes_with_nul(); + if name_slice.len() > libc::IFNAMSIZ { + return Err(io::Error::new(::std::io::ErrorKind::InvalidInput, "").into()); + } + $name_field[..name_slice.len()].clone_from_slice(name_slice); + + Ok(()) + }}; +} + +macro_rules! get_name { + ($name_field:expr) => {{ + let nul_pos = match $name_field.iter().position(|x| *x == 0) { + Some(p) => p, + None => { + return Err(::std::io::Error::new( + ::std::io::ErrorKind::InvalidData, + "malformed interface name", + ) + .into()) + } + }; + + std::ffi::CString::new(&$name_field[..nul_pos]) + .unwrap() + .into_string() + .map_err(|_| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "malformed interface name") + }) + }}; +} + +pub(crate) use get_name; +pub(crate) use set_name; diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs b/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs new file mode 100644 index 000000000000..f628ec03f4ec --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/manager.rs @@ -0,0 +1,229 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use netlink_packet_route::{ + link::nlas::{Info, InfoBridge, InfoData, InfoKind, Nla}, + LinkMessage, +}; + +use super::{Link, LinkAttrs}; + +#[allow(clippy::box_default)] +pub fn get_link_from_message(mut msg: LinkMessage) -> Box { + let mut base = LinkAttrs { + index: msg.header.index, + flags: msg.header.flags, + link_layer_type: msg.header.link_layer_type, + ..Default::default() + }; + if msg.header.flags & libc::IFF_PROMISC as u32 != 0 { + base.promisc = 1; + } + let mut link: Option> = None; + while let Some(attr) = msg.nlas.pop() { + match attr { + Nla::Info(infos) => { + link = Some(link_info(infos)); + } + Nla::Address(a) => { + base.hardware_addr = a; + } + Nla::IfName(i) => { + base.name = i; + } + Nla::Mtu(m) => { + base.mtu = m; + } + Nla::Link(l) => { + base.parent_index = l; + } + Nla::Master(m) => { + base.master_index = m; + } + Nla::TxQueueLen(t) => { + base.txq_len = t; + } + Nla::IfAlias(a) => { + base.alias = a; + } + Nla::Stats(_s) => {} + Nla::Stats64(_s) => {} + Nla::Xdp(_x) => {} + Nla::ProtoInfo(_) => {} + Nla::OperState(_) => {} + Nla::NetnsId(n) => { + base.net_ns_id = n; + } + Nla::GsoMaxSize(i) => { + base.gso_max_size = i; + } + Nla::GsoMaxSegs(e) => { + base.gso_max_seqs = e; + } + Nla::VfInfoList(_) => {} + Nla::NumTxQueues(t) => { + base.num_tx_queues = t; + } + Nla::NumRxQueues(r) => { + base.num_rx_queues = r; + } + Nla::Group(g) => { + base.group = g; + } + _ => { + // skip unused attr + } + } + } + + let mut ret = link.unwrap_or_else(|| Box::new(Device::default())); + ret.set_attrs(base); + ret +} + +#[allow(clippy::box_default)] +fn link_info(mut infos: Vec) -> Box { + let mut link: Option> = None; + while let Some(info) = infos.pop() { + match info { + Info::Kind(kind) => match kind { + InfoKind::Tun => { + if link.is_none() { + link = Some(Box::new(Tuntap::default())); + } + } + InfoKind::Veth => { + if link.is_none() { + link = Some(Box::new(Veth::default())); + } + } + InfoKind::IpVlan => { + if link.is_none() { + link = Some(Box::new(IpVlan::default())); + } + } + InfoKind::MacVlan => { + if link.is_none() { + link = Some(Box::new(MacVlan::default())); + } + } + InfoKind::Vlan => { + if link.is_none() { + link = Some(Box::new(Vlan::default())); + } + } + InfoKind::Bridge => { + if link.is_none() { + link = Some(Box::new(Bridge::default())); + } + } + _ => { + if link.is_none() { + link = Some(Box::new(Device::default())); + } + } + }, + Info::Data(data) => match data { + InfoData::Tun(_) => { + link = Some(Box::new(Tuntap::default())); + } + InfoData::Veth(_) => { + link = Some(Box::new(Veth::default())); + } + InfoData::IpVlan(_) => { + link = Some(Box::new(IpVlan::default())); + } + InfoData::MacVlan(_) => { + link = Some(Box::new(MacVlan::default())); + } + InfoData::Vlan(_) => { + link = Some(Box::new(Vlan::default())); + } + InfoData::Bridge(ibs) => { + link = Some(Box::new(parse_bridge(ibs))); + } + _ => { + link = Some(Box::new(Device::default())); + } + }, + Info::SlaveKind(_sk) => { + if link.is_none() { + link = Some(Box::new(Device::default())); + } + } + Info::SlaveData(_sd) => { + link = Some(Box::new(Device::default())); + } + _ => { + link = Some(Box::new(Device::default())); + } + } + } + link.unwrap() +} + +fn parse_bridge(mut ibs: Vec) -> Bridge { + let mut bridge = Bridge::default(); + while let Some(ib) = ibs.pop() { + match ib { + InfoBridge::HelloTime(ht) => { + bridge.hello_time = ht; + } + InfoBridge::MulticastSnooping(m) => { + bridge.multicast_snooping = m == 1; + } + InfoBridge::VlanFiltering(v) => { + bridge.vlan_filtering = v == 1; + } + _ => {} + } + } + bridge +} + +macro_rules! impl_network_dev { + ($r_type: literal , $r_struct: ty) => { + impl Link for $r_struct { + fn attrs(&self) -> &LinkAttrs { + self.attrs.as_ref().unwrap() + } + fn set_attrs(&mut self, attr: LinkAttrs) { + self.attrs = Some(attr); + } + fn r#type(&self) -> &'static str { + $r_type + } + } + }; +} + +macro_rules! define_and_impl_network_dev { + ($r_type: literal , $r_struct: tt) => { + #[derive(Debug, PartialEq, Eq, Clone, Default)] + pub struct $r_struct { + attrs: Option, + } + + impl_network_dev!($r_type, $r_struct); + }; +} + +define_and_impl_network_dev!("device", Device); +define_and_impl_network_dev!("tuntap", Tuntap); +define_and_impl_network_dev!("veth", Veth); +define_and_impl_network_dev!("ipvlan", IpVlan); +define_and_impl_network_dev!("macvlan", MacVlan); +define_and_impl_network_dev!("vlan", Vlan); + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct Bridge { + attrs: Option, + pub multicast_snooping: bool, + pub hello_time: u32, + pub vlan_filtering: bool, +} + +impl_network_dev!("bridge", Bridge); diff --git a/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs b/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs new file mode 100644 index 000000000000..aa5c2631b1a7 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/link/mod.rs @@ -0,0 +1,148 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod create; +pub use create::{create_link, LinkType}; +mod driver_info; +pub use driver_info::{get_driver_info, DriverInfo}; +mod macros; +mod manager; +pub use manager::get_link_from_message; + +use std::os::unix::io::RawFd; + +use netlink_packet_route::link::nlas::State; + +#[cfg(test)] +pub use create::net_test_utils; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Namespace { + NetNsPid(u32), + #[allow(dead_code)] + NetNsFd(RawFd), +} +impl Default for Namespace { + fn default() -> Self { + Self::NetNsPid(0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum LinkStatistics { + #[allow(dead_code)] + Stats(LinkStatistics32), + Stats64(LinkStatistics64), +} +impl Default for LinkStatistics { + fn default() -> Self { + Self::Stats64(LinkStatistics64::default()) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkStatistics32 { + pub rx_packets: u32, + pub tx_packets: u32, + pub rx_bytes: u32, + pub tx_bytes: u32, + pub rx_errors: u32, + pub tx_errors: u32, + pub rx_dropped: u32, + pub tx_dropped: u32, + pub multicast: u32, + pub collisions: u32, + pub rx_length_errors: u32, + pub rx_over_errors: u32, + pub rx_crc_errors: u32, + pub rx_frame_errors: u32, + pub rx_fifo_errors: u32, + pub rx_missed_errors: u32, + pub tx_aborted_errors: u32, + pub tx_carrier_errors: u32, + pub tx_fifo_errors: u32, + pub tx_heartbeat_errors: u32, + pub tx_window_errors: u32, + pub rx_compressed: u32, + pub tx_compressed: u32, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkStatistics64 { + pub rx_packets: u64, + pub tx_packets: u64, + pub rx_bytes: u64, + pub tx_bytes: u64, + pub rx_errors: u64, + pub tx_errors: u64, + pub rx_dropped: u64, + pub tx_dropped: u64, + pub multicast: u64, + pub collisions: u64, + pub rx_length_errors: u64, + pub rx_over_errors: u64, + pub rx_crc_errors: u64, + pub rx_frame_errors: u64, + pub rx_fifo_errors: u64, + pub rx_missed_errors: u64, + pub tx_aborted_errors: u64, + pub tx_carrier_errors: u64, + pub tx_fifo_errors: u64, + pub tx_heartbeat_errors: u64, + pub tx_window_errors: u64, + pub rx_compressed: u64, + pub tx_compressed: u64, +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkXdp { + pub fd: RawFd, + pub attached: bool, + pub flags: u32, + pub prog_id: u32, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct OperState(State); +impl Default for OperState { + fn default() -> Self { + Self(State::Unknown) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub struct LinkAttrs { + pub index: u32, + pub mtu: u32, + pub txq_len: u32, + + pub name: String, + pub hardware_addr: Vec, + pub flags: u32, + pub parent_index: u32, + pub master_index: u32, + pub namespace: Namespace, + pub alias: String, + pub statistics: LinkStatistics, + pub promisc: u32, + pub xdp: LinkXdp, + pub link_layer_type: u16, + pub proto_info: Vec, + pub oper_state: OperState, + pub net_ns_id: i32, + pub num_tx_queues: u32, + pub num_rx_queues: u32, + pub gso_max_size: u32, + pub gso_max_seqs: u32, + pub vfs: Vec, + pub group: u32, +} + +pub trait Link: Send + Sync { + fn attrs(&self) -> &LinkAttrs; + fn set_attrs(&mut self, attr: LinkAttrs); + fn r#type(&self) -> &str; +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/mod.rs b/src/runtime-rs/crates/resource/src/network/utils/mod.rs new file mode 100644 index 000000000000..39a34d6876f4 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/mod.rs @@ -0,0 +1,89 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +pub(crate) mod address; +pub(crate) mod link; +pub(crate) mod netns; + +use anyhow::{anyhow, Result}; +use rand::rngs::OsRng; +use rand::RngCore; + +pub(crate) fn parse_mac(s: &str) -> Option { + let v: Vec<_> = s.split(':').collect(); + if v.len() != 6 { + return None; + } + let mut bytes = [0u8; 6]; + for i in 0..6 { + bytes[i] = u8::from_str_radix(v[i], 16).ok()?; + } + + Some(hypervisor::Address(bytes)) +} + +pub(crate) fn get_mac_addr(b: &[u8]) -> Result { + if b.len() != 6 { + Err(anyhow!("invalid mac address {:?}", b)) + } else { + Ok(format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + )) + } +} + +/// Generate a private mac address. +/// The range of private mac addressess is +/// x2-xx-xx-xx-xx-xx, x6-xx-xx-xx-xx-xx, xA-xx-xx-xx-xx-xx, xE-xx-xx-xx-xx-xx. +pub(crate) fn generate_private_mac_addr() -> String { + let mut addr: [u8; 6] = [0, 0, 0, 0, 0, 0]; + OsRng.fill_bytes(&mut addr); + addr[0] = (addr[0] | 2) & 0xfe; + // This is a safty unwrap since the len of addr is 6 + get_mac_addr(&addr).unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_mac_addr() { + // length is not 6 + let fail_slice = vec![1, 2, 3]; + assert!(get_mac_addr(&fail_slice).is_err()); + + let expected_slice = vec![10, 11, 128, 3, 4, 5]; + let expected_mac = String::from("0a:0b:80:03:04:05"); + let res = get_mac_addr(&expected_slice); + assert!(res.is_ok()); + assert_eq!(expected_mac, res.unwrap()); + } + + #[test] + fn test_parse_mac() { + // length is not 6 + let fail = "1:2:3"; + assert!(parse_mac(fail).is_none()); + + let v = [10, 11, 128, 3, 4, 5]; + let expected_addr = hypervisor::Address(v); + let addr = parse_mac("0a:0b:80:03:04:05"); + assert!(addr.is_some()); + assert_eq!(expected_addr.0, addr.unwrap().0); + } + + #[test] + fn test_generate_private_mac_addr() { + let addr1 = generate_private_mac_addr(); + let addr2 = generate_private_mac_addr(); + assert_ne!(addr1, addr2); + let ch1 = addr1.chars().nth(1).unwrap(); + let is_private = ch1 == '2' || ch1 == '6' || ch1 == 'a' || ch1 == 'e'; + assert!(is_private) + } +} diff --git a/src/runtime-rs/crates/resource/src/network/utils/netns.rs b/src/runtime-rs/crates/resource/src/network/utils/netns.rs new file mode 100644 index 000000000000..f2dc2ae6f64f --- /dev/null +++ b/src/runtime-rs/crates/resource/src/network/utils/netns.rs @@ -0,0 +1,95 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{fs::File, os::unix::io::AsRawFd}; + +use anyhow::{Context, Result}; +use nix::sched::{setns, CloneFlags}; +use nix::unistd::{getpid, gettid}; +use rand::Rng; + +pub struct NetnsGuard { + old_netns: Option, +} + +impl NetnsGuard { + pub fn new(new_netns_path: &str) -> Result { + let old_netns = if !new_netns_path.is_empty() { + let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net"); + let old_netns = File::open(¤t_netns_path) + .with_context(|| format!("open current netns path {}", ¤t_netns_path))?; + let new_netns = File::open(new_netns_path) + .with_context(|| format!("open new netns path {}", &new_netns_path))?; + setns(new_netns.as_raw_fd(), CloneFlags::CLONE_NEWNET) + .with_context(|| "set netns to new netns")?; + info!( + sl!(), + "set netns from old {:?} to new {:?} tid {}", + old_netns, + new_netns, + gettid().to_string() + ); + Some(old_netns) + } else { + warn!(sl!(), "skip to set netns for empty netns path"); + None + }; + Ok(Self { old_netns }) + } +} + +impl Drop for NetnsGuard { + fn drop(&mut self) { + if let Some(old_netns) = self.old_netns.as_ref() { + let old_netns_fd = old_netns.as_raw_fd(); + setns(old_netns_fd, CloneFlags::CLONE_NEWNET).unwrap(); + info!(sl!(), "set netns to old {:?}", old_netns_fd); + } + } +} + +// generate the network namespace name +pub fn generate_netns_name() -> String { + let mut rng = rand::thread_rng(); + let random_bytes: [u8; 16] = rng.gen(); + format!( + "cnitest-{}-{}-{}-{}-{}", + hex::encode(&random_bytes[..4]), + hex::encode(&random_bytes[4..6]), + hex::encode(&random_bytes[6..8]), + hex::encode(&random_bytes[8..10]), + hex::encode(&random_bytes[10..]) + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use test_utils::skip_if_not_root; + + #[test] + fn test_new_netns_guard() { + // test run under root + skip_if_not_root!(); + + let new_netns_path = "/proc/1/task/1/ns/net"; // systemd, always exists + let netns_guard = NetnsGuard::new(new_netns_path).unwrap(); + drop(netns_guard); + + let empty_path = ""; + assert!(NetnsGuard::new(empty_path).unwrap().old_netns.is_none()); + } + + #[test] + fn test_generate_netns_name() { + let name1 = generate_netns_name(); + let name2 = generate_netns_name(); + let name3 = generate_netns_name(); + assert_ne!(name1, name2); + assert_ne!(name2, name3); + assert_ne!(name1, name3); + } +} diff --git a/src/runtime-rs/crates/resource/src/resource_persist.rs b/src/runtime-rs/crates/resource/src/resource_persist.rs new file mode 100644 index 000000000000..dd2a39c4abf6 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/resource_persist.rs @@ -0,0 +1,15 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::network::EndpointState; +use serde::{Deserialize, Serialize}; + +use crate::cgroups::cgroup_persist::CgroupState; +#[derive(Serialize, Deserialize, Default)] +pub struct ResourceState { + pub endpoint: Vec, + pub cgroup_state: Option, +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs new file mode 100644 index 000000000000..33b0ff2168d5 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/rootfs/block_rootfs.rs @@ -0,0 +1,126 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::{Rootfs, ROOTFS}; +use crate::share_fs::{do_get_guest_path, do_get_host_path}; +use agent::Storage; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::{ + device::{ + device_manager::{do_handle_device, get_block_driver, DeviceManager}, + DeviceConfig, DeviceType, + }, + BlockConfig, +}; +use kata_types::mount::Mount; +use nix::sys::stat::{self, SFlag}; +use std::fs; +use tokio::sync::RwLock; + +pub(crate) struct BlockRootfs { + guest_path: String, + device_id: String, + mount: oci::Mount, + storage: Option, +} + +impl BlockRootfs { + pub async fn new( + d: &RwLock, + sid: &str, + cid: &str, + dev_id: u64, + rootfs: &Mount, + ) -> Result { + let container_path = do_get_guest_path(ROOTFS, cid, false, false); + let host_path = do_get_host_path(ROOTFS, sid, cid, false, false); + // Create rootfs dir on host to make sure mount point in guest exists, as readonly dir is + // shared to guest via virtiofs, and guest is unable to create rootfs dir. + fs::create_dir_all(&host_path) + .map_err(|e| anyhow!("failed to create rootfs dir {}: {:?}", host_path, e))?; + + let block_driver = get_block_driver(d).await; + + let block_device_config = &mut BlockConfig { + major: stat::major(dev_id) as i64, + minor: stat::minor(dev_id) as i64, + driver_option: block_driver, + ..Default::default() + }; + + // create and insert block device into Kata VM + let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone())) + .await + .context("do handle device failed.")?; + + let mut storage = Storage { + fs_type: rootfs.fs_type.clone(), + mount_point: container_path.clone(), + options: rootfs.options.clone(), + ..Default::default() + }; + + let mut device_id: String = "".to_owned(); + if let DeviceType::Block(device) = device_info { + storage.driver = device.config.driver_option; + storage.source = device.config.virt_path; + device_id = device.device_id; + } + + Ok(Self { + guest_path: container_path.clone(), + device_id, + mount: oci::Mount { + ..Default::default() + }, + storage: Some(storage), + }) + } +} + +#[async_trait] +impl Rootfs for BlockRootfs { + async fn get_guest_rootfs_path(&self) -> Result { + Ok(self.guest_path.clone()) + } + + async fn get_rootfs_mount(&self) -> Result> { + Ok(vec![self.mount.clone()]) + } + + async fn get_storage(&self) -> Option { + self.storage.clone() + } + + async fn get_device_id(&self) -> Result> { + Ok(Some(self.device_id.clone())) + } + + async fn cleanup(&self, device_manager: &RwLock) -> Result<()> { + device_manager + .write() + .await + .try_remove_device(&self.device_id) + .await + } +} + +pub(crate) fn is_block_rootfs(file: &str) -> Option { + if file.is_empty() { + return None; + } + match stat::stat(file) { + Ok(fstat) => { + if SFlag::from_bits_truncate(fstat.st_mode) == SFlag::S_IFBLK { + let dev_id = fstat.st_rdev; + return Some(dev_id); + } + } + Err(_) => return None, + }; + None +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/mod.rs b/src/runtime-rs/crates/resource/src/rootfs/mod.rs new file mode 100644 index 000000000000..a924b021bd6c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/rootfs/mod.rs @@ -0,0 +1,151 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod nydus_rootfs; +mod share_fs_rootfs; +use agent::Storage; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use kata_types::mount::Mount; +mod block_rootfs; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use std::{sync::Arc, vec::Vec}; +use tokio::sync::RwLock; + +use crate::share_fs::ShareFs; + +use self::{block_rootfs::is_block_rootfs, nydus_rootfs::NYDUS_ROOTFS_TYPE}; + +const ROOTFS: &str = "rootfs"; +const HYBRID_ROOTFS_LOWER_DIR: &str = "rootfs_lower"; +const TYPE_OVERLAY_FS: &str = "overlay"; +#[async_trait] +pub trait Rootfs: Send + Sync { + async fn get_guest_rootfs_path(&self) -> Result; + async fn get_rootfs_mount(&self) -> Result>; + async fn get_storage(&self) -> Option; + async fn cleanup(&self, device_manager: &RwLock) -> Result<()>; + async fn get_device_id(&self) -> Result>; +} + +#[derive(Default)] +struct RootFsResourceInner { + rootfs: Vec>, +} + +pub struct RootFsResource { + inner: Arc>, +} + +impl Default for RootFsResource { + fn default() -> Self { + Self::new() + } +} + +impl RootFsResource { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(RootFsResourceInner::default())), + } + } + + #[allow(clippy::too_many_arguments)] + pub async fn handler_rootfs( + &self, + share_fs: &Option>, + device_manager: &RwLock, + h: &dyn Hypervisor, + sid: &str, + cid: &str, + root: &oci::Root, + bundle_path: &str, + rootfs_mounts: &[Mount], + ) -> Result> { + match rootfs_mounts { + // if rootfs_mounts is empty + mounts_vec if mounts_vec.is_empty() => { + if let Some(share_fs) = share_fs { + // handle share fs rootfs + Ok(Arc::new( + share_fs_rootfs::ShareFsRootfs::new( + share_fs, + cid, + root.path.as_str(), + None, + ) + .await + .context("new share fs rootfs")?, + )) + } else { + Err(anyhow!("share fs is unavailable")) + } + } + mounts_vec if is_single_layer_rootfs(mounts_vec) => { + // Safe as single_layer_rootfs must have one layer + let layer = &mounts_vec[0]; + let mut inner = self.inner.write().await; + let rootfs = if let Some(dev_id) = is_block_rootfs(&layer.source) { + // handle block rootfs + info!(sl!(), "block device: {}", dev_id); + let block_rootfs: Arc = Arc::new( + block_rootfs::BlockRootfs::new(device_manager, sid, cid, dev_id, layer) + .await + .context("new block rootfs")?, + ); + Ok(block_rootfs) + } else if let Some(share_fs) = share_fs { + // handle nydus rootfs + let share_rootfs: Arc = if layer.fs_type == NYDUS_ROOTFS_TYPE { + Arc::new( + nydus_rootfs::NydusRootfs::new(share_fs, h, sid, cid, layer) + .await + .context("new nydus rootfs")?, + ) + } + // handle sharefs rootfs + else { + Arc::new( + share_fs_rootfs::ShareFsRootfs::new( + share_fs, + cid, + bundle_path, + Some(layer), + ) + .await + .context("new share fs rootfs")?, + ) + }; + Ok(share_rootfs) + } else { + Err(anyhow!("unsupported rootfs {:?}", &layer)) + }?; + inner.rootfs.push(rootfs.clone()); + Ok(rootfs) + } + _ => Err(anyhow!( + "unsupported rootfs mounts count {}", + rootfs_mounts.len() + )), + } + } + + pub async fn dump(&self) { + let inner = self.inner.read().await; + for r in &inner.rootfs { + info!( + sl!(), + "rootfs {:?}: count {}", + r.get_guest_rootfs_path().await, + Arc::strong_count(r) + ); + } + } +} + +fn is_single_layer_rootfs(rootfs_mounts: &[Mount]) -> bool { + rootfs_mounts.len() == 1 +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs new file mode 100644 index 000000000000..96f29e8d6729 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/rootfs/nydus_rootfs.rs @@ -0,0 +1,227 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::{fs, path::Path, sync::Arc}; + +use super::{Rootfs, TYPE_OVERLAY_FS}; +use crate::{ + rootfs::{HYBRID_ROOTFS_LOWER_DIR, ROOTFS}, + share_fs::{ + do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path, rafs_mount, ShareFs, + ShareFsRootfsConfig, PASSTHROUGH_FS_DIR, + }, +}; +use agent::Storage; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::{device::device_manager::DeviceManager, Hypervisor}; +use kata_types::mount::{Mount, NydusExtraOptions}; +use tokio::sync::RwLock; + +// Used for nydus rootfs +pub(crate) const NYDUS_ROOTFS_TYPE: &str = "fuse.nydus-overlayfs"; +// Used for Nydus v5 rootfs version +const NYDUS_ROOTFS_V5: &str = "v5"; +// Used for Nydus v6 rootfs version +const NYDUS_ROOTFS_V6: &str = "v6"; + +const SNAPSHOT_DIR: &str = "snapshotdir"; +const KATA_OVERLAY_DEV_TYPE: &str = "overlayfs"; +// nydus prefetch file list name +const NYDUS_PREFETCH_FILE_LIST: &str = "prefetch_file.list"; + +pub(crate) struct NydusRootfs { + guest_path: String, + rootfs: Storage, +} + +impl NydusRootfs { + pub async fn new( + share_fs: &Arc, + h: &dyn Hypervisor, + sid: &str, + cid: &str, + rootfs: &Mount, + ) -> Result { + let prefetch_list_path = + get_nydus_prefetch_files(h.hypervisor_config().await.prefetch_list_path).await; + + let share_fs_mount = share_fs.get_share_fs_mount(); + let extra_options = + NydusExtraOptions::new(rootfs).context("failed to parse nydus extra options")?; + info!(sl!(), "extra_option {:?}", &extra_options); + let rafs_meta = &extra_options.source; + let (rootfs_storage, rootfs_guest_path) = match extra_options.fs_version.as_str() { + // both nydus v5 and v6 can be handled by the builtin nydus in dragonball by using the rafs mode. + // nydus v6 could also be handled by the guest kernel as well, but some kernel patch is not support in the upstream community. We will add an option to let runtime-rs handle nydus v6 in the guest kernel optionally once the patch is ready + // see this issue (https://github.com/kata-containers/kata-containers/issues/5143) + NYDUS_ROOTFS_V5 | NYDUS_ROOTFS_V6 => { + // rafs mount the metadata of nydus rootfs + let rafs_mnt = do_get_guest_share_path(HYBRID_ROOTFS_LOWER_DIR, cid, true); + rafs_mount( + h, + rafs_meta.to_string(), + rafs_mnt, + extra_options.config.clone(), + prefetch_list_path, + ) + .await + .context("failed to do rafs mount")?; + // create rootfs under the share directory + let container_share_dir = get_host_rw_shared_path(sid) + .join(PASSTHROUGH_FS_DIR) + .join(cid); + let rootfs_dir = container_share_dir.join(ROOTFS); + fs::create_dir_all(rootfs_dir).context("failed to create directory")?; + // mount point inside the guest + let rootfs_guest_path = do_get_guest_path(ROOTFS, cid, false, false); + // bind mount the snapshot dir under the share directory + share_fs_mount + .share_rootfs(&ShareFsRootfsConfig { + cid: cid.to_string(), + source: extra_options.snapshot_dir.clone(), + target: SNAPSHOT_DIR.to_string(), + readonly: true, + is_rafs: false, + }) + .await + .context("share nydus rootfs")?; + let mut options: Vec = Vec::new(); + options.push( + "lowerdir=".to_string() + + &do_get_guest_path(HYBRID_ROOTFS_LOWER_DIR, cid, false, true), + ); + options.push( + "workdir=".to_string() + + &do_get_guest_path( + format!("{}/{}", SNAPSHOT_DIR, "work").as_str(), + cid, + false, + false, + ), + ); + options.push( + "upperdir=".to_string() + + &do_get_guest_path( + format!("{}/{}", SNAPSHOT_DIR, "fs").as_str(), + cid, + false, + false, + ), + ); + options.push("index=off".to_string()); + Ok(( + Storage { + driver: KATA_OVERLAY_DEV_TYPE.to_string(), + source: TYPE_OVERLAY_FS.to_string(), + fs_type: TYPE_OVERLAY_FS.to_string(), + options, + mount_point: rootfs_guest_path.clone(), + ..Default::default() + }, + rootfs_guest_path, + )) + } + _ => { + let errstr: &str = "new_nydus_rootfs: invalid nydus rootfs type"; + error!(sl!(), "{}", errstr); + Err(anyhow!(errstr)) + } + }?; + Ok(NydusRootfs { + guest_path: rootfs_guest_path, + rootfs: rootfs_storage, + }) + } +} + +#[async_trait] +impl Rootfs for NydusRootfs { + async fn get_guest_rootfs_path(&self) -> Result { + Ok(self.guest_path.clone()) + } + + async fn get_rootfs_mount(&self) -> Result> { + Ok(vec![]) + } + + async fn get_storage(&self) -> Option { + Some(self.rootfs.clone()) + } + + async fn get_device_id(&self) -> Result> { + Ok(None) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // TODO: Clean up NydusRootfs after the container is killed + warn!(sl!(), "Cleaning up NydusRootfs is still unimplemented."); + Ok(()) + } +} + +// Check prefetch files list path, and if invalid, discard it directly. +// As the result of caller `rafs_mount`, it returns `Option`. +async fn get_nydus_prefetch_files(nydus_prefetch_path: String) -> Option { + // nydus_prefetch_path is an annotation and pod with it will indicate + // that prefetch_files will be included. + if nydus_prefetch_path.is_empty() { + info!(sl!(), "nydus prefetch files path not set, just skip it."); + + return None; + } + + // Ensure the string ends with "/prefetch_files.list" + if !nydus_prefetch_path.ends_with(format!("/{}", NYDUS_PREFETCH_FILE_LIST).as_str()) { + info!( + sl!(), + "nydus prefetch file path no {:?} file exist.", NYDUS_PREFETCH_FILE_LIST + ); + + return None; + } + + // ensure the prefetch_list_path is a regular file. + let prefetch_list_path = Path::new(nydus_prefetch_path.as_str()); + if !prefetch_list_path.is_file() { + info!( + sl!(), + "nydus prefetch list file {:?} not a regular file", &prefetch_list_path + ); + + return None; + } + + return Some(prefetch_list_path.display().to_string()); +} + +#[cfg(test)] +mod tests { + use super::*; + use std::{fs::File, path::PathBuf}; + use tempfile::tempdir; + + #[tokio::test] + async fn test_get_nydus_prefetch_files() { + let temp_dir = tempdir().unwrap(); + let prefetch_list_path01 = temp_dir.path().join("nydus_prefetch_files"); + // /tmp_dir/nydus_prefetch_files/ + std::fs::create_dir_all(prefetch_list_path01.clone()).unwrap(); + // /tmp_dir/nydus_prefetch_files/prefetch_file.list + let prefetch_list_path02 = prefetch_list_path01 + .as_path() + .join(NYDUS_PREFETCH_FILE_LIST); + let file = File::create(prefetch_list_path02.clone()); + assert!(file.is_ok()); + + let prefetch_file = + get_nydus_prefetch_files(prefetch_list_path02.as_path().display().to_string()).await; + assert!(prefetch_file.is_some()); + assert_eq!(PathBuf::from(prefetch_file.unwrap()), prefetch_list_path02); + + drop(file); + temp_dir.close().unwrap_or_default(); + } +} diff --git a/src/runtime-rs/crates/resource/src/rootfs/share_fs_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/share_fs_rootfs.rs new file mode 100644 index 000000000000..385c058d3d20 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/rootfs/share_fs_rootfs.rs @@ -0,0 +1,96 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use agent::Storage; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use kata_sys_util::mount::{umount_timeout, Mounter}; +use kata_types::mount::Mount; +use tokio::sync::RwLock; + +use super::{Rootfs, ROOTFS}; +use crate::share_fs::{ShareFs, ShareFsRootfsConfig}; + +pub(crate) struct ShareFsRootfs { + guest_path: String, + share_fs: Arc, + config: ShareFsRootfsConfig, +} + +impl ShareFsRootfs { + pub async fn new( + share_fs: &Arc, + cid: &str, + bundle_path: &str, + rootfs: Option<&Mount>, + ) -> Result { + let bundle_rootfs = if let Some(rootfs) = rootfs { + let bundle_rootfs = format!("{}/{}", bundle_path, ROOTFS); + rootfs.mount(&bundle_rootfs).context(format!( + "mount rootfs from {:?} to {}", + &rootfs, &bundle_rootfs + ))?; + bundle_rootfs + } else { + bundle_path.to_string() + }; + + let share_fs_mount = share_fs.get_share_fs_mount(); + let config = ShareFsRootfsConfig { + cid: cid.to_string(), + source: bundle_rootfs.to_string(), + target: ROOTFS.to_string(), + readonly: false, + is_rafs: false, + }; + + let mount_result = share_fs_mount + .share_rootfs(&config) + .await + .context("share rootfs")?; + + Ok(ShareFsRootfs { + guest_path: mount_result.guest_path, + share_fs: Arc::clone(share_fs), + config, + }) + } +} + +#[async_trait] +impl Rootfs for ShareFsRootfs { + async fn get_guest_rootfs_path(&self) -> Result { + Ok(self.guest_path.clone()) + } + + async fn get_rootfs_mount(&self) -> Result> { + todo!() + } + + async fn get_storage(&self) -> Option { + None + } + + async fn get_device_id(&self) -> Result> { + Ok(None) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // Umount the mount point shared to guest + let share_fs_mount = self.share_fs.get_share_fs_mount(); + share_fs_mount + .umount_rootfs(&self.config) + .await + .context("umount shared rootfs")?; + + // Umount the bundle rootfs + umount_timeout(&self.config.source, 0).context("umount bundle rootfs")?; + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs new file mode 100644 index 000000000000..4d70a6c7b409 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod share_virtio_fs; +pub use share_virtio_fs::rafs_mount; +mod share_virtio_fs_inline; +use share_virtio_fs_inline::ShareVirtioFsInline; +mod share_virtio_fs_standalone; +use share_virtio_fs_standalone::ShareVirtioFsStandalone; +mod utils; +use tokio::sync::Mutex; +pub use utils::{ + do_get_guest_path, do_get_guest_share_path, do_get_host_path, get_host_rw_shared_path, +}; +mod virtio_fs_share_mount; +use virtio_fs_share_mount::VirtiofsShareMount; +pub use virtio_fs_share_mount::EPHEMERAL_PATH; +pub mod sandbox_bind_mounts; + +use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc}; + +use agent::Storage; +use anyhow::{anyhow, Context, Ok, Result}; +use async_trait::async_trait; +use hypervisor::Hypervisor; +use kata_types::config::hypervisor::SharedFsInfo; + +const VIRTIO_FS: &str = "virtio-fs"; +const _VIRTIO_FS_NYDUS: &str = "virtio-fs-nydus"; +const INLINE_VIRTIO_FS: &str = "inline-virtio-fs"; + +const KATA_HOST_SHARED_DIR: &str = "/run/kata-containers/shared/sandboxes/"; + +/// share fs (for example virtio-fs) mount path in the guest +const KATA_GUEST_SHARE_DIR: &str = "/run/kata-containers/shared/containers/"; + +pub(crate) const DEFAULT_KATA_GUEST_SANDBOX_DIR: &str = "/run/kata-containers/sandbox/"; + +pub const PASSTHROUGH_FS_DIR: &str = "passthrough"; +const RAFS_DIR: &str = "rafs"; + +#[async_trait] +pub trait ShareFs: Send + Sync { + fn get_share_fs_mount(&self) -> Arc; + async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; + async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; + async fn get_storages(&self) -> Result>; + fn mounted_info_set(&self) -> Arc>>; +} + +#[derive(Debug, Clone)] +pub struct ShareFsRootfsConfig { + // TODO: for nydus v5/v6 need to update ShareFsMount + pub cid: String, + pub source: String, + pub target: String, + pub readonly: bool, + pub is_rafs: bool, +} + +#[derive(Debug)] +pub struct ShareFsVolumeConfig { + pub cid: String, + pub source: String, + pub target: String, + pub readonly: bool, + pub mount_options: Vec, + pub mount: oci::Mount, + pub is_rafs: bool, +} + +pub struct ShareFsMountResult { + pub guest_path: String, + pub storages: Vec, +} + +/// Save mounted info for sandbox-level shared files. +#[derive(Clone, Debug)] +pub struct MountedInfo { + // Guest path + pub guest_path: PathBuf, + // Ref count of containers that uses this volume with read only permission + pub ro_ref_count: usize, + // Ref count of containers that uses this volume with read write permission + pub rw_ref_count: usize, +} + +impl MountedInfo { + pub fn new(guest_path: PathBuf, readonly: bool) -> Self { + Self { + guest_path, + ro_ref_count: readonly.into(), + rw_ref_count: (!readonly).into(), + } + } + + /// Check if the mount has read only permission + pub fn readonly(&self) -> bool { + self.rw_ref_count == 0 + } + + /// Ref count for all permissions + pub fn ref_count(&self) -> usize { + self.ro_ref_count + self.rw_ref_count + } + + // File/dir name in the form of "sandbox--" + pub fn file_name(&self) -> Result { + match self.guest_path.file_name() { + Some(file_name) => match file_name.to_str() { + Some(file_name) => Ok(file_name.to_owned()), + None => Err(anyhow!("failed to get string from {:?}", file_name)), + }, + None => Err(anyhow!( + "failed to get file name from the guest_path {:?}", + self.guest_path + )), + } + } +} + +#[async_trait] +pub trait ShareFsMount: Send + Sync { + async fn share_rootfs(&self, config: &ShareFsRootfsConfig) -> Result; + async fn share_volume(&self, config: &ShareFsVolumeConfig) -> Result; + /// Upgrade to readwrite permission + async fn upgrade_to_rw(&self, file_name: &str) -> Result<()>; + /// Downgrade to readonly permission + async fn downgrade_to_ro(&self, file_name: &str) -> Result<()>; + /// Umount the volume + async fn umount_volume(&self, file_name: &str) -> Result<()>; + /// Umount the rootfs + async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()>; + /// Clean up share fs mount + async fn cleanup(&self, sid: &str) -> Result<()>; +} + +pub fn new(id: &str, config: &SharedFsInfo) -> Result> { + let shared_fs = config.shared_fs.clone(); + let shared_fs = shared_fs.unwrap_or_default(); + match shared_fs.as_str() { + INLINE_VIRTIO_FS => Ok(Arc::new( + ShareVirtioFsInline::new(id, config).context("new inline virtio fs")?, + )), + VIRTIO_FS => Ok(Arc::new( + ShareVirtioFsStandalone::new(id, config).context("new standalone virtio fs")?, + )), + _ => Err(anyhow!("unsupported shred fs {:?}", &shared_fs)), + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs b/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs new file mode 100644 index 000000000000..8c166573d0ca --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs @@ -0,0 +1,157 @@ +// Copyright (c) 2023 Alibaba Cloud +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +// Note: +// sandbox_bind_mounts supports kinds of mount patterns, for example: +// (1) "/path/to", with default readonly mode. +// (2) "/path/to:ro", same as (1). +// (3) "/path/to:rw", with readwrite mode. +// +// sandbox_bind_mounts: ["/path/to", "/path/to:rw", "/mnt/to:ro"] +// + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, Context, Result}; + +use super::utils::{do_get_host_path, mkdir_with_permissions}; +use kata_sys_util::{fs::get_base_name, mount}; +use kata_types::mount::{SANDBOX_BIND_MOUNTS_DIR, SANDBOX_BIND_MOUNTS_RO, SANDBOX_BIND_MOUNTS_RW}; +use nix::mount::MsFlags; + +#[derive(Clone, Default, Debug)] +pub struct SandboxBindMounts { + sid: String, + host_mounts_path: PathBuf, + sandbox_bindmounts: Vec, +} + +impl SandboxBindMounts { + pub fn new(sid: String, sandbox_bindmounts: Vec) -> Result { + // /run/kata-containers/shared/sandboxes//rw/passthrough/sandbox-mounts + let bindmounts_path = + do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, sid.as_str(), "", true, false); + let host_mounts_path = PathBuf::from(bindmounts_path); + + Ok(SandboxBindMounts { + sid, + host_mounts_path, + sandbox_bindmounts, + }) + } + + fn parse_sandbox_bind_mounts<'a>(&self, bindmnt_src: &'a str) -> Result<(&'a str, &'a str)> { + // get the bindmount's r/w mode + let bindmount_mode = if bindmnt_src.ends_with(SANDBOX_BIND_MOUNTS_RW) { + SANDBOX_BIND_MOUNTS_RW + } else { + SANDBOX_BIND_MOUNTS_RO + }; + + // get the true bindmount from the string + let bindmount = bindmnt_src.trim_end_matches(bindmount_mode); + + Ok((bindmount_mode, bindmount)) + } + + pub fn setup_sandbox_bind_mounts(&self) -> Result<()> { + let mut mounted_list: Vec = Vec::new(); + let mut mounted_map: HashMap = HashMap::new(); + for src in &self.sandbox_bindmounts { + let (bindmount_mode, bindmount) = self + .parse_sandbox_bind_mounts(src) + .context("parse sandbox bind mounts failed")?; + + // get the basename of the canonicalized mount path mnt_name: dirX + let mnt_name = get_base_name(bindmount)? + .into_string() + .map_err(|e| anyhow!("failed to get base name {:?}", e))?; + + // if repeated mounted, do umount it and return error + if mounted_map.insert(mnt_name.clone(), true).is_some() { + for p in &mounted_list { + nix::mount::umount(p) + .context("mounted_map insert one repeated mounted, do umount it")?; + } + + return Err(anyhow!( + "sandbox-bindmounts: path {} is already specified.", + bindmount + )); + } + + // mount_dest: /run/kata-containers/shared/sandboxes//rw/passthrough/sandbox-mounts/dirX + let mount_dest = self.host_mounts_path.clone().join(mnt_name.as_str()); + mkdir_with_permissions(self.host_mounts_path.clone().to_path_buf(), 0o750).context( + format!( + "create host mounts path {:?}", + self.host_mounts_path.clone() + ), + )?; + + info!( + sl!(), + "sandbox-bindmounts mount_src: {:?} => mount_dest: {:?}", bindmount, &mount_dest + ); + + // mount -o bind,ro host_shared mount_dest + // host_shared: ${bindmount} + mount::bind_mount_unchecked(Path::new(bindmount), &mount_dest, true, MsFlags::MS_SLAVE) + .map_err(|e| { + for p in &mounted_list { + nix::mount::umount(p).unwrap_or_else(|x| { + format!("do umount failed: {:?}", x); + }); + } + e + })?; + + // default sandbox bind mounts mode is ro. + if bindmount_mode == SANDBOX_BIND_MOUNTS_RO { + info!(sl!(), "sandbox readonly bind mount."); + // dest_ro: /run/kata-containers/shared/sandboxes//ro/passthrough/sandbox-mounts + let mount_dest_ro = + do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, &self.sid, "", true, true); + let sandbox_bindmounts_ro = [mount_dest_ro, mnt_name.clone()].join("/"); + + mount::bind_remount(sandbox_bindmounts_ro, true) + .context("remount ro directory with ro permission")?; + } + + mounted_list.push(mount_dest); + } + + Ok(()) + } + + pub fn cleanup_sandbox_bind_mounts(&self) -> Result<()> { + for src in &self.sandbox_bindmounts { + let parsed_mnts = self + .parse_sandbox_bind_mounts(src) + .context("parse sandbox bind mounts")?; + + let mnt_name = get_base_name(parsed_mnts.1)? + .into_string() + .map_err(|e| anyhow!("failed to convert to string{:?}", e))?; + + // /run/kata-containers/shared/sandboxes//passthrough/rw/sandbox-mounts/dir + let mnt_dest = self.host_mounts_path.join(mnt_name.as_str()); + mount::umount_timeout(mnt_dest, 0).context("umount bindmount failed")?; + } + + if fs::metadata(self.host_mounts_path.clone())?.is_dir() { + fs::remove_dir_all(self.host_mounts_path.clone()).context(format!( + "remove sandbox bindmount point {:?}.", + self.host_mounts_path.clone() + ))?; + } + + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs new file mode 100644 index 000000000000..a1a1ba25c4bb --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs @@ -0,0 +1,123 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; + +use anyhow::{Context, Result}; +use hypervisor::{ + device::{ + driver::{ + ShareFsDevice, ShareFsMountConfig, ShareFsMountDevice, ShareFsMountType, + ShareFsOperation, + }, + DeviceType, + }, + Hypervisor, ShareFsDeviceConfig, +}; +use kata_sys_util::mount; +use nix::mount::MsFlags; + +use super::{utils, PASSTHROUGH_FS_DIR}; + +pub(crate) const MOUNT_GUEST_TAG: &str = "kataShared"; + +pub(crate) const FS_TYPE_VIRTIO_FS: &str = "virtiofs"; +pub(crate) const KATA_VIRTIO_FS_DEV_TYPE: &str = "virtio-fs"; + +const VIRTIO_FS_SOCKET: &str = "virtiofsd.sock"; + +pub(crate) fn generate_sock_path(root: &str) -> String { + let socket_path = Path::new(root).join(VIRTIO_FS_SOCKET); + socket_path.to_str().unwrap().to_string() +} + +pub(crate) async fn prepare_virtiofs( + h: &dyn Hypervisor, + fs_type: &str, + id: &str, + root: &str, +) -> Result<()> { + let host_ro_dest = utils::get_host_ro_shared_path(id); + utils::ensure_dir_exist(&host_ro_dest)?; + + let host_rw_dest = utils::get_host_rw_shared_path(id); + utils::ensure_dir_exist(&host_rw_dest)?; + + mount::bind_mount_unchecked(&host_rw_dest, &host_ro_dest, true, MsFlags::MS_SLAVE) + .context("bind mount shared_fs directory")?; + + let share_fs_device = ShareFsDevice { + config: ShareFsDeviceConfig { + sock_path: generate_sock_path(root), + mount_tag: String::from(MOUNT_GUEST_TAG), + host_path: String::from(host_ro_dest.to_str().unwrap()), + fs_type: fs_type.to_string(), + queue_size: 0, + queue_num: 0, + options: vec![], + }, + }; + h.add_device(DeviceType::ShareFs(share_fs_device)) + .await + .context("add device")?; + Ok(()) +} + +pub(crate) async fn setup_inline_virtiofs(id: &str, h: &dyn Hypervisor) -> Result<()> { + // - source is the absolute path of PASSTHROUGH_FS_DIR on host, e.g. + // /run/kata-containers/shared/sandboxes//passthrough + // - mount point is the path relative to KATA_GUEST_SHARE_DIR in guest + let mnt = format!("/{}", PASSTHROUGH_FS_DIR); + + let rw_source = utils::get_host_rw_shared_path(id).join(PASSTHROUGH_FS_DIR); + utils::ensure_dir_exist(&rw_source).context("ensure directory exist")?; + + let ro_source = utils::get_host_ro_shared_path(id).join(PASSTHROUGH_FS_DIR); + let source = String::from(ro_source.to_str().unwrap()); + + let virtio_fs = ShareFsMountDevice { + config: ShareFsMountConfig { + source: source.clone(), + fstype: ShareFsMountType::PASSTHROUGH, + mount_point: mnt, + config: None, + tag: String::from(MOUNT_GUEST_TAG), + op: ShareFsOperation::Mount, + prefetch_list_path: None, + }, + }; + h.add_device(DeviceType::ShareFsMount(virtio_fs)) + .await + .with_context(|| format!("fail to attach passthrough fs {:?}", source)) +} + +pub async fn rafs_mount( + h: &dyn Hypervisor, + rafs_meta: String, + rafs_mnt: String, + config_content: String, + prefetch_list_path: Option, +) -> Result<()> { + info!( + sl!(), + "Attaching rafs meta file {} to virtio-fs device, rafs mount point {}", rafs_meta, rafs_mnt + ); + let virtio_fs = ShareFsMountDevice { + config: ShareFsMountConfig { + source: rafs_meta.clone(), + fstype: ShareFsMountType::RAFS, + mount_point: rafs_mnt, + config: Some(config_content), + tag: String::from(MOUNT_GUEST_TAG), + op: ShareFsOperation::Mount, + prefetch_list_path, + }, + }; + h.add_device(DeviceType::ShareFsMount(virtio_fs)) + .await + .with_context(|| format!("fail to attach rafs {:?}", rafs_meta))?; + Ok(()) +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs new file mode 100644 index 000000000000..5dddefbfdd7c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs @@ -0,0 +1,89 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; + +use agent::Storage; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use hypervisor::Hypervisor; +use kata_types::config::hypervisor::SharedFsInfo; +use tokio::sync::Mutex; + +use super::{ + share_virtio_fs::{ + prepare_virtiofs, setup_inline_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, + MOUNT_GUEST_TAG, + }, + ShareFs, *, +}; + +lazy_static! { + pub(crate) static ref SHARED_DIR_VIRTIO_FS_OPTIONS: Vec:: = vec![String::from("nodev")]; +} + +#[derive(Debug, Clone)] +pub struct ShareVirtioFsInlineConfig { + pub id: String, +} + +pub struct ShareVirtioFsInline { + config: ShareVirtioFsInlineConfig, + share_fs_mount: Arc, + mounted_info_set: Arc>>, +} + +impl ShareVirtioFsInline { + pub(crate) fn new(id: &str, _config: &SharedFsInfo) -> Result { + Ok(Self { + config: ShareVirtioFsInlineConfig { id: id.to_string() }, + share_fs_mount: Arc::new(VirtiofsShareMount::new(id)), + mounted_info_set: Arc::new(Mutex::new(HashMap::new())), + }) + } +} + +#[async_trait] +impl ShareFs for ShareVirtioFsInline { + fn get_share_fs_mount(&self) -> Arc { + self.share_fs_mount.clone() + } + + async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { + prepare_virtiofs(h, INLINE_VIRTIO_FS, &self.config.id, "") + .await + .context("prepare virtiofs")?; + Ok(()) + } + + async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { + setup_inline_virtiofs(&self.config.id, h) + .await + .context("setup inline virtiofs")?; + Ok(()) + } + async fn get_storages(&self) -> Result> { + // setup storage + let mut storages: Vec = Vec::new(); + + let shared_volume: Storage = Storage { + driver: String::from(KATA_VIRTIO_FS_DEV_TYPE), + driver_options: Vec::new(), + source: String::from(MOUNT_GUEST_TAG), + fs_type: String::from(FS_TYPE_VIRTIO_FS), + fs_group: None, + options: SHARED_DIR_VIRTIO_FS_OPTIONS.clone(), + mount_point: String::from(KATA_GUEST_SHARE_DIR), + }; + + storages.push(shared_volume); + Ok(storages) + } + + fn mounted_info_set(&self) -> Arc>> { + self.mounted_info_set.clone() + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs new file mode 100644 index 000000000000..db421ada36ae --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs @@ -0,0 +1,207 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{collections::HashMap, process::Stdio, sync::Arc}; + +use crate::share_fs::share_virtio_fs::{ + prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG, +}; +use crate::share_fs::{KATA_GUEST_SHARE_DIR, VIRTIO_FS}; +use agent::Storage; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::Hypervisor; +use kata_types::config::hypervisor::SharedFsInfo; +use tokio::{ + io::{AsyncBufReadExt, BufReader}, + process::{Child, Command}, + sync::{ + mpsc::{channel, Receiver, Sender}, + Mutex, RwLock, + }, +}; + +use super::{ + share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path, + virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount, +}; + +#[derive(Debug, Clone)] +pub struct ShareVirtioFsStandaloneConfig { + id: String, + + // virtio_fs_daemon is the virtio-fs vhost-user daemon path + pub virtio_fs_daemon: String, + // virtio_fs_cache cache mode for fs version cache + pub virtio_fs_cache: String, + // virtio_fs_extra_args passes options to virtiofsd daemon + pub virtio_fs_extra_args: Vec, +} + +#[derive(Default, Debug)] +struct ShareVirtioFsStandaloneInner { + pid: Option, +} + +pub(crate) struct ShareVirtioFsStandalone { + inner: Arc>, + config: ShareVirtioFsStandaloneConfig, + share_fs_mount: Arc, + mounted_info_set: Arc>>, +} + +impl ShareVirtioFsStandalone { + pub(crate) fn new(id: &str, config: &SharedFsInfo) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new(ShareVirtioFsStandaloneInner::default())), + config: ShareVirtioFsStandaloneConfig { + id: id.to_string(), + virtio_fs_daemon: config.virtio_fs_daemon.clone(), + virtio_fs_cache: config.virtio_fs_cache.clone(), + virtio_fs_extra_args: config.virtio_fs_extra_args.clone(), + }, + share_fs_mount: Arc::new(VirtiofsShareMount::new(id)), + mounted_info_set: Arc::new(Mutex::new(HashMap::new())), + }) + } + + fn virtiofsd_args(&self, sock_path: &str) -> Result> { + let source_path = get_host_ro_shared_path(&self.config.id); + ensure_dir_exist(&source_path)?; + let shared_dir = source_path + .to_str() + .ok_or_else(|| anyhow!("convert source path {:?} to str failed", source_path))?; + + let mut args: Vec = vec![ + String::from("--socket-path"), + String::from(sock_path), + String::from("--shared-dir"), + String::from(shared_dir), + String::from("--cache"), + self.config.virtio_fs_cache.clone(), + String::from("--sandbox"), + String::from("none"), + String::from("--seccomp"), + String::from("none"), + ]; + + if !self.config.virtio_fs_extra_args.is_empty() { + let mut extra_args: Vec = self.config.virtio_fs_extra_args.clone(); + args.append(&mut extra_args); + } + + Ok(args) + } + + async fn setup_virtiofsd(&self, h: &dyn Hypervisor) -> Result<()> { + let sock_path = generate_sock_path(&h.get_jailer_root().await?); + let args = self.virtiofsd_args(&sock_path).context("virtiofsd args")?; + + let mut cmd = Command::new(&self.config.virtio_fs_daemon); + let child_cmd = cmd.args(&args).stderr(Stdio::piped()); + let child = child_cmd.spawn().context("spawn virtiofsd")?; + + // update virtiofsd pid{ + { + let mut inner = self.inner.write().await; + inner.pid = child.id(); + } + + let (tx, mut rx): (Sender>, Receiver>) = channel(100); + tokio::spawn(run_virtiofsd(child, tx)); + + // TODO: support timeout + match rx.recv().await.unwrap() { + Ok(_) => { + info!(sl!(), "start virtiofsd successfully"); + Ok(()) + } + Err(e) => { + error!(sl!(), "failed to start virtiofsd {}", e); + self.shutdown_virtiofsd() + .await + .context("shutdown_virtiofsd")?; + Err(anyhow!("failed to start virtiofsd")) + } + } + } + + async fn shutdown_virtiofsd(&self) -> Result<()> { + let mut inner = self.inner.write().await; + + if let Some(pid) = inner.pid.take() { + info!(sl!(), "shutdown virtiofsd pid {}", pid); + let pid = ::nix::unistd::Pid::from_raw(pid as i32); + if let Err(err) = ::nix::sys::signal::kill(pid, nix::sys::signal::SIGKILL) { + if err != ::nix::Error::ESRCH { + return Err(anyhow!("failed to kill virtiofsd pid {} {}", pid, err)); + } + } + } + inner.pid = None; + + Ok(()) + } +} + +async fn run_virtiofsd(mut child: Child, tx: Sender>) -> Result<()> { + let stderr = child.stderr.as_mut().unwrap(); + let stderr_reader = BufReader::new(stderr); + let mut lines = stderr_reader.lines(); + + while let Some(buffer) = lines.next_line().await.context("read next line")? { + let trim_buffer = buffer.trim_end(); + if !trim_buffer.is_empty() { + info!(sl!(), "source: virtiofsd {}", trim_buffer); + } + if buffer.contains("Waiting for vhost-user socket connection") { + tx.send(Ok(())).await.unwrap(); + } + } + + info!(sl!(), "wait virtiofsd {:?}", child.wait().await); + Ok(()) +} + +#[async_trait] +impl ShareFs for ShareVirtioFsStandalone { + fn get_share_fs_mount(&self) -> Arc { + self.share_fs_mount.clone() + } + + async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> { + prepare_virtiofs(h, VIRTIO_FS, &self.config.id, &h.get_jailer_root().await?) + .await + .context("prepare virtiofs")?; + self.setup_virtiofsd(h).await.context("setup virtiofsd")?; + Ok(()) + } + + async fn setup_device_after_start_vm(&self, _h: &dyn Hypervisor) -> Result<()> { + Ok(()) + } + + async fn get_storages(&self) -> Result> { + let mut storages: Vec = Vec::new(); + + let shared_volume: Storage = Storage { + driver: String::from(KATA_VIRTIO_FS_DEV_TYPE), + driver_options: Vec::new(), + source: String::from(MOUNT_GUEST_TAG), + fs_type: String::from(FS_TYPE_VIRTIO_FS), + fs_group: None, + options: vec![String::from("nodev")], + mount_point: String::from(KATA_GUEST_SHARE_DIR), + }; + + storages.push(shared_volume); + Ok(storages) + } + + fn mounted_info_set(&self) -> Arc>> { + self.mounted_info_set.clone() + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/utils.rs b/src/runtime-rs/crates/resource/src/share_fs/utils.rs new file mode 100644 index 000000000000..0b019e9c5608 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/utils.rs @@ -0,0 +1,136 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + os::unix::fs::PermissionsExt, + path::{Path, PathBuf}, +}; + +use anyhow::Result; +use kata_sys_util::mount; +use nix::mount::MsFlags; + +use super::*; + +pub(crate) fn mkdir_with_permissions(path_target: PathBuf, mode: u32) -> Result<()> { + let new_path = &path_target; + std::fs::create_dir_all(new_path) + .context(format!("unable to create new path: {:?}", new_path))?; + + // mode format: 0o750, ... + std::fs::set_permissions(new_path, std::fs::Permissions::from_mode(mode))?; + + Ok(()) +} + +pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> { + if !path.exists() { + std::fs::create_dir_all(path).context(format!("failed to create directory {:?}", path))?; + } + Ok(()) +} + +/// Bind mount the original path to the runtime directory. +pub(crate) fn share_to_guest( + // absolute path for source + source: &str, + // relative path for target + target: &str, + sid: &str, + cid: &str, + readonly: bool, + is_volume: bool, + is_rafs: bool, +) -> Result { + let host_dest = do_get_host_path(target, sid, cid, is_volume, false); + mount::bind_mount_unchecked(source, &host_dest, readonly, MsFlags::MS_SLAVE) + .with_context(|| format!("failed to bind mount {} to {}", source, &host_dest))?; + + // bind mount remount event is not propagated to mount subtrees, so we have + // to remount the read only dir mount point directly. + if readonly { + let dst = do_get_host_path(target, sid, cid, is_volume, true); + mount::bind_remount(dst, readonly).context("bind remount readonly")?; + } + + Ok(do_get_guest_path(target, cid, is_volume, is_rafs)) +} +// Shared path handling: +// 1. create two directories for each sandbox: +// -. /run/kata-containers/shared/sandboxes/$sbx_id/rw/, a host/guest shared directory which is rw +// -. /run/kata-containers/shared/sandboxes/$sbx_id/ro/, a host/guest shared directory (virtiofs source dir) which is ro +// +// 2. /run/kata-containers/shared/sandboxes/$sbx_id/rw/ is bind mounted readonly to /run/kata-containers/shared/sandboxes/$sbx_id/ro/, so guest cannot modify it +// +// 3. host-guest shared files/directories are mounted one-level under /run/kata-containers/shared/sandboxes/$sbx_id/rw/passthrough and thus present to guest at one level under run/kata-containers/shared/containers/passthrough. +pub(crate) fn get_host_ro_shared_path(id: &str) -> PathBuf { + Path::new(KATA_HOST_SHARED_DIR).join(id).join("ro") +} + +pub fn get_host_rw_shared_path(sid: &str) -> PathBuf { + Path::new(KATA_HOST_SHARED_DIR).join(sid).join("rw") +} + +pub fn get_host_shared_path(sid: &str) -> PathBuf { + Path::new(KATA_HOST_SHARED_DIR).join(sid) +} + +fn do_get_guest_any_path( + target: &str, + cid: &str, + is_volume: bool, + is_rafs: bool, + is_virtiofs: bool, +) -> String { + let dir = if is_rafs { + RAFS_DIR + } else { + PASSTHROUGH_FS_DIR + }; + let guest_share_dir = if is_virtiofs { + Path::new("/").to_path_buf() + } else { + Path::new(KATA_GUEST_SHARE_DIR).to_path_buf() + }; + + let path = if is_volume && !is_virtiofs { + guest_share_dir.join(dir).join(target) + } else { + guest_share_dir.join(dir).join(cid).join(target) + }; + path.to_str().unwrap().to_string() +} + +pub fn do_get_guest_path(target: &str, cid: &str, is_volume: bool, is_rafs: bool) -> String { + do_get_guest_any_path(target, cid, is_volume, is_rafs, false) +} + +pub fn do_get_guest_share_path(target: &str, cid: &str, is_rafs: bool) -> String { + do_get_guest_any_path(target, cid, false, is_rafs, true) +} + +pub fn do_get_host_path( + target: &str, + sid: &str, + cid: &str, + is_volume: bool, + read_only: bool, +) -> String { + let dir = PASSTHROUGH_FS_DIR; + + let get_host_path = if read_only { + get_host_ro_shared_path + } else { + get_host_rw_shared_path + }; + + let path = if is_volume { + get_host_path(sid).join(dir).join(target) + } else { + get_host_path(sid).join(dir).join(cid).join(target) + }; + path.to_str().unwrap().to_string() +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs new file mode 100644 index 000000000000..6f875d29ed1c --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs @@ -0,0 +1,242 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use agent::Storage; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use kata_sys_util::mount::{bind_remount, umount_all, umount_timeout}; +use kata_types::k8s::is_watchable_mount; +use kata_types::mount; +use nix::sys::stat::stat; +use std::fs; +use std::path::Path; + +const WATCHABLE_PATH_NAME: &str = "watchable"; +const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind"; +pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral"; + +use super::{ + get_host_rw_shared_path, + utils::{ + self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path, + mkdir_with_permissions, + }, + ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig, + KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR, +}; + +#[derive(Debug)] +pub struct VirtiofsShareMount { + id: String, +} + +impl VirtiofsShareMount { + pub fn new(id: &str) -> Self { + Self { id: id.to_string() } + } +} + +#[async_trait] +impl ShareFsMount for VirtiofsShareMount { + async fn share_rootfs(&self, config: &ShareFsRootfsConfig) -> Result { + // TODO: select virtiofs or support nydus + let guest_path = utils::share_to_guest( + &config.source, + &config.target, + &self.id, + &config.cid, + config.readonly, + false, + config.is_rafs, + ) + .context("share to guest")?; + Ok(ShareFsMountResult { + guest_path, + storages: vec![], + }) + } + + async fn share_volume(&self, config: &ShareFsVolumeConfig) -> Result { + let mut guest_path = utils::share_to_guest( + &config.source, + &config.target, + &self.id, + &config.cid, + config.readonly, + true, + config.is_rafs, + ) + .context("share to guest")?; + + // watchable mounts + if is_watchable_mount(&config.source) { + // Create path in shared directory for creating watchable mount: + let host_rw_path = utils::get_host_rw_shared_path(&self.id); + + // "/run/kata-containers/shared/sandboxes/$sid/rw/passthrough/watchable" + let watchable_host_path = Path::new(&host_rw_path) + .join(PASSTHROUGH_FS_DIR) + .join(WATCHABLE_PATH_NAME); + + mkdir_with_permissions(watchable_host_path.clone(), 0o750).context(format!( + "unable to create watchable path {:?}", + watchable_host_path + ))?; + + // path: /run/kata-containers/shared/containers/passthrough/watchable/config-map-name + let file_name = Path::new(&guest_path) + .file_name() + .context("get file name from guest path")?; + let watchable_guest_mount = Path::new(KATA_GUEST_SHARE_DIR) + .join(PASSTHROUGH_FS_DIR) + .join(WATCHABLE_PATH_NAME) + .join(file_name) + .into_os_string() + .into_string() + .map_err(|e| anyhow!("failed to get watchable guest mount path {:?}", e))?; + + let watchable_storage: Storage = Storage { + driver: String::from(WATCHABLE_BIND_DEV_TYPE), + driver_options: Vec::new(), + source: guest_path, + fs_type: String::from("bind"), + fs_group: None, + options: config.mount_options.clone(), + mount_point: watchable_guest_mount.clone(), + }; + + // Update the guest_path, in order to identify what will + // change in the OCI spec. + guest_path = watchable_guest_mount; + + let storages = vec![watchable_storage]; + + return Ok(ShareFsMountResult { + guest_path, + storages, + }); + } else if config.mount.r#type == mount::KATA_EPHEMERAL_VOLUME_TYPE { + // refer to the golang `handleEphemeralStorage` code at + // https://github.com/kata-containers/kata-containers/blob/9516286f6dd5cfd6b138810e5d7c9e01cf6fc043/src/runtime/virtcontainers/kata_agent.go#L1354 + + let source = &config.mount.source; + let file_stat = + stat(Path::new(source)).with_context(|| format!("mount source {}", source))?; + + // if volume's gid isn't root group(default group), this means there's + // an specific fsGroup is set on this local volume, then it should pass + // to guest. + let dir_options = if file_stat.st_gid != 0 { + vec![format!("fsgid={}", file_stat.st_gid)] + } else { + vec![] + }; + + let file_name = Path::new(source) + .file_name() + .context("get file name from mount.source")?; + let source = Path::new(EPHEMERAL_PATH) + .join(file_name) + .into_os_string() + .into_string() + .map_err(|e| anyhow!("failed to get ephemeral path {:?}", e))?; + + // Create a storage struct so that kata agent is able to create + // tmpfs backed volume inside the VM + let ephemeral_storage = agent::Storage { + driver: String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE), + driver_options: Vec::new(), + source: String::from("tmpfs"), + fs_type: String::from("tmpfs"), + fs_group: None, + options: dir_options, + mount_point: source.clone(), + }; + + guest_path = source; + let storages = vec![ephemeral_storage]; + + return Ok(ShareFsMountResult { + guest_path, + storages, + }); + } + + Ok(ShareFsMountResult { + guest_path, + storages: vec![], + }) + } + + async fn upgrade_to_rw(&self, file_name: &str) -> Result<()> { + // Remount readonly directory with readwrite permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, true); + bind_remount(host_dest, false) + .context("remount readonly directory with readwrite permission")?; + // Remount readwrite directory with readwrite permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, false); + bind_remount(host_dest, false) + .context("remount readwrite directory with readwrite permission")?; + Ok(()) + } + + async fn downgrade_to_ro(&self, file_name: &str) -> Result<()> { + // Remount readwrite directory with readonly permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, false); + bind_remount(host_dest, true) + .context("remount readwrite directory with readonly permission")?; + // Remount readonly directory with readonly permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, true); + bind_remount(host_dest, true) + .context("remount readonly directory with readonly permission")?; + Ok(()) + } + + async fn umount_volume(&self, file_name: &str) -> Result<()> { + let host_dest = do_get_host_path(file_name, &self.id, "", true, false); + umount_timeout(&host_dest, 0).context("umount volume")?; + // Umount event will be propagated to ro directory + + // Remove the directory of mointpoint + if let Ok(md) = fs::metadata(&host_dest) { + if md.is_file() { + fs::remove_file(&host_dest).context("remove the volume mount point as a file")?; + } + if md.is_dir() { + fs::remove_dir(&host_dest).context("remove the volume mount point as a dir")?; + } + } + Ok(()) + } + + async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()> { + let host_dest = do_get_host_path(&config.target, &self.id, &config.cid, false, false); + umount_timeout(&host_dest, 0).context("umount rootfs")?; + + // Remove the directory of mointpoint + if let Ok(md) = fs::metadata(&host_dest) { + if md.is_dir() { + fs::remove_dir(&host_dest).context("remove the rootfs mount point as a dir")?; + } + } + + Ok(()) + } + + async fn cleanup(&self, sid: &str) -> Result<()> { + // Unmount ro path + let host_ro_dest = get_host_ro_shared_path(sid); + umount_all(host_ro_dest.clone(), true).context("failed to umount ro path")?; + fs::remove_dir_all(host_ro_dest).context("failed to remove ro path")?; + // As the rootfs and volume have been umounted before calling this function, so just remove the rw dir directly + let host_rw_dest = get_host_rw_shared_path(sid); + fs::remove_dir_all(host_rw_dest).context("failed to remove rw path")?; + // remove the host share directory + let host_path = get_host_shared_path(sid); + fs::remove_dir_all(host_path).context("failed to remove host shared path")?; + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/volume/block_volume.rs b/src/runtime-rs/crates/resource/src/volume/block_volume.rs new file mode 100644 index 000000000000..d0e361b24326 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/block_volume.rs @@ -0,0 +1,207 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use nix::sys::{stat, stat::SFlag}; +use tokio::sync::RwLock; + +use super::Volume; +use crate::volume::utils::{ + generate_shared_path, get_direct_volume_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, + KATA_DIRECT_VOLUME_TYPE, KATA_MOUNT_BIND_TYPE, +}; +use hypervisor::{ + device::{ + device_manager::{do_handle_device, get_block_driver, DeviceManager}, + DeviceConfig, DeviceType, + }, + BlockConfig, +}; + +#[derive(Clone)] +pub(crate) struct BlockVolume { + storage: Option, + mount: oci::Mount, + device_id: String, +} + +/// BlockVolume for bind-mount block volume and direct block volume +impl BlockVolume { + pub(crate) async fn new( + d: &RwLock, + m: &oci::Mount, + read_only: bool, + cid: &str, + sid: &str, + ) -> Result { + let mnt_src: &str = &m.source; + // default block device fs type: ext4. + let mut blk_dev_fstype = DEFAULT_VOLUME_FS_TYPE.to_string(); + + let block_driver = get_block_driver(d).await; + + let block_device_config = match m.r#type.as_str() { + KATA_MOUNT_BIND_TYPE => { + let fstat = stat::stat(mnt_src).context(format!("stat {}", m.source))?; + + BlockConfig { + major: stat::major(fstat.st_rdev) as i64, + minor: stat::minor(fstat.st_rdev) as i64, + driver_option: block_driver, + ..Default::default() + } + } + KATA_DIRECT_VOLUME_TYPE => { + // get volume mountinfo from mountinfo.json + let v = volume_mount_info(mnt_src) + .context("deserde information from mountinfo.json")?; + // check volume type + if v.volume_type != KATA_DIRECT_VOLUME_TYPE { + return Err(anyhow!("volume type {:?} is invalid", v.volume_type)); + } + + let fstat = stat::stat(v.device.as_str()) + .with_context(|| format!("stat volume device file: {}", v.device.clone()))?; + if SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFREG + && SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFBLK + { + return Err(anyhow!( + "invalid volume device {:?} for volume type {:?}", + v.device, + v.volume_type + )); + } + + blk_dev_fstype = v.fs_type.clone(); + + BlockConfig { + path_on_host: v.device, + driver_option: block_driver, + ..Default::default() + } + } + _ => { + return Err(anyhow!( + "unsupport direct block volume r#type: {:?}", + m.r#type.as_str() + )) + } + }; + + // create and insert block device into Kata VM + let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone())) + .await + .context("do handle device failed.")?; + + // generate host guest shared path + let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid) + .await + .context("generate host-guest shared path failed")?; + + // storage + let mut storage = agent::Storage { + mount_point: guest_path.clone(), + ..Default::default() + }; + + storage.options = if read_only { + vec!["ro".to_string()] + } else { + Vec::new() + }; + + // As the true Block Device wrapped in DeviceType, we need to + // get it out from the wrapper, and the device_id will be for + // BlockVolume. + // safe here, device_info is correct and only unwrap it. + let mut device_id = String::new(); + if let DeviceType::Block(device) = device_info { + // blk, mmioblk + storage.driver = device.config.driver_option; + // /dev/vdX + storage.source = device.config.virt_path; + device_id = device.device_id; + } + + // In some case, dest is device /dev/xxx + if m.destination.clone().starts_with("/dev") { + storage.fs_type = "bind".to_string(); + storage.options.append(&mut m.options.clone()); + } else { + // usually, the dest is directory. + storage.fs_type = blk_dev_fstype; + } + + let mount = oci::Mount { + destination: m.destination.clone(), + r#type: storage.fs_type.clone(), + source: guest_path, + options: m.options.clone(), + }; + + Ok(Self { + storage: Some(storage), + mount, + device_id, + }) + } +} + +#[async_trait] +impl Volume for BlockVolume { + fn get_volume_mount(&self) -> Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + + Ok(s) + } + + async fn cleanup(&self, device_manager: &RwLock) -> Result<()> { + device_manager + .write() + .await + .try_remove_device(&self.device_id) + .await + } + + fn get_device_id(&self) -> Result> { + Ok(Some(self.device_id.clone())) + } +} + +pub(crate) fn is_block_volume(m: &oci::Mount) -> Result { + let vol_types = [KATA_MOUNT_BIND_TYPE, KATA_DIRECT_VOLUME_TYPE]; + if !vol_types.contains(&m.r#type.as_str()) { + return Ok(false); + } + + let source = if m.r#type.as_str() == KATA_DIRECT_VOLUME_TYPE { + get_direct_volume_path(&m.source).context("get direct volume path failed")? + } else { + m.source.clone() + }; + + let fstat = + stat::stat(source.as_str()).context(format!("stat mount source {} failed.", source))?; + let s_flag = SFlag::from_bits_truncate(fstat.st_mode); + + match m.r#type.as_str() { + // case: mount bind and block device + KATA_MOUNT_BIND_TYPE if s_flag == SFlag::S_IFBLK => Ok(true), + // case: directvol and directory + KATA_DIRECT_VOLUME_TYPE if s_flag == SFlag::S_IFDIR => Ok(true), + // else: unsupported or todo for other volume type. + _ => Ok(false), + } +} diff --git a/src/runtime-rs/crates/resource/src/volume/default_volume.rs b/src/runtime-rs/crates/resource/src/volume/default_volume.rs new file mode 100644 index 000000000000..827d2b121f17 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/default_volume.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use hypervisor::device::device_manager::DeviceManager; +use tokio::sync::RwLock; + +use anyhow::Result; +use async_trait::async_trait; + +use super::Volume; + +#[derive(Debug)] +pub(crate) struct DefaultVolume { + mount: oci::Mount, +} + +/// DefaultVolume: passthrough the mount to guest +impl DefaultVolume { + pub fn new(mount: &oci::Mount) -> Result { + Ok(Self { + mount: mount.clone(), + }) + } +} + +#[async_trait] +impl Volume for DefaultVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + Ok(vec![]) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // TODO: Clean up DefaultVolume + warn!(sl!(), "Cleaning up DefaultVolume is still unimplemented."); + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} diff --git a/src/runtime-rs/crates/resource/src/volume/hugepage.rs b/src/runtime-rs/crates/resource/src/volume/hugepage.rs new file mode 100644 index 000000000000..ca8502e7e29a --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/hugepage.rs @@ -0,0 +1,228 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + collections::HashMap, + fs::File, + io::{BufRead, BufReader}, +}; + +use crate::share_fs::EPHEMERAL_PATH; +use agent::Storage; +use anyhow::{anyhow, Context, Ok, Result}; +use async_trait::async_trait; +use byte_unit::Byte; +use hypervisor::{device::device_manager::DeviceManager, HUGETLBFS}; +use kata_sys_util::{fs::get_base_name, mount::PROC_MOUNTS_FILE}; +use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE; +use tokio::sync::RwLock; + +use super::{Volume, BIND}; + +type PageSize = Byte; +type Limit = u64; + +const NODEV: &str = "nodev"; + +// container hugepage +pub(crate) struct Hugepage { + // storage info + storage: Option, + // mount info + mount: oci::Mount, +} + +// handle hugepage +impl Hugepage { + pub(crate) fn new( + mount: &oci::Mount, + hugepage_limits_map: HashMap, + fs_options: Vec, + ) -> Result { + // Create mount option string + let page_size = get_page_size(fs_options).context("failed to get page size")?; + let option = hugepage_limits_map + .get(&page_size) + .map(|limit| format!("pagesize={},size={}", page_size.get_bytes(), limit)) + .context("failed to get hugepage option")?; + let base_name = get_base_name(mount.source.clone())? + .into_string() + .map_err(|e| anyhow!("failed to convert to string{:?}", e))?; + let mut mount = mount.clone(); + // Set the mount source path to a path that resides inside the VM + mount.source = format!("{}{}{}", EPHEMERAL_PATH, "/", base_name); + // Set the mount type to "bind" + mount.r#type = BIND.to_string(); + + // Create a storage struct so that kata agent is able to create + // hugetlbfs backed volume inside the VM + let storage = Storage { + driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(), + source: NODEV.to_string(), + fs_type: HUGETLBFS.to_string(), + mount_point: mount.source.clone(), + options: vec![option], + ..Default::default() + }; + Ok(Self { + storage: Some(storage), + mount, + }) + } +} + +#[async_trait] +impl Volume for Hugepage { + fn get_volume_mount(&self) -> Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + Ok(s) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn get_huge_page_option(m: &oci::Mount) -> Result>> { + if m.source.is_empty() { + return Err(anyhow!("empty mount source")); + } + let file = File::open(PROC_MOUNTS_FILE).context("failed open file")?; + let reader = BufReader::new(file); + for line in reader.lines().flatten() { + let items: Vec<&str> = line.split(' ').collect(); + if m.source == items[1] && items[2] == HUGETLBFS { + let fs_options: Vec<&str> = items[3].split(',').collect(); + return Ok(Some( + fs_options + .iter() + .map(|&s| s.to_string()) + .collect::>(), + )); + } + } + Ok(None) +} + +// TODO add hugepage limit to sandbox memory once memory hotplug is enabled +// https://github.com/kata-containers/kata-containers/issues/5880 +pub(crate) fn get_huge_page_limits_map(spec: &oci::Spec) -> Result> { + let mut hugepage_limits_map: HashMap = HashMap::new(); + if let Some(l) = &spec.linux { + if let Some(r) = &l.resources { + let hugepage_limits = r.hugepage_limits.clone(); + for hugepage_limit in hugepage_limits { + // the pagesize send from oci spec is MB or GB, change it to Mi and Gi + let page_size = hugepage_limit.page_size.replace('B', "i"); + let page_size = Byte::from_str(page_size) + .context("failed to create Byte object from String")?; + hugepage_limits_map.insert(page_size, hugepage_limit.limit); + } + return Ok(hugepage_limits_map); + } + return Ok(hugepage_limits_map); + } + Ok(hugepage_limits_map) +} + +fn get_page_size(fs_options: Vec) -> Result { + for fs_option in fs_options { + if fs_option.starts_with("pagesize=") { + let page_size = fs_option + .strip_prefix("pagesize=") + // the parameters passed are in unit M or G, append i to be Mi and Gi + .map(|s| format!("{}i", s)) + .context("failed to strip prefix pagesize")?; + return Byte::from_str(page_size) + .map_err(|_| anyhow!("failed to convert string to byte")); + } + } + Err(anyhow!("failed to get page size")) +} + +#[cfg(test)] +mod tests { + + use std::{collections::HashMap, fs}; + + use crate::volume::hugepage::{get_page_size, HUGETLBFS, NODEV}; + + use super::{get_huge_page_limits_map, get_huge_page_option}; + use byte_unit::Byte; + use nix::mount::{mount, umount, MsFlags}; + use oci::{Linux, LinuxHugepageLimit, LinuxResources}; + use test_utils::skip_if_not_root; + + #[test] + fn test_get_huge_page_option() { + let format_sizes = ["1GB", "2MB"]; + let mut huge_page_limits: Vec = vec![]; + for format_size in format_sizes { + huge_page_limits.push(LinuxHugepageLimit { + page_size: format_size.to_string(), + limit: 100000, + }); + } + + let spec = oci::Spec { + linux: Some(Linux { + resources: Some(LinuxResources { + hugepage_limits: huge_page_limits, + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + + assert!(get_huge_page_limits_map(&spec).is_ok()); + + let mut expect_res = HashMap::new(); + expect_res.insert(Byte::from_str("1Gi").ok().unwrap(), 100000); + expect_res.insert(Byte::from_str("2Mi").ok().unwrap(), 100000); + assert_eq!(get_huge_page_limits_map(&spec).unwrap(), expect_res); + } + + #[test] + fn test_get_huge_page_size() { + skip_if_not_root!(); + let format_sizes = ["1Gi", "2Mi"]; + for format_size in format_sizes { + let dir = tempfile::tempdir().unwrap(); + let dst = dir.path().join(format!("hugepages-{}", format_size)); + fs::create_dir_all(&dst).unwrap(); + mount( + Some(NODEV), + &dst, + Some(HUGETLBFS), + MsFlags::MS_NODEV, + Some(format!("pagesize={}", format_size).as_str()), + ) + .unwrap(); + let mount = oci::Mount { + source: dst.to_str().unwrap().to_string(), + ..Default::default() + }; + let option = get_huge_page_option(&mount).unwrap().unwrap(); + let page_size = get_page_size(option).unwrap(); + assert_eq!(page_size, Byte::from_str(format_size).unwrap()); + umount(&dst).unwrap(); + fs::remove_dir(&dst).unwrap(); + } + } +} diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs new file mode 100644 index 000000000000..230b7098bf42 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -0,0 +1,147 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod block_volume; +mod default_volume; +pub mod hugepage; +mod share_fs_volume; +mod shm_volume; +pub mod utils; + +pub mod vfio_volume; +use vfio_volume::is_vfio_volume; + +pub mod spdk_volume; +use spdk_volume::is_spdk_volume; + +use std::{sync::Arc, vec::Vec}; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use self::hugepage::{get_huge_page_limits_map, get_huge_page_option}; +use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume}; +use agent::Agent; +use hypervisor::device::device_manager::DeviceManager; + +const BIND: &str = "bind"; + +#[async_trait] +pub trait Volume: Send + Sync { + fn get_volume_mount(&self) -> Result>; + fn get_storage(&self) -> Result>; + fn get_device_id(&self) -> Result>; + async fn cleanup(&self, device_manager: &RwLock) -> Result<()>; +} + +#[derive(Default)] +pub struct VolumeResourceInner { + volumes: Vec>, +} + +#[derive(Default)] +pub struct VolumeResource { + inner: Arc>, +} + +impl VolumeResource { + pub fn new() -> Self { + Self::default() + } + + pub async fn handler_volumes( + &self, + share_fs: &Option>, + cid: &str, + spec: &oci::Spec, + d: &RwLock, + sid: &str, + agent: Arc, + ) -> Result>> { + let mut volumes: Vec> = vec![]; + let oci_mounts = &spec.mounts; + info!(sl!(), " oci mount is : {:?}", oci_mounts.clone()); + // handle mounts + for m in oci_mounts { + let read_only = m.options.iter().any(|opt| opt == "ro"); + let volume: Arc = if shm_volume::is_shim_volume(m) { + let shm_size = shm_volume::DEFAULT_SHM_SIZE; + Arc::new( + shm_volume::ShmVolume::new(m, shm_size) + .with_context(|| format!("new shm volume {:?}", m))?, + ) + } else if is_block_volume(m).context("block volume type")? { + // handle block volume + Arc::new( + block_volume::BlockVolume::new(d, m, read_only, cid, sid) + .await + .with_context(|| format!("new share fs volume {:?}", m))?, + ) + } else if is_vfio_volume(m) { + Arc::new( + vfio_volume::VfioVolume::new(d, m, read_only, cid, sid) + .await + .with_context(|| format!("new vfio volume {:?}", m))?, + ) + } else if is_spdk_volume(m) { + Arc::new( + spdk_volume::SPDKVolume::new(d, m, read_only, cid, sid) + .await + .with_context(|| format!("create spdk volume {:?}", m))?, + ) + } else if let Some(options) = + get_huge_page_option(m).context("failed to check huge page")? + { + // get hugepage limits from oci + let hugepage_limits = + get_huge_page_limits_map(spec).context("get huge page option")?; + // handle container hugepage + Arc::new( + hugepage::Hugepage::new(m, hugepage_limits, options) + .with_context(|| format!("handle hugepages {:?}", m))?, + ) + } else if share_fs_volume::is_share_fs_volume(m) { + Arc::new( + share_fs_volume::ShareFsVolume::new(share_fs, m, cid, read_only, agent.clone()) + .await + .with_context(|| format!("new share fs volume {:?}", m))?, + ) + } else if is_skip_volume(m) { + info!(sl!(), "skip volume {:?}", m); + continue; + } else { + Arc::new( + default_volume::DefaultVolume::new(m) + .with_context(|| format!("new default volume {:?}", m))?, + ) + }; + + volumes.push(volume.clone()); + let mut inner = self.inner.write().await; + inner.volumes.push(volume); + } + + Ok(volumes) + } + + pub async fn dump(&self) { + let inner = self.inner.read().await; + for v in &inner.volumes { + info!( + sl!(), + "volume mount {:?}: count {}", + v.get_volume_mount(), + Arc::strong_count(v) + ); + } + } +} + +fn is_skip_volume(_m: &oci::Mount) -> bool { + // TODO: support volume check + false +} diff --git a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs new file mode 100644 index 000000000000..098dc399f7a1 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs @@ -0,0 +1,373 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs::File, + io::Read, + os::unix::fs::MetadataExt, + path::{Path, PathBuf}, + str::FromStr, + sync::Arc, +}; + +use agent::Agent; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use tokio::sync::RwLock; + +use super::Volume; +use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig}; +use kata_types::mount; + +use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; +use crate::share_fs::PASSTHROUGH_FS_DIR; + +const SYS_MOUNT_PREFIX: [&str; 2] = ["/proc", "/sys"]; + +// copy file to container's rootfs if filesystem sharing is not supported, otherwise +// bind mount it in the shared directory. +// Ignore /dev, directories and all other device files. We handle +// only regular files in /dev. It does not make sense to pass the host +// device nodes to the guest. +// skip the volumes whose source had already set to guest share dir. +pub(crate) struct ShareFsVolume { + share_fs: Option>, + mounts: Vec, + storages: Vec, +} + +impl ShareFsVolume { + pub(crate) async fn new( + share_fs: &Option>, + m: &oci::Mount, + cid: &str, + readonly: bool, + agent: Arc, + ) -> Result { + // The file_name is in the format of "sandbox-{uuid}-{file_name}" + let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap(); + let file_name = generate_mount_path("sandbox", file_name); + + let mut volume = Self { + share_fs: share_fs.as_ref().map(Arc::clone), + mounts: vec![], + storages: vec![], + }; + match share_fs { + None => { + let src = match std::fs::canonicalize(&m.source) { + Err(err) => { + return Err(anyhow!(format!( + "failed to canonicalize file {} {:?}", + &m.source, err + ))) + } + Ok(src) => src, + }; + + // If the mount source is a file, we can copy it to the sandbox + if src.is_file() { + // This is where we set the value for the guest path + let dest = [ + DEFAULT_KATA_GUEST_SANDBOX_DIR, + PASSTHROUGH_FS_DIR, + file_name.clone().as_str(), + ] + .join("/"); + + debug!( + sl!(), + "copy local file {:?} to guest {:?}", + &m.source, + dest.clone() + ); + + // Read file metadata + let file_metadata = std::fs::metadata(src.clone()) + .with_context(|| format!("Failed to read metadata from file: {:?}", src))?; + + // Open file + let mut file = File::open(&src) + .with_context(|| format!("Failed to open file: {:?}", src))?; + + // Open read file contents to buffer + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer) + .with_context(|| format!("Failed to read file: {:?}", src))?; + + // Create gRPC request + let r = agent::CopyFileRequest { + path: dest.clone(), + file_size: file_metadata.len() as i64, + uid: file_metadata.uid() as i32, + gid: file_metadata.gid() as i32, + file_mode: file_metadata.mode(), + data: buffer, + ..Default::default() + }; + + debug!(sl!(), "copy_file: {:?} to sandbox {:?}", &src, dest.clone()); + + // Issue gRPC request to agent + agent.copy_file(r).await.with_context(|| { + format!( + "copy file request failed: src: {:?}, dest: {:?}", + file_name, dest + ) + })?; + + // append oci::Mount structure to volume mounts + volume.mounts.push(oci::Mount { + destination: m.destination.clone(), + r#type: "bind".to_string(), + source: dest.clone(), + options: m.options.clone(), + }) + } else { + // If not, we can ignore it. Let's issue a warning so that the user knows. + warn!( + sl!(), + "Ignoring non-regular file as FS sharing not supported. mount: {:?}", m + ); + } + } + Some(share_fs) => { + let share_fs_mount = share_fs.get_share_fs_mount(); + let mounted_info_set = share_fs.mounted_info_set(); + let mut mounted_info_set = mounted_info_set.lock().await; + if let Some(mut mounted_info) = mounted_info_set.get(&m.source).cloned() { + // Mounted at least once + let guest_path = mounted_info + .guest_path + .clone() + .as_os_str() + .to_str() + .unwrap() + .to_owned(); + if !readonly && mounted_info.readonly() { + // The current mount should be upgraded to readwrite permission + info!( + sl!(), + "The mount will be upgraded, mount = {:?}, cid = {}", m, cid + ); + share_fs_mount + .upgrade_to_rw( + &mounted_info + .file_name() + .context("get name of mounted info")?, + ) + .await + .context("upgrade mount")?; + } + if readonly { + mounted_info.ro_ref_count += 1; + } else { + mounted_info.rw_ref_count += 1; + } + mounted_info_set.insert(m.source.clone(), mounted_info); + + volume.mounts.push(oci::Mount { + destination: m.destination.clone(), + r#type: "bind".to_string(), + source: guest_path, + options: m.options.clone(), + }) + } else { + // Not mounted ever + let mount_result = share_fs_mount + .share_volume(&ShareFsVolumeConfig { + // The scope of shared volume is sandbox + cid: String::from(""), + source: m.source.clone(), + target: file_name.clone(), + readonly, + mount_options: m.options.clone(), + mount: m.clone(), + is_rafs: false, + }) + .await + .context("mount shared volume")?; + let mounted_info = MountedInfo::new( + PathBuf::from_str(&mount_result.guest_path) + .context("convert guest path")?, + readonly, + ); + mounted_info_set.insert(m.source.clone(), mounted_info); + // set storages for the volume + volume.storages = mount_result.storages; + + // set mount for the volume + volume.mounts.push(oci::Mount { + destination: m.destination.clone(), + r#type: "bind".to_string(), + source: mount_result.guest_path, + options: m.options.clone(), + }); + } + } + } + Ok(volume) + } +} + +#[async_trait] +impl Volume for ShareFsVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(self.mounts.clone()) + } + + fn get_storage(&self) -> Result> { + Ok(self.storages.clone()) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + let share_fs = match self.share_fs.as_ref() { + Some(fs) => fs, + None => return Ok(()), + }; + + let mounted_info_set = share_fs.mounted_info_set(); + let mut mounted_info_set = mounted_info_set.lock().await; + for m in self.mounts.iter() { + let (host_source, mut mounted_info) = match mounted_info_set + .iter() + .find(|entry| entry.1.guest_path.as_os_str().to_str().unwrap() == m.source) + .map(|entry| (entry.0.to_owned(), entry.1.clone())) + { + Some(entry) => entry, + None => { + warn!( + sl!(), + "The mounted info for guest path {} not found", m.source + ); + continue; + } + }; + + let old_readonly = mounted_info.readonly(); + + if m.options.iter().any(|opt| *opt == "ro") { + mounted_info.ro_ref_count -= 1; + } else { + mounted_info.rw_ref_count -= 1; + } + + debug!( + sl!(), + "Ref count for {} was updated to {} due to volume cleanup", + host_source, + mounted_info.ref_count() + ); + let share_fs_mount = share_fs.get_share_fs_mount(); + let file_name = mounted_info.file_name()?; + + if mounted_info.ref_count() > 0 { + // Downgrade to readonly if no container needs readwrite permission + if !old_readonly && mounted_info.readonly() { + info!(sl!(), "Downgrade {} to readonly due to no container that needs readwrite permission", host_source); + share_fs_mount + .downgrade_to_ro(&file_name) + .await + .context("Downgrade volume")?; + } + mounted_info_set.insert(host_source.clone(), mounted_info); + } else { + info!( + sl!(), + "The path will be umounted due to no references, host_source = {}", host_source + ); + mounted_info_set.remove(&host_source); + // Umount the volume + share_fs_mount + .umount_volume(&file_name) + .await + .context("Umount volume")? + } + } + + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn is_share_fs_volume(m: &oci::Mount) -> bool { + (m.r#type == "bind" || m.r#type == mount::KATA_EPHEMERAL_VOLUME_TYPE) + && !is_host_device(&m.destination) + && !is_system_mount(&m.source) +} + +fn is_host_device(dest: &str) -> bool { + if dest == "/dev" { + return true; + } + + if dest.starts_with("/dev/") { + let src = match std::fs::canonicalize(dest) { + Err(_) => return false, + Ok(src) => src, + }; + + if src.is_file() { + return false; + } + + return true; + } + + false +} + +// Skip mounting certain system paths("/sys/*", "/proc/*") +// from source on the host side into the container as it does not +// make sense to do so. +// Agent will support this kind of bind mount. +fn is_system_mount(src: &str) -> bool { + for p in SYS_MOUNT_PREFIX { + let sub_dir_p = format!("{}/", p); + if src == p || src.contains(sub_dir_p.as_str()) { + return true; + } + } + false +} + +// Note, don't generate random name, attaching rafs depends on the predictable name. +pub fn generate_mount_path(id: &str, file_name: &str) -> String { + let mut nid = String::from(id); + if nid.len() > 10 { + nid = nid.chars().take(10).collect(); + } + + let mut uid = uuid::Uuid::new_v4().to_string(); + let uid_vec: Vec<&str> = uid.splitn(2, '-').collect(); + uid = String::from(uid_vec[0]); + + format!("{}-{}-{}", nid, uid, file_name) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_is_system_mount() { + let sys_dir = "/sys"; + let proc_dir = "/proc"; + let sys_sub_dir = "/sys/fs/cgroup"; + let proc_sub_dir = "/proc/cgroups"; + let not_sys_dir = "/root"; + + assert!(is_system_mount(sys_dir)); + assert!(is_system_mount(proc_dir)); + assert!(is_system_mount(sys_sub_dir)); + assert!(is_system_mount(proc_sub_dir)); + assert!(!is_system_mount(not_sys_dir)); + } +} diff --git a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs new file mode 100644 index 000000000000..ea769b2dc196 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs @@ -0,0 +1,117 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::Path; + +use anyhow::Result; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use tokio::sync::RwLock; + +use super::Volume; +use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; + +pub const SHM_DIR: &str = "shm"; +// DEFAULT_SHM_SIZE is the default shm size to be used in case host +// IPC is used. +pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024; + +// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. +pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral"; + +#[derive(Debug)] +pub(crate) struct ShmVolume { + mount: oci::Mount, + storage: Option, +} + +impl ShmVolume { + pub(crate) fn new(m: &oci::Mount, shm_size: u64) -> Result { + let (storage, mount) = if shm_size > 0 { + // storage + let mount_path = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR); + let mount_path = mount_path.to_str().unwrap(); + let option = format!("size={}", shm_size); + + let options = vec![ + String::from("noexec"), + String::from("nosuid"), + String::from("nodev"), + String::from("mode=1777"), + option, + ]; + + let storage = agent::Storage { + driver: String::from(KATA_EPHEMERAL_DEV_TYPE), + driver_options: Vec::new(), + source: String::from("shm"), + fs_type: String::from("tmpfs"), + fs_group: None, + options, + mount_point: mount_path.to_string(), + }; + + // mount + let mount = oci::Mount { + r#type: "bind".to_string(), + destination: m.destination.clone(), + source: mount_path.to_string(), + options: vec!["rbind".to_string()], + }; + + (Some(storage), mount) + } else { + let mount = oci::Mount { + r#type: "tmpfs".to_string(), + destination: m.destination.clone(), + source: "shm".to_string(), + options: [ + "noexec", + "nosuid", + "nodev", + "mode=1777", + &format!("size={}", DEFAULT_SHM_SIZE), + ] + .iter() + .map(|s| s.to_string()) + .collect(), + }; + (None, mount) + }; + + Ok(Self { storage, mount }) + } +} + +#[async_trait] +impl Volume for ShmVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + Ok(s) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // TODO: Clean up ShmVolume + warn!(sl!(), "Cleaning up ShmVolume is still unimplemented."); + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn is_shim_volume(m: &oci::Mount) -> bool { + m.destination == "/dev/shm" && m.r#type != KATA_EPHEMERAL_DEV_TYPE +} diff --git a/src/runtime-rs/crates/resource/src/volume/spdk_volume.rs b/src/runtime-rs/crates/resource/src/volume/spdk_volume.rs new file mode 100644 index 000000000000..6789d6d3713b --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/spdk_volume.rs @@ -0,0 +1,192 @@ +// Copyright (c) 2023 Alibaba Cloud +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use nix::sys::{stat, stat::SFlag}; +use tokio::sync::RwLock; + +use super::Volume; +use crate::volume::utils::{ + generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_SPDK_VOLUME_TYPE, + KATA_SPOOL_VOLUME_TYPE, +}; +use hypervisor::{ + device::{ + device_manager::{do_handle_device, get_block_driver, DeviceManager}, + DeviceConfig, DeviceType, + }, + VhostUserConfig, VhostUserType, +}; + +/// SPDKVolume: spdk block device volume +#[derive(Clone)] +pub(crate) struct SPDKVolume { + storage: Option, + mount: oci::Mount, + device_id: String, +} + +impl SPDKVolume { + pub(crate) async fn new( + d: &RwLock, + m: &oci::Mount, + read_only: bool, + cid: &str, + sid: &str, + ) -> Result { + let mnt_src: &str = &m.source; + + // deserde Information from mountinfo.json + let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?; + let device = match v.volume_type.as_str() { + KATA_SPDK_VOLUME_TYPE => { + if v.device.starts_with("spdk://") { + v.device.clone() + } else { + format!("spdk://{}", v.device.as_str()) + } + } + KATA_SPOOL_VOLUME_TYPE => { + if v.device.starts_with("spool://") { + v.device.clone() + } else { + format!("spool://{}", v.device.as_str()) + } + } + _ => return Err(anyhow!("mountinfo.json is invalid")), + }; + + // device format: X:///x/y/z.sock,so just unwrap it. + // if file is not S_IFSOCK, return error. + { + // device tokens: (Type, Socket) + let device_tokens = device.split_once("://").unwrap(); + + let fstat = stat::stat(device_tokens.1).context("stat socket failed")?; + let s_flag = SFlag::from_bits_truncate(fstat.st_mode); + if s_flag != SFlag::S_IFSOCK { + return Err(anyhow!("device {:?} is not valid", device)); + } + } + + let block_driver = get_block_driver(d).await; + + let vhu_blk_config = &mut VhostUserConfig { + socket_path: device, + device_type: VhostUserType::Blk("vhost-user-blk-pci".to_owned()), + driver_option: block_driver, + ..Default::default() + }; + + if let Some(num) = v.metadata.get("num_queues") { + vhu_blk_config.num_queues = num + .parse::() + .context("num queues parse usize failed.")?; + } + if let Some(size) = v.metadata.get("queue_size") { + vhu_blk_config.queue_size = size + .parse::() + .context("num queues parse u32 failed.")?; + } + + // create and insert block device into Kata VM + let device_info = + do_handle_device(d, &DeviceConfig::VhostUserBlkCfg(vhu_blk_config.clone())) + .await + .context("do handle device failed.")?; + + // generate host guest shared path + let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid) + .await + .context("generate host-guest shared path failed")?; + + // storage + let mut storage = agent::Storage { + mount_point: guest_path.clone(), + ..Default::default() + }; + + storage.options = if read_only { + vec!["ro".to_string()] + } else { + Vec::new() + }; + + let mut device_id = String::new(); + if let DeviceType::VhostUserBlk(device) = device_info { + // blk, mmioblk + storage.driver = device.config.driver_option; + // /dev/vdX + storage.source = device.config.virt_path; + device_id = device.device_id; + } + + if m.r#type != "bind" { + storage.fs_type = v.fs_type.clone(); + } else { + storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string(); + } + + if m.destination.clone().starts_with("/dev") { + storage.fs_type = "bind".to_string(); + storage.options.append(&mut m.options.clone()); + } + + storage.fs_group = None; + let mount = oci::Mount { + destination: m.destination.clone(), + r#type: storage.fs_type.clone(), + source: guest_path, + options: m.options.clone(), + }; + + Ok(Self { + storage: Some(storage), + mount, + device_id, + }) + } +} + +#[async_trait] +impl Volume for SPDKVolume { + fn get_volume_mount(&self) -> Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + + Ok(s) + } + + async fn cleanup(&self, device_manager: &RwLock) -> Result<()> { + device_manager + .write() + .await + .try_remove_device(&self.device_id) + .await + } + + fn get_device_id(&self) -> Result> { + Ok(Some(self.device_id.clone())) + } +} + +pub(crate) fn is_spdk_volume(m: &oci::Mount) -> bool { + // spdkvol or spoolvol will share the same implementation + let vol_types = [KATA_SPDK_VOLUME_TYPE, KATA_SPOOL_VOLUME_TYPE]; + if vol_types.contains(&m.r#type.as_str()) { + return true; + } + + false +} diff --git a/src/runtime-rs/crates/resource/src/volume/utils.rs b/src/runtime-rs/crates/resource/src/volume/utils.rs new file mode 100644 index 000000000000..2121b02c2c43 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/utils.rs @@ -0,0 +1,76 @@ +// Copyright (c) 2022-2023 Alibaba Cloud +// Copyright (c) 2022-2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{fs, path::Path}; + +use anyhow::{anyhow, Context, Result}; + +use crate::{ + share_fs::{do_get_guest_path, do_get_host_path}, + volume::share_fs_volume::generate_mount_path, +}; +use kata_sys_util::eother; +use kata_types::mount::{ + get_volume_mount_info, join_path, DirectVolumeMountInfo, KATA_DIRECT_VOLUME_ROOT_PATH, +}; + +pub const DEFAULT_VOLUME_FS_TYPE: &str = "ext4"; +pub const KATA_MOUNT_BIND_TYPE: &str = "bind"; +pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol"; +pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol"; +pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol"; +pub const KATA_SPOOL_VOLUME_TYPE: &str = "spoolvol"; + +// volume mount info load infomation from mountinfo.json +pub fn volume_mount_info(volume_path: &str) -> Result { + get_volume_mount_info(volume_path) +} + +// get direct volume path whose volume_path encoded with base64 +pub fn get_direct_volume_path(volume_path: &str) -> Result { + let volume_full_path = + join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path).context("failed to join path.")?; + + Ok(volume_full_path.display().to_string()) +} + +pub fn get_file_name>(src: P) -> Result { + let file_name = src + .as_ref() + .file_name() + .map(|v| v.to_os_string()) + .ok_or_else(|| { + eother!( + "failed to get file name of path {}", + src.as_ref().to_string_lossy() + ) + })? + .into_string() + .map_err(|e| anyhow!("failed to convert to string {:?}", e))?; + + Ok(file_name) +} + +pub(crate) async fn generate_shared_path( + dest: String, + read_only: bool, + cid: &str, + sid: &str, +) -> Result { + let file_name = get_file_name(&dest).context("failed to get file name.")?; + let mount_name = generate_mount_path(cid, file_name.as_str()); + let guest_path = do_get_guest_path(&mount_name, cid, true, false); + let host_path = do_get_host_path(&mount_name, sid, cid, true, read_only); + + if dest.starts_with("/dev") { + fs::File::create(&host_path).context(format!("failed to create file {:?}", &host_path))?; + } else { + std::fs::create_dir_all(&host_path) + .map_err(|e| anyhow!("failed to create dir {}: {:?}", host_path, e))?; + } + + Ok(guest_path) +} diff --git a/src/runtime-rs/crates/resource/src/volume/vfio_volume.rs b/src/runtime-rs/crates/resource/src/volume/vfio_volume.rs new file mode 100644 index 000000000000..dab8c7dae2c9 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/vfio_volume.rs @@ -0,0 +1,141 @@ +// Copyright (c) 2023 Alibaba Cloud +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use tokio::sync::RwLock; + +use super::Volume; +use crate::volume::utils::{ + generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_VFIO_VOLUME_TYPE, +}; +use hypervisor::{ + device::{ + device_manager::{do_handle_device, DeviceManager}, + DeviceConfig, DeviceType, + }, + get_vfio_device, VfioConfig, +}; + +pub(crate) struct VfioVolume { + storage: Option, + mount: oci::Mount, + device_id: String, +} + +// VfioVolume: vfio device based block volume +impl VfioVolume { + pub(crate) async fn new( + d: &RwLock, + m: &oci::Mount, + read_only: bool, + cid: &str, + sid: &str, + ) -> Result { + let mnt_src: &str = &m.source; + + // deserde Information from mountinfo.json + let v = volume_mount_info(mnt_src).context("deserde information from mountinfo.json")?; + if v.volume_type != KATA_VFIO_VOLUME_TYPE { + return Err(anyhow!("volume type is invalid")); + } + + // support both /dev/vfio/X and BDF or BDF + let vfio_device = get_vfio_device(v.device).context("get vfio device failed.")?; + let vfio_dev_config = &mut VfioConfig { + host_path: vfio_device.clone(), + dev_type: "b".to_string(), + hostdev_prefix: "vfio_vol".to_owned(), + ..Default::default() + }; + + // create and insert block device into Kata VM + let device_info = do_handle_device(d, &DeviceConfig::VfioCfg(vfio_dev_config.clone())) + .await + .context("do handle device failed.")?; + + // generate host guest shared path + let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid) + .await + .context("generate host-guest shared path failed")?; + + let storage_options = if read_only { + vec!["ro".to_string()] + } else { + Vec::new() + }; + + let mut storage = agent::Storage { + options: storage_options, + mount_point: guest_path.clone(), + ..Default::default() + }; + + let mut device_id = String::new(); + if let DeviceType::Vfio(device) = device_info { + device_id = device.device_id; + storage.driver = device.driver_type; + // safe here, device_info is correct and only unwrap it. + storage.source = device.config.virt_path.unwrap().1; + } + + if m.r#type != "bind" { + storage.fs_type = v.fs_type.clone(); + } else { + storage.fs_type = DEFAULT_VOLUME_FS_TYPE.to_string(); + } + + let mount = oci::Mount { + destination: m.destination.clone(), + r#type: v.fs_type, + source: guest_path, + options: m.options.clone(), + }; + + Ok(Self { + storage: Some(storage), + mount, + device_id, + }) + } +} + +#[async_trait] +impl Volume for VfioVolume { + fn get_volume_mount(&self) -> Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + + Ok(s) + } + + async fn cleanup(&self, device_manager: &RwLock) -> Result<()> { + device_manager + .write() + .await + .try_remove_device(&self.device_id) + .await + } + + fn get_device_id(&self) -> Result> { + Ok(Some(self.device_id.clone())) + } +} + +pub(crate) fn is_vfio_volume(m: &oci::Mount) -> bool { + if m.r#type == KATA_VFIO_VOLUME_TYPE { + return true; + } + + false +} diff --git a/src/runtime-rs/crates/runtimes/Cargo.toml b/src/runtime-rs/crates/runtimes/Cargo.toml new file mode 100644 index 000000000000..4d5b4dcc97d6 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "runtimes" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +anyhow = "^1.0" +lazy_static = "1.4.0" +netns-rs = "0.1.0" +slog = "2.5.2" +slog-scope = "4.4.0" +tokio = { version = "1.28.1", features = ["rt-multi-thread"] } +tracing = "0.1.36" +tracing-opentelemetry = "0.18.0" +opentelemetry = { version = "0.18.0", features = ["rt-tokio-current-thread", "trace", "rt-tokio"] } +opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio", "hyper_collector_client", "collector_client"] } +tracing-subscriber = { version = "0.3", features = ["registry", "std"] } +hyper = { version = "0.14.20", features = ["stream", "server", "http1"] } +hyperlocal = "0.8" +serde_json = "1.0.88" +nix = "0.25.0" +url = "2.3.1" +procfs = "0.12.0" +prometheus = { version = "0.13.0", features = ["process"] } + +agent = { path = "../agent" } +common = { path = "./common" } +kata-types = { path = "../../../libs/kata-types" } +kata-sys-util = { path = "../../../libs/kata-sys-util" } +logging = { path = "../../../libs/logging"} +oci = { path = "../../../libs/oci" } +shim-interface = { path = "../../../libs/shim-interface" } +persist = { path = "../persist" } +hypervisor = { path = "../hypervisor" } +resource = { path = "../resource" } + +# runtime handler +linux_container = { path = "./linux_container", optional = true } +virt_container = { path = "./virt_container", optional = true } +wasm_container = { path = "./wasm_container", optional = true } + +[features] +default = ["virt"] +linux = ["linux_container"] +virt = ["virt_container"] +wasm = ["wasm_container"] diff --git a/src/runtime-rs/crates/runtimes/common/Cargo.toml b/src/runtime-rs/crates/runtimes/common/Cargo.toml new file mode 100644 index 000000000000..a60e1f5f1f8f --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "common" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +containerd-shim-protos = { version = "0.3.0", features = ["async"]} +lazy_static = "1.4.0" +nix = "0.24.2" +protobuf = "3.2.0" +serde_json = "1.0.39" +slog = "2.5.2" +slog-scope = "4.4.0" +strum = { version = "0.24.0", features = ["derive"] } +thiserror = "^1.0" +tokio = { version = "1.28.1", features = ["rt-multi-thread", "process", "fs"] } +ttrpc = { version = "0.7.1" } +persist = {path = "../../persist"} +agent = { path = "../../agent" } +kata-sys-util = { path = "../../../../libs/kata-sys-util" } +kata-types = { path = "../../../../libs/kata-types" } +oci = { path = "../../../../libs/oci" } + diff --git a/src/runtime-rs/crates/runtimes/common/src/container_manager.rs b/src/runtime-rs/crates/runtimes/common/src/container_manager.rs new file mode 100644 index 000000000000..040b557ee641 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/container_manager.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use async_trait::async_trait; + +use crate::types::{ + ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest, + ProcessExitStatus, ProcessStateInfo, ResizePTYRequest, ShutdownRequest, StatsInfo, + UpdateRequest, PID, +}; + +#[async_trait] +pub trait ContainerManager: Send + Sync { + // container lifecycle + async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result; + async fn pause_container(&self, container_id: &ContainerID) -> Result<()>; + async fn resume_container(&self, container_id: &ContainerID) -> Result<()>; + async fn stats_container(&self, container_id: &ContainerID) -> Result; + async fn update_container(&self, req: UpdateRequest) -> Result<()>; + async fn connect_container(&self, container_id: &ContainerID) -> Result; + + // process lifecycle + async fn close_process_io(&self, process_id: &ContainerProcess) -> Result<()>; + async fn delete_process(&self, process_id: &ContainerProcess) -> Result; + async fn exec_process(&self, req: ExecProcessRequest) -> Result<()>; + async fn kill_process(&self, req: &KillRequest) -> Result<()>; + async fn resize_process_pty(&self, req: &ResizePTYRequest) -> Result<()>; + async fn start_process(&self, process_id: &ContainerProcess) -> Result; + async fn state_process(&self, process_id: &ContainerProcess) -> Result; + async fn wait_process(&self, process_id: &ContainerProcess) -> Result; + + // utility + async fn pid(&self) -> Result; + async fn need_shutdown_sandbox(&self, req: &ShutdownRequest) -> bool; + async fn is_sandbox_container(&self, process_id: &ContainerProcess) -> bool; +} diff --git a/src/runtime-rs/crates/runtimes/common/src/error.rs b/src/runtime-rs/crates/runtimes/common/src/error.rs new file mode 100644 index 000000000000..2ec03c4c6cac --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/error.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::types::{ContainerProcess, Response}; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("failed to find container {0}")] + ContainerNotFound(String), + #[error("failed to find process {0}")] + ProcessNotFound(ContainerProcess), + #[error("unexpected response {0} to shim {1}")] + UnexpectedResponse(Response, String), +} diff --git a/src/runtime-rs/crates/runtimes/common/src/lib.rs b/src/runtime-rs/crates/runtimes/common/src/lib.rs new file mode 100644 index 000000000000..adb5ca0028a0 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/lib.rs @@ -0,0 +1,15 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod container_manager; +pub use container_manager::ContainerManager; +pub mod error; +pub mod message; +mod runtime_handler; +pub use runtime_handler::{RuntimeHandler, RuntimeInstance}; +mod sandbox; +pub use sandbox::{Sandbox, SandboxNetworkEnv}; +pub mod types; diff --git a/src/runtime-rs/crates/runtimes/common/src/message.rs b/src/runtime-rs/crates/runtimes/common/src/message.rs new file mode 100644 index 000000000000..622d5a1620c6 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/message.rs @@ -0,0 +1,69 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::sync::Arc; + +use anyhow::{Context, Result}; +use containerd_shim_protos::{events::task::TaskOOM, protobuf::Message as ProtobufMessage}; +use tokio::sync::mpsc::{channel, Receiver, Sender}; + +/// message receiver buffer size +const MESSAGE_RECEIVER_BUFFER_SIZE: usize = 1; + +#[derive(Debug)] +pub enum Action { + Start, + Stop, + Shutdown, + Event(Arc), +} + +#[derive(Debug)] +pub struct Message { + pub action: Action, + pub resp_sender: Option>>, +} + +impl Message { + pub fn new(action: Action) -> Self { + Message { + action, + resp_sender: None, + } + } + + pub fn new_with_receiver(action: Action) -> (Receiver>, Self) { + let (resp_sender, receiver) = channel(MESSAGE_RECEIVER_BUFFER_SIZE); + ( + receiver, + Message { + action, + resp_sender: Some(resp_sender), + }, + ) + } +} + +const TASK_OOM_EVENT_TOPIC: &str = "/tasks/oom"; + +pub trait Event: std::fmt::Debug + Send { + fn r#type(&self) -> String; + fn type_url(&self) -> String; + fn value(&self) -> Result>; +} + +impl Event for TaskOOM { + fn r#type(&self) -> String { + TASK_OOM_EVENT_TOPIC.to_string() + } + + fn type_url(&self) -> String { + "containerd.events.TaskOOM".to_string() + } + + fn value(&self) -> Result> { + self.write_to_bytes().context("get oom value") + } +} diff --git a/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs b/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs new file mode 100644 index 000000000000..80e4149c3b52 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/runtime_handler.rs @@ -0,0 +1,44 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use anyhow::Result; +use async_trait::async_trait; +use kata_types::config::TomlConfig; +use tokio::sync::mpsc::Sender; + +use crate::{message::Message, ContainerManager, Sandbox}; + +#[derive(Clone)] +pub struct RuntimeInstance { + pub sandbox: Arc, + pub container_manager: Arc, +} + +#[async_trait] +pub trait RuntimeHandler: Send + Sync { + fn init() -> Result<()> + where + Self: Sized; + + fn name() -> String + where + Self: Sized; + + fn new_handler() -> Arc + where + Self: Sized; + + async fn new_instance( + &self, + sid: &str, + msg_sender: Sender, + config: Arc, + ) -> Result; + + fn cleanup(&self, id: &str) -> Result<()>; +} diff --git a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs new file mode 100644 index 000000000000..f27c837ab29c --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use async_trait::async_trait; + +#[derive(Clone)] +pub struct SandboxNetworkEnv { + pub netns: Option, + pub network_created: bool, +} + +impl std::fmt::Debug for SandboxNetworkEnv { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SandboxNetworkEnv") + .field("netns", &self.netns) + .field("network_created", &self.network_created) + .finish() + } +} + +#[async_trait] +pub trait Sandbox: Send + Sync { + async fn start( + &self, + dns: Vec, + spec: &oci::Spec, + state: &oci::State, + network_env: SandboxNetworkEnv, + ) -> Result<()>; + async fn stop(&self) -> Result<()>; + async fn cleanup(&self) -> Result<()>; + async fn shutdown(&self) -> Result<()>; + + // utils + async fn set_iptables(&self, is_ipv6: bool, data: Vec) -> Result>; + async fn get_iptables(&self, is_ipv6: bool) -> Result>; + async fn direct_volume_stats(&self, volume_path: &str) -> Result; + async fn direct_volume_resize(&self, resize_req: agent::ResizeVolumeRequest) -> Result<()>; + async fn agent_sock(&self) -> Result; + + // metrics function + async fn agent_metrics(&self) -> Result; + async fn hypervisor_metrics(&self) -> Result; +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/mod.rs b/src/runtime-rs/crates/runtimes/common/src/types/mod.rs new file mode 100644 index 000000000000..0e6f80a4f653 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/types/mod.rs @@ -0,0 +1,236 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod trans_from_agent; +mod trans_from_shim; +mod trans_into_agent; +mod trans_into_shim; + +use std::fmt; + +use anyhow::{Context, Result}; +use kata_sys_util::validate; +use kata_types::mount::Mount; +use strum::Display; + +/// Request: request from shim +/// Request and Response messages need to be paired +#[derive(Debug, Clone, Display)] +pub enum Request { + CreateContainer(ContainerConfig), + CloseProcessIO(ContainerProcess), + DeleteProcess(ContainerProcess), + ExecProcess(ExecProcessRequest), + KillProcess(KillRequest), + WaitProcess(ContainerProcess), + StartProcess(ContainerProcess), + StateProcess(ContainerProcess), + ShutdownContainer(ShutdownRequest), + PauseContainer(ContainerID), + ResumeContainer(ContainerID), + ResizeProcessPTY(ResizePTYRequest), + StatsContainer(ContainerID), + UpdateContainer(UpdateRequest), + Pid, + ConnectContainer(ContainerID), +} + +/// Response: response to shim +/// Request and Response messages need to be paired +#[derive(Debug, Clone, Display)] +pub enum Response { + CreateContainer(PID), + CloseProcessIO, + DeleteProcess(ProcessStateInfo), + ExecProcess, + KillProcess, + WaitProcess(ProcessExitStatus), + StartProcess(PID), + StateProcess(ProcessStateInfo), + ShutdownContainer, + PauseContainer, + ResumeContainer, + ResizeProcessPTY, + StatsContainer(StatsInfo), + UpdateContainer, + Pid(PID), + ConnectContainer(PID), +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum ProcessType { + Container, + Exec, +} + +#[derive(Clone, Debug)] +pub struct ContainerID { + pub container_id: String, +} + +impl ToString for ContainerID { + fn to_string(&self) -> String { + self.container_id.clone() + } +} + +impl ContainerID { + pub fn new(container_id: &str) -> Result { + validate::verify_id(container_id).context("verify container id")?; + Ok(Self { + container_id: container_id.to_string(), + }) + } +} + +#[derive(Clone, Debug)] +pub struct ContainerProcess { + pub container_id: ContainerID, + pub exec_id: String, + pub process_type: ProcessType, +} + +impl fmt::Display for ContainerProcess { + fn fmt(&self, f: &mut std::fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", &self) + } +} + +impl ContainerProcess { + pub fn new(container_id: &str, exec_id: &str) -> Result { + let (exec_id, process_type) = if exec_id.is_empty() || container_id == exec_id { + ("".to_string(), ProcessType::Container) + } else { + validate::verify_id(exec_id).context("verify exec id")?; + (exec_id.to_string(), ProcessType::Exec) + }; + Ok(Self { + container_id: ContainerID::new(container_id)?, + exec_id, + process_type, + }) + } + + pub fn container_id(&self) -> &str { + &self.container_id.container_id + } + + pub fn exec_id(&self) -> &str { + &self.exec_id + } +} +#[derive(Debug, Clone)] +pub struct ContainerConfig { + pub container_id: String, + pub bundle: String, + pub rootfs_mounts: Vec, + pub terminal: bool, + pub options: Option>, + pub stdin: Option, + pub stdout: Option, + pub stderr: Option, +} + +#[derive(Debug, Clone)] +pub struct PID { + pub pid: u32, +} + +impl PID { + pub fn new(pid: u32) -> Self { + Self { pid } + } +} + +#[derive(Debug, Clone)] +pub struct KillRequest { + pub process: ContainerProcess, + pub signal: u32, + pub all: bool, +} + +#[derive(Debug, Clone)] +pub struct ShutdownRequest { + pub container_id: String, + pub is_now: bool, +} + +#[derive(Debug, Clone)] +pub struct ResizePTYRequest { + pub process: ContainerProcess, + pub width: u32, + pub height: u32, +} + +#[derive(Debug, Clone)] +pub struct ExecProcessRequest { + pub process: ContainerProcess, + pub terminal: bool, + pub stdin: Option, + pub stdout: Option, + pub stderr: Option, + pub spec_type_url: String, + pub spec_value: Vec, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum ProcessStatus { + Unknown = 0, + Created = 1, + Running = 2, + Stopped = 3, + Paused = 4, + Pausing = 5, +} + +#[derive(Debug, Clone)] +pub struct ProcessStateInfo { + pub container_id: String, + pub exec_id: String, + pub pid: PID, + pub bundle: String, + pub stdin: Option, + pub stdout: Option, + pub stderr: Option, + pub terminal: bool, + pub status: ProcessStatus, + pub exit_status: i32, + pub exited_at: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct ProcessExitStatus { + pub exit_code: i32, + pub exit_time: Option, +} + +impl ProcessExitStatus { + pub fn new() -> Self { + Self::default() + } + + pub fn update_exit_code(&mut self, exit_code: i32) { + self.exit_code = exit_code; + self.exit_time = Some(std::time::SystemTime::now()); + } +} + +#[derive(Debug, Clone)] +pub struct StatsInfoValue { + pub type_url: String, + pub value: Vec, +} + +#[derive(Debug, Clone)] +pub struct StatsInfo { + pub value: Option, +} + +#[derive(Debug, Clone)] +pub struct UpdateRequest { + pub container_id: String, + pub value: Vec, +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/trans_from_agent.rs b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_agent.rs new file mode 100644 index 000000000000..f28f50582b5a --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_agent.rs @@ -0,0 +1,212 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::From; + +use containerd_shim_protos::cgroups::metrics; +use protobuf::Message; + +use super::{StatsInfo, StatsInfoValue}; + +// TODO: trans from agent proto? +impl From> for StatsInfo { + fn from(c_stats: Option) -> Self { + let mut metric = metrics::Metrics::new(); + let stats = match c_stats { + None => { + return StatsInfo { value: None }; + } + Some(stats) => stats, + }; + + if let Some(cg_stats) = stats.cgroup_stats { + if let Some(cpu) = cg_stats.cpu_stats { + // set protobuf cpu stat + let mut p_cpu = metrics::CPUStat::new(); + if let Some(usage) = cpu.cpu_usage { + let mut p_usage = metrics::CPUUsage::new(); + p_usage.set_total(usage.total_usage); + p_usage.set_per_cpu(usage.percpu_usage); + p_usage.set_kernel(usage.usage_in_kernelmode); + p_usage.set_user(usage.usage_in_usermode); + + // set protobuf cpu usage + p_cpu.set_usage(p_usage); + } + + if let Some(throttle) = cpu.throttling_data { + let mut p_throttle = metrics::Throttle::new(); + p_throttle.set_periods(throttle.periods); + p_throttle.set_throttled_time(throttle.throttled_time); + p_throttle.set_throttled_periods(throttle.throttled_periods); + + // set protobuf cpu usage + p_cpu.set_throttling(p_throttle); + } + + metric.set_cpu(p_cpu); + } + + if let Some(m_stats) = cg_stats.memory_stats { + let mut p_m = metrics::MemoryStat::new(); + p_m.set_cache(m_stats.cache); + // memory usage + if let Some(m_data) = m_stats.usage { + let mut p_m_entry = metrics::MemoryEntry::new(); + p_m_entry.set_usage(m_data.usage); + p_m_entry.set_limit(m_data.limit); + p_m_entry.set_failcnt(m_data.failcnt); + p_m_entry.set_max(m_data.max_usage); + + p_m.set_usage(p_m_entry); + } + // memory swap_usage + if let Some(m_data) = m_stats.swap_usage { + let mut p_m_entry = metrics::MemoryEntry::new(); + p_m_entry.set_usage(m_data.usage); + p_m_entry.set_limit(m_data.limit); + p_m_entry.set_failcnt(m_data.failcnt); + p_m_entry.set_max(m_data.max_usage); + + p_m.set_swap(p_m_entry); + } + // memory kernel_usage + if let Some(m_data) = m_stats.kernel_usage { + let mut p_m_entry = metrics::MemoryEntry::new(); + p_m_entry.set_usage(m_data.usage); + p_m_entry.set_limit(m_data.limit); + p_m_entry.set_failcnt(m_data.failcnt); + p_m_entry.set_max(m_data.max_usage); + + p_m.set_kernel(p_m_entry); + } + + for (k, v) in m_stats.stats { + match k.as_str() { + "dirty" => p_m.set_dirty(v), + "rss" => p_m.set_rss(v), + "rss_huge" => p_m.set_rss_huge(v), + "mapped_file" => p_m.set_mapped_file(v), + "writeback" => p_m.set_writeback(v), + "pg_pg_in" => p_m.set_pg_pg_in(v), + "pg_pg_out" => p_m.set_pg_pg_out(v), + "pg_fault" => p_m.set_pg_fault(v), + "pg_maj_fault" => p_m.set_pg_maj_fault(v), + "inactive_file" => p_m.set_inactive_file(v), + "inactive_anon" => p_m.set_inactive_anon(v), + "active_file" => p_m.set_active_file(v), + "unevictable" => p_m.set_unevictable(v), + "hierarchical_memory_limit" => p_m.set_hierarchical_memory_limit(v), + "hierarchical_swap_limit" => p_m.set_hierarchical_swap_limit(v), + "total_cache" => p_m.set_total_cache(v), + "total_rss" => p_m.set_total_rss(v), + "total_mapped_file" => p_m.set_total_mapped_file(v), + "total_dirty" => p_m.set_total_dirty(v), + + "total_pg_pg_in" => p_m.set_total_pg_pg_in(v), + "total_pg_pg_out" => p_m.set_total_pg_pg_out(v), + "total_pg_fault" => p_m.set_total_pg_fault(v), + "total_pg_maj_fault" => p_m.set_total_pg_maj_fault(v), + "total_inactive_file" => p_m.set_total_inactive_file(v), + "total_inactive_anon" => p_m.set_total_inactive_anon(v), + "total_active_file" => p_m.set_total_active_file(v), + "total_unevictable" => p_m.set_total_unevictable(v), + _ => (), + } + } + metric.set_memory(p_m); + } + + if let Some(pid_stats) = cg_stats.pids_stats { + let mut p_pid = metrics::PidsStat::new(); + p_pid.set_limit(pid_stats.limit); + p_pid.set_current(pid_stats.current); + metric.set_pids(p_pid); + } + + if let Some(blk_stats) = cg_stats.blkio_stats { + let mut p_blk_stats = metrics::BlkIOStat::new(); + p_blk_stats + .set_io_serviced_recursive(copy_blkio_entry(&blk_stats.io_serviced_recursive)); + p_blk_stats.set_io_service_bytes_recursive(copy_blkio_entry( + &blk_stats.io_service_bytes_recursive, + )); + p_blk_stats + .set_io_queued_recursive(copy_blkio_entry(&blk_stats.io_queued_recursive)); + p_blk_stats.set_io_service_time_recursive(copy_blkio_entry( + &blk_stats.io_service_time_recursive, + )); + p_blk_stats.set_io_wait_time_recursive(copy_blkio_entry( + &blk_stats.io_wait_time_recursive, + )); + p_blk_stats + .set_io_merged_recursive(copy_blkio_entry(&blk_stats.io_merged_recursive)); + p_blk_stats.set_io_time_recursive(copy_blkio_entry(&blk_stats.io_time_recursive)); + p_blk_stats.set_sectors_recursive(copy_blkio_entry(&blk_stats.sectors_recursive)); + + metric.set_blkio(p_blk_stats); + } + + if !cg_stats.hugetlb_stats.is_empty() { + let mut p_huge = Vec::new(); + for (k, v) in cg_stats.hugetlb_stats { + let mut h = metrics::HugetlbStat::new(); + h.set_pagesize(k); + h.set_max(v.max_usage); + h.set_usage(v.usage); + h.set_failcnt(v.failcnt); + p_huge.push(h); + } + metric.set_hugetlb(p_huge); + } + } + + let net_stats = stats.network_stats; + if !net_stats.is_empty() { + let mut p_net = Vec::new(); + for v in net_stats.iter() { + let mut h = metrics::NetworkStat::new(); + h.set_name(v.name.clone()); + + h.set_tx_bytes(v.tx_bytes); + h.set_tx_packets(v.tx_packets); + h.set_tx_errors(v.tx_errors); + h.set_tx_dropped(v.tx_dropped); + + h.set_rx_bytes(v.rx_bytes); + h.set_rx_packets(v.rx_packets); + h.set_rx_errors(v.rx_errors); + h.set_rx_dropped(v.rx_dropped); + + p_net.push(h); + } + metric.set_network(p_net); + } + + StatsInfo { + value: Some(StatsInfoValue { + type_url: "io.containerd.cgroups.v1.Metrics".to_string(), + value: metric.write_to_bytes().unwrap(), + }), + } + } +} + +fn copy_blkio_entry(entry: &[agent::BlkioStatsEntry]) -> Vec { + let mut p_entry = Vec::new(); + + for e in entry.iter() { + let mut blk = metrics::BlkIOEntry::new(); + blk.set_op(e.op.clone()); + blk.set_value(e.value); + blk.set_major(e.major); + blk.set_minor(e.minor); + + p_entry.push(blk); + } + + p_entry +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs new file mode 100644 index 000000000000..29a4a676ced6 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_from_shim.rs @@ -0,0 +1,197 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use super::{ + ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest, Request, + ResizePTYRequest, ShutdownRequest, UpdateRequest, +}; +use anyhow::{Context, Result}; +use containerd_shim_protos::api; +use kata_types::mount::Mount; +use std::{ + convert::{From, TryFrom}, + path::PathBuf, +}; + +fn trans_from_shim_mount(from: &api::Mount) -> Mount { + let options = from.options.to_vec(); + let mut read_only = false; + for o in &options { + if o == "ro" { + read_only = true; + break; + } + } + + Mount { + source: from.source.clone(), + destination: PathBuf::from(&from.target), + fs_type: from.type_.clone(), + options, + device_id: None, + host_shared_fs_path: None, + read_only, + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::CreateTaskRequest) -> Result { + let options = if from.has_options() { + Some(from.options().value.to_vec()) + } else { + None + }; + Ok(Request::CreateContainer(ContainerConfig { + container_id: from.id.clone(), + bundle: from.bundle.clone(), + rootfs_mounts: from.rootfs.iter().map(trans_from_shim_mount).collect(), + terminal: from.terminal, + options, + stdin: (!from.stdin.is_empty()).then(|| from.stdin.clone()), + stdout: (!from.stdout.is_empty()).then(|| from.stdout.clone()), + stderr: (!from.stderr.is_empty()).then(|| from.stderr.clone()), + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::CloseIORequest) -> Result { + Ok(Request::CloseProcessIO( + ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + )) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::DeleteRequest) -> Result { + Ok(Request::DeleteProcess( + ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + )) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::ExecProcessRequest) -> Result { + let spec = from.spec(); + Ok(Request::ExecProcess(ExecProcessRequest { + process: ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + terminal: from.terminal, + stdin: (!from.stdin.is_empty()).then(|| from.stdin.clone()), + stdout: (!from.stdout.is_empty()).then(|| from.stdout.clone()), + stderr: (!from.stderr.is_empty()).then(|| from.stderr.clone()), + spec_type_url: spec.type_url.to_string(), + spec_value: spec.value.to_vec(), + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::KillRequest) -> Result { + Ok(Request::KillProcess(KillRequest { + process: ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + signal: from.signal, + all: from.all, + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::WaitRequest) -> Result { + Ok(Request::WaitProcess( + ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + )) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::StartRequest) -> Result { + Ok(Request::StartProcess( + ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + )) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::StateRequest) -> Result { + Ok(Request::StateProcess( + ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + )) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::ShutdownRequest) -> Result { + Ok(Request::ShutdownContainer(ShutdownRequest { + container_id: from.id.to_string(), + is_now: from.now, + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::ResizePtyRequest) -> Result { + Ok(Request::ResizeProcessPTY(ResizePTYRequest { + process: ContainerProcess::new(&from.id, &from.exec_id).context("new process id")?, + width: from.width, + height: from.height, + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::PauseRequest) -> Result { + Ok(Request::PauseContainer(ContainerID::new(&from.id)?)) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::ResumeRequest) -> Result { + Ok(Request::ResumeContainer(ContainerID::new(&from.id)?)) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::StatsRequest) -> Result { + Ok(Request::StatsContainer(ContainerID::new(&from.id)?)) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::UpdateTaskRequest) -> Result { + Ok(Request::UpdateContainer(UpdateRequest { + container_id: from.id.to_string(), + value: from.resources().value.to_vec(), + })) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(_from: api::PidsRequest) -> Result { + Ok(Request::Pid) + } +} + +impl TryFrom for Request { + type Error = anyhow::Error; + fn try_from(from: api::ConnectRequest) -> Result { + Ok(Request::ConnectContainer(ContainerID::new(&from.id)?)) + } +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/trans_into_agent.rs b/src/runtime-rs/crates/runtimes/common/src/types/trans_into_agent.rs new file mode 100644 index 000000000000..f032fd70bcf7 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_into_agent.rs @@ -0,0 +1,28 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::convert::From; + +use agent; + +use super::{ContainerID, ContainerProcess}; + +impl From for agent::ContainerID { + fn from(from: ContainerID) -> Self { + Self { + container_id: from.container_id, + } + } +} + +impl From for agent::ContainerProcessID { + fn from(from: ContainerProcess) -> Self { + Self { + container_id: from.container_id.into(), + exec_id: from.exec_id, + } + } +} diff --git a/src/runtime-rs/crates/runtimes/common/src/types/trans_into_shim.rs b/src/runtime-rs/crates/runtimes/common/src/types/trans_into_shim.rs new file mode 100644 index 000000000000..5f758f567684 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_into_shim.rs @@ -0,0 +1,241 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + any::type_name, + convert::{Into, TryFrom, TryInto}, + time, +}; + +use anyhow::{anyhow, Result}; +use containerd_shim_protos::api; + +use super::{ProcessExitStatus, ProcessStateInfo, ProcessStatus, Response}; +use crate::error::Error; + +fn system_time_into(time: time::SystemTime) -> ::protobuf::well_known_types::timestamp::Timestamp { + let mut proto_time = ::protobuf::well_known_types::timestamp::Timestamp::new(); + proto_time.seconds = time + .duration_since(time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs() + .try_into() + .unwrap_or_default(); + + proto_time +} + +fn option_system_time_into( + time: Option, +) -> protobuf::MessageField { + match time { + Some(v) => ::protobuf::MessageField::some(system_time_into(v)), + None => ::protobuf::MessageField::none(), + } +} + +impl From for api::WaitResponse { + fn from(from: ProcessExitStatus) -> Self { + Self { + exit_status: from.exit_code as u32, + exited_at: option_system_time_into(from.exit_time), + ..Default::default() + } + } +} + +impl From for api::Status { + fn from(from: ProcessStatus) -> Self { + match from { + ProcessStatus::Unknown => api::Status::UNKNOWN, + ProcessStatus::Created => api::Status::CREATED, + ProcessStatus::Running => api::Status::RUNNING, + ProcessStatus::Stopped => api::Status::STOPPED, + ProcessStatus::Paused => api::Status::PAUSED, + ProcessStatus::Pausing => api::Status::PAUSING, + } + } +} +impl From for api::StateResponse { + fn from(from: ProcessStateInfo) -> Self { + Self { + id: from.container_id.clone(), + bundle: from.bundle.clone(), + pid: from.pid.pid, + status: protobuf::EnumOrUnknown::new(from.status.into()), + stdin: from.stdin.unwrap_or_default(), + stdout: from.stdout.unwrap_or_default(), + stderr: from.stderr.unwrap_or_default(), + terminal: from.terminal, + exit_status: from.exit_status as u32, + exited_at: option_system_time_into(from.exited_at), + exec_id: from.exec_id, + ..Default::default() + } + } +} + +impl From for api::DeleteResponse { + fn from(from: ProcessStateInfo) -> Self { + Self { + pid: from.pid.pid, + exit_status: from.exit_status as u32, + exited_at: option_system_time_into(from.exited_at), + ..Default::default() + } + } +} + +impl TryFrom for api::CreateTaskResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::CreateContainer(resp) => Ok(Self { + pid: resp.pid, + ..Default::default() + }), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::DeleteResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::DeleteProcess(resp) => Ok(resp.into()), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::WaitResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::WaitProcess(resp) => Ok(resp.into()), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::StartResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::StartProcess(resp) => Ok(api::StartResponse { + pid: resp.pid, + ..Default::default() + }), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::StateResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::StateProcess(resp) => Ok(resp.into()), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::StatsResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + let mut any = ::protobuf::well_known_types::any::Any::new(); + let mut response = api::StatsResponse::new(); + match from { + Response::StatsContainer(resp) => { + if let Some(value) = resp.value { + any.type_url = value.type_url; + any.value = value.value; + response.set_stats(any); + } + Ok(response) + } + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::PidsResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::Pid(resp) => { + let mut processes: Vec = vec![]; + let mut p_info = api::ProcessInfo::new(); + let mut res = api::PidsResponse::new(); + p_info.set_pid(resp.pid); + processes.push(p_info); + res.set_processes(processes); + Ok(res) + } + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::ConnectResponse { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::ConnectContainer(resp) => { + let mut res = api::ConnectResponse::new(); + res.set_shim_pid(resp.pid); + Ok(res) + } + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} + +impl TryFrom for api::Empty { + type Error = anyhow::Error; + fn try_from(from: Response) -> Result { + match from { + Response::CloseProcessIO => Ok(api::Empty::new()), + Response::ExecProcess => Ok(api::Empty::new()), + Response::KillProcess => Ok(api::Empty::new()), + Response::ShutdownContainer => Ok(api::Empty::new()), + Response::PauseContainer => Ok(api::Empty::new()), + Response::ResumeContainer => Ok(api::Empty::new()), + Response::ResizeProcessPTY => Ok(api::Empty::new()), + Response::UpdateContainer => Ok(api::Empty::new()), + _ => Err(anyhow!(Error::UnexpectedResponse( + from, + type_name::().to_string() + ))), + } + } +} diff --git a/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml b/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml new file mode 100644 index 000000000000..de3c03ffd05e --- /dev/null +++ b/src/runtime-rs/crates/runtimes/linux_container/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "linux_container" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +tokio = { version = "1.28.1" } + +common = { path = "../common" } +kata-types = { path = "../../../../libs/kata-types" } diff --git a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs new file mode 100644 index 000000000000..406ccb0b602f --- /dev/null +++ b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs @@ -0,0 +1,42 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::sync::Arc; + +use anyhow::Result; +use async_trait::async_trait; +use common::{message::Message, RuntimeHandler, RuntimeInstance}; +use kata_types::config::TomlConfig; +use tokio::sync::mpsc::Sender; + +pub struct LinuxContainer {} + +#[async_trait] +impl RuntimeHandler for LinuxContainer { + fn init() -> Result<()> { + Ok(()) + } + + fn name() -> String { + "linux_container".to_string() + } + + fn new_handler() -> Arc { + Arc::new(LinuxContainer {}) + } + + async fn new_instance( + &self, + _sid: &str, + _msg_sender: Sender, + _config: Arc, + ) -> Result { + todo!() + } + + fn cleanup(&self, _id: &str) -> Result<()> { + todo!() + } +} diff --git a/src/runtime-rs/crates/runtimes/src/lib.rs b/src/runtime-rs/crates/runtimes/src/lib.rs new file mode 100644 index 000000000000..9a98ed5e8c1b --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/lib.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use(lazy_static)] +extern crate lazy_static; + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "runtimes"); + +pub mod manager; +pub use manager::RuntimeHandlerManager; +pub use shim_interface; +mod shim_metrics; +mod shim_mgmt; +pub mod tracer; diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs new file mode 100644 index 000000000000..a5af2a3fdff9 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -0,0 +1,514 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{path::PathBuf, str::from_utf8, sync::Arc}; + +use anyhow::{anyhow, Context, Result}; +use common::{ + message::Message, + types::{Request, Response}, + RuntimeHandler, RuntimeInstance, Sandbox, SandboxNetworkEnv, +}; +use hypervisor::Param; +use kata_sys_util::spec::load_oci_spec; +use kata_types::{ + annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig, +}; +#[cfg(feature = "linux")] +use linux_container::LinuxContainer; +use netns_rs::NetNs; +use persist::sandbox_persist::Persist; +use resource::{ + cpu_mem::initial_size::InitialSizeManager, + network::{dan_config_path, generate_netns_name}, +}; +use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER; +use tokio::fs; +use tokio::sync::{mpsc::Sender, Mutex, RwLock}; +use tracing::instrument; +#[cfg(feature = "virt")] +use virt_container::{ + sandbox::{SandboxRestoreArgs, VirtSandbox}, + sandbox_persist::SandboxState, + VirtContainer, +}; +#[cfg(feature = "wasm")] +use wasm_container::WasmContainer; + +use crate::{ + shim_mgmt::server::MgmtServer, + tracer::{KataTracer, ROOTSPAN}, +}; + +struct RuntimeHandlerManagerInner { + id: String, + msg_sender: Sender, + kata_tracer: Arc>, + runtime_instance: Option>, +} + +impl std::fmt::Debug for RuntimeHandlerManagerInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RuntimeHandlerManagerInner") + .field("id", &self.id) + .field("msg_sender", &self.msg_sender) + .finish() + } +} + +impl RuntimeHandlerManagerInner { + fn new(id: &str, msg_sender: Sender) -> Result { + let tracer = KataTracer::new(); + Ok(Self { + id: id.to_string(), + msg_sender, + kata_tracer: Arc::new(Mutex::new(tracer)), + runtime_instance: None, + }) + } + + #[instrument] + async fn init_runtime_handler( + &mut self, + spec: &oci::Spec, + state: &oci::State, + network_env: SandboxNetworkEnv, + dns: Vec, + config: Arc, + ) -> Result<()> { + info!(sl!(), "new runtime handler {}", &config.runtime.name); + let runtime_handler = match config.runtime.name.as_str() { + #[cfg(feature = "linux")] + name if name == LinuxContainer::name() => LinuxContainer::new_handler(), + #[cfg(feature = "wasm")] + name if name == WasmContainer::name() => WasmContainer::new_handler(), + #[cfg(feature = "virt")] + name if name == VirtContainer::name() || name.is_empty() => { + VirtContainer::new_handler() + } + _ => return Err(anyhow!("Unsupported runtime: {}", &config.runtime.name)), + }; + let runtime_instance = runtime_handler + .new_instance(&self.id, self.msg_sender.clone(), config.clone()) + .await + .context("new runtime instance")?; + + // initilize the trace subscriber + if config.runtime.enable_tracing { + let mut tracer = self.kata_tracer.lock().await; + if let Err(e) = tracer.trace_setup( + &self.id, + &config.runtime.jaeger_endpoint, + &config.runtime.jaeger_user, + &config.runtime.jaeger_password, + ) { + warn!(sl!(), "failed to setup tracing, {:?}", e); + } + } + + // start sandbox + runtime_instance + .sandbox + .start(dns, spec, state, network_env) + .await + .context("start sandbox")?; + self.runtime_instance = Some(Arc::new(runtime_instance)); + Ok(()) + } + + #[instrument] + async fn try_init( + &mut self, + spec: &oci::Spec, + state: &oci::State, + options: &Option>, + ) -> Result<()> { + // return if runtime instance has init + if self.runtime_instance.is_some() { + return Ok(()); + } + + let mut dns: Vec = vec![]; + + #[cfg(feature = "linux")] + LinuxContainer::init().context("init linux container")?; + #[cfg(feature = "wasm")] + WasmContainer::init().context("init wasm container")?; + #[cfg(feature = "virt")] + VirtContainer::init().context("init virt container")?; + + for m in &spec.mounts { + if m.destination == DEFAULT_GUEST_DNS_FILE { + let contents = fs::read_to_string(&m.source).await?; + dns = contents.split('\n').map(|e| e.to_string()).collect(); + } + } + + let config = load_config(spec, options).context("load config")?; + + let dan_path = dan_config_path(&config, &self.id); + let mut network_created = false; + // set netns to None if we want no network for the VM + let netns = if config.runtime.disable_new_netns { + None + } else if dan_path.exists() { + info!(sl!(), "Do not create a netns due to DAN"); + None + } else { + let mut netns_path = None; + if let Some(linux) = &spec.linux { + for ns in &linux.namespaces { + if ns.r#type.as_str() != oci::NETWORKNAMESPACE { + continue; + } + // get netns path from oci spec + if !ns.path.is_empty() { + netns_path = Some(ns.path.clone()); + } + // if we get empty netns from oci spec, we need to create netns for the VM + else { + let ns_name = generate_netns_name(); + let netns = NetNs::new(ns_name)?; + let path = PathBuf::from(netns.path()).to_str().map(|s| s.to_string()); + info!(sl!(), "the netns path is {:?}", path); + netns_path = path; + network_created = true; + } + break; + } + } + netns_path + }; + + let network_env = SandboxNetworkEnv { + netns, + network_created, + }; + + self.init_runtime_handler(spec, state, network_env, dns, Arc::new(config)) + .await + .context("init runtime handler")?; + + // the sandbox creation can reach here only once and the sandbox is created + // so we can safely create the shim management socket right now + // the unwrap here is safe because the runtime handler is correctly created + let shim_mgmt_svr = MgmtServer::new( + &self.id, + self.runtime_instance.as_ref().unwrap().sandbox.clone(), + ) + .context(ERR_NO_SHIM_SERVER)?; + + tokio::task::spawn(Arc::new(shim_mgmt_svr).run()); + info!(sl!(), "shim management http server starts"); + + Ok(()) + } + + fn get_runtime_instance(&self) -> Option> { + self.runtime_instance.clone() + } + + fn get_kata_tracer(&self) -> Arc> { + self.kata_tracer.clone() + } +} + +pub struct RuntimeHandlerManager { + inner: Arc>, +} + +// todo: a more detailed impl for fmt::Debug +impl std::fmt::Debug for RuntimeHandlerManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RuntimeHandlerManager").finish() + } +} + +impl RuntimeHandlerManager { + pub fn new(id: &str, msg_sender: Sender) -> Result { + Ok(Self { + inner: Arc::new(RwLock::new(RuntimeHandlerManagerInner::new( + id, msg_sender, + )?)), + }) + } + + pub async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + let sender = inner.msg_sender.clone(); + let sandbox_state = persist::from_disk::(&inner.id) + .context("failed to load the sandbox state")?; + + let config = if let Ok(spec) = load_oci_spec() { + load_config(&spec, &None).context("load config")? + } else { + TomlConfig::default() + }; + + let sandbox_args = SandboxRestoreArgs { + sid: inner.id.clone(), + toml_config: config, + sender, + }; + match sandbox_state.sandbox_type.clone() { + #[cfg(feature = "linux")] + name if name == LinuxContainer::name() => { + // TODO :support linux container (https://github.com/kata-containers/kata-containers/issues/4905) + return Ok(()); + } + #[cfg(feature = "wasm")] + name if name == WasmContainer::name() => { + // TODO :support wasm container (https://github.com/kata-containers/kata-containers/issues/4906) + return Ok(()); + } + #[cfg(feature = "virt")] + name if name == VirtContainer::name() => { + if sandbox_args.toml_config.runtime.keep_abnormal { + info!(sl!(), "skip cleanup for keep_abnormal"); + return Ok(()); + } + let sandbox = VirtSandbox::restore(sandbox_args, sandbox_state) + .await + .context("failed to restore the sandbox")?; + sandbox + .cleanup() + .await + .context("failed to cleanup the resource")?; + } + _ => { + return Ok(()); + } + } + + Ok(()) + } + + async fn get_runtime_instance(&self) -> Result> { + let inner = self.inner.read().await; + inner + .get_runtime_instance() + .ok_or_else(|| anyhow!("runtime not ready")) + } + + async fn get_kata_tracer(&self) -> Result>> { + let inner = self.inner.read().await; + Ok(inner.get_kata_tracer()) + } + + #[instrument] + async fn try_init_runtime_instance( + &self, + spec: &oci::Spec, + state: &oci::State, + options: &Option>, + ) -> Result<()> { + let mut inner = self.inner.write().await; + inner.try_init(spec, state, options).await + } + + #[instrument(parent = &*(ROOTSPAN))] + pub async fn handler_message(&self, req: Request) -> Result { + if let Request::CreateContainer(container_config) = req { + // get oci spec + let bundler_path = format!( + "{}/{}", + container_config.bundle, + oci::OCI_SPEC_CONFIG_FILE_NAME + ); + let spec = oci::Spec::load(&bundler_path).context("load spec")?; + let state = oci::State { + version: spec.version.clone(), + id: container_config.container_id.to_string(), + status: oci::ContainerState::Creating, + pid: 0, + bundle: container_config.bundle.clone(), + annotations: spec.annotations.clone(), + }; + + self.try_init_runtime_instance(&spec, &state, &container_config.options) + .await + .context("try init runtime instance")?; + let instance = self + .get_runtime_instance() + .await + .context("get runtime instance")?; + + let shim_pid = instance + .container_manager + .create_container(container_config, spec) + .await + .context("create container")?; + + Ok(Response::CreateContainer(shim_pid)) + } else { + self.handler_request(req).await.context("handler request") + } + } + + #[instrument(parent = &(*ROOTSPAN))] + pub async fn handler_request(&self, req: Request) -> Result { + let instance = self + .get_runtime_instance() + .await + .context("get runtime instance")?; + let sandbox = instance.sandbox.clone(); + let cm = instance.container_manager.clone(); + + match req { + Request::CreateContainer(req) => Err(anyhow!("Unreachable request {:?}", req)), + Request::CloseProcessIO(process_id) => { + cm.close_process_io(&process_id).await.context("close io")?; + Ok(Response::CloseProcessIO) + } + Request::DeleteProcess(process_id) => { + let resp = cm.delete_process(&process_id).await.context("do delete")?; + Ok(Response::DeleteProcess(resp)) + } + Request::ExecProcess(req) => { + cm.exec_process(req).await.context("exec")?; + Ok(Response::ExecProcess) + } + Request::KillProcess(req) => { + cm.kill_process(&req).await.context("kill process")?; + Ok(Response::KillProcess) + } + Request::ShutdownContainer(req) => { + if cm.need_shutdown_sandbox(&req).await { + sandbox.shutdown().await.context("do shutdown")?; + + // stop the tracer collector + let kata_tracer = self.get_kata_tracer().await.context("get kata tracer")?; + let tracer = kata_tracer.lock().await; + tracer.trace_end(); + } + Ok(Response::ShutdownContainer) + } + Request::WaitProcess(process_id) => { + let exit_status = cm.wait_process(&process_id).await.context("wait process")?; + if cm.is_sandbox_container(&process_id).await { + sandbox.stop().await.context("stop sandbox")?; + } + Ok(Response::WaitProcess(exit_status)) + } + Request::StartProcess(process_id) => { + let shim_pid = cm + .start_process(&process_id) + .await + .context("start process")?; + Ok(Response::StartProcess(shim_pid)) + } + + Request::StateProcess(process_id) => { + let state = cm + .state_process(&process_id) + .await + .context("state process")?; + Ok(Response::StateProcess(state)) + } + Request::PauseContainer(container_id) => { + cm.pause_container(&container_id) + .await + .context("pause container")?; + Ok(Response::PauseContainer) + } + Request::ResumeContainer(container_id) => { + cm.resume_container(&container_id) + .await + .context("resume container")?; + Ok(Response::ResumeContainer) + } + Request::ResizeProcessPTY(req) => { + cm.resize_process_pty(&req).await.context("resize pty")?; + Ok(Response::ResizeProcessPTY) + } + Request::StatsContainer(container_id) => { + let stats = cm + .stats_container(&container_id) + .await + .context("stats container")?; + Ok(Response::StatsContainer(stats)) + } + Request::UpdateContainer(req) => { + cm.update_container(req).await.context("update container")?; + Ok(Response::UpdateContainer) + } + Request::Pid => Ok(Response::Pid(cm.pid().await.context("pid")?)), + Request::ConnectContainer(container_id) => Ok(Response::ConnectContainer( + cm.connect_container(&container_id) + .await + .context("connect")?, + )), + } + } +} + +/// Config override ordering(high to low): +/// 1. podsandbox annotation +/// 2. environment variable +/// 3. shimv2 create task option +/// 4. If above three are not set, then get default path from DEFAULT_RUNTIME_CONFIGURATIONS +/// in kata-containers/src/libs/kata-types/src/config/default.rs, in array order. +#[instrument] +fn load_config(spec: &oci::Spec, option: &Option>) -> Result { + const KATA_CONF_FILE: &str = "KATA_CONF_FILE"; + let annotation = Annotation::new(spec.annotations.clone()); + let config_path = if let Some(path) = annotation.get_sandbox_config_path() { + path + } else if let Ok(path) = std::env::var(KATA_CONF_FILE) { + path + } else if let Some(option) = option { + // get rid of the special characters in options to get the config path + if option.len() > 2 { + from_utf8(&option[2..])?.to_string() + } else { + String::from("") + } + } else { + String::from("") + }; + info!(sl!(), "get config path {:?}", &config_path); + let (mut toml_config, _) = + TomlConfig::load_from_file(&config_path).context("load toml config")?; + annotation.update_config_by_annotation(&mut toml_config)?; + update_agent_kernel_params(&mut toml_config)?; + + // validate configuration and return the error + toml_config.validate()?; + + // Sandbox sizing information *may* be provided in two scenarios: + // 1. The upper layer runtime (ie, containerd or crio) provide sandbox sizing information as an annotation + // in the 'sandbox container's' spec. This would typically be a scenario where as part of a create sandbox + // request the upper layer runtime receives this information as part of a pod, and makes it available to us + // for sizing purposes. + // 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."), + // then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is + // a single container. + let initial_size_manager = + InitialSizeManager::new(spec).context("failed to construct static resource manager")?; + initial_size_manager + .setup_config(&mut toml_config) + .context("failed to setup static resource mgmt config")?; + + info!(sl!(), "get config content {:?}", &toml_config); + Ok(toml_config) +} + +// this update the agent-specfic kernel parameters into hypervisor's bootinfo +// the agent inside the VM will read from file cmdline to get the params and function +fn update_agent_kernel_params(config: &mut TomlConfig) -> Result<()> { + let mut params = vec![]; + if let Ok(kv) = config.get_agent_kernel_params() { + for (k, v) in kv.into_iter() { + if let Ok(s) = Param::new(k.as_str(), v.as_str()).to_string() { + params.push(s); + } + } + if let Some(h) = config.hypervisor.get_mut(&config.runtime.hypervisor_name) { + h.boot_info.add_kernel_params(params); + } + } + Ok(()) +} diff --git a/src/runtime-rs/crates/runtimes/src/shim_metrics.rs b/src/runtime-rs/crates/runtimes/src/shim_metrics.rs new file mode 100644 index 000000000000..62ebf0135676 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/shim_metrics.rs @@ -0,0 +1,235 @@ +// Copyright 2021-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +extern crate procfs; + +use anyhow::{anyhow, Result}; +use prometheus::{Encoder, Gauge, GaugeVec, Opts, Registry, TextEncoder}; +use slog::warn; +use std::sync::Mutex; + +const NAMESPACE_KATA_SHIM: &str = "kata_shim"; + +// Convenience macro to obtain the scope logger +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "metrics")) + }; +} + +lazy_static! { + static ref REGISTERED: Mutex = Mutex::new(false); + + // custom registry + static ref REGISTRY: Registry = Registry::new(); + + // shim metrics + static ref SHIM_THREADS: Gauge = Gauge::new(format!("{}_{}", NAMESPACE_KATA_SHIM, "threads"),"Kata containerd shim v2 process threads.").unwrap(); + + static ref SHIM_PROC_STATUS: GaugeVec = + GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_SHIM,"proc_status"), "Kata containerd shim v2 process status."), &["item"]).unwrap(); + + static ref SHIM_PROC_STAT: GaugeVec = GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_SHIM,"proc_stat"), "Kata containerd shim v2 process statistics."), &["item"]).unwrap(); + + static ref SHIM_NETDEV: GaugeVec = GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_SHIM,"netdev"), "Kata containerd shim v2 network devices statistics."), &["interface", "item"]).unwrap(); + + static ref SHIM_IO_STAT: GaugeVec = GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_SHIM,"io_stat"), "Kata containerd shim v2 process IO statistics."), &["item"]).unwrap(); + + static ref SHIM_OPEN_FDS: Gauge = Gauge::new(format!("{}_{}", NAMESPACE_KATA_SHIM, "fds"), "Kata containerd shim v2 open FDs.").unwrap(); +} + +pub fn get_shim_metrics() -> Result { + let mut registered = REGISTERED + .lock() + .map_err(|e| anyhow!("failed to check shim metrics register status {:?}", e))?; + + if !(*registered) { + register_shim_metrics()?; + *registered = true; + } + + update_shim_metrics()?; + + // gather all metrics and return as a String + let metric_families = REGISTRY.gather(); + + let mut buffer = Vec::new(); + let encoder = TextEncoder::new(); + encoder.encode(&metric_families, &mut buffer)?; + + Ok(String::from_utf8(buffer)?) +} + +fn register_shim_metrics() -> Result<()> { + REGISTRY.register(Box::new(SHIM_THREADS.clone()))?; + REGISTRY.register(Box::new(SHIM_PROC_STATUS.clone()))?; + REGISTRY.register(Box::new(SHIM_PROC_STAT.clone()))?; + REGISTRY.register(Box::new(SHIM_NETDEV.clone()))?; + REGISTRY.register(Box::new(SHIM_IO_STAT.clone()))?; + REGISTRY.register(Box::new(SHIM_OPEN_FDS.clone()))?; + + // TODO: + // REGISTRY.register(Box::new(RPC_DURATIONS_HISTOGRAM.clone()))?; + // REGISTRY.register(Box::new(SHIM_POD_OVERHEAD_CPU.clone()))?; + // REGISTRY.register(Box::new(SHIM_POD_OVERHEAD_MEMORY.clone()))?; + + Ok(()) +} + +fn update_shim_metrics() -> Result<()> { + let me = procfs::process::Process::myself(); + + let me = match me { + Ok(p) => p, + Err(e) => { + warn!(sl!(), "failed to create process instance: {:?}", e); + return Ok(()); + } + }; + + SHIM_THREADS.set(me.stat.num_threads as f64); + + match me.status() { + Err(err) => error!(sl!(), "failed to get process status: {:?}", err), + Ok(status) => set_gauge_vec_proc_status(&SHIM_PROC_STATUS, &status), + } + + match me.stat() { + Err(err) => { + error!(sl!(), "failed to get process stat: {:?}", err); + } + Ok(stat) => { + set_gauge_vec_proc_stat(&SHIM_PROC_STAT, &stat); + } + } + + match procfs::net::dev_status() { + Err(err) => { + error!(sl!(), "failed to get host net::dev_status: {:?}", err); + } + Ok(devs) => { + for (_, status) in devs { + set_gauge_vec_netdev(&SHIM_NETDEV, &status); + } + } + } + + match me.io() { + Err(err) => { + error!(sl!(), "failed to get process io stat: {:?}", err); + } + Ok(io) => { + set_gauge_vec_proc_io(&SHIM_IO_STAT, &io); + } + } + + match me.fd_count() { + Err(err) => { + error!(sl!(), "failed to get process open fds number: {:?}", err); + } + Ok(fds) => { + SHIM_OPEN_FDS.set(fds as f64); + } + } + + // TODO: + // RPC_DURATIONS_HISTOGRAM & SHIM_POD_OVERHEAD_CPU & SHIM_POD_OVERHEAD_MEMORY + + Ok(()) +} + +fn set_gauge_vec_proc_status(gv: &prometheus::GaugeVec, status: &procfs::process::Status) { + gv.with_label_values(&["vmpeak"]) + .set(status.vmpeak.unwrap_or(0) as f64); + gv.with_label_values(&["vmsize"]) + .set(status.vmsize.unwrap_or(0) as f64); + gv.with_label_values(&["vmlck"]) + .set(status.vmlck.unwrap_or(0) as f64); + gv.with_label_values(&["vmpin"]) + .set(status.vmpin.unwrap_or(0) as f64); + gv.with_label_values(&["vmhwm"]) + .set(status.vmhwm.unwrap_or(0) as f64); + gv.with_label_values(&["vmrss"]) + .set(status.vmrss.unwrap_or(0) as f64); + gv.with_label_values(&["rssanon"]) + .set(status.rssanon.unwrap_or(0) as f64); + gv.with_label_values(&["rssfile"]) + .set(status.rssfile.unwrap_or(0) as f64); + gv.with_label_values(&["rssshmem"]) + .set(status.rssshmem.unwrap_or(0) as f64); + gv.with_label_values(&["vmdata"]) + .set(status.vmdata.unwrap_or(0) as f64); + gv.with_label_values(&["vmstk"]) + .set(status.vmstk.unwrap_or(0) as f64); + gv.with_label_values(&["vmexe"]) + .set(status.vmexe.unwrap_or(0) as f64); + gv.with_label_values(&["vmlib"]) + .set(status.vmlib.unwrap_or(0) as f64); + gv.with_label_values(&["vmpte"]) + .set(status.vmpte.unwrap_or(0) as f64); + gv.with_label_values(&["vmswap"]) + .set(status.vmswap.unwrap_or(0) as f64); + gv.with_label_values(&["hugetlbpages"]) + .set(status.hugetlbpages.unwrap_or(0) as f64); + gv.with_label_values(&["voluntary_ctxt_switches"]) + .set(status.voluntary_ctxt_switches.unwrap_or(0) as f64); + gv.with_label_values(&["nonvoluntary_ctxt_switches"]) + .set(status.nonvoluntary_ctxt_switches.unwrap_or(0) as f64); +} + +fn set_gauge_vec_proc_stat(gv: &prometheus::GaugeVec, stat: &procfs::process::Stat) { + gv.with_label_values(&["utime"]).set(stat.utime as f64); + gv.with_label_values(&["stime"]).set(stat.stime as f64); + gv.with_label_values(&["cutime"]).set(stat.cutime as f64); + gv.with_label_values(&["cstime"]).set(stat.cstime as f64); +} + +fn set_gauge_vec_netdev(gv: &prometheus::GaugeVec, status: &procfs::net::DeviceStatus) { + gv.with_label_values(&[status.name.as_str(), "recv_bytes"]) + .set(status.recv_bytes as f64); + gv.with_label_values(&[status.name.as_str(), "recv_packets"]) + .set(status.recv_packets as f64); + gv.with_label_values(&[status.name.as_str(), "recv_errs"]) + .set(status.recv_errs as f64); + gv.with_label_values(&[status.name.as_str(), "recv_drop"]) + .set(status.recv_drop as f64); + gv.with_label_values(&[status.name.as_str(), "recv_fifo"]) + .set(status.recv_fifo as f64); + gv.with_label_values(&[status.name.as_str(), "recv_frame"]) + .set(status.recv_frame as f64); + gv.with_label_values(&[status.name.as_str(), "recv_compressed"]) + .set(status.recv_compressed as f64); + gv.with_label_values(&[status.name.as_str(), "recv_multicast"]) + .set(status.recv_multicast as f64); + gv.with_label_values(&[status.name.as_str(), "sent_bytes"]) + .set(status.sent_bytes as f64); + gv.with_label_values(&[status.name.as_str(), "sent_packets"]) + .set(status.sent_packets as f64); + gv.with_label_values(&[status.name.as_str(), "sent_errs"]) + .set(status.sent_errs as f64); + gv.with_label_values(&[status.name.as_str(), "sent_drop"]) + .set(status.sent_drop as f64); + gv.with_label_values(&[status.name.as_str(), "sent_fifo"]) + .set(status.sent_fifo as f64); + gv.with_label_values(&[status.name.as_str(), "sent_colls"]) + .set(status.sent_colls as f64); + gv.with_label_values(&[status.name.as_str(), "sent_carrier"]) + .set(status.sent_carrier as f64); + gv.with_label_values(&[status.name.as_str(), "sent_compressed"]) + .set(status.sent_compressed as f64); +} + +fn set_gauge_vec_proc_io(gv: &prometheus::GaugeVec, io_stat: &procfs::process::Io) { + gv.with_label_values(&["rchar"]).set(io_stat.rchar as f64); + gv.with_label_values(&["wchar"]).set(io_stat.wchar as f64); + gv.with_label_values(&["syscr"]).set(io_stat.syscr as f64); + gv.with_label_values(&["syscw"]).set(io_stat.syscw as f64); + gv.with_label_values(&["read_bytes"]) + .set(io_stat.read_bytes as f64); + gv.with_label_values(&["write_bytes"]) + .set(io_stat.write_bytes as f64); + gv.with_label_values(&["cancelled_write_bytes"]) + .set(io_stat.cancelled_write_bytes as f64); +} diff --git a/src/runtime-rs/crates/runtimes/src/shim_mgmt/handlers.rs b/src/runtime-rs/crates/runtimes/src/shim_mgmt/handlers.rs new file mode 100644 index 000000000000..613cca28780f --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/shim_mgmt/handlers.rs @@ -0,0 +1,166 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// This defines the handlers corresponding to the url when a request is sent to destined url, +// the handler function should be invoked, and the corresponding data will be in the response + +use crate::shim_metrics::get_shim_metrics; +use agent::ResizeVolumeRequest; +use anyhow::{anyhow, Context, Result}; +use common::Sandbox; +use hyper::{Body, Method, Request, Response, StatusCode}; +use std::sync::Arc; +use url::Url; + +use shim_interface::shim_mgmt::{ + AGENT_URL, DIRECT_VOLUME_PATH_KEY, DIRECT_VOLUME_RESIZE_URL, DIRECT_VOLUME_STATS_URL, + IP6_TABLE_URL, IP_TABLE_URL, METRICS_URL, +}; + +// main router for response, this works as a multiplexer on +// http arrival which invokes the corresponding handler function +pub(crate) async fn handler_mux( + sandbox: Arc, + req: Request, +) -> Result> { + info!( + sl!(), + "mgmt-svr(mux): recv req, method: {}, uri: {}", + req.method(), + req.uri().path() + ); + match (req.method(), req.uri().path()) { + (&Method::GET, AGENT_URL) => agent_url_handler(sandbox, req).await, + (&Method::PUT, IP_TABLE_URL) | (&Method::GET, IP_TABLE_URL) => { + ip_table_handler(sandbox, req).await + } + (&Method::PUT, IP6_TABLE_URL) | (&Method::GET, IP6_TABLE_URL) => { + ipv6_table_handler(sandbox, req).await + } + (&Method::POST, DIRECT_VOLUME_STATS_URL) => direct_volume_stats_handler(sandbox, req).await, + (&Method::POST, DIRECT_VOLUME_RESIZE_URL) => { + direct_volume_resize_handler(sandbox, req).await + } + (&Method::GET, METRICS_URL) => metrics_url_handler(sandbox, req).await, + _ => Ok(not_found(req).await), + } +} + +// url not found +async fn not_found(_req: Request) -> Response { + Response::builder() + .status(StatusCode::NOT_FOUND) + .body(Body::from("URL NOT FOUND")) + .unwrap() +} + +// returns the url for agent +async fn agent_url_handler( + sandbox: Arc, + _req: Request, +) -> Result> { + let agent_sock = sandbox + .agent_sock() + .await + .unwrap_or_else(|_| String::from("")); + Ok(Response::new(Body::from(agent_sock))) +} + +/// the ipv4 handler of iptable operation +async fn ip_table_handler(sandbox: Arc, req: Request) -> Result> { + generic_ip_table_handler(sandbox, req, false).await +} + +/// the ipv6 handler of iptable operation +async fn ipv6_table_handler( + sandbox: Arc, + req: Request, +) -> Result> { + generic_ip_table_handler(sandbox, req, true).await +} + +/// the generic iptable handler, for both ipv4 and ipv6 +/// this requires iptables-series binaries to be inside guest rootfs +async fn generic_ip_table_handler( + sandbox: Arc, + req: Request, + is_ipv6: bool, +) -> Result> { + info!(sl!(), "handler: iptable ipv6?: {}", is_ipv6); + match *req.method() { + Method::GET => match sandbox.get_iptables(is_ipv6).await { + Ok(data) => { + let body = Body::from(data); + Response::builder().body(body).map_err(|e| anyhow!(e)) + } + _ => Err(anyhow!("Failed to get iptable")), + }, + + Method::PUT => { + let data = hyper::body::to_bytes(req.into_body()).await?; + match sandbox.set_iptables(is_ipv6, data.to_vec()).await { + Ok(resp_data) => Response::builder() + .body(Body::from(resp_data)) + .map_err(|e| anyhow!(e)), + _ => Err(anyhow!("Failed to set iptable")), + } + } + + _ => Err(anyhow!("IP Tables only takes PUT and GET")), + } +} + +async fn direct_volume_stats_handler( + sandbox: Arc, + req: Request, +) -> Result> { + let params = Url::parse(&req.uri().to_string()) + .map_err(|e| anyhow!(e))? + .query_pairs() + .into_owned() + .collect::>(); + let volume_path = params + .get(DIRECT_VOLUME_PATH_KEY) + .context("shim-mgmt: volume path key not found in request params")?; + let result = sandbox.direct_volume_stats(volume_path).await; + match result { + Ok(stats) => Ok(Response::new(Body::from(stats))), + _ => Err(anyhow!("handler: Failed to get volume stats")), + } +} + +async fn direct_volume_resize_handler( + sandbox: Arc, + req: Request, +) -> Result> { + let body = hyper::body::to_bytes(req.into_body()).await?; + + // unserialize json body into resizeRequest struct + let resize_req: ResizeVolumeRequest = + serde_json::from_slice(&body).context("shim-mgmt: deserialize resizeRequest failed")?; + let result = sandbox.direct_volume_resize(resize_req).await; + + match result { + Ok(_) => Ok(Response::new(Body::from(""))), + _ => Err(anyhow!("handler: Failed to resize volume")), + } +} + +// returns the url for metrics +async fn metrics_url_handler( + sandbox: Arc, + _req: Request, +) -> Result> { + // get metrics from agent, hypervisor, and shim + let agent_metrics = sandbox.agent_metrics().await.unwrap_or_default(); + let hypervisor_metrics = sandbox.hypervisor_metrics().await.unwrap_or_default(); + let shim_metrics = get_shim_metrics().unwrap_or_default(); + + Ok(Response::new(Body::from(format!( + "{}{}{}", + agent_metrics, hypervisor_metrics, shim_metrics + )))) +} diff --git a/src/runtime-rs/crates/runtimes/src/shim_mgmt/mod.rs b/src/runtime-rs/crates/runtimes/src/shim_mgmt/mod.rs new file mode 100644 index 000000000000..5e9feb0bd342 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/shim_mgmt/mod.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! The server side of shim management implementation, receive HTTP +//! requests and multiplex them to corresponding functions inside shim +//! +//! To call services in a RESTful convention, use the client +//! from libs/shim-interface library + +mod handlers; +pub mod server; diff --git a/src/runtime-rs/crates/runtimes/src/shim_mgmt/server.rs b/src/runtime-rs/crates/runtimes/src/shim_mgmt/server.rs new file mode 100644 index 000000000000..08ad681ffecd --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/shim_mgmt/server.rs @@ -0,0 +1,81 @@ +// Copyright (c) 2022 Alibaba Cloud +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// Shim management service, this service starts a management http server on a socket +// and wire certain URL with a corresponding handler. When a command-line interface +// or further shim functions want the information corresponding to this, it can just +// send a GET request to the url, and the info will be in the response + +#![allow(dead_code)] // some url's handler are *to be* developed + +use std::{fs, path::Path, sync::Arc}; + +use anyhow::{Context, Result}; +use common::Sandbox; +use hyper::{server::conn::Http, service::service_fn}; +use shim_interface::{mgmt_socket_addr, shim_mgmt::ERR_NO_SHIM_SERVER}; +use tokio::net::UnixListener; + +use super::handlers::handler_mux; + +/// The shim management server instance +pub struct MgmtServer { + /// socket address(with prefix like hvsock://) + pub s_addr: String, + + /// The sandbox instance + pub sandbox: Arc, +} + +impl MgmtServer { + /// construct a new management server + pub fn new(sid: &str, sandbox: Arc) -> Result { + Ok(Self { + s_addr: mgmt_socket_addr(sid).context(ERR_NO_SHIM_SERVER)?, + sandbox, + }) + } + + // TODO(when metrics is supported): write metric addresses to fs + // TODO(when metrics is supported): register shim metrics + // TODO(when metrics is supported): register sandbox metrics + // running management http server in an infinite loop, able to serve concurrent requests + pub async fn run(self: Arc) { + let listener = listener_from_path(self.s_addr.clone()).await.unwrap(); + // start an infinite loop, which serves the incomming uds stream + loop { + let (stream, _) = listener.accept().await.unwrap(); + let me = self.clone(); + // spawn a light weight thread to multiplex to the handler + tokio::task::spawn(async move { + if let Err(err) = Http::new() + .serve_connection( + stream, + service_fn(|request| handler_mux(me.sandbox.clone(), request)), + ) + .await + { + warn!(sl!(), "Failed to serve connection: {:?}", err); + } + }); + } + } +} + +// from path, return a unix listener corresponding to that path, +// if the path(socket file) is not created, we create that here +async fn listener_from_path(path: String) -> Result { + // create the socket if not present + let trim_path = path.strip_prefix("unix:").context("trim path")?; + let file_path = Path::new("/").join(trim_path); + let file_path = file_path.as_path(); + if let Some(parent_dir) = file_path.parent() { + fs::create_dir_all(parent_dir).context("create parent dir")?; + } + // bind the socket and return the listener + info!(sl!(), "mgmt-svr: binding to path {}", path); + UnixListener::bind(file_path).context("bind address") +} diff --git a/src/runtime-rs/crates/runtimes/src/tracer.rs b/src/runtime-rs/crates/runtimes/src/tracer.rs new file mode 100644 index 000000000000..e34c556063f0 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/src/tracer.rs @@ -0,0 +1,169 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::cmp::min; +use std::sync::Arc; + +use anyhow::Result; +use lazy_static::lazy_static; +use opentelemetry::global; +use opentelemetry::runtime::Tokio; +use tracing::{span, subscriber::NoSubscriber, Span, Subscriber}; +use tracing_subscriber::prelude::*; +use tracing_subscriber::Registry; + +const DEFAULT_JAEGER_URL: &str = "http://localhost:14268/api/traces"; + +lazy_static! { + /// The ROOTSPAN is a phantom span that is running by calling [`trace_enter_root()`] at the background + /// once the configuration is read and config.runtime.enable_tracing is enabled + /// The ROOTSPAN exits by calling [`trace_exit_root()`] on shutdown request sent from containerd + /// + /// NOTE: + /// This allows other threads which are not directly running under some spans to be tracked easily + /// within the entire sandbox's lifetime. + /// To do this, you just need to add attribute #[instrment(parent=&(*ROOTSPAN))] + pub static ref ROOTSPAN: Span = span!(tracing::Level::TRACE, "root-span"); +} + +/// The tracer wrapper for kata-containers +/// The fields and member methods should ALWAYS be PRIVATE and be exposed in a safe +/// way to other modules +unsafe impl Send for KataTracer {} +unsafe impl Sync for KataTracer {} +pub struct KataTracer { + subscriber: Arc, + enabled: bool, +} + +impl Default for KataTracer { + fn default() -> Self { + Self::new() + } +} + +impl KataTracer { + /// Constructor of KataTracer, this is a dummy implementation for static initialization + pub fn new() -> Self { + Self { + subscriber: Arc::new(NoSubscriber::default()), + enabled: false, + } + } + + /// Set the tracing enabled flag + fn enable(&mut self) { + self.enabled = true; + } + + /// Return whether the tracing is enabled, enabled by [`trace_setup`] + fn enabled(&self) -> bool { + self.enabled + } + + /// Call when the tracing is enabled (set in toml configuration file) + /// This setup the subscriber, which maintains the span's information, to global and + /// inside KATA_TRACER. + /// + /// Note that the span will be noop(not collected) if a invalid subscriber is set + pub fn trace_setup( + &mut self, + sid: &str, + jaeger_endpoint: &str, + jaeger_username: &str, + jaeger_password: &str, + ) -> Result<()> { + // If varify jaeger config returns an error, it means that the tracing should not be enabled + let endpoint = verify_jaeger_config(jaeger_endpoint, jaeger_username, jaeger_password)?; + + // derive a subscriber to collect span info + let tracer = opentelemetry_jaeger::new_collector_pipeline() + .with_service_name(format!("kata-sb-{}", &sid[0..min(8, sid.len())])) + .with_endpoint(endpoint) + .with_username(jaeger_username) + .with_password(jaeger_password) + .with_hyper() + .install_batch(Tokio)?; + + let layer = tracing_opentelemetry::layer().with_tracer(tracer); + + let sub = Registry::default().with(layer); + + // we use Arc to let global subscriber and katatracer to SHARE the SAME subscriber + // this is for record the global subscriber into a global variable KATA_TRACER for more usages + let subscriber = Arc::new(sub); + tracing::subscriber::set_global_default(subscriber.clone())?; + self.subscriber = subscriber; + + // enter the rootspan + self.trace_enter_root(); + + // modity the enable state, note that we have successfully enable tracing + self.enable(); + + info!(sl!(), "Tracing enabled successfully"); + Ok(()) + } + + /// Shutdown the tracer and emit the span info to jaeger agent + /// The tracing information is only partially update to jaeger agent before this function is called + pub fn trace_end(&self) { + if self.enabled() { + // exit the rootspan + self.trace_exit_root(); + + global::shutdown_tracer_provider(); + } + } + + /// Enter the global ROOTSPAN + /// This function is a hack on tracing library's guard approach, letting the span + /// to enter without using a RAII guard to exit. This function should only be called + /// once, and also in paired with [`trace_exit_root()`]. + fn trace_enter_root(&self) { + self.enter_span(&ROOTSPAN); + } + + /// Exit the global ROOTSPAN + /// This should be called in paired with [`trace_enter_root()`]. + fn trace_exit_root(&self) { + self.exit_span(&ROOTSPAN); + } + + /// let the subscriber enter the span, this has to be called in pair with exit(span) + /// This function allows **cross function span** to run without span guard + fn enter_span(&self, span: &Span) { + let id: Option = span.into(); + self.subscriber.enter(&id.unwrap()); + } + + /// let the subscriber exit the span, this has to be called in pair to enter(span) + fn exit_span(&self, span: &Span) { + let id: Option = span.into(); + self.subscriber.exit(&id.unwrap()); + } +} + +/// Verifying the configuration of jaeger and setup the default value +fn verify_jaeger_config(endpoint: &str, username: &str, passwd: &str) -> Result { + if username.is_empty() && !passwd.is_empty() { + warn!( + sl!(), + "Jaeger password with empty username is not allowed, tracing is NOT enabled" + ); + return Err(anyhow::anyhow!("Empty username with non-empty password")); + } + + // set the default endpoint address, this expects a jaeger-collector running on localhost:14268 + let endpt = if endpoint.is_empty() { + DEFAULT_JAEGER_URL + } else { + endpoint + } + .to_owned(); + + Ok(endpt) +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml new file mode 100644 index 000000000000..0a63195eb3ca --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "virt_container" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +awaitgroup = "0.6.0" +containerd-shim-protos = { version = "0.3.0", features = ["async"]} +futures = "0.3.19" +lazy_static = "1.4.0" +libc = ">=0.2.39" +nix = "0.24.2" +protobuf = "3.2.0" +serde = { version = "1.0.100", features = ["derive"] } +serde_derive = "1.0.27" +serde_json = "1.0.82" +slog = "2.5.2" +slog-scope = "4.4.0" +tokio = { version = "1.28.1" } +toml = "0.4.2" +url = "2.1.1" +async-std = "1.12.0" +tracing = "0.1.36" + +agent = { path = "../../agent" } +common = { path = "../common" } +hypervisor = { path = "../../hypervisor" } +kata-sys-util = { path = "../../../../libs/kata-sys-util" } +kata-types = { path = "../../../../libs/kata-types" } +logging = { path = "../../../../libs/logging"} +oci = { path = "../../../../libs/oci" } +persist = { path = "../../persist"} +resource = { path = "../../resource" } + +[features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = [] diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs new file mode 100644 index 000000000000..efab56a682ac --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs @@ -0,0 +1,610 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::sync::Arc; + +use agent::Agent; +use anyhow::{anyhow, Context, Result}; +use common::{ + error::Error, + types::{ + ContainerConfig, ContainerID, ContainerProcess, ProcessStateInfo, ProcessStatus, + ProcessType, + }, +}; +use kata_sys_util::k8s::update_ephemeral_storage_type; + +use oci::{LinuxResources, Process as OCIProcess}; +use resource::{ResourceManager, ResourceUpdateOp}; +use tokio::sync::RwLock; + +use super::{ + process::{Process, ProcessWatcher}, + ContainerInner, +}; +use crate::container_manager::logger_with_process; + +pub struct Exec { + pub(crate) process: Process, + pub(crate) oci_process: OCIProcess, +} + +pub struct Container { + pid: u32, + pub container_id: ContainerID, + config: ContainerConfig, + spec: oci::Spec, + inner: Arc>, + agent: Arc, + resource_manager: Arc, + logger: slog::Logger, +} + +impl Container { + pub fn new( + pid: u32, + config: ContainerConfig, + spec: oci::Spec, + agent: Arc, + resource_manager: Arc, + ) -> Result { + let container_id = ContainerID::new(&config.container_id).context("new container id")?; + let logger = sl!().new(o!("container_id" => config.container_id.clone())); + let process = ContainerProcess::new(&config.container_id, "")?; + let init_process = Process::new( + &process, + pid, + &config.bundle, + config.stdin.clone(), + config.stdout.clone(), + config.stderr.clone(), + config.terminal, + ); + let linux_resources = spec + .linux + .as_ref() + .and_then(|linux| linux.resources.clone()); + + Ok(Self { + pid, + container_id, + config, + spec, + inner: Arc::new(RwLock::new(ContainerInner::new( + agent.clone(), + init_process, + logger.clone(), + linux_resources, + ))), + agent, + resource_manager, + logger, + }) + } + + pub async fn create(&self, mut spec: oci::Spec) -> Result<()> { + // process oci spec + let mut inner = self.inner.write().await; + let toml_config = self.resource_manager.config().await; + let config = &self.config; + let sandbox_pidns = is_pid_namespace_enabled(&spec); + amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?; + + // get mutable root from oci spec + let root = match spec.root.as_mut() { + Some(root) => root, + None => return Err(anyhow!("spec miss root field")), + }; + + // handler rootfs + let rootfs = self + .resource_manager + .handler_rootfs( + &config.container_id, + root, + &config.bundle, + &config.rootfs_mounts, + ) + .await + .context("handler rootfs")?; + + // update rootfs + root.path = rootfs + .get_guest_rootfs_path() + .await + .context("get guest rootfs path")?; + + let mut storages = vec![]; + if let Some(storage) = rootfs.get_storage().await { + storages.push(storage); + } + inner.rootfs.push(rootfs); + + // handler volumes + let volumes = self + .resource_manager + .handler_volumes(&config.container_id, &spec) + .await + .context("handler volumes")?; + let mut oci_mounts = vec![]; + for v in volumes { + let mut volume_mounts = v.get_volume_mount().context("get volume mount")?; + if !volume_mounts.is_empty() { + oci_mounts.append(&mut volume_mounts); + } + + let mut s = v.get_storage().context("get storage")?; + if !s.is_empty() { + storages.append(&mut s); + } + inner.volumes.push(v); + } + spec.mounts = oci_mounts; + + let linux = spec + .linux + .as_ref() + .context("OCI spec missing linux field")?; + + let devices_agent = self + .resource_manager + .handler_devices(&config.container_id, linux) + .await?; + + // update vcpus, mems and host cgroups + let resources = self + .resource_manager + .update_linux_resource( + &config.container_id, + inner.linux_resources.as_ref(), + ResourceUpdateOp::Add, + ) + .await?; + if let Some(linux) = &mut spec.linux { + linux.resources = resources; + } + + // create container + let r = agent::CreateContainerRequest { + process_id: agent::ContainerProcessID::new(&config.container_id, ""), + storages, + oci: Some(spec), + sandbox_pidns, + devices: devices_agent, + ..Default::default() + }; + + self.agent + .create_container(r) + .await + .context("agent create container")?; + self.resource_manager.dump().await; + Ok(()) + } + + pub async fn start( + &self, + containers: Arc>>, + process: &ContainerProcess, + ) -> Result<()> { + let mut inner = self.inner.write().await; + match process.process_type { + ProcessType::Container => { + if let Err(err) = inner.start_container(&process.container_id).await { + let device_manager = self.resource_manager.get_device_manager().await; + let _ = inner.stop_process(process, true, &device_manager).await; + return Err(err); + } + + let container_io = inner.new_container_io(process).await?; + inner + .init_process + .start_io_and_wait(containers, self.agent.clone(), container_io) + .await?; + } + ProcessType::Exec => { + if let Err(e) = inner.start_exec_process(process).await { + let device_manager = self.resource_manager.get_device_manager().await; + let _ = inner.stop_process(process, true, &device_manager).await; + return Err(e).context("enter process"); + } + + let container_io = inner.new_container_io(process).await.context("io stream")?; + + { + let exec = inner + .exec_processes + .get(&process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(process.clone()))?; + if exec.process.height != 0 && exec.process.width != 0 { + inner + .win_resize_process(process, exec.process.height, exec.process.width) + .await + .context("win resize")?; + } + } + + // start io and wait + { + let exec = inner + .exec_processes + .get_mut(&process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(process.clone()))?; + + exec.process + .start_io_and_wait(containers, self.agent.clone(), container_io) + .await + .context("start io and wait")?; + } + } + } + + Ok(()) + } + + pub async fn delete_exec_process(&self, container_process: &ContainerProcess) -> Result<()> { + let mut inner = self.inner.write().await; + inner + .delete_exec_process(&container_process.exec_id) + .await + .context("delete process") + } + + pub async fn state_process( + &self, + container_process: &ContainerProcess, + ) -> Result { + let inner = self.inner.read().await; + match container_process.process_type { + ProcessType::Container => inner.init_process.state().await, + ProcessType::Exec => { + let exec = inner + .exec_processes + .get(&container_process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(container_process.clone()))?; + exec.process.state().await + } + } + } + + pub async fn wait_process( + &self, + container_process: &ContainerProcess, + ) -> Result { + let logger = logger_with_process(container_process); + info!(logger, "start wait process"); + + let inner = self.inner.read().await; + inner + .fetch_exit_watcher(container_process) + .context("fetch exit watcher") + } + + pub async fn kill_process( + &self, + container_process: &ContainerProcess, + signal: u32, + all: bool, + ) -> Result<()> { + let mut inner = self.inner.write().await; + inner.signal_process(container_process, signal, all).await + } + + pub async fn exec_process( + &self, + container_process: &ContainerProcess, + stdin: Option, + stdout: Option, + stderr: Option, + terminal: bool, + oci_process: OCIProcess, + ) -> Result<()> { + let process = Process::new( + container_process, + self.pid, + &self.config.bundle, + stdin, + stdout, + stderr, + terminal, + ); + let exec = Exec { + process, + oci_process, + }; + let mut inner = self.inner.write().await; + inner.add_exec_process(&container_process.exec_id, exec); + Ok(()) + } + + pub async fn close_io(&self, container_process: &ContainerProcess) -> Result<()> { + let mut inner = self.inner.write().await; + inner.close_io(container_process).await + } + + pub async fn stop_process(&self, container_process: &ContainerProcess) -> Result<()> { + let mut inner = self.inner.write().await; + let device_manager = self.resource_manager.get_device_manager().await; + inner + .stop_process(container_process, true, &device_manager) + .await + .context("stop process")?; + + // update vcpus, mems and host cgroups + if container_process.process_type == ProcessType::Container { + self.resource_manager + .update_linux_resource( + &self.config.container_id, + inner.linux_resources.as_ref(), + ResourceUpdateOp::Del, + ) + .await?; + } + + Ok(()) + } + + pub async fn pause(&self) -> Result<()> { + let inner = self.inner.read().await; + if inner.init_process.get_status().await == ProcessStatus::Paused { + warn!(self.logger, "container is paused no need to pause"); + return Ok(()); + } + self.agent + .pause_container(self.container_id.clone().into()) + .await + .context("agent pause container")?; + Ok(()) + } + + pub async fn resume(&self) -> Result<()> { + let inner = self.inner.read().await; + if inner.init_process.get_status().await == ProcessStatus::Running { + warn!(self.logger, "container is running no need to resume"); + return Ok(()); + } + self.agent + .resume_container(self.container_id.clone().into()) + .await + .context("agent pause container")?; + Ok(()) + } + + pub async fn resize_pty( + &self, + process: &ContainerProcess, + width: u32, + height: u32, + ) -> Result<()> { + let logger = logger_with_process(process); + let mut inner = self.inner.write().await; + if inner.init_process.get_status().await != ProcessStatus::Running { + warn!(logger, "container is not running"); + return Ok(()); + } + + if process.exec_id.is_empty() { + inner.init_process.height = height; + inner.init_process.width = width; + } else if let Some(exec) = inner.exec_processes.get_mut(&process.exec_id) { + exec.process.height = height; + exec.process.width = width; + + // for some case, resize_pty request should be handled while the process has not been started in agent + // just return here, and truly resize_pty will happen in start_process + if exec.process.get_status().await != ProcessStatus::Running { + return Ok(()); + } + } else { + return Err(anyhow!( + "could not find process {} in container {}", + process.exec_id(), + process.container_id() + )); + } + + inner.win_resize_process(process, height, width).await + } + + pub async fn stats(&self) -> Result> { + let stats_resp = self + .agent + .stats_container(self.container_id.clone().into()) + .await + .context("agent stats container")?; + Ok(Some(stats_resp)) + } + + pub async fn update(&self, resources: &LinuxResources) -> Result<()> { + let mut inner = self.inner.write().await; + inner.linux_resources = Some(resources.clone()); + // update vcpus, mems and host cgroups + let agent_resources = self + .resource_manager + .update_linux_resource( + &self.config.container_id, + Some(resources), + ResourceUpdateOp::Update, + ) + .await?; + + let req = agent::UpdateContainerRequest { + container_id: self.container_id.container_id.clone(), + resources: agent_resources, + mounts: Vec::new(), + }; + self.agent + .update_container(req) + .await + .context("agent update container")?; + Ok(()) + } + + pub async fn config(&self) -> ContainerConfig { + self.config.clone() + } + + pub async fn spec(&self) -> oci::Spec { + self.spec.clone() + } +} + +fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> { + // Only the StartContainer hook needs to be reserved for execution in the guest + let start_container_hooks = match spec.hooks.as_ref() { + Some(hooks) => hooks.start_container.clone(), + None => Vec::new(), + }; + + spec.hooks = if start_container_hooks.is_empty() { + None + } else { + Some(oci::Hooks { + start_container: start_container_hooks, + ..Default::default() + }) + }; + + // special process K8s ephemeral volumes. + update_ephemeral_storage_type(spec); + + if let Some(linux) = spec.linux.as_mut() { + if disable_guest_seccomp { + linux.seccomp = None; + } + + if let Some(resource) = linux.resources.as_mut() { + resource.devices = Vec::new(); + resource.pids = None; + resource.block_io = None; + resource.network = None; + resource.rdma = HashMap::new(); + } + + // Host pidns path does not make sense in kata. Let's just align it with + // sandbox namespace whenever it is set. + let mut ns: Vec = Vec::new(); + for n in linux.namespaces.iter() { + match n.r#type.as_str() { + oci::PIDNAMESPACE | oci::NETWORKNAMESPACE => continue, + _ => ns.push(oci::LinuxNamespace { + r#type: n.r#type.clone(), + path: "".to_string(), + }), + } + } + + linux.namespaces = ns; + } + + Ok(()) +} + +// is_pid_namespace_enabled checks if Pid namespace for a container needs to be shared with its sandbox +// pid namespace. +fn is_pid_namespace_enabled(spec: &oci::Spec) -> bool { + if let Some(linux) = spec.linux.as_ref() { + for n in linux.namespaces.iter() { + if n.r#type.as_str() == oci::PIDNAMESPACE { + return !n.path.is_empty(); + } + } + } + + false +} + +#[cfg(test)] +mod tests { + use super::amend_spec; + use super::is_pid_namespace_enabled; + #[test] + fn test_amend_spec_disable_guest_seccomp() { + let mut spec = oci::Spec { + linux: Some(oci::Linux { + seccomp: Some(oci::LinuxSeccomp::default()), + ..Default::default() + }), + ..Default::default() + }; + + assert!(spec.linux.as_ref().unwrap().seccomp.is_some()); + + // disable_guest_seccomp = false + amend_spec(&mut spec, false).unwrap(); + assert!(spec.linux.as_ref().unwrap().seccomp.is_some()); + + // disable_guest_seccomp = true + amend_spec(&mut spec, true).unwrap(); + assert!(spec.linux.as_ref().unwrap().seccomp.is_none()); + } + + #[test] + fn test_is_pid_namespace_enabled() { + struct TestData<'a> { + desc: &'a str, + namespaces: Vec, + result: bool, + } + + let tests = &[ + TestData { + desc: "no pid namespace", + namespaces: vec![oci::LinuxNamespace { + r#type: "network".to_string(), + path: "".to_string(), + }], + result: false, + }, + TestData { + desc: "empty pid namespace path", + namespaces: vec![ + oci::LinuxNamespace { + r#type: "pid".to_string(), + path: "".to_string(), + }, + oci::LinuxNamespace { + r#type: "network".to_string(), + path: "".to_string(), + }, + ], + result: false, + }, + TestData { + desc: "pid namespace is set", + namespaces: vec![ + oci::LinuxNamespace { + r#type: "pid".to_string(), + path: "/some/path".to_string(), + }, + oci::LinuxNamespace { + r#type: "network".to_string(), + path: "".to_string(), + }, + ], + result: true, + }, + ]; + + let mut spec = oci::Spec::default(); + + for (i, d) in tests.iter().enumerate() { + spec.linux = Some(oci::Linux { + namespaces: d.namespaces.clone(), + ..Default::default() + }); + + assert_eq!( + d.result, + is_pid_namespace_enabled(&spec), + "test[{}]: {:?}", + i, + d.desc + ); + } + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs new file mode 100644 index 000000000000..8c5e766d82d9 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs @@ -0,0 +1,336 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{collections::HashMap, sync::Arc}; + +use agent::Agent; +use anyhow::{anyhow, Context, Result}; +use common::{ + error::Error, + types::{ContainerID, ContainerProcess, ProcessExitStatus, ProcessStatus, ProcessType}, +}; +use hypervisor::device::device_manager::DeviceManager; +use nix::sys::signal::Signal; +use oci::LinuxResources; +use resource::{rootfs::Rootfs, volume::Volume}; +use tokio::sync::RwLock; + +use crate::container_manager::logger_with_process; + +use super::{ + io::ContainerIo, + process::{Process, ProcessWatcher}, + Exec, +}; + +pub struct ContainerInner { + agent: Arc, + logger: slog::Logger, + pub(crate) init_process: Process, + pub(crate) exec_processes: HashMap, + pub(crate) rootfs: Vec>, + pub(crate) volumes: Vec>, + pub(crate) linux_resources: Option, +} + +impl ContainerInner { + pub(crate) fn new( + agent: Arc, + init_process: Process, + logger: slog::Logger, + linux_resources: Option, + ) -> Self { + Self { + agent, + logger, + init_process, + exec_processes: HashMap::new(), + rootfs: vec![], + volumes: vec![], + linux_resources, + } + } + + fn container_id(&self) -> &str { + self.init_process.process.container_id() + } + + pub(crate) async fn check_state(&self, states: Vec) -> Result<()> { + let state = self.init_process.get_status().await; + if states.contains(&state) { + return Ok(()); + } + + Err(anyhow!( + "failed to check state {:?} for {:?}", + state, + states + )) + } + + pub(crate) async fn set_state(&mut self, state: ProcessStatus) { + let mut status = self.init_process.status.write().await; + *status = state; + } + + pub(crate) async fn start_exec_process(&mut self, process: &ContainerProcess) -> Result<()> { + let exec = self + .exec_processes + .get_mut(&process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(process.clone()))?; + + self.agent + .exec_process(agent::ExecProcessRequest { + process_id: process.clone().into(), + string_user: None, + process: Some(exec.oci_process.clone()), + }) + .await + .context("exec process")?; + exec.process.set_status(ProcessStatus::Running).await; + Ok(()) + } + + pub(crate) async fn win_resize_process( + &self, + process: &ContainerProcess, + height: u32, + width: u32, + ) -> Result<()> { + self.check_state(vec![ProcessStatus::Created, ProcessStatus::Running]) + .await + .context("check state")?; + + self.agent + .tty_win_resize(agent::TtyWinResizeRequest { + process_id: process.clone().into(), + row: height, + column: width, + }) + .await?; + Ok(()) + } + + pub fn fetch_exit_watcher(&self, process: &ContainerProcess) -> Result { + match process.process_type { + ProcessType::Container => self.init_process.fetch_exit_watcher(), + ProcessType::Exec => { + let exec = self + .exec_processes + .get(&process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(process.clone()))?; + exec.process.fetch_exit_watcher() + } + } + } + + pub(crate) async fn start_container(&mut self, cid: &ContainerID) -> Result<()> { + self.check_state(vec![ProcessStatus::Created, ProcessStatus::Stopped]) + .await + .context("check state")?; + + self.agent + .start_container(agent::ContainerID { + container_id: cid.container_id.clone(), + }) + .await + .context("start container")?; + + self.set_state(ProcessStatus::Running).await; + + Ok(()) + } + + async fn get_exit_status(&self) -> Arc> { + self.init_process.exit_status.clone() + } + + pub(crate) fn add_exec_process(&mut self, id: &str, exec: Exec) -> Option { + self.exec_processes.insert(id.to_string(), exec) + } + + pub(crate) async fn delete_exec_process(&mut self, eid: &str) -> Result<()> { + match self.exec_processes.remove(eid) { + Some(_) => { + debug!(self.logger, " delete process eid {}", eid); + Ok(()) + } + None => Err(anyhow!( + "failed to find cid {} eid {}", + self.container_id(), + eid + )), + } + } + + async fn cleanup_container( + &mut self, + cid: &str, + force: bool, + device_manager: &RwLock, + ) -> Result<()> { + // wait until the container process + // terminated and the status write lock released. + info!(self.logger, "wait on container terminated"); + let exit_status = self.get_exit_status().await; + let _locked_exit_status = exit_status.read().await; + info!(self.logger, "container terminated"); + let remove_request = agent::RemoveContainerRequest { + container_id: cid.to_string(), + ..Default::default() + }; + self.agent + .remove_container(remove_request) + .await + .or_else(|e| { + if force { + warn!( + self.logger, + "stop container: agent remove container failed: {}", e + ); + Ok(agent::Empty::new()) + } else { + Err(e) + } + })?; + + // close the exit channel to wakeup wait service + // send to notify watchers who are waiting for the process exit + self.init_process.stop().await; + + self.clean_volumes(device_manager) + .await + .context("clean volumes")?; + self.clean_rootfs(device_manager) + .await + .context("clean rootfs")?; + + Ok(()) + } + + pub(crate) async fn stop_process( + &mut self, + process: &ContainerProcess, + force: bool, + device_manager: &RwLock, + ) -> Result<()> { + let logger = logger_with_process(process); + info!(logger, "begin to stop process"); + + // do not stop again when state stopped, may cause multi cleanup resource + let state = self.init_process.get_status().await; + if state == ProcessStatus::Stopped { + return Ok(()); + } + + self.check_state(vec![ProcessStatus::Running]) + .await + .context("check state")?; + + // if use force mode to stop container, stop always successful + // send kill signal to container + // ignore the error of sending signal, since the process would + // have been killed and exited yet. + self.signal_process(process, Signal::SIGKILL as u32, false) + .await + .map_err(|e| { + warn!(logger, "failed to signal kill. {:?}", e); + }) + .ok(); + + match process.process_type { + ProcessType::Container => self + .cleanup_container(&process.container_id.container_id, force, device_manager) + .await + .context("stop container")?, + ProcessType::Exec => { + let exec = self + .exec_processes + .get_mut(&process.exec_id) + .ok_or_else(|| anyhow!("failed to find exec"))?; + exec.process.stop().await; + } + } + + Ok(()) + } + + pub(crate) async fn signal_process( + &mut self, + process: &ContainerProcess, + signal: u32, + all: bool, + ) -> Result<()> { + let mut process_id: agent::ContainerProcessID = process.clone().into(); + if all { + // force signal init process + process_id.exec_id.clear(); + }; + + self.agent + .signal_process(agent::SignalProcessRequest { process_id, signal }) + .await?; + + Ok(()) + } + + pub async fn new_container_io(&self, process: &ContainerProcess) -> Result { + Ok(ContainerIo::new(self.agent.clone(), process.clone())) + } + + pub async fn close_io(&mut self, process: &ContainerProcess) -> Result<()> { + match process.process_type { + ProcessType::Container => self.init_process.close_io(self.agent.clone()).await, + ProcessType::Exec => { + let exec = self + .exec_processes + .get_mut(&process.exec_id) + .ok_or_else(|| Error::ProcessNotFound(process.clone()))?; + exec.process.close_io(self.agent.clone()).await; + } + }; + + Ok(()) + } + + async fn clean_volumes(&mut self, device_manager: &RwLock) -> Result<()> { + let mut unhandled = Vec::new(); + for v in self.volumes.iter() { + if let Err(err) = v.cleanup(device_manager).await { + unhandled.push(Arc::clone(v)); + warn!( + sl!(), + "Failed to clean the volume = {:?}, error = {:?}", + v.get_volume_mount(), + err + ); + } + } + if !unhandled.is_empty() { + self.volumes = unhandled; + } + Ok(()) + } + + async fn clean_rootfs(&mut self, device_manager: &RwLock) -> Result<()> { + let mut unhandled = Vec::new(); + for rootfs in self.rootfs.iter() { + if let Err(err) = rootfs.cleanup(device_manager).await { + unhandled.push(Arc::clone(rootfs)); + warn!( + sl!(), + "Failed to umount rootfs, cid = {:?}, error = {:?}", + self.container_id(), + err + ); + } + } + if !unhandled.is_empty() { + self.rootfs = unhandled; + } + Ok(()) + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/container_io.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/container_io.rs new file mode 100644 index 000000000000..c211e8bca43a --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/container_io.rs @@ -0,0 +1,171 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + future::Future, + io, + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +use agent::Agent; +use anyhow::Result; +use common::types::ContainerProcess; +use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; + +struct ContainerIoInfo { + pub agent: Arc, + pub process: ContainerProcess, +} + +pub struct ContainerIo { + pub stdin: Box, + pub stdout: Box, + pub stderr: Box, +} + +impl ContainerIo { + pub fn new(agent: Arc, process: ContainerProcess) -> Self { + let info = Arc::new(ContainerIoInfo { agent, process }); + + Self { + stdin: Box::new(ContainerIoWrite::new(info.clone())), + stdout: Box::new(ContainerIoRead::new(info.clone(), true)), + stderr: Box::new(ContainerIoRead::new(info, false)), + } + } +} + +struct ContainerIoWrite<'inner> { + pub info: Arc, + write_future: + Option> + Send + 'inner>>>, +} + +impl<'inner> ContainerIoWrite<'inner> { + pub fn new(info: Arc) -> Self { + Self { + info, + write_future: Default::default(), + } + } + + fn poll_write_inner( + &'inner mut self, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let mut write_future = self.write_future.take(); + if write_future.is_none() { + let req = agent::WriteStreamRequest { + process_id: self.info.process.clone().into(), + data: buf.to_vec(), + }; + write_future = Some(Box::pin(self.info.agent.write_stdin(req))); + } + + let mut write_future = write_future.unwrap(); + match write_future.as_mut().poll(cx) { + Poll::Ready(v) => match v { + Ok(resp) => Poll::Ready(Ok(resp.length as usize)), + Err(err) => Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::Other, err))), + }, + Poll::Pending => { + self.write_future = Some(write_future); + Poll::Pending + } + } + } +} + +impl<'inner> AsyncWrite for ContainerIoWrite<'inner> { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let me = unsafe { + std::mem::transmute::<&mut ContainerIoWrite<'_>, &mut ContainerIoWrite<'inner>>( + &mut *self, + ) + }; + me.poll_write_inner(cx, buf) + } + + fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } +} + +type ResultBuffer = Result; +struct ContainerIoRead<'inner> { + pub info: Arc, + is_stdout: bool, + read_future: Option + Send + 'inner>>>, +} + +impl<'inner> ContainerIoRead<'inner> { + pub fn new(info: Arc, is_stdout: bool) -> Self { + Self { + info, + is_stdout, + read_future: Default::default(), + } + } + fn poll_read_inner( + &'inner mut self, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let mut read_future = self.read_future.take(); + if read_future.is_none() { + let req = agent::ReadStreamRequest { + process_id: self.info.process.clone().into(), + len: buf.remaining() as u32, + }; + read_future = if self.is_stdout { + Some(Box::pin(self.info.agent.read_stdout(req))) + } else { + Some(Box::pin(self.info.agent.read_stderr(req))) + }; + } + + let mut read_future = read_future.unwrap(); + match read_future.as_mut().poll(cx) { + Poll::Ready(v) => match v { + Ok(resp) => { + buf.put_slice(&resp.data); + Poll::Ready(Ok(())) + } + Err(err) => Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::Other, err))), + }, + Poll::Pending => { + self.read_future = Some(read_future); + Poll::Pending + } + } + } +} + +impl<'inner> AsyncRead for ContainerIoRead<'inner> { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let me = unsafe { + std::mem::transmute::<&mut ContainerIoRead<'_>, &mut ContainerIoRead<'inner>>( + &mut *self, + ) + }; + me.poll_read_inner(cx, buf) + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/mod.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/mod.rs new file mode 100644 index 000000000000..3c6ca719bcba --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/mod.rs @@ -0,0 +1,10 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod container_io; +pub use container_io::ContainerIo; +mod shim_io; +pub use shim_io::ShimIo; diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/shim_io.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/shim_io.rs new file mode 100644 index 000000000000..db3ce998910e --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/io/shim_io.rs @@ -0,0 +1,171 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + io, + os::unix::{ + io::{FromRawFd, RawFd}, + net::UnixStream as StdUnixStream, + prelude::AsRawFd, + }, + pin::Pin, + task::Context as TaskContext, + task::Poll, +}; + +use anyhow::{anyhow, Context, Result}; +use nix::{ + fcntl::{self, OFlag}, + sys::stat::Mode, +}; +use tokio::{ + fs::OpenOptions, + io::{AsyncRead, AsyncWrite}, + net::UnixStream as AsyncUnixStream, +}; +use url::Url; + +fn open_fifo(path: &str) -> Result { + let fd = fcntl::open(path, OFlag::O_RDWR, Mode::from_bits(0).unwrap())?; + + let std_stream = unsafe { StdUnixStream::from_raw_fd(fd) }; + std_stream + .set_nonblocking(true) + .context("set nonblocking")?; + + AsyncUnixStream::from_std(std_stream).map_err(|e| anyhow!(e)) +} + +pub struct ShimIo { + pub stdin: Option>, + pub stdout: Option>, + pub stderr: Option>, +} + +impl ShimIo { + pub async fn new( + stdin: &Option, + stdout: &Option, + stderr: &Option, + ) -> Result { + info!( + sl!(), + "new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr + ); + + let set_flag_with_blocking = |fd: RawFd| { + let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) }; + let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) }; + if ret < 0 { + error!(sl!(), "failed to set fcntl for fd {} error {}", fd, ret); + } + }; + + let stdin_fd: Option> = if let Some(stdin) = stdin { + info!(sl!(), "open stdin {:?}", &stdin); + + // Since the stdin peer point (which is hold by containerd) could not be openned + // immediately, which would block here's open with block mode, and we wouldn't want to + // block here, thus here opened with nonblock and then reset it to block mode for + // tokio async io. + match OpenOptions::new() + .read(true) + .write(false) + .custom_flags(libc::O_NONBLOCK) + .open(&stdin) + .await + { + Ok(file) => { + // Set it to blocking to avoid infinitely handling EAGAIN when the reader is empty + set_flag_with_blocking(file.as_raw_fd()); + Some(Box::new(file)) + } + Err(err) => { + error!(sl!(), "failed to open {} error {:?}", &stdin, err); + None + } + } + } else { + None + }; + + let get_url = |url: &Option| -> Option { + info!(sl!(), "get url for {:?}", url); + + match url { + None => None, + Some(out) => match Url::parse(out.as_str()) { + Err(url::ParseError::RelativeUrlWithoutBase) => { + let out = "fifo://".to_owned() + out.as_str(); + let u = Url::parse(out.as_str()).unwrap(); + Some(u) + } + Err(err) => { + warn!(sl!(), "unable to parse stdout uri: {}", err); + None + } + Ok(u) => Some(u), + }, + } + }; + + let stdout_url = get_url(stdout); + let get_fd = |url: &Option| -> Option> { + info!(sl!(), "get fd for {:?}", &url); + if let Some(url) = url { + if url.scheme() == "fifo" { + let path = url.path(); + match open_fifo(path) { + Ok(s) => { + return Some(Box::new(ShimIoWrite::Stream(s))); + } + Err(err) => { + error!(sl!(), "failed to open file {} error {:?}", url.path(), err); + } + } + } + } + None + }; + + let stderr_url = get_url(stderr); + Ok(Self { + stdin: stdin_fd, + stdout: get_fd(&stdout_url), + stderr: get_fd(&stderr_url), + }) + } +} + +#[derive(Debug)] +enum ShimIoWrite { + Stream(AsyncUnixStream), + // TODO: support other type +} + +impl AsyncWrite for ShimIoWrite { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut TaskContext<'_>, + buf: &[u8], + ) -> Poll> { + match *self { + ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_write(cx, buf), + } + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll> { + match *self { + ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_flush(cx), + } + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll> { + match *self { + ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_shutdown(cx), + } + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs new file mode 100644 index 000000000000..f6a6553e9749 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs @@ -0,0 +1,367 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; + +use std::{collections::HashMap, sync::Arc}; + +use agent::Agent; +use common::{ + error::Error, + types::{ + ContainerConfig, ContainerID, ContainerProcess, ExecProcessRequest, KillRequest, + ProcessExitStatus, ProcessStateInfo, ProcessType, ResizePTYRequest, ShutdownRequest, + StatsInfo, UpdateRequest, PID, + }, + ContainerManager, +}; +use hypervisor::Hypervisor; +use oci::Process as OCIProcess; +use resource::network::NetnsGuard; +use resource::ResourceManager; +use tokio::sync::RwLock; +use tracing::instrument; + +use kata_sys_util::hooks::HookStates; + +use super::{logger_with_process, Container}; + +pub struct VirtContainerManager { + sid: String, + pid: u32, + containers: Arc>>, + resource_manager: Arc, + agent: Arc, + hypervisor: Arc, +} + +impl std::fmt::Debug for VirtContainerManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VirtContainerManager") + .field("sid", &self.sid) + .field("pid", &self.pid) + .finish() + } +} + +impl VirtContainerManager { + pub fn new( + sid: &str, + pid: u32, + agent: Arc, + hypervisor: Arc, + resource_manager: Arc, + ) -> Self { + Self { + sid: sid.to_string(), + pid, + containers: Default::default(), + resource_manager, + agent, + hypervisor, + } + } +} + +#[async_trait] +impl ContainerManager for VirtContainerManager { + #[instrument] + async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result { + let container = Container::new( + self.pid, + config.clone(), + spec.clone(), + self.agent.clone(), + self.resource_manager.clone(), + ) + .context("new container")?; + + // CreateContainer Hooks: + // * should be run in vmm namespace (hook path in runtime namespace) + // * should be run after the vm is started, before container is created, and after CreateRuntime Hooks + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createcontainer-hooks + let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?; + let vmm_ns_path = self.hypervisor.get_ns_path().await?; + let vmm_netns_path = format!("{}/{}", vmm_ns_path, "net"); + let state = oci::State { + version: spec.version.clone(), + id: config.container_id.clone(), + status: oci::ContainerState::Creating, + pid: vmm_master_tid as i32, + bundle: config.bundle.clone(), + annotations: spec.annotations.clone(), + }; + + // new scope, CreateContainer hooks in which will execute in a new network namespace + { + let _netns_guard = NetnsGuard::new(&vmm_netns_path).context("vmm netns guard")?; + if let Some(hooks) = spec.hooks.as_ref() { + let mut create_container_hook_states = HookStates::new(); + create_container_hook_states.execute_hooks(&hooks.create_container, Some(state))?; + } + } + + let mut containers = self.containers.write().await; + container.create(spec).await.context("create")?; + containers.insert(container.container_id.to_string(), container); + Ok(PID { pid: self.pid }) + } + + #[instrument] + async fn close_process_io(&self, process: &ContainerProcess) -> Result<()> { + let containers = self.containers.read().await; + let container_id = &process.container_id.to_string(); + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + + c.close_io(process).await.context("close io")?; + Ok(()) + } + + #[instrument] + async fn delete_process(&self, process: &ContainerProcess) -> Result { + let container_id = &process.container_id.container_id; + match process.process_type { + ProcessType::Container => { + let mut containers = self.containers.write().await; + let c = containers + .remove(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?; + + // Poststop Hooks: + // * should be run in runtime namespace + // * should be run after the container is deleted but before delete operation returns + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststop + let c_spec = c.spec().await; + let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?; + let state = oci::State { + version: c_spec.version.clone(), + id: c.container_id.to_string(), + status: oci::ContainerState::Stopped, + pid: vmm_master_tid as i32, + bundle: c.config().await.bundle, + annotations: c_spec.annotations.clone(), + }; + if let Some(hooks) = c_spec.hooks.as_ref() { + let mut poststop_hook_states = HookStates::new(); + poststop_hook_states.execute_hooks(&hooks.poststop, Some(state))?; + } + + c.state_process(process).await.context("state process") + } + ProcessType::Exec => { + let containers = self.containers.read().await; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?; + let state = c.state_process(process).await.context("state process"); + c.delete_exec_process(process) + .await + .context("delete process")?; + return state; + } + } + } + + #[instrument] + async fn exec_process(&self, req: ExecProcessRequest) -> Result<()> { + if req.spec_type_url.is_empty() { + return Err(anyhow!("invalid type url")); + } + let oci_process: OCIProcess = + serde_json::from_slice(&req.spec_value).context("serde from slice")?; + + let containers = self.containers.read().await; + let container_id = &req.process.container_id.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + c.exec_process( + &req.process, + req.stdin, + req.stdout, + req.stderr, + req.terminal, + oci_process, + ) + .await + .context("exec")?; + Ok(()) + } + + #[instrument] + async fn kill_process(&self, req: &KillRequest) -> Result<()> { + let containers = self.containers.read().await; + let container_id = &req.process.container_id.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + c.kill_process(&req.process, req.signal, req.all) + .await + .map_err(|err| { + warn!( + sl!(), + "failed to signal process {:?} {:?}", &req.process, err + ); + err + }) + .ok(); + Ok(()) + } + + #[instrument] + async fn wait_process(&self, process: &ContainerProcess) -> Result { + let logger = logger_with_process(process); + + let containers = self.containers.read().await; + let container_id = &process.container_id.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + let (watcher, status) = c.wait_process(process).await.context("wait")?; + drop(containers); + + match watcher { + Some(mut watcher) => { + info!(logger, "begin wait exit"); + while watcher.changed().await.is_ok() {} + info!(logger, "end wait exited"); + } + None => { + warn!(logger, "failed to find watcher for wait process"); + } + } + + let status = status.read().await; + + info!(logger, "wait process exit status {:?}", status); + + Ok(status.clone()) + } + + #[instrument] + async fn start_process(&self, process: &ContainerProcess) -> Result { + let containers = self.containers.read().await; + let container_id = &process.container_id.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + c.start(self.containers.clone(), process) + .await + .context("start")?; + + // Poststart Hooks: + // * should be run in runtime namespace + // * should be run after user-specific command is executed but before start operation returns + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststart + let c_spec = c.spec().await; + let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?; + let state = oci::State { + version: c_spec.version.clone(), + id: c.container_id.to_string(), + status: oci::ContainerState::Running, + pid: vmm_master_tid as i32, + bundle: c.config().await.bundle, + annotations: c_spec.annotations.clone(), + }; + if let Some(hooks) = c_spec.hooks.as_ref() { + let mut poststart_hook_states = HookStates::new(); + poststart_hook_states.execute_hooks(&hooks.poststart, Some(state))?; + } + + Ok(PID { pid: self.pid }) + } + + #[instrument] + async fn state_process(&self, process: &ContainerProcess) -> Result { + let containers = self.containers.read().await; + let container_id = &process.container_id.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; + let state = c.state_process(process).await.context("state process")?; + Ok(state) + } + + #[instrument] + async fn pause_container(&self, id: &ContainerID) -> Result<()> { + let containers = self.containers.read().await; + let c = containers + .get(&id.container_id) + .ok_or_else(|| Error::ContainerNotFound(id.container_id.clone()))?; + c.pause().await.context("pause")?; + Ok(()) + } + + #[instrument] + async fn resume_container(&self, id: &ContainerID) -> Result<()> { + let containers = self.containers.read().await; + let c = containers + .get(&id.container_id) + .ok_or_else(|| Error::ContainerNotFound(id.container_id.clone()))?; + c.resume().await.context("resume")?; + Ok(()) + } + + #[instrument] + async fn resize_process_pty(&self, req: &ResizePTYRequest) -> Result<()> { + let containers = self.containers.read().await; + let c = containers + .get(&req.process.container_id.container_id) + .ok_or_else(|| { + Error::ContainerNotFound(req.process.container_id.container_id.clone()) + })?; + c.resize_pty(&req.process, req.width, req.height) + .await + .context("resize pty")?; + Ok(()) + } + + #[instrument] + async fn stats_container(&self, id: &ContainerID) -> Result { + let containers = self.containers.read().await; + let c = containers + .get(&id.container_id) + .ok_or_else(|| Error::ContainerNotFound(id.container_id.clone()))?; + let stats = c.stats().await.context("stats")?; + Ok(StatsInfo::from(stats)) + } + + #[instrument] + async fn update_container(&self, req: UpdateRequest) -> Result<()> { + let resource = serde_json::from_slice::(&req.value) + .context("deserialize LinuxResource")?; + let containers = self.containers.read().await; + let container_id = &req.container_id; + let c = containers + .get(container_id) + .ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?; + c.update(&resource).await.context("update_container") + } + + #[instrument] + async fn pid(&self) -> Result { + Ok(PID { pid: self.pid }) + } + + #[instrument] + async fn connect_container(&self, _id: &ContainerID) -> Result { + Ok(PID { pid: self.pid }) + } + + #[instrument] + async fn need_shutdown_sandbox(&self, req: &ShutdownRequest) -> bool { + req.is_now || self.containers.read().await.is_empty() || self.sid == req.container_id + } + + #[instrument] + async fn is_sandbox_container(&self, process: &ContainerProcess) -> bool { + process.process_type == ProcessType::Container + && process.container_id.container_id == self.sid + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/mod.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/mod.rs new file mode 100644 index 000000000000..3c615517fd5c --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/mod.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +mod container; +use container::{Container, Exec}; +mod container_inner; +mod io; +use container_inner::ContainerInner; +mod manager; +pub use manager::VirtContainerManager; +mod process; + +use common::types::ContainerProcess; + +fn logger_with_process(container_process: &ContainerProcess) -> slog::Logger { + sl!().new(o!("container_id" => container_process.container_id.container_id.clone(), "exec_id" => container_process.exec_id.clone())) +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/process.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/process.rs new file mode 100644 index 000000000000..cd73134dd2f8 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/process.rs @@ -0,0 +1,270 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::sync::Arc; + +use agent::Agent; +use anyhow::{Context, Result}; +use awaitgroup::{WaitGroup, Worker as WaitGroupWorker}; +use common::types::{ContainerProcess, ProcessExitStatus, ProcessStateInfo, ProcessStatus, PID}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tokio::sync::{watch, RwLock}; + +use super::container::Container; +use super::io::{ContainerIo, ShimIo}; +use super::logger_with_process; + +pub type ProcessWatcher = ( + Option>, + Arc>, +); + +#[derive(Debug)] +pub struct Process { + pub process: ContainerProcess, + pub pid: u32, + logger: slog::Logger, + pub bundle: String, + + pub stdin: Option, + pub stdout: Option, + pub stderr: Option, + pub terminal: bool, + + pub height: u32, + pub width: u32, + pub status: Arc>, + + pub exit_status: Arc>, + pub exit_watcher_rx: Option>, + pub exit_watcher_tx: Option>, + // used to sync between stdin io copy thread(tokio) and the close it call. + // close io call should wait until the stdin io copy finished to + // prevent stdin data lost. + pub wg_stdin: WaitGroup, +} + +impl Process { + pub fn new( + process: &ContainerProcess, + pid: u32, + bundle: &str, + stdin: Option, + stdout: Option, + stderr: Option, + terminal: bool, + ) -> Process { + let (sender, receiver) = watch::channel(false); + + Process { + process: process.clone(), + pid, + logger: logger_with_process(process), + bundle: bundle.to_string(), + stdin, + stdout, + stderr, + terminal, + height: 0, + width: 0, + status: Arc::new(RwLock::new(ProcessStatus::Created)), + exit_status: Arc::new(RwLock::new(ProcessExitStatus::new())), + exit_watcher_rx: Some(receiver), + exit_watcher_tx: Some(sender), + wg_stdin: WaitGroup::new(), + } + } + + pub async fn start_io_and_wait( + &mut self, + containers: Arc>>, + agent: Arc, + container_io: ContainerIo, + ) -> Result<()> { + info!(self.logger, "start io and wait"); + + // new shim io + let shim_io = ShimIo::new(&self.stdin, &self.stdout, &self.stderr) + .await + .context("new shim io")?; + + // start io copy for stdin + let wgw_stdin = self.wg_stdin.worker(); + if let Some(stdin) = shim_io.stdin { + self.run_io_copy("stdin", wgw_stdin, stdin, container_io.stdin) + .await?; + } + + // prepare for wait group for stdout, stderr + let wg = WaitGroup::new(); + let wgw = wg.worker(); + + // start io copy for stdout + if let Some(stdout) = shim_io.stdout { + self.run_io_copy("stdout", wgw.clone(), container_io.stdout, stdout) + .await?; + } + + // start io copy for stderr + if !self.terminal { + if let Some(stderr) = shim_io.stderr { + self.run_io_copy("stderr", wgw, container_io.stderr, stderr) + .await?; + } + } + + self.run_io_wait(containers, agent, wg) + .await + .context("run io thread")?; + Ok(()) + } + + async fn run_io_copy<'a>( + &'a self, + io_name: &'a str, + wgw: WaitGroupWorker, + mut reader: Box, + mut writer: Box, + ) -> Result<()> { + info!(self.logger, "run io copy for {}", io_name); + let io_name = io_name.to_string(); + let logger = self.logger.new(o!("io_name" => io_name)); + tokio::spawn(async move { + match tokio::io::copy(&mut reader, &mut writer).await { + Err(e) => { + warn!(logger, "run_io_copy: failed to copy stream: {}", e); + } + Ok(length) => { + info!(logger, "run_io_copy: stop to copy stream length {}", length) + } + }; + + wgw.done(); + }); + + Ok(()) + } + + /// A container is considered exited once its IO ended. + /// This function waits for IO to end. And then, do some cleanup + /// things. + async fn run_io_wait( + &mut self, + containers: Arc>>, + agent: Arc, + mut wg: WaitGroup, + ) -> Result<()> { + let logger = self.logger.clone(); + info!(logger, "start run io wait"); + let process = self.process.clone(); + let exit_status = self.exit_status.clone(); + let exit_notifier = self.exit_watcher_tx.take(); + let status = self.status.clone(); + + tokio::spawn(async move { + // wait on all of the container's io stream terminated + info!(logger, "begin wait group io"); + wg.wait().await; + info!(logger, "end wait group for io"); + + let req = agent::WaitProcessRequest { + process_id: process.clone().into(), + }; + + info!(logger, "begin wait process"); + let resp = match agent.wait_process(req).await { + Ok(ret) => ret, + Err(e) => { + error!(logger, "failed to wait process {:?}", e); + return; + } + }; + + info!(logger, "end wait process exit code {}", resp.status); + + let containers = containers.read().await; + let container_id = &process.container_id.container_id; + let c = match containers.get(container_id) { + Some(c) => c, + None => { + error!( + logger, + "Failed to stop process, since container {} not found", container_id + ); + return; + } + }; + + if let Err(err) = c.stop_process(&process).await { + error!( + logger, + "Failed to stop process, process = {:?}, err = {:?}", process, err + ); + } + + let mut exit_status = exit_status.write().await; + exit_status.update_exit_code(resp.status); + drop(exit_status); + + let mut status = status.write().await; + *status = ProcessStatus::Stopped; + drop(status); + + drop(exit_notifier); + info!(logger, "end io wait thread"); + }); + Ok(()) + } + + pub fn fetch_exit_watcher(&self) -> Result { + Ok((self.exit_watcher_rx.clone(), self.exit_status.clone())) + } + + pub async fn state(&self) -> Result { + let exit_status = self.exit_status.read().await; + Ok(ProcessStateInfo { + container_id: self.process.container_id.container_id.clone(), + exec_id: self.process.exec_id.clone(), + pid: PID { pid: self.pid }, + bundle: self.bundle.clone(), + stdin: self.stdin.clone(), + stdout: self.stdout.clone(), + stderr: self.stderr.clone(), + terminal: self.terminal, + status: self.get_status().await, + exit_status: exit_status.exit_code, + exited_at: exit_status.exit_time, + }) + } + + pub async fn stop(&mut self) { + let mut status = self.status.write().await; + *status = ProcessStatus::Stopped; + } + + pub async fn close_io(&mut self, agent: Arc) { + self.wg_stdin.wait().await; + + let req = agent::CloseStdinRequest { + process_id: self.process.clone().into(), + }; + + if let Err(e) = agent.close_stdin(req).await { + warn!(self.logger, "failed clsoe process io: {:?}", e); + } + } + + pub async fn get_status(&self) -> ProcessStatus { + let status = self.status.read().await; + *status + } + + pub async fn set_status(&self, new_status: ProcessStatus) { + let mut status = self.status.write().await; + *status = new_status; + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/health_check.rs b/src/runtime-rs/crates/runtimes/virt_container/src/health_check.rs new file mode 100644 index 000000000000..874ccb7f1093 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/health_check.rs @@ -0,0 +1,123 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use agent::Agent; +use anyhow::Context; +use tokio::sync::{mpsc, Mutex}; + +/// monitor check interval 30s +const HEALTH_CHECK_TIMER_INTERVAL: u64 = 30; + +/// version check threshold 5min +const VERSION_CHECK_THRESHOLD: u64 = 5 * 60 / HEALTH_CHECK_TIMER_INTERVAL; + +/// health check stop channel buffer size +const HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE: usize = 1; + +pub struct HealthCheck { + pub keep_alive: bool, + keep_abnormal: bool, + stop_tx: mpsc::Sender<()>, + stop_rx: Arc>>, +} + +impl HealthCheck { + pub fn new(keep_alive: bool, keep_abnormal: bool) -> HealthCheck { + let (tx, rx) = mpsc::channel(HEALTH_CHECK_STOP_CHANNEL_BUFFER_SIZE); + HealthCheck { + keep_alive, + keep_abnormal, + stop_tx: tx, + stop_rx: Arc::new(Mutex::new(rx)), + } + } + + pub fn start(&self, id: &str, agent: Arc) { + if !self.keep_alive { + return; + } + let id = id.to_string(); + + info!(sl!(), "start runtime keep alive"); + + let stop_rx = self.stop_rx.clone(); + let keep_abnormal = self.keep_abnormal; + tokio::spawn(async move { + let mut version_check_threshold_count = 0; + + loop { + tokio::time::sleep(std::time::Duration::from_secs(HEALTH_CHECK_TIMER_INTERVAL)) + .await; + let mut stop_rx = stop_rx.lock().await; + match stop_rx.try_recv() { + Ok(_) => { + info!(sl!(), "revive stop {} monitor signal", id); + break; + } + + Err(mpsc::error::TryRecvError::Empty) => { + // check agent + match agent + .check(agent::CheckRequest::new("")) + .await + .context("check health") + { + Ok(_) => { + debug!(sl!(), "check {} agent health successfully", id); + version_check_threshold_count += 1; + if version_check_threshold_count >= VERSION_CHECK_THRESHOLD { + // need to check version + version_check_threshold_count = 0; + if let Ok(v) = agent + .version(agent::CheckRequest::new("")) + .await + .context("check version") + { + info!(sl!(), "agent {}", v.agent_version) + } + } + continue; + } + Err(e) => { + error!(sl!(), "failed to do {} agent health check: {}", id, e); + if let Err(mpsc::error::TryRecvError::Empty) = stop_rx.try_recv() { + error!(sl!(), "failed to receive stop monitor signal"); + if !keep_abnormal { + ::std::process::exit(1); + } + } else { + info!(sl!(), "wait to exit {}", id); + break; + } + } + } + } + + Err(mpsc::error::TryRecvError::Disconnected) => { + warn!(sl!(), "{} monitor channel has broken", id); + break; + } + } + } + }); + } + + pub async fn stop(&self) { + if !self.keep_alive { + return; + } + info!(sl!(), "stop runtime keep alive"); + self.stop_tx + .send(()) + .await + .map_err(|e| { + warn!(sl!(), "failed send monitor channel. {:?}", e); + }) + .ok(); + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs new file mode 100644 index 000000000000..4999ee4f9e06 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs @@ -0,0 +1,216 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "virt-container"); + +mod container_manager; +pub mod health_check; +pub mod sandbox; +pub mod sandbox_persist; + +use std::sync::Arc; + +use agent::{kata::KataAgent, AGENT_KATA}; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use common::{message::Message, RuntimeHandler, RuntimeInstance}; +use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL}; +use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU}; +use kata_types::config::{ + hypervisor::register_hypervisor_plugin, DragonballConfig, QemuConfig, TomlConfig, +}; + +#[cfg(feature = "cloud-hypervisor")] +use hypervisor::ch::CloudHypervisor; +#[cfg(feature = "cloud-hypervisor")] +use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig}; + +use resource::ResourceManager; +use sandbox::VIRTCONTAINER; +use tokio::sync::mpsc::Sender; +use tracing::instrument; + +unsafe impl Send for VirtContainer {} +unsafe impl Sync for VirtContainer {} +#[derive(Debug)] +pub struct VirtContainer {} + +#[async_trait] +impl RuntimeHandler for VirtContainer { + fn init() -> Result<()> { + // register + let dragonball_config = Arc::new(DragonballConfig::new()); + register_hypervisor_plugin("dragonball", dragonball_config); + + let qemu_config = Arc::new(QemuConfig::new()); + register_hypervisor_plugin("qemu", qemu_config); + + #[cfg(feature = "cloud-hypervisor")] + { + let ch_config = Arc::new(CloudHypervisorConfig::new()); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, ch_config); + } + + Ok(()) + } + + fn name() -> String { + VIRTCONTAINER.to_string() + } + + fn new_handler() -> Arc { + Arc::new(VirtContainer {}) + } + + #[instrument] + async fn new_instance( + &self, + sid: &str, + msg_sender: Sender, + config: Arc, + ) -> Result { + let hypervisor = new_hypervisor(&config).await.context("new hypervisor")?; + + // get uds from hypervisor and get config from toml_config + let agent = new_agent(&config).context("new agent")?; + let resource_manager = + Arc::new(ResourceManager::new(sid, agent.clone(), hypervisor.clone(), config).await?); + let pid = std::process::id(); + + let sandbox = sandbox::VirtSandbox::new( + sid, + msg_sender, + agent.clone(), + hypervisor.clone(), + resource_manager.clone(), + ) + .await + .context("new virt sandbox")?; + let container_manager = container_manager::VirtContainerManager::new( + sid, + pid, + agent, + hypervisor, + resource_manager, + ); + Ok(RuntimeInstance { + sandbox: Arc::new(sandbox), + container_manager: Arc::new(container_manager), + }) + } + + fn cleanup(&self, _id: &str) -> Result<()> { + // TODO + Ok(()) + } +} + +async fn new_hypervisor(toml_config: &TomlConfig) -> Result> { + let hypervisor_name = &toml_config.runtime.hypervisor_name; + let hypervisor_config = toml_config + .hypervisor + .get(hypervisor_name) + .ok_or_else(|| anyhow!("failed to get hypervisor for {}", &hypervisor_name)) + .context("get hypervisor")?; + + // TODO: support other hypervisor + // issue: https://github.com/kata-containers/kata-containers/issues/4634 + match hypervisor_name.as_str() { + HYPERVISOR_DRAGONBALL => { + let mut hypervisor = Dragonball::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + Ok(Arc::new(hypervisor)) + } + HYPERVISOR_QEMU => { + let mut hypervisor = Qemu::new(); + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + Ok(Arc::new(hypervisor)) + } + + #[cfg(feature = "cloud-hypervisor")] + HYPERVISOR_NAME_CH => { + let mut hypervisor = CloudHypervisor::new(); + + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + + Ok(Arc::new(hypervisor)) + } + _ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)), + } +} + +fn new_agent(toml_config: &TomlConfig) -> Result> { + let agent_name = &toml_config.runtime.agent_name; + let agent_config = toml_config + .agent + .get(agent_name) + .ok_or_else(|| anyhow!("failed to get agent for {}", &agent_name)) + .context("get agent")?; + match agent_name.as_str() { + AGENT_KATA => { + let agent = KataAgent::new(agent_config.clone()); + Ok(Arc::new(agent)) + } + _ => Err(anyhow!("Unsupported agent {}", &agent_name)), + } +} + +#[cfg(test)] +mod test { + + use super::*; + + fn default_toml_config_agent() -> Result { + let config_content = r#" +[agent.kata] +container_pipe_size=1 + +[runtime] +agent_name="kata" + "#; + TomlConfig::load(config_content).map_err(|e| anyhow!("can not load config toml: {}", e)) + } + + #[test] + fn test_new_agent() { + let toml_config = default_toml_config_agent().unwrap(); + + let res = new_agent(&toml_config); + assert!(res.is_ok()); + } + + #[tokio::test] + async fn test_new_hypervisor() { + VirtContainer::init().unwrap(); + + let toml_config = { + let config_content = r#" +[hypervisor.qemu] +path = "/bin/echo" +kernel = "/bin/echo" +image = "/bin/echo" +firmware = "" + +[runtime] +hypervisor_name="qemu" +"#; + TomlConfig::load(config_content).map_err(|e| anyhow!("can not load config toml: {}", e)) + } + .unwrap(); + + let res = new_hypervisor(&toml_config).await; + assert!(res.is_ok()); + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs new file mode 100644 index 000000000000..a86aa07d7cc1 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -0,0 +1,543 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::sync::Arc; + +use agent::kata::KataAgent; +use agent::types::KernelModule; +use agent::{self, Agent, GetIPTablesRequest, SetIPTablesRequest, VolumeStatsRequest}; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use common::message::{Action, Message}; +use common::{Sandbox, SandboxNetworkEnv}; +use containerd_shim_protos::events::task::TaskOOM; +use hypervisor::{dragonball::Dragonball, BlockConfig, Hypervisor, HYPERVISOR_DRAGONBALL}; +use hypervisor::{utils::get_hvsock_path, HybridVsockConfig, DEFAULT_GUEST_VSOCK_CID}; +use kata_sys_util::hooks::HookStates; +use kata_types::config::TomlConfig; +use persist::{self, sandbox_persist::Persist}; +use resource::manager::ManagerArgs; +use resource::network::{dan_config_path, DanNetworkConfig, NetworkConfig, NetworkWithNetNsConfig}; +use resource::{ResourceConfig, ResourceManager}; +use tokio::sync::{mpsc::Sender, Mutex, RwLock}; +use tracing::instrument; + +use crate::health_check::HealthCheck; + +pub(crate) const VIRTCONTAINER: &str = "virt_container"; +pub struct SandboxRestoreArgs { + pub sid: String, + pub toml_config: TomlConfig, + pub sender: Sender, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum SandboxState { + Init, + Running, + Stopped, +} + +struct SandboxInner { + state: SandboxState, +} + +impl SandboxInner { + pub fn new() -> Self { + Self { + state: SandboxState::Init, + } + } +} + +#[derive(Clone)] +pub struct VirtSandbox { + sid: String, + msg_sender: Arc>>, + inner: Arc>, + resource_manager: Arc, + agent: Arc, + hypervisor: Arc, + monitor: Arc, +} + +impl std::fmt::Debug for VirtSandbox { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VirtSandbox") + .field("sid", &self.sid) + .field("msg_sender", &self.msg_sender) + .finish() + } +} + +impl VirtSandbox { + pub async fn new( + sid: &str, + msg_sender: Sender, + agent: Arc, + hypervisor: Arc, + resource_manager: Arc, + ) -> Result { + let config = resource_manager.config().await; + let keep_abnormal = config.runtime.keep_abnormal; + Ok(Self { + sid: sid.to_string(), + msg_sender: Arc::new(Mutex::new(msg_sender)), + inner: Arc::new(RwLock::new(SandboxInner::new())), + agent, + hypervisor, + resource_manager, + monitor: Arc::new(HealthCheck::new(true, keep_abnormal)), + }) + } + + #[instrument] + async fn prepare_for_start_sandbox( + &self, + id: &str, + network_env: SandboxNetworkEnv, + ) -> Result> { + let mut resource_configs = vec![]; + + // Prepare VM hybrid vsock device config and add the hybrid vsock device first. + info!(sl!(), "prepare hybrid vsock resource for sandbox."); + let vm_hvsock = ResourceConfig::HybridVsock(HybridVsockConfig { + guest_cid: DEFAULT_GUEST_VSOCK_CID, + uds_path: get_hvsock_path(id), + }); + resource_configs.push(vm_hvsock); + + // prepare network config + if !network_env.network_created { + if let Some(network_resource) = self.prepare_network_resource(&network_env).await { + resource_configs.push(network_resource); + } + } + + // prepare sharefs device config + let virtio_fs_config = + ResourceConfig::ShareFs(self.hypervisor.hypervisor_config().await.shared_fs); + resource_configs.push(virtio_fs_config); + + // prepare VM rootfs device config + let vm_rootfs = ResourceConfig::VmRootfs( + self.prepare_rootfs_config() + .await + .context("failed to prepare rootfs device config")?, + ); + resource_configs.push(vm_rootfs); + + Ok(resource_configs) + } + + async fn prepare_network_resource( + &self, + network_env: &SandboxNetworkEnv, + ) -> Option { + let config = self.resource_manager.config().await; + let dan_path = dan_config_path(&config, &self.sid); + + // Network priority: DAN > NetNS + if dan_path.exists() { + Some(ResourceConfig::Network(NetworkConfig::Dan( + DanNetworkConfig { + dan_conf_path: dan_path, + }, + ))) + } else if let Some(netns_path) = network_env.netns.as_ref() { + Some(ResourceConfig::Network(NetworkConfig::NetNs( + NetworkWithNetNsConfig { + network_model: config.runtime.internetworking_model.clone(), + netns_path: netns_path.to_owned(), + queues: self + .hypervisor + .hypervisor_config() + .await + .network_info + .network_queues as usize, + network_created: network_env.network_created, + }, + ))) + } else { + None + } + } + + async fn execute_oci_hook_functions( + &self, + prestart_hooks: &[oci::Hook], + create_runtime_hooks: &[oci::Hook], + state: &oci::State, + ) -> Result<()> { + let mut st = state.clone(); + // for dragonball, we use vmm_master_tid + let vmm_pid = self + .hypervisor + .get_vmm_master_tid() + .await + .context("get vmm master tid")?; + st.pid = vmm_pid as i32; + + // Prestart Hooks [DEPRECATED in newest oci spec]: + // * should be run in runtime namespace + // * should be run after vm is started, but before container is created + // if Prestart Hook and CreateRuntime Hook are both supported + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#prestart + let mut prestart_hook_states = HookStates::new(); + prestart_hook_states.execute_hooks(prestart_hooks, Some(st.clone()))?; + + // CreateRuntime Hooks: + // * should be run in runtime namespace + // * should be run when creating the runtime + // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createruntime-hooks + let mut create_runtime_hook_states = HookStates::new(); + create_runtime_hook_states.execute_hooks(create_runtime_hooks, Some(st.clone()))?; + + Ok(()) + } + + async fn prepare_rootfs_config(&self) -> Result { + let boot_info = self.hypervisor.hypervisor_config().await.boot_info; + + let image = { + let initrd_path = boot_info.initrd.clone(); + let image_path = boot_info.image; + if !initrd_path.is_empty() { + Ok(initrd_path) + } else if !image_path.is_empty() { + Ok(image_path) + } else { + Err(anyhow!("failed to get image")) + } + } + .context("get image")?; + + Ok(BlockConfig { + path_on_host: image, + is_readonly: true, + driver_option: boot_info.vm_rootfs_driver, + ..Default::default() + }) + } + + fn has_prestart_hooks( + &self, + prestart_hooks: Vec, + create_runtime_hooks: Vec, + ) -> bool { + !prestart_hooks.is_empty() || !create_runtime_hooks.is_empty() + } +} + +#[async_trait] +impl Sandbox for VirtSandbox { + #[instrument(name = "sb: start")] + async fn start( + &self, + dns: Vec, + spec: &oci::Spec, + state: &oci::State, + network_env: SandboxNetworkEnv, + ) -> Result<()> { + let id = &self.sid; + + // if sandbox running, return + // if sandbox not running try to start sandbox + let mut inner = self.inner.write().await; + if inner.state == SandboxState::Running { + warn!(sl!(), "sandbox is running, no need to start"); + return Ok(()); + } + + self.hypervisor + .prepare_vm(id, network_env.netns.clone()) + .await + .context("prepare vm")?; + + // generate device and setup before start vm + // should after hypervisor.prepare_vm + let resources = self + .prepare_for_start_sandbox(id, network_env.clone()) + .await?; + + self.resource_manager + .prepare_before_start_vm(resources) + .await + .context("set up device before start vm")?; + + // start vm + self.hypervisor.start_vm(10_000).await.context("start vm")?; + info!(sl!(), "start vm"); + + // execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks + let (prestart_hooks, create_runtime_hooks) = match spec.hooks.as_ref() { + Some(hooks) => (hooks.prestart.clone(), hooks.create_runtime.clone()), + None => (Vec::new(), Vec::new()), + }; + self.execute_oci_hook_functions(&prestart_hooks, &create_runtime_hooks, state) + .await?; + + // 1. if there are pre-start hook functions, network config might have been changed. + // We need to rescan the netns to handle the change. + // 2. Do not scan the netns if we want no network for the VM. + // TODO In case of vm factory, scan the netns to hotplug interfaces after the VM is started. + let config = self.resource_manager.config().await; + if self.has_prestart_hooks(prestart_hooks, create_runtime_hooks) + && !config.runtime.disable_new_netns + && !dan_config_path(&config, &self.sid).exists() + { + if let Some(netns_path) = network_env.netns { + let network_resource = NetworkConfig::NetNs(NetworkWithNetNsConfig { + network_model: config.runtime.internetworking_model.clone(), + netns_path: netns_path.to_owned(), + queues: self + .hypervisor + .hypervisor_config() + .await + .network_info + .network_queues as usize, + network_created: network_env.network_created, + }); + self.resource_manager + .handle_network(network_resource) + .await + .context("set up device after start vm")?; + } + } + + // connect agent + // set agent socket + let address = self + .hypervisor + .get_agent_socket() + .await + .context("get agent socket")?; + self.agent.start(&address).await.context("connect")?; + + self.resource_manager + .setup_after_start_vm() + .await + .context("setup device after start vm")?; + + // create sandbox in vm + let agent_config = self.agent.agent_config().await; + let kernel_modules = KernelModule::set_kernel_modules(agent_config.kernel_modules)?; + let req = agent::CreateSandboxRequest { + hostname: spec.hostname.clone(), + dns, + storages: self + .resource_manager + .get_storage_for_sandbox() + .await + .context("get storages for sandbox")?, + sandbox_pidns: false, + sandbox_id: id.to_string(), + guest_hook_path: self + .hypervisor + .hypervisor_config() + .await + .security_info + .guest_hook_path, + kernel_modules, + }; + + self.agent + .create_sandbox(req) + .await + .context("create sandbox")?; + + inner.state = SandboxState::Running; + let agent = self.agent.clone(); + let sender = self.msg_sender.clone(); + info!(sl!(), "oom watcher start"); + tokio::spawn(async move { + loop { + match agent + .get_oom_event(agent::Empty::new()) + .await + .context("get oom event") + { + Ok(resp) => { + let cid = &resp.container_id; + warn!(sl!(), "send oom event for container {}", &cid); + let event = TaskOOM { + container_id: cid.to_string(), + ..Default::default() + }; + let msg = Message::new(Action::Event(Arc::new(event))); + let lock_sender = sender.lock().await; + if let Err(err) = lock_sender.send(msg).await.context("send event") { + error!( + sl!(), + "failed to send oom event for {} error {:?}", cid, err + ); + } + } + Err(err) => { + warn!(sl!(), "failed to get oom event error {:?}", err); + break; + } + } + } + }); + self.monitor.start(id, self.agent.clone()); + self.save().await.context("save state")?; + Ok(()) + } + + async fn stop(&self) -> Result<()> { + info!(sl!(), "begin stop sandbox"); + self.hypervisor.stop_vm().await.context("stop vm")?; + Ok(()) + } + + async fn shutdown(&self) -> Result<()> { + info!(sl!(), "shutdown"); + + self.stop().await.context("stop")?; + + self.cleanup().await.context("do the clean up")?; + + info!(sl!(), "stop monitor"); + self.monitor.stop().await; + + info!(sl!(), "stop agent"); + self.agent.stop().await; + + // stop server + info!(sl!(), "send shutdown message"); + let msg = Message::new(Action::Shutdown); + let sender = self.msg_sender.clone(); + let sender = sender.lock().await; + sender.send(msg).await.context("send shutdown msg")?; + Ok(()) + } + + async fn cleanup(&self) -> Result<()> { + info!(sl!(), "delete hypervisor"); + self.hypervisor + .cleanup() + .await + .context("delete hypervisor")?; + + info!(sl!(), "resource clean up"); + self.resource_manager + .cleanup() + .await + .context("resource clean up")?; + + // TODO: cleanup other sandbox resource + Ok(()) + } + + async fn agent_sock(&self) -> Result { + self.agent.agent_sock().await + } + + async fn direct_volume_stats(&self, volume_guest_path: &str) -> Result { + let req: agent::VolumeStatsRequest = VolumeStatsRequest { + volume_guest_path: volume_guest_path.to_string(), + }; + let result = self + .agent + .get_volume_stats(req) + .await + .context("sandbox: failed to process direct volume stats query")?; + Ok(result.data) + } + + async fn direct_volume_resize(&self, resize_req: agent::ResizeVolumeRequest) -> Result<()> { + self.agent + .resize_volume(resize_req) + .await + .context("sandbox: failed to resize direct-volume")?; + Ok(()) + } + + async fn set_iptables(&self, is_ipv6: bool, data: Vec) -> Result> { + info!(sl!(), "sb: set_iptables invoked"); + let req = SetIPTablesRequest { is_ipv6, data }; + let resp = self + .agent + .set_ip_tables(req) + .await + .context("sandbox: failed to set iptables")?; + Ok(resp.data) + } + + async fn get_iptables(&self, is_ipv6: bool) -> Result> { + info!(sl!(), "sb: get_iptables invoked"); + let req = GetIPTablesRequest { is_ipv6 }; + let resp = self + .agent + .get_ip_tables(req) + .await + .context("sandbox: failed to get iptables")?; + Ok(resp.data) + } + + async fn agent_metrics(&self) -> Result { + self.agent + .get_metrics(agent::Empty::new()) + .await + .map_err(|err| anyhow!("failed to get agent metrics {:?}", err)) + .map(|resp| resp.metrics) + } + + async fn hypervisor_metrics(&self) -> Result { + self.hypervisor.get_hypervisor_metrics().await + } +} + +#[async_trait] +impl Persist for VirtSandbox { + type State = crate::sandbox_persist::SandboxState; + type ConstructorArgs = SandboxRestoreArgs; + + /// Save a state of Sandbox + async fn save(&self) -> Result { + let sandbox_state = crate::sandbox_persist::SandboxState { + sandbox_type: VIRTCONTAINER.to_string(), + resource: Some(self.resource_manager.save().await?), + hypervisor: Some(self.hypervisor.save_state().await?), + }; + persist::to_disk(&sandbox_state, &self.sid)?; + Ok(sandbox_state) + } + /// Restore Sandbox + async fn restore( + sandbox_args: Self::ConstructorArgs, + sandbox_state: Self::State, + ) -> Result { + let config = sandbox_args.toml_config; + let r = sandbox_state.resource.unwrap_or_default(); + let h = sandbox_state.hypervisor.unwrap_or_default(); + let hypervisor = match h.hypervisor_type.as_str() { + // TODO support other hypervisors + HYPERVISOR_DRAGONBALL => Ok(Arc::new(Dragonball::restore((), h).await?)), + _ => Err(anyhow!("Unsupported hypervisor {}", &h.hypervisor_type)), + }?; + let agent = Arc::new(KataAgent::new(kata_types::config::Agent::default())); + let sid = sandbox_args.sid; + let keep_abnormal = config.runtime.keep_abnormal; + let args = ManagerArgs { + sid: sid.clone(), + agent: agent.clone(), + hypervisor: hypervisor.clone(), + config, + }; + let resource_manager = Arc::new(ResourceManager::restore(args, r).await?); + Ok(Self { + sid: sid.to_string(), + msg_sender: Arc::new(Mutex::new(sandbox_args.sender)), + inner: Arc::new(RwLock::new(SandboxInner::new())), + agent, + hypervisor, + resource_manager, + monitor: Arc::new(HealthCheck::new(true, keep_abnormal)), + }) + } +} diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox_persist.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox_persist.rs new file mode 100644 index 000000000000..5497b0eee2b8 --- /dev/null +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox_persist.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use hypervisor::hypervisor_persist::HypervisorState; +use resource::resource_persist::ResourceState; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +pub struct SandboxState { + pub sandbox_type: String, + pub resource: Option, + pub hypervisor: Option, +} diff --git a/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml b/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml new file mode 100644 index 000000000000..4f098295a76e --- /dev/null +++ b/src/runtime-rs/crates/runtimes/wasm_container/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "wasm_container" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +tokio = { version = "1.28.1" } + +common = { path = "../common" } +kata-types = { path = "../../../../libs/kata-types" } diff --git a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs new file mode 100644 index 000000000000..77282ac9cfad --- /dev/null +++ b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs @@ -0,0 +1,41 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +use std::sync::Arc; + +use anyhow::Result; +use async_trait::async_trait; +use common::{message::Message, RuntimeHandler, RuntimeInstance}; +use kata_types::config::TomlConfig; +use tokio::sync::mpsc::Sender; +pub struct WasmContainer {} + +#[async_trait] +impl RuntimeHandler for WasmContainer { + fn init() -> Result<()> { + Ok(()) + } + + fn name() -> String { + "wasm_container".to_string() + } + + fn new_handler() -> Arc { + Arc::new(WasmContainer {}) + } + + async fn new_instance( + &self, + _sid: &str, + _msg_sender: Sender, + _config: Arc, + ) -> Result { + todo!() + } + + fn cleanup(&self, _id: &str) -> Result<()> { + todo!() + } +} diff --git a/src/runtime-rs/crates/service/Cargo.toml b/src/runtime-rs/crates/service/Cargo.toml new file mode 100644 index 000000000000..693cce330c9a --- /dev/null +++ b/src/runtime-rs/crates/service/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "service" +version = "0.1.0" +authors = ["The Kata Containers community "] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +anyhow = "^1.0" +async-trait = "0.1.48" +slog = "2.5.2" +slog-scope = "4.4.0" +tokio = { version = "1.28.1", features = ["rt-multi-thread"] } +tracing = "0.1.36" +ttrpc = { version = "0.7.1" } + +common = { path = "../runtimes/common" } +containerd-shim-protos = { version = "0.3.0", features = ["async"]} +logging = { path = "../../../libs/logging"} +kata-types = { path = "../../../libs/kata-types" } +runtimes = { path = "../runtimes" } +persist = { path = "../persist" } diff --git a/src/runtime-rs/crates/service/src/lib.rs b/src/runtime-rs/crates/service/src/lib.rs new file mode 100644 index 000000000000..1f28a8009c4b --- /dev/null +++ b/src/runtime-rs/crates/service/src/lib.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "service"); + +mod manager; +pub use manager::ServiceManager; +mod task_service; diff --git a/src/runtime-rs/crates/service/src/manager.rs b/src/runtime-rs/crates/service/src/manager.rs new file mode 100644 index 000000000000..2b16fe3f4714 --- /dev/null +++ b/src/runtime-rs/crates/service/src/manager.rs @@ -0,0 +1,194 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs, + os::unix::io::{FromRawFd, RawFd}, + process::Stdio, + sync::Arc, +}; + +use anyhow::{Context, Result}; +use common::message::{Action, Event, Message}; +use containerd_shim_protos::{ + protobuf::{well_known_types::any::Any, Message as ProtobufMessage}, + shim_async, +}; +use kata_types::config::KATA_PATH; +use runtimes::RuntimeHandlerManager; +use tokio::{ + io::AsyncWriteExt, + process::Command, + sync::mpsc::{channel, Receiver}, +}; +use ttrpc::asynchronous::Server; + +use crate::task_service::TaskService; + +/// message buffer size +const MESSAGE_BUFFER_SIZE: usize = 8; + +pub struct ServiceManager { + receiver: Option>, + handler: Arc, + task_server: Option, + binary: String, + address: String, + namespace: String, +} + +impl std::fmt::Debug for ServiceManager { + // todo: some how to implement debug for handler + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ServiceManager") + .field("receiver", &self.receiver) + .field("task_server.is_some()", &self.task_server.is_some()) + .field("binary", &self.binary) + .field("address", &self.address) + .field("namespace", &self.namespace) + .finish() + } +} + +impl ServiceManager { + // TODO: who manages lifecycle for `task_server_fd`? + pub async fn new( + id: &str, + containerd_binary: &str, + address: &str, + namespace: &str, + task_server_fd: RawFd, + ) -> Result { + let (sender, receiver) = channel::(MESSAGE_BUFFER_SIZE); + let rt_mgr = RuntimeHandlerManager::new(id, sender).context("new runtime handler")?; + let handler = Arc::new(rt_mgr); + let mut task_server = unsafe { Server::from_raw_fd(task_server_fd) }; + task_server = task_server.set_domain_unix(); + Ok(Self { + receiver: Some(receiver), + handler, + task_server: Some(task_server), + binary: containerd_binary.to_string(), + address: address.to_string(), + namespace: namespace.to_string(), + }) + } + + pub async fn run(mut self) -> Result<()> { + info!(sl!(), "begin to run service"); + self.registry_service().context("registry service")?; + self.start_service().await.context("start service")?; + + info!(sl!(), "wait server message"); + let mut rx = self.receiver.take(); + if let Some(rx) = rx.as_mut() { + while let Some(r) = rx.recv().await { + info!(sl!(), "receive action {:?}", &r.action); + let result = match r.action { + Action::Start => self.start_service().await.context("start listen"), + Action::Stop => self.stop_service().await.context("stop listen"), + Action::Shutdown => { + self.stop_service().await.context("stop listen")?; + break; + } + Action::Event(event) => { + info!(sl!(), "get event {:?}", &event); + self.send_event(event).await.context("send event") + } + }; + + if let Some(ref sender) = r.resp_sender { + if let Err(err) = result.as_ref() { + error!(sl!(), "failed to process action {:?}", err); + } + sender.send(result).await.context("send response")?; + } + } + } + + info!(sl!(), "end to run service"); + + Ok(()) + } + + pub async fn cleanup(sid: &str) -> Result<()> { + let (sender, _receiver) = channel::(MESSAGE_BUFFER_SIZE); + let handler = RuntimeHandlerManager::new(sid, sender).context("new runtime handler")?; + if let Err(e) = handler.cleanup().await { + warn!(sl!(), "failed to clean up runtime state, {}", e); + } + + let temp_dir = [KATA_PATH, sid].join("/"); + if fs::metadata(temp_dir.as_str()).is_ok() { + // try to remove dir and skip the result + if let Err(e) = fs::remove_dir_all(temp_dir) { + warn!(sl!(), "failed to clean up sandbox tmp dir, {}", e); + } + } + + Ok(()) + } + + fn registry_service(&mut self) -> Result<()> { + if let Some(t) = self.task_server.take() { + let task_service = Arc::new(Box::new(TaskService::new(self.handler.clone())) + as Box); + let t = t.register_service(shim_async::create_task(task_service)); + self.task_server = Some(t); + } + Ok(()) + } + + async fn start_service(&mut self) -> Result<()> { + if let Some(t) = self.task_server.as_mut() { + t.start().await.context("task server start")?; + } + Ok(()) + } + + async fn stop_service(&mut self) -> Result<()> { + if let Some(t) = self.task_server.as_mut() { + t.stop_listen().await; + } + Ok(()) + } + + async fn send_event(&self, event: Arc) -> Result<()> { + let any = Any { + type_url: event.type_url(), + value: event.value().context("get event value")?, + ..Default::default() + }; + let data = any.write_to_bytes().context("write to any")?; + let mut child = Command::new(&self.binary) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .args([ + "--address", + &self.address, + "publish", + "--topic", + &event.r#type(), + "--namespace", + &self.namespace, + ]) + .spawn() + .context("spawn containerd cmd to publish event")?; + + let stdin = child.stdin.as_mut().context("failed to open stdin")?; + stdin + .write_all(&data) + .await + .context("failed to write to stdin")?; + let output = child + .wait_with_output() + .await + .context("failed to read stdout")?; + info!(sl!(), "get output: {:?}", output); + Ok(()) + } +} diff --git a/src/runtime-rs/crates/service/src/task_service.rs b/src/runtime-rs/crates/service/src/task_service.rs new file mode 100644 index 000000000000..9db1bcbe4da9 --- /dev/null +++ b/src/runtime-rs/crates/service/src/task_service.rs @@ -0,0 +1,82 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + convert::{TryFrom, TryInto}, + sync::Arc, +}; + +use async_trait::async_trait; +use common::types::{Request, Response}; +use containerd_shim_protos::{api, shim_async}; +use ttrpc::{self, r#async::TtrpcContext}; + +use runtimes::RuntimeHandlerManager; + +pub(crate) struct TaskService { + handler: Arc, +} + +impl TaskService { + pub(crate) fn new(handler: Arc) -> Self { + Self { handler } + } + + async fn handler_message( + &self, + ctx: &TtrpcContext, + req: TtrpcReq, + ) -> ttrpc::Result + where + Request: TryFrom, + >::Error: std::fmt::Debug, + TtrpcResp: TryFrom, + >::Error: std::fmt::Debug, + { + let r = req.try_into().map_err(|err| { + ttrpc::Error::Others(format!("failed to translate from shim {:?}", err)) + })?; + let logger = sl!().new(o!("stream id" => ctx.mh.stream_id)); + debug!(logger, "====> task service {:?}", &r); + let resp = + self.handler.handler_message(r).await.map_err(|err| { + ttrpc::Error::Others(format!("failed to handler message {:?}", err)) + })?; + debug!(logger, "<==== task service {:?}", &resp); + resp.try_into() + .map_err(|err| ttrpc::Error::Others(format!("failed to translate to shim {:?}", err))) + } +} + +macro_rules! impl_service { + ($($name: tt | $req: ty | $resp: ty),*) => { + #[async_trait] + impl shim_async::Task for TaskService { + $(async fn $name(&self, ctx: &TtrpcContext, req: $req) -> ttrpc::Result<$resp> { + self.handler_message(ctx, req).await + })* + } + }; +} + +impl_service!( + state | api::StateRequest | api::StateResponse, + create | api::CreateTaskRequest | api::CreateTaskResponse, + start | api::StartRequest | api::StartResponse, + delete | api::DeleteRequest | api::DeleteResponse, + pids | api::PidsRequest | api::PidsResponse, + pause | api::PauseRequest | api::Empty, + resume | api::ResumeRequest | api::Empty, + kill | api::KillRequest | api::Empty, + exec | api::ExecProcessRequest | api::Empty, + resize_pty | api::ResizePtyRequest | api::Empty, + update | api::UpdateTaskRequest | api::Empty, + wait | api::WaitRequest | api::WaitResponse, + stats | api::StatsRequest | api::StatsResponse, + connect | api::ConnectRequest | api::ConnectResponse, + shutdown | api::ShutdownRequest | api::Empty, + close_io | api::CloseIORequest | api::Empty +); diff --git a/src/runtime-rs/crates/shim-ctl/Cargo.toml b/src/runtime-rs/crates/shim-ctl/Cargo.toml new file mode 100644 index 000000000000..b1e844b0c0f4 --- /dev/null +++ b/src/runtime-rs/crates/shim-ctl/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "shim-ctl" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "^1.0" +common = { path = "../runtimes/common" } +logging = { path = "../../../libs/logging"} +runtimes = { path = "../runtimes" } +tokio = { version = "1.28.1", features = [ "rt", "rt-multi-thread" ] } + diff --git a/src/runtime-rs/crates/shim-ctl/README.md b/src/runtime-rs/crates/shim-ctl/README.md new file mode 100644 index 000000000000..6e7abdb7e466 --- /dev/null +++ b/src/runtime-rs/crates/shim-ctl/README.md @@ -0,0 +1,51 @@ +### Purpose +`shim-ctl` is a binary to exercise the shim proper without containerd +dependencies. + +The actual Kata shim is hard to execute outside of deployment environments due +to its dependency on containerd's shim v2 protocol. Among others, the +dependency requires having a socket with a remote end that's capable of driving +the shim using the shim v2 `ttrpc` protocol, and a binary for shim to publish +events to. + +Since at least some of the shim v2 protocol dependencies are fairly hard to +mock up, this presents a significant obstacle to development. + +`shim-ctl` takes advantage of the fact that due to the shim implementation +architecture, only the outermost couple of shim layers are +containerd-dependent and all of the inner layers that do the actual heavy +lifting don't depend on containerd. This allows `shim-ctl` to replace the +containerd-dependent layers with something that's easier to use on a +developer's machine. + +### Usage + +After building the binary as usual with `cargo build` run `shim-ctl` as follows. + +Even though `shim-ctl` does away with containerd dependencies it still has +some requirements of its execution environment. In particular, it needs a +Kata `configuration.toml` file, some Kata distribution files to point a bunch +of `configuration.toml` keys to (like hypervisor keys `path`, `kernel` or +`initrd`) and a container bundle. These are however much easier to fulfill +than the original containerd dependencies, and doing so is a one-off task - +once done they can be reused for an unlimited number of modify-build-run +development cycles. + +`shim-ctl` also needs to be launched from an exported container bundle +directory. One can be created by running + +``` +mkdir rootfs +podman export $(podman create busybox) | tar -C ./rootfs -xvf - +runc spec -b . +``` + +in a suitable directory. + +The program can then be launched like this: + +``` +cd /the/bundle/directory +KATA_CONF_FILE=/path/to/configuration-qemu.toml /path/to/shim-ctl +``` + diff --git a/src/runtime-rs/crates/shim-ctl/src/main.rs b/src/runtime-rs/crates/shim-ctl/src/main.rs new file mode 100644 index 000000000000..76506fec2d80 --- /dev/null +++ b/src/runtime-rs/crates/shim-ctl/src/main.rs @@ -0,0 +1,45 @@ +// Copyright (c) 2022 Red Hat +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use common::{ + message::Message, + types::{ContainerConfig, Request}, +}; +use runtimes::RuntimeHandlerManager; +use tokio::sync::mpsc::channel; + +const MESSAGE_BUFFER_SIZE: usize = 8; +const WORKER_THREADS: usize = 2; + +async fn real_main() { + let (sender, _receiver) = channel::(MESSAGE_BUFFER_SIZE); + let manager = RuntimeHandlerManager::new("xxx", sender).unwrap(); + + let req = Request::CreateContainer(ContainerConfig { + container_id: "xxx".to_owned(), + bundle: ".".to_owned(), + rootfs_mounts: Vec::new(), + terminal: false, + options: None, + stdin: None, + stdout: None, + stderr: None, + }); + + manager.handler_message(req).await.ok(); +} + +fn main() -> Result<(), Box> { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(WORKER_THREADS) + .enable_all() + .build() + .context("prepare tokio runtime")?; + + runtime.block_on(real_main()); + + Ok(()) +} diff --git a/src/runtime-rs/crates/shim/Cargo.toml b/src/runtime-rs/crates/shim/Cargo.toml new file mode 100644 index 000000000000..e38e66262905 --- /dev/null +++ b/src/runtime-rs/crates/shim/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "shim" +version = "0.1.0" +authors = ["The Kata Containers community "] +description = "Containerd shim runtime for Kata Containers" +keywords = ["kata-containers", "shim"] +repository = "https://github.com/kata-containers/kata-containers.git" +license = "Apache-2.0" +edition = "2018" + +[[bin]] +name = "containerd-shim-kata-v2" +path = "src/bin/main.rs" + +[dependencies] +anyhow = "^1.0" +backtrace = {version = ">=0.3.35", features = ["libunwind", "libbacktrace", "std"], default-features = false} +containerd-shim-protos = { version = "0.3.0", features = ["async"]} +go-flag = "0.1.0" +libc = "0.2.108" +log = "0.4.14" +nix = "0.24.2" +protobuf = "3.2.0" +sha2 = "=0.9.3" +slog = {version = "2.5.2", features = ["std", "release_max_level_trace", "max_level_trace"]} +slog-async = "2.5.2" +slog-scope = "4.4.0" +slog-stdlog = "4.1.0" +thiserror = "1.0.30" +tokio = { version = "1.28.1", features = [ "rt", "rt-multi-thread" ] } +unix_socket2 = "0.5.4" +tracing = "0.1.36" +tracing-opentelemetry = "0.18.0" + +kata-types = { path = "../../../libs/kata-types"} +kata-sys-util = { path = "../../../libs/kata-sys-util"} +logging = { path = "../../../libs/logging"} +oci = { path = "../../../libs/oci" } +service = { path = "../service" } +runtimes = { path = "../runtimes" } + +[dev-dependencies] +tempfile = "3.2.0" +rand = "0.8.4" +serial_test = "0.5.1" +tests_utils = { path = "../../tests/utils"} diff --git a/src/runtime-rs/crates/shim/src/args.rs b/src/runtime-rs/crates/shim/src/args.rs new file mode 100644 index 000000000000..1ab5b8afabc5 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/args.rs @@ -0,0 +1,320 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{os::unix::fs::FileTypeExt, path::PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::validate; + +use crate::Error; + +/// Received command-line arguments or environment arguments +/// from a shimv2 container manager such as containerd. +/// +/// For detailed information, please refer to the +/// [shim spec](https://github.com/containerd/containerd/blob/v1.6.8/runtime/v2/README.md). +#[derive(Debug, Default, Clone)] +pub struct Args { + /// the id of the container + pub id: String, + /// the namespace for the container + pub namespace: String, + /// the address of the containerd's main socket + pub address: String, + /// the binary path to publish events back to containerd + pub publish_binary: String, + /// the path to the bundle to delete + pub bundle: String, + /// Whether or not to enable debug + pub debug: bool, +} + +impl Args { + /// Check the shim argument object is vaild or not. + /// + /// The id, namespace, address and publish_binary are mandatory for START, RUN and DELETE. + /// And bundle is mandatory for DELETE. + pub fn validate(&mut self, should_check_bundle: bool) -> Result<()> { + if self.id.is_empty() + || self.namespace.is_empty() + || self.address.is_empty() + || self.publish_binary.is_empty() + { + return Err(anyhow!(Error::ArgumentIsEmpty(format!( + "id: {} namespace: {} address: {} publish_binary: {}", + &self.id, &self.namespace, &self.address, &self.publish_binary + )))); + } + + validate::verify_id(&self.id).context("verify container id")?; + validate::verify_id(&self.namespace).context("verify namespace")?; + + // Ensure `address` is a valid path. + let path = PathBuf::from(self.address.clone()) + .canonicalize() + .context(Error::InvalidPath(self.address.clone()))?; + let md = path + .metadata() + .context(Error::FileGetMetadata(format!("{:?}", path)))?; + if !md.file_type().is_socket() { + return Err(Error::InvalidArgument).context("address is not socket"); + } + self.address = path + .to_str() + .map(|v| v.to_owned()) + .ok_or(Error::InvalidArgument)?; + + // Ensure `bundle` is a valid path. + if should_check_bundle { + if self.bundle.is_empty() { + return Err(anyhow!(Error::ArgumentIsEmpty("bundle".to_string()))); + } + + let path = PathBuf::from(self.bundle.clone()) + .canonicalize() + .map_err(|_| Error::InvalidArgument)?; + let md = path + .metadata() + .map_err(|_| Error::InvalidArgument) + .context("get address metadata")?; + if !md.is_dir() { + return Err(Error::InvalidArgument).context("medata is dir"); + } + self.bundle = path + .to_str() + .map(|v| v.to_owned()) + .ok_or(Error::InvalidArgument) + .context("path to string")?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::os::unix::net::UnixListener; + + use anyhow::anyhow; + use kata_sys_util::validate; + + #[test] + fn test_args_is_valid() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().to_path_buf(); + let path = path.to_str().unwrap(); + let bind_address = &format!("{}/socket1", path); + UnixListener::bind(bind_address).unwrap(); + + #[derive(Debug)] + struct TestData { + arg: Args, + should_check_bundle: bool, + result: Result<()>, + } + + let default_id = "default_id".to_string(); + let default_namespace = "default_namespace".to_string(); + let default_address = bind_address.to_string(); + let default_publish_binary = "containerd".to_string(); + let default_bundle = path.to_string(); + + let mut arg = Args { + id: default_id.clone(), + namespace: default_namespace.clone(), + address: default_address.clone(), + publish_binary: default_publish_binary.clone(), + bundle: default_bundle.clone(), + ..Default::default() + }; + + let tests = &[ + TestData { + arg: arg.clone(), + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: { + arg.namespace = "".to_string(); + arg.clone() + }, + should_check_bundle: false, + result: Err(anyhow!(Error::ArgumentIsEmpty(format!( + "id: {} namespace: {} address: {} publish_binary: {}", + &arg.id, &arg.namespace, &arg.address, &arg.publish_binary + )))), + }, + TestData { + arg: { + arg.namespace = default_namespace.clone(); + arg.clone() + }, + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: { + arg.id = "".to_string(); + arg.clone() + }, + should_check_bundle: false, + result: Err(anyhow!(Error::ArgumentIsEmpty(format!( + "id: {} namespace: {} address: {} publish_binary: {}", + &arg.id, &arg.namespace, &arg.address, &arg.publish_binary + )))), + }, + TestData { + arg: { + arg.id = default_id; + arg.clone() + }, + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: { + arg.address = "".to_string(); + arg.clone() + }, + should_check_bundle: false, + result: Err(anyhow!(Error::ArgumentIsEmpty(format!( + "id: {} namespace: {} address: {} publish_binary: {}", + &arg.id, &arg.namespace, &arg.address, &arg.publish_binary + )))), + }, + TestData { + arg: { + arg.address = default_address.clone(); + arg.clone() + }, + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: { + arg.publish_binary = "".to_string(); + arg.clone() + }, + should_check_bundle: false, + result: Err(anyhow!(Error::ArgumentIsEmpty(format!( + "id: {} namespace: {} address: {} publish_binary: {}", + &arg.id, &arg.namespace, &arg.address, &arg.publish_binary + )))), + }, + TestData { + arg: { + arg.publish_binary = default_publish_binary; + arg.clone() + }, + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: { + arg.bundle = "".to_string(); + arg.clone() + }, + should_check_bundle: false, + result: Ok(()), + }, + TestData { + arg: arg.clone(), + should_check_bundle: true, + result: Err(anyhow!(Error::ArgumentIsEmpty("bundle".to_string()))), + }, + TestData { + arg: { + arg.bundle = default_bundle; + arg.clone() + }, + should_check_bundle: true, + result: Ok(()), + }, + TestData { + arg: { + arg.namespace = "id1/id2".to_string(); + arg.clone() + }, + should_check_bundle: true, + result: Err( + anyhow!(validate::Error::InvalidContainerID("id/id2".to_string())) + .context("verify namespace"), + ), + }, + TestData { + arg: { + arg.namespace = default_namespace.clone() + "id1 id2"; + arg.clone() + }, + should_check_bundle: true, + result: Err(anyhow!(validate::Error::InvalidContainerID( + default_namespace.clone() + "id1 id2", + )) + .context("verify namespace")), + }, + TestData { + arg: { + arg.namespace = default_namespace.clone() + "id2\tid2"; + arg.clone() + }, + should_check_bundle: true, + result: Err(anyhow!(validate::Error::InvalidContainerID( + default_namespace.clone() + "id1\tid2", + )) + .context("verify namespace")), + }, + TestData { + arg: { + arg.namespace = default_namespace; + arg.clone() + }, + should_check_bundle: true, + result: Ok(()), + }, + TestData { + arg: { + arg.address = default_address.clone() + "/.."; + arg.clone() + }, + should_check_bundle: true, + result: Err(anyhow!(Error::InvalidPath(arg.address.clone()))), + }, + TestData { + arg: { + arg.address = default_address.clone() + "/.."; + arg.clone() + }, + should_check_bundle: true, + result: Err(anyhow!(Error::InvalidPath(arg.address.clone()))), + }, + TestData { + arg: { + arg.address = default_address; + arg + }, + should_check_bundle: true, + result: Ok(()), + }, + ]; + + for (i, t) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, t); + let should_check_bundle = t.should_check_bundle; + let result = t.arg.clone().validate(should_check_bundle); + let msg = format!("{}, result: {:?}", msg, result); + + if t.result.is_ok() { + assert!(result.is_ok(), "{}", msg); + } else { + let expected_error = format!("{}", t.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + } +} diff --git a/src/runtime-rs/crates/shim/src/bin/main.rs b/src/runtime-rs/crates/shim/src/bin/main.rs new file mode 100644 index 000000000000..587f5a18b0dc --- /dev/null +++ b/src/runtime-rs/crates/shim/src/bin/main.rs @@ -0,0 +1,195 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + ffi::{OsStr, OsString}, + path::PathBuf, +}; + +use anyhow::{anyhow, Context, Result}; +use nix::{ + mount::{mount, MsFlags}, + sched::{self, CloneFlags}, +}; +use shim::{config, Args, Error, ShimExecutor}; + +// default tokio runtime worker threads +const DEFAULT_TOKIO_RUNTIME_WORKER_THREADS: usize = 2; +// env to config tokio runtime worker threads +const ENV_TOKIO_RUNTIME_WORKER_THREADS: &str = "TOKIO_RUNTIME_WORKER_THREADS"; + +#[derive(Debug)] +enum Action { + Run(Args), + Start(Args), + Delete(Args), + Help, + Version, +} + +fn parse_args(args: &[OsString]) -> Result { + let mut help = false; + let mut version = false; + let mut shim_args = Args::default(); + + // Crate `go_flag` is used to keep compatible with go/flag package. + let rest_args = go_flag::parse_args_with_warnings::(&args[1..], None, |flags| { + flags.add_flag("address", &mut shim_args.address); + flags.add_flag("bundle", &mut shim_args.bundle); + flags.add_flag("debug", &mut shim_args.debug); + flags.add_flag("id", &mut shim_args.id); + flags.add_flag("namespace", &mut shim_args.namespace); + flags.add_flag("publish-binary", &mut shim_args.publish_binary); + flags.add_flag("help", &mut help); + flags.add_flag("version", &mut version); + }) + .context(Error::ParseArgument(format!("{:?}", args)))?; + + if help { + Ok(Action::Help) + } else if version { + Ok(Action::Version) + } else if rest_args.is_empty() { + Ok(Action::Run(shim_args)) + } else if rest_args[0] == "start" { + Ok(Action::Start(shim_args)) + } else if rest_args[0] == "delete" { + Ok(Action::Delete(shim_args)) + } else { + Err(anyhow!(Error::InvalidArgument)) + } +} + +fn show_help(cmd: &OsStr) { + let path = PathBuf::from(cmd); + let name = match path.file_name() { + Some(v) => v.to_str(), + None => None, + }; + + let name = name.unwrap_or(config::RUNTIME_NAME); + + println!( + r#"Usage of {}: + -address string + grpc address back to main containerd + -bundle string + path to the bundle if not workdir + -debug + enable debug output in logs + -id string + id of the task + -namespace string + namespace that owns the shim + -publish-binary string + path to publish binary (used for publishing events) (default "containerd") + --version + show the runtime version detail and exit +"#, + name + ); +} + +fn show_version(err: Option) { + let data = format!( + r#"{} containerd shim (Rust): id: {}, version: {}, commit: {}"#, + config::PROJECT_NAME, + config::CONTAINERD_RUNTIME_NAME, + config::RUNTIME_VERSION, + config::RUNTIME_GIT_COMMIT, + ); + + if let Some(err) = err { + eprintln!( + "{}\r\nERROR: {} failed: {:?}", + data, + config::RUNTIME_NAME, + err + ); + } else { + println!("{}", data) + } +} + +fn get_tokio_runtime() -> Result { + let worker_threads = std::env::var(ENV_TOKIO_RUNTIME_WORKER_THREADS) + .unwrap_or_default() + .parse() + .unwrap_or(DEFAULT_TOKIO_RUNTIME_WORKER_THREADS); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(worker_threads) + .enable_all() + .build() + .context("prepare tokio runtime")?; + Ok(rt) +} + +fn real_main() -> Result<()> { + let args = std::env::args_os().collect::>(); + if args.is_empty() { + return Err(anyhow!(Error::ArgumentIsEmpty( + "command-line arguments".to_string() + ))); + } + + let action = parse_args(&args).context("parse args")?; + match action { + Action::Start(args) => ShimExecutor::new(args).start().context("shim start")?, + Action::Delete(args) => { + let mut shim = ShimExecutor::new(args); + let rt = get_tokio_runtime().context("get tokio runtime")?; + rt.block_on(shim.delete())?; + } + Action::Run(args) => { + // set mnt namespace + // need setup before other async call + setup_mnt().context("setup mnt")?; + + let mut shim = ShimExecutor::new(args); + let rt = get_tokio_runtime().context("get tokio runtime")?; + rt.block_on(shim.run())?; + } + Action::Help => show_help(&args[0]), + Action::Version => show_version(None), + } + Ok(()) +} +fn main() { + if let Err(err) = real_main() { + show_version(Some(err)); + } +} + +fn setup_mnt() -> Result<()> { + // Unshare the mount namespace, so that the calling process has a private copy of its namespace + // which is not shared with any other process. + sched::unshare(CloneFlags::CLONE_NEWNS).context("unshare clone newns")?; + + // Mount and unmount events propagate into this mount from the (master) shared peer group of + // which it was formerly a member. Mount and unmount events under this mount do not propagate + // to any peer. + mount( + Some("none"), + "/", + Some(""), + MsFlags::MS_REC | MsFlags::MS_SLAVE, + Some(""), + ) + .context("mount with slave")?; + + // Mount and unmount events immediately under this mount will propagate to the other mounts + // that are members of this mount's peer group. + mount( + Some("none"), + "/", + Some(""), + MsFlags::MS_REC | MsFlags::MS_SHARED, + Some(""), + ) + .context("mount with shared")?; + Ok(()) +} diff --git a/src/runtime-rs/crates/shim/src/config.rs.in b/src/runtime-rs/crates/shim/src/config.rs.in new file mode 100644 index 000000000000..f4e01c9e2ff0 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/config.rs.in @@ -0,0 +1,16 @@ +// Copyright (c) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +// +// WARNING: This file is auto-generated - DO NOT EDIT! +// + +pub const PROJECT_NAME: &str = "@PROJECT_NAME@"; +pub const RUNTIME_VERSION: &str = "@RUNTIME_VERSION@"; +pub const RUNTIME_GIT_COMMIT: &str = "@COMMIT@"; +pub const RUNTIME_NAME: &str = "@RUNTIME_NAME@"; +pub const CONTAINERD_RUNTIME_NAME: &str = "@CONTAINERD_RUNTIME_NAME@"; +pub const RUNTIME_DIR: &str = "@BINDIR@"; +pub const RUNTIME_PATH: &str = "@BINDIR@/@RUNTIME_NAME@"; diff --git a/src/runtime-rs/crates/shim/src/core_sched.rs b/src/runtime-rs/crates/shim/src/core_sched.rs new file mode 100644 index 000000000000..7801d6504351 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/core_sched.rs @@ -0,0 +1,102 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// +// Core Scheduling landed in linux 5.14, this enables that - +// ONLY the processes have the same cookie value can share an SMT core for security +// reasons, since SMT siblings share their cpu caches and many other things. This can +// prevent some malicious processes steal others' private information. +// +// This is enabled by containerd, see https://github.com/containerd/containerd/blob/main/docs/man/containerd-config.toml.5.md#format +// +// This is done by using system call prctl(), for core scheduling purpose, it is defined as +// int prctl(PR_SCHED_CORE, int cs_command, pid_t pid, enum pid_type type, +// unsigned long *cookie); +// +// You may go to https://lwn.net/Articles/861251/, https://lore.kernel.org/lkml/20210422123309.039845339@infradead.org/ +// and kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html for more info. +// + +use anyhow::Result; +use nix::{self, errno::Errno}; + +#[allow(dead_code)] +pub const PID_GROUP: usize = 0; +#[allow(dead_code)] +pub const THREAD_GROUP: usize = 1; +pub const PROCESS_GROUP: usize = 2; + +#[allow(dead_code)] +pub const PR_SCHED_CORE: i32 = 62; +pub const PR_SCHED_CORE_CREATE: usize = 1; +pub const PR_SCHED_CORE_SHARE_FROM: usize = 3; + +// create a new core sched domain, this will NOT succeed if kernel version < 5.14 +pub fn core_sched_create(pidtype: usize) -> Result<(), Errno> { + let errno = unsafe { nix::libc::prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, pidtype, 0) }; + if errno != 0 { + Err(nix::errno::Errno::from_i32(-errno)) + } else { + Ok(()) + } +} + +// shares the domain with *pid* +#[allow(dead_code)] +pub fn core_sched_share_from(pid: usize, pidtype: usize) -> Result<(), Errno> { + let errno = + unsafe { nix::libc::prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, pid, pidtype, 0) }; + if errno != 0 { + Err(nix::errno::Errno::from_i32(-errno)) + } else { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nix::errno::Errno::{EINVAL, ENODEV, ENOMEM, EPERM, ESRCH}; + + const RELEASE_MAJOR_VERSION: u8 = 5; + const RELEASE_MINOR_VERSION: u8 = 14; + + // since this feature only lands in linux 5.14, we run the test when version is higher + fn core_sched_landed() -> bool { + let vinfo = std::fs::read_to_string("/proc/sys/kernel/osrelease"); + if let Ok(info) = vinfo { + let vnum: Vec<&str> = info.as_str().split('.').collect(); + if vnum.len() >= 2 { + let major: u8 = vnum[0].parse().unwrap(); + let minor: u8 = vnum[1].parse().unwrap(); + return major >= RELEASE_MAJOR_VERSION && minor >= RELEASE_MINOR_VERSION; + } + } + false + } + + #[test] + fn test_core_sched() { + std::env::set_var("SCHED_CORE", "1"); + assert_eq!(std::env::var("SCHED_CORE").unwrap(), "1"); + if core_sched_landed() { + // it is possible that the machine running this test does not support SMT, + // therefore it does not make sense to assert a successful prctl call + // but we can still make sure that the return value is a possible value + let e = core_sched_create(PROCESS_GROUP); + if let Err(errno) = e { + if errno != EINVAL + && errno != ENODEV + && errno != ENOMEM + && errno != EPERM + && errno != ESRCH + { + panic!("impossible return value {:?}", errno); + } + } + } + } +} diff --git a/src/runtime-rs/crates/shim/src/error.rs b/src/runtime-rs/crates/shim/src/error.rs new file mode 100644 index 000000000000..3867963fbc26 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/error.rs @@ -0,0 +1,52 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::PathBuf; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("failed to parse argument {0}")] + ParseArgument(String), + #[error("failed to get bundle path")] + GetBundlePath, + #[error("invalid argument")] + InvalidArgument, + #[error("argument is empty {0}")] + ArgumentIsEmpty(String), + #[error("invalid path {0}")] + InvalidPath(String), + + // File + #[error("failed to open file {0}")] + FileOpen(String), + #[error("failed to get file metadata {0}")] + FileGetMetadata(String), + #[error("failed to read file {0}")] + FileRead(String), + #[error("failed to write file {0}")] + FileWrite(String), + + #[error("empty sandbox id")] + EmptySandboxId, + #[error("failed to get self exec: {0}")] + SelfExec(#[source] std::io::Error), + #[error("failed to bind socket at {1} with error: {0}")] + BindSocket(#[source] std::io::Error, PathBuf), + #[error("failed to spawn child: {0}")] + SpawnChild(#[source] std::io::Error), + #[error("failed to clean container {0}")] + CleanUpContainer(String), + #[error("failed to get env variable: {0}")] + EnvVar(#[source] std::env::VarError), + #[error("failed to parse server fd environment variable {0}")] + ServerFd(String), + #[error("failed to wait ttrpc server when {0}")] + WaitServer(String), + #[error("failed to get system time: {0}")] + SystemTime(#[source] std::time::SystemTimeError), + #[error("failed to parse pid")] + ParsePid, +} diff --git a/src/runtime-rs/crates/shim/src/lib.rs b/src/runtime-rs/crates/shim/src/lib.rs new file mode 100644 index 000000000000..59bf22adadad --- /dev/null +++ b/src/runtime-rs/crates/shim/src/lib.rs @@ -0,0 +1,25 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +#[macro_use] +extern crate slog; + +logging::logger_with_subsystem!(sl, "shim"); + +mod args; +pub use args::Args; +mod error; +pub use error::Error; +mod logger; +mod panic_hook; +mod shim; +pub use crate::shim::ShimExecutor; +mod core_sched; +#[rustfmt::skip] +pub mod config; +mod shim_delete; +mod shim_run; +mod shim_start; diff --git a/src/runtime-rs/crates/shim/src/logger.rs b/src/runtime-rs/crates/shim/src/logger.rs new file mode 100644 index 000000000000..50ba891fb357 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/logger.rs @@ -0,0 +1,41 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::os::unix::fs::OpenOptionsExt; + +use anyhow::{Context, Result}; + +use crate::Error; + +pub(crate) fn set_logger(path: &str, sid: &str, is_debug: bool) -> Result { + let fifo = std::fs::OpenOptions::new() + .custom_flags(libc::O_NONBLOCK) + .create(true) + .write(true) + .append(true) + .open(path) + .context(Error::FileOpen(path.to_string()))?; + + let level = if is_debug { + slog::Level::Debug + } else { + slog::Level::Info + }; + + let (logger, async_guard) = logging::create_logger("kata-runtime", sid, level, fifo); + + // not reset global logger when drop + slog_scope::set_global_logger(logger).cancel_reset(); + + let level = if is_debug { + log::Level::Debug + } else { + log::Level::Info + }; + slog_stdlog::init_with_level(level).context(format!("init with level {}", level))?; + + Ok(async_guard) +} diff --git a/src/runtime-rs/crates/shim/src/panic_hook.rs b/src/runtime-rs/crates/shim/src/panic_hook.rs new file mode 100644 index 000000000000..88dbf305a66b --- /dev/null +++ b/src/runtime-rs/crates/shim/src/panic_hook.rs @@ -0,0 +1,57 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{boxed::Box, fs::OpenOptions, io::Write, ops::Deref}; + +use backtrace::Backtrace; + +const KMESG_DEVICE: &str = "/dev/kmsg"; + +// TODO: the Kata 1.x runtime had a SIGUSR1 handler that would log a formatted backtrace on +// receiving that signal. It could be useful to re-add that feature. +pub(crate) fn set_panic_hook() { + std::panic::set_hook(Box::new(move |panic_info| { + let (filename, line) = panic_info + .location() + .map(|loc| (loc.file(), loc.line())) + .unwrap_or(("", 0)); + + let cause = panic_info + .payload() + .downcast_ref::() + .map(std::string::String::deref); + + let cause = cause.unwrap_or_else(|| { + panic_info + .payload() + .downcast_ref::<&str>() + .copied() + .unwrap_or("") + }); + let bt = Backtrace::new(); + let bt_data = format!("{:?}", bt); + error!( + sl!(), + "A panic occurred at {}:{}: {}\r\n{:?}", filename, line, cause, bt_data + ); + + // print panic log to dmesg + // The panic log size is too large to /dev/kmsg, so write by line. + if let Ok(mut file) = OpenOptions::new().write(true).open(KMESG_DEVICE) { + file.write_all( + format!("A panic occurred at {}:{}: {}", filename, line, cause).as_bytes(), + ) + .ok(); + let lines: Vec<&str> = bt_data.split('\n').collect(); + for line in lines { + file.write_all(line.as_bytes()).ok(); + } + + file.flush().ok(); + } + std::process::abort(); + })); +} diff --git a/src/runtime-rs/crates/shim/src/shim.rs b/src/runtime-rs/crates/shim/src/shim.rs new file mode 100644 index 000000000000..a197f6f3198f --- /dev/null +++ b/src/runtime-rs/crates/shim/src/shim.rs @@ -0,0 +1,119 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + os::unix::ffi::OsStrExt, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, Context, Result}; +use sha2::Digest; + +use crate::{Args, Error}; + +const SOCKET_ROOT: &str = "/run/containerd"; +const SHIM_PID_FILE: &str = "shim.pid"; + +pub(crate) const ENV_KATA_RUNTIME_BIND_FD: &str = "KATA_RUNTIME_BIND_FD"; + +/// Command executor for shim. +#[derive(Debug)] +pub struct ShimExecutor { + pub(crate) args: Args, +} + +impl ShimExecutor { + /// Create a new instance of [`Shim`]. + pub fn new(args: Args) -> Self { + ShimExecutor { args } + } + + pub(crate) fn load_oci_spec(&self, path: &Path) -> Result { + let spec_file = path.join(oci::OCI_SPEC_CONFIG_FILE_NAME); + oci::Spec::load(spec_file.to_str().unwrap_or_default()).context("load spec") + } + + pub(crate) fn write_address(&self, path: &Path, address: &Path) -> Result<()> { + let file_path = &path.join("address"); + std::fs::write(file_path, address.as_os_str().as_bytes()) + .context(Error::FileWrite(format!("{:?}", &file_path))) + } + + pub(crate) fn write_pid_file(&self, path: &Path, pid: u32) -> Result<()> { + let file_path = &path.join(SHIM_PID_FILE); + std::fs::write(file_path, format!("{}", pid)) + .context(Error::FileWrite(format!("{:?}", &file_path))) + } + + // There may be a multi-container for a Pod, each container has a bundle path, we need to write + // the PID to the file for each container in their own bundle path, so we can directly get the + // `bundle_path()` and write the PID. + // While the real runtime process's PID is stored in the file in the sandbox container's bundle + // path, so needs to read from the sandbox container's bundle path. + pub(crate) fn read_pid_file(&self, path: &Path) -> Result { + let file_path = path.join(SHIM_PID_FILE); + let data = std::fs::read_to_string(&file_path) + .context(Error::FileOpen(format!("{:?}", file_path)))?; + + data.parse::().context(Error::ParsePid) + } + + pub(crate) fn socket_address(&self, id: &str) -> Result { + if id.is_empty() { + return Err(anyhow!(Error::EmptySandboxId)); + } + + let data = [&self.args.address, &self.args.namespace, id].join("/"); + let mut hasher = sha2::Sha256::new(); + hasher.update(data); + // https://github.com/containerd/containerd/blob/v1.6.8/runtime/v2/shim/util_unix.go#L68 to + // generate a shim socket path. + Ok(PathBuf::from(format!( + "unix://{}/s/{:X}", + SOCKET_ROOT, + hasher.finalize() + ))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serial_test::serial; + + use kata_sys_util::spec::get_bundle_path; + + #[test] + #[serial] + fn test_shim_executor() { + let dir = tempfile::tempdir().unwrap(); + let bundle_path = dir.path(); + std::env::set_current_dir(bundle_path).unwrap(); + + let args = Args { + id: "default_id".into(), + namespace: "default_namespace".into(), + address: "default_address".into(), + publish_binary: "containerd".into(), + bundle: bundle_path.to_str().unwrap().into(), + ..Default::default() + }; + + let executor = ShimExecutor::new(args); + + executor + .write_address(bundle_path, Path::new("12345")) + .unwrap(); + let dir = get_bundle_path().unwrap(); + let file_path = &dir.join("address"); + let buf = std::fs::read_to_string(file_path).unwrap(); + assert_eq!(&buf, "12345"); + + executor.write_pid_file(&dir, 1267).unwrap(); + let read_pid = executor.read_pid_file(&dir).unwrap(); + assert_eq!(read_pid, 1267); + } +} diff --git a/src/runtime-rs/crates/shim/src/shim_delete.rs b/src/runtime-rs/crates/shim/src/shim_delete.rs new file mode 100644 index 000000000000..412fc8be6cb6 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/shim_delete.rs @@ -0,0 +1,71 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{Context, Result}; +use containerd_shim_protos::api; +use kata_sys_util::spec::{get_bundle_path, get_container_type, load_oci_spec}; +use kata_types::container::ContainerType; +use nix::{sys::signal::kill, sys::signal::SIGKILL, unistd::Pid}; +use protobuf::Message; +use std::{fs, path::Path}; + +use crate::{shim::ShimExecutor, Error}; + +impl ShimExecutor { + pub async fn delete(&mut self) -> Result<()> { + self.args.validate(true).context("validate")?; + let rsp = self.do_cleanup().await.context("shim do cleanup")?; + rsp.write_to_writer(&mut std::io::stdout()) + .context(Error::FileWrite(format!("write {:?} to stdout", rsp)))?; + Ok(()) + } + + async fn do_cleanup(&self) -> Result { + let mut rsp = api::DeleteResponse::new(); + rsp.set_exit_status(128 + libc::SIGKILL as u32); + let mut exited_time = protobuf::well_known_types::timestamp::Timestamp::new(); + let seconds = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map_err(Error::SystemTime)? + .as_secs() as i64; + exited_time.seconds = seconds; + rsp.set_exited_at(exited_time); + + let address = self + .socket_address(&self.args.id) + .context("socket address")?; + let trim_path = address.strip_prefix("unix://").context("trim path")?; + let file_path = Path::new("/").join(trim_path); + let file_path = file_path.as_path(); + if std::fs::metadata(file_path).is_ok() { + info!(sl!(), "remote socket path: {:?}", &file_path); + fs::remove_file(file_path).ok(); + } + + if let Err(e) = service::ServiceManager::cleanup(&self.args.id).await { + error!( + sl!(), + "failed to cleanup in service manager: {:?}. force shutdown shim process", e + ); + + let bundle_path = get_bundle_path().context("get bundle path")?; + if let Ok(spec) = load_oci_spec() { + if let Ok(ContainerType::PodSandbox) = get_container_type(&spec) { + // only force shutdown for sandbox container + if let Ok(shim_pid) = self.read_pid_file(&bundle_path) { + info!(sl!(), "force to shutdown shim process {}", shim_pid); + let pid = Pid::from_raw(shim_pid as i32); + if let Err(_e) = kill(pid, SIGKILL) { + // ignore kill errors + } + } + } + } + } + + Ok(rsp) + } +} diff --git a/src/runtime-rs/crates/shim/src/shim_run.rs b/src/runtime-rs/crates/shim/src/shim_run.rs new file mode 100644 index 000000000000..64e81ca40783 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/shim_run.rs @@ -0,0 +1,81 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::os::unix::io::RawFd; + +use anyhow::{Context, Result}; +use kata_sys_util::spec::get_bundle_path; + +use crate::{ + core_sched, logger, + shim::{ShimExecutor, ENV_KATA_RUNTIME_BIND_FD}, + Error, +}; + +impl ShimExecutor { + pub async fn run(&mut self) -> Result<()> { + crate::panic_hook::set_panic_hook(); + let sid = self.args.id.clone(); + let bundle_path = get_bundle_path().context("get bundle")?; + let path = bundle_path.join("log"); + let _logger_guard = + logger::set_logger(path.to_str().unwrap(), &sid, self.args.debug).context("set logger"); + if try_core_sched().is_err() { + warn!( + sl!(), + "Failed to enable core sched since prctl() returns non-zero value." + ); + } + + self.do_run() + .await + .map_err(|err| { + error!(sl!(), "failed run shim {:?}", err); + err + }) + .context("run shim")?; + + Ok(()) + } + + async fn do_run(&mut self) -> Result<()> { + info!(sl!(), "start to run"); + self.args.validate(false).context("validate")?; + + let server_fd = get_server_fd().context("get server fd")?; + let service_manager = service::ServiceManager::new( + &self.args.id, + &self.args.publish_binary, + &self.args.address, + &self.args.namespace, + server_fd, + ) + .await + .context("new shim server")?; + service_manager.run().await.context("run")?; + + Ok(()) + } +} + +fn get_server_fd() -> Result { + let env_fd = std::env::var(ENV_KATA_RUNTIME_BIND_FD).map_err(Error::EnvVar)?; + let fd = env_fd + .parse::() + .map_err(|_| Error::ServerFd(env_fd))?; + Ok(fd) +} + +// TODO: currently we log a warning on fail (i.e. kernel version < 5.14), maybe just exit +// TODO: more test on higher version of kernel +fn try_core_sched() -> Result<()> { + if let Ok(v) = std::env::var("SCHED_CORE") { + if !v.is_empty() { + core_sched::core_sched_create(core_sched::PROCESS_GROUP)? + } + } + Ok(()) +} diff --git a/src/runtime-rs/crates/shim/src/shim_start.rs b/src/runtime-rs/crates/shim/src/shim_start.rs new file mode 100644 index 000000000000..06a7f3ae5f87 --- /dev/null +++ b/src/runtime-rs/crates/shim/src/shim_start.rs @@ -0,0 +1,231 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{ + fs, + io::Write, + os::unix::{io::IntoRawFd, prelude::OsStrExt}, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::spec::get_bundle_path; +use kata_types::{container::ContainerType, k8s}; +use unix_socket::UnixListener; + +use crate::{ + shim::{ShimExecutor, ENV_KATA_RUNTIME_BIND_FD}, + Error, +}; + +impl ShimExecutor { + pub fn start(&mut self) -> Result<()> { + self.args.validate(false).context("validate")?; + + let address = self.do_start().context("do start")?; + std::io::stdout() + .write_all(address.as_os_str().as_bytes()) + .context("failed to write stdout")?; + Ok(()) + } + + fn do_start(&mut self) -> Result { + let bundle_path = get_bundle_path().context("get bundle path")?; + let spec = self.load_oci_spec(&bundle_path)?; + let (container_type, id) = k8s::container_type_with_id(&spec); + + match container_type { + ContainerType::PodSandbox | ContainerType::SingleContainer => { + let address = self.socket_address(&self.args.id)?; + let socket = new_listener(&address)?; + let child_pid = self.create_shim_process(socket)?; + self.write_pid_file(&bundle_path, child_pid)?; + self.write_address(&bundle_path, &address)?; + Ok(address) + } + ContainerType::PodContainer => { + let sid = id + .ok_or(Error::InvalidArgument) + .context("get sid for container")?; + let (address, pid) = self.get_shim_info_from_sandbox(&sid)?; + self.write_pid_file(&bundle_path, pid)?; + self.write_address(&bundle_path, &address)?; + Ok(address) + } + } + } + + fn new_command(&self) -> Result { + if self.args.id.is_empty() + || self.args.namespace.is_empty() + || self.args.address.is_empty() + || self.args.publish_binary.is_empty() + { + return Err(anyhow!("invalid param")); + } + + let bundle_path = get_bundle_path().context("get bundle path")?; + let self_exec = std::env::current_exe().map_err(Error::SelfExec)?; + let mut command = std::process::Command::new(self_exec); + + command + .current_dir(bundle_path) + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .arg("-id") + .arg(&self.args.id) + .arg("-namespace") + .arg(&self.args.namespace) + .arg("-address") + .arg(&self.args.address) + .arg("-publish-binary") + .arg(&self.args.publish_binary) + .env("RUST_BACKTRACE", "1"); + + if self.args.debug { + command.arg("-debug"); + } + + Ok(command) + } + + fn create_shim_process(&self, socket: T) -> Result { + let mut cmd = self.new_command().context("new command")?; + cmd.env( + ENV_KATA_RUNTIME_BIND_FD, + format!("{}", socket.into_raw_fd()), + ); + let child = cmd + .spawn() + .map_err(Error::SpawnChild) + .context("spawn child")?; + + Ok(child.id()) + } + + fn get_shim_info_from_sandbox(&self, sandbox_id: &str) -> Result<(PathBuf, u32)> { + // All containers of a pod share the same pod socket address. + let address = self.socket_address(sandbox_id).context("socket address")?; + let bundle_path = get_bundle_path().context("get bundle path")?; + let parent_bundle_path = Path::new(&bundle_path) + .parent() + .unwrap_or_else(|| Path::new("")); + let sandbox_bundle_path = parent_bundle_path + .join(sandbox_id) + .canonicalize() + .context(Error::GetBundlePath)?; + let pid = self.read_pid_file(&sandbox_bundle_path)?; + + Ok((address, pid)) + } +} + +fn new_listener(address: &Path) -> Result { + let trim_path = address.strip_prefix("unix:").context("trim path")?; + let file_path = Path::new("/").join(trim_path); + let file_path = file_path.as_path(); + if let Some(parent_dir) = file_path.parent() { + fs::create_dir_all(parent_dir).context("create parent dir")?; + } + + UnixListener::bind(file_path).context("bind address") +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use serial_test::serial; + use tests_utils::gen_id; + + use super::*; + use crate::Args; + + #[test] + #[serial] + fn test_new_command() { + let dir = tempfile::tempdir().unwrap(); + let bundle_path = dir.path(); + std::env::set_current_dir(bundle_path).unwrap(); + + let args = Args { + id: "default_id".into(), + namespace: "default_namespace".into(), + address: "default_address".into(), + publish_binary: "containerd".into(), + bundle: bundle_path.to_str().unwrap().into(), + ..Default::default() + }; + let mut executor = ShimExecutor::new(args); + + let cmd = executor.new_command().unwrap(); + assert_eq!(cmd.get_args().len(), 8); + assert_eq!(cmd.get_envs().len(), 1); + assert_eq!(cmd.get_current_dir().unwrap(), get_bundle_path().unwrap()); + + executor.args.debug = true; + let cmd = executor.new_command().unwrap(); + assert_eq!(cmd.get_args().len(), 9); + assert_eq!(cmd.get_envs().len(), 1); + assert_eq!(cmd.get_current_dir().unwrap(), get_bundle_path().unwrap()); + } + + #[test] + #[serial] + fn test_get_info_from_sandbox() { + let dir = tempfile::tempdir().unwrap(); + let sandbox_id = gen_id(16); + let bundle_path = &dir.path().join(&sandbox_id); + std::fs::create_dir(bundle_path).unwrap(); + std::env::set_current_dir(bundle_path).unwrap(); + + let args = Args { + id: sandbox_id.to_owned(), + namespace: "default_namespace".into(), + address: "default_address".into(), + publish_binary: "containerd".into(), + bundle: bundle_path.to_str().unwrap().into(), + ..Default::default() + }; + let executor = ShimExecutor::new(args); + + let addr = executor.socket_address(&executor.args.id).unwrap(); + executor.write_address(bundle_path, &addr).unwrap(); + executor.write_pid_file(bundle_path, 1267).unwrap(); + + let container_id = gen_id(16); + let bundle_path2 = &dir.path().join(&container_id); + std::fs::create_dir(bundle_path2).unwrap(); + std::env::set_current_dir(bundle_path2).unwrap(); + + let args = Args { + id: container_id, + namespace: "default_namespace".into(), + address: "default_address".into(), + publish_binary: "containerd".into(), + bundle: bundle_path2.to_str().unwrap().into(), + ..Default::default() + }; + let executor2 = ShimExecutor::new(args); + + let (address, pid) = executor2.get_shim_info_from_sandbox(&sandbox_id).unwrap(); + + assert_eq!(pid, 1267); + assert_eq!(&address, &addr); + } + + #[test] + #[serial] + fn test_new_listener() { + let path = "/tmp/aaabbbccc"; + let uds_path = format!("unix://{}", path); + std::fs::remove_file(path).ok(); + + let _ = new_listener(Path::new(&uds_path)).unwrap(); + std::fs::remove_file(path).ok(); + } +} diff --git a/src/runtime-rs/docs/images/crate-overview.drawio b/src/runtime-rs/docs/images/crate-overview.drawio new file mode 100644 index 000000000000..57dd4ab20d49 --- /dev/null +++ b/src/runtime-rs/docs/images/crate-overview.drawio @@ -0,0 +1,415 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/runtime-rs/docs/images/crate-overview.svg b/src/runtime-rs/docs/images/crate-overview.svg new file mode 100644 index 000000000000..da0f258792ed --- /dev/null +++ b/src/runtime-rs/docs/images/crate-overview.svg @@ -0,0 +1,5 @@ +crate::hypervisor
crate::agent
crate::agent
crate::hypervisor
crate::hypervisor
crate::service
crate::service
service_manager.run()
service_manager.run()
RPC calls

create_task
start
delete
kill
exec
... ...
RPC calls...
crate::shim
crate::shim
struct RuntimeHandlerManager
struct RuntimeHandlerManager
crate::runtimes
crate::runtimes
struct RuntimeHandlerManagerInner
struct RuntimeHandlerManagerInner +
trait RuntimeHandler
trait RuntimeHandler
new_instance()
new_instance()
    fn init() -> Result<()>
    fn name() -> String
    fn new_handler() -> Arc<dyn RuntimeHandler>
    async fn new_instance() -> Result<RuntimeInstance>
    fn cleanup() -> Result<()>

fn init() -> Result<()>...
runtime_instance
runtime_instance
    id: String,
    runtime_instance: RuntimeInstance,
id: String,...
struct RuntimeInstance
struct RuntimeInstance
sandbox
sandbox
container_manager
container_manager
    pub sandbox: Arc<dyn Sandbox>,
    pub container_manager: Arc<dyn ContainerManager>,

pub sandbox: Arc<dyn Sandbox>,...
trait Sandbox
trait Sandbox

    async fn start() -> Result<()>;
    async fn stop() -> Result<()>;
    async fn cleanup() -> Result<()>;
    async fn shutdown() -> Result<()>;

async fn start() -> Result<()>;...
trait ContainerManager
trait ContainerManager

    async fn create_container() -> Result<PID>;
    async fn pause_container() -> Result<()>;
    async fn resume_container() -> Result<()>;
    async fn stats_container() -> Result<StatsInfo>;
    async fn update_container() -> Result<()>;
    async fn connect_container() -> Result<PID>;

    ... ...
async fn create_container() -> Result<PID>;...
impl
impl
LinuxContainer
LinuxContainer
impl
impl
WasmContainer
WasmContainer
new_instance()
new_instance()
impl
impl
VirtContainer
VirtContainer
struct RuntimeInstance
struct RuntimeInstance
sandbox
sandbox
container_manager
container_manager

    sandbox: VirtSandbox,
    container_manager: VirtContainerManager,
sandbox: VirtSandbox,...
impl
impl
struct VirtSandbox
struct VirtSandbox
resource_manager
resource_manager
hypervisor
hypervisor
agent
agent

    sid: String,
    resource_manager: Arc<ResourceManager>,
    agent: Arc<dyn Agent>,
    hypervisor: Arc<dyn Hypervisor>,

sid: String,...
agent
agent
impl
impl
struct VirtContainerManager
struct VirtContainerManager
resource_manager
resource_manager
containers
containers
    sid: String,
    pid: u32,
    containers: HashMap<String, Container>,
    resource_manager: Arc<ResourceManager>,
    agent: Arc<dyn Agent>,
sid: String,...
struct ResourceManagerInner
struct ResourceManagerInner
hypervisor
hypervisor
agent
agent
    sid: String,
    toml_config: Arc<TomlConfig>,
    agent: Arc<dyn Agent>,
    hypervisor: Arc<dyn Hypervisor>,
    network: Option<Arc<dyn Network>>,
    share_fs: Option<Arc<dyn ShareFs>>,

    pub rootfs_resource: RootFsResource,
    pub volume_resource: VolumeResource,
    pub cgroups_resource: CgroupsResource,
sid: String,...
struct ResourceManager
struct ResourceManager
trait Hypervisor
trait Hypervisor
    async fn prepare_vm() -> Result<()>;
    async fn start_vm() -> Result<()>;
    async fn stop_vm() -> Result<()>;
    async fn pause_vm() -> Result<()>;
    async fn save_vm() -> Result<()>;
    async fn resume_vm() -> Result<()>;

    ... ...
async fn prepare_vm() -> Result<()>;...
impl
impl
Dragonball
Dragonball
impl
impl
QEMU(WIP)
QEMU(WIP)
trait Agent
trait Agent
    async fn create_sandbox();
    async fn destroy_sandbox();
    async fn create_container();
    async fn pause_container();

    ... ...
async fn create_sandbox();...
impl
impl
KataAgent
KataAgent
struct ServiceManager
struct ServiceManager +
handler
handler
    handler: Arc<RuntimeHandlerManager>,
    task_server: Option<Server>,
handler: Arc<RuntimeHandlerManager>,...
handler_message()
handler_message()
struct TaskService
struct TaskService +
    handler: Arc<RuntimeHandlerManager>,
    handler: Arc<RuntimeHandlerManager>,
struct RuntimeHandlerManager
struct RuntimeHandlerManager +
struct StructXyz
struct StructXyz
trait TraitAbc
trait TraitAbc
Legend
Legend
Function call
Function call
Reference
Reference
Trait Impl
Trait Impl
struct Container
struct Container
inner
inner
    pid: u32,
    config: ContainerConfig,
    inner: Arc<RwLock<ContainerInner>>,
    agent: Arc<dyn Agent>,
    resource_manager: ResourceManager,
pid: u32,...
struct ContainerInner
struct ContainerInner
rootfs
rootfs
volume
volume
    agent: Arc<dyn Agent>,
    init_process: Process,
    exec_processes: HashMap<String, Exec>,
    rootfs: Vec<Arc<dyn Rootfs>>,
    volumes: Vec<Arc<dyn Volume>>,
agent: Arc<dyn Agent>,...
trait Rootfs
trait Rootfs
trait Volume
trait Volume
crate::<name>
crate::<name>
Crate
Crate
Text is not SVG - cannot display
\ No newline at end of file diff --git a/src/runtime-rs/docs/images/hypervisor-config.svg b/src/runtime-rs/docs/images/hypervisor-config.svg new file mode 100644 index 000000000000..a396cd04b37d --- /dev/null +++ b/src/runtime-rs/docs/images/hypervisor-config.svg @@ -0,0 +1,150 @@ +confighypervisor_modHYPERVISOR_PLUGINSConfigPluginname()adjust_config()validate()get_min_memory()get_max_cpus()ConfigOpsadjust_config()validate()TomlConfigagent: HashMap<String, Agent>hypervisor: HashMap<String, Hypervisor>runtime: Runtimeload_from_file()load_raw_from_file()load()validate()get_default_config_file()blockdev_infoboot_infocpu_infodebug_infodevice_infomachine_infomemory_infonetwork_infosecurity_infoshared_fsHypervisorvalidate_hypervisor_path()validate_hypervisor_ctlpath()validate_jailer_path()adjust_config()validate()dragonballnew()register()name()adjust_config()validate()get_min_memory()get_max_cpus()qemunew()register()name()adjust_config()validate()get_min_memory()get_max_cpus()cloudHypervisornew()register()name()adjust_config()validate()get_min_memory()get_max_cpus() \ No newline at end of file diff --git a/src/runtime-rs/docs/images/vm-start.svg b/src/runtime-rs/docs/images/vm-start.svg new file mode 100644 index 000000000000..ad85bf32f340 --- /dev/null +++ b/src/runtime-rs/docs/images/vm-start.svg @@ -0,0 +1,60 @@ +hypervisorhypervisorsandbox startsandbox startagentagentresource_managerresource_managerresource_manager_innerresource_manager_innershare_fsshare_fsnetworknetwork1prepare_vm()fill the hypervisor struct,set the vmm state, or anything needed before start vm2prepare_before_start_vm()3prepare_before_start_vm()4setup_device_before_start_vm ()5prepare_virtiofs()prepare share_fs device6add_device()add share_fs device to the pending list7new()scan the network namespace to get the endpoint and create the network pair8setup()setup the network model for each network pair9add_device()add tap/Macvtap device to the pending list10start_vm()hypervisor start here11get_agent_socket()12start()13setup_after_start_vm()14setup_after_start_vm()15setup_device_after_start_vm()setup virtiofs16add_device()Share the source path into the guest17handle_interfaces()18handle_neighbours()19handle_routes()Update the network like neighbours, interface and routes20create_sandbox() \ No newline at end of file diff --git a/src/runtime-rs/tests/texture/configuration-qemu.toml b/src/runtime-rs/tests/texture/configuration-qemu.toml new file mode 100644 index 000000000000..b8d876f89e15 --- /dev/null +++ b/src/runtime-rs/tests/texture/configuration-qemu.toml @@ -0,0 +1,90 @@ +[hypervisor.qemu] +path = "/usr/bin/lsns" +valid_hypervisor_paths = ["/usr/bin/qemu*", "/opt/qemu?","/usr/bin/ls*","./hypervisor_path"] +valid_jailer_paths = ["/usr/lib/rust","./test_jailer_path"] +ctlpath = "/usr/bin/" +valid_ctlpaths = ["/usr/lib/jvm","usr/bin/qemu-io","./jvm"] +disable_nesting_checks = true +enable_iothreads = true +jailer_path = "/usr/local" +kernel = "/usr/bin/../bin/zcmp" +image = "/usr/bin/./tabs" +kernel_params = "ro" +firmware = "/etc/hostname" + +cpu_features="pmu=off,vmx=off" +default_vcpus = 2 +default_maxvcpus = 64 + +machine_type = "q35" +confidential_guest = true +rootless = true +enable_annotations = ["shared_fs","path", "ctlpath","jailer_path","enable_iothreads","default_memory","memory_slots","enable_mem_prealloc","enable_hugepages","file_mem_backend","enable_virtio_mem","enable_swap","enable_guest_swap","default_vcpus","virtio_fs_extra_args","block_device_driver","vhost_user_store_path","kernel","guest_hook_path","block_device_cache_noflush","virtio_fs_daemon"] +machine_accelerators="noapic" +default_bridges = 2 +default_memory = 128 +memory_slots = 128 +memory_offset = 0x100000 +enable_virtio_mem = true +disable_block_device_use = false +shared_fs = "virtio-fs" +virtio_fs_daemon = "/usr/bin/uptime" +valid_virtio_fs_daemon_paths = ["/usr/local/bin/virtiofsd*","./virtio_fs"] +virtio_fs_cache_size = 512 +virtio_fs_extra_args = ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +virtio_fs_cache = "always" +block_device_driver = "virtio-blk-pci" +block_device_cache_set = true +block_device_cache_direct = true +block_device_cache_noflush = true +enable_mem_prealloc = true +enable_hugepages = true +enable_vhost_user_store = true +vhost_user_store_path = "/tmp" +valid_vhost_user_store_paths = ["/var/kata/vhost-user-store*", "/tmp/kata?","/var/tmp","./store_path"] +enable_iommu = true +enable_iommu_platform = true +file_mem_backend = "/dev/shm" +valid_file_mem_backends = ["/dev/shm","/dev/snd","./test_file_backend_mem_root"] +enable_swap = true +pflashes = ["/proc/mounts"] +enable_debug = true +msize_9p = 16384 +disable_image_nvdimm = true +hotplug_vfio_on_root_bus = true +pcie_root_port = 2 +disable_vhost_net = true +entropy_source= "/dev/urandom" +valid_entropy_sources = ["/dev/urandom", "/dev/random"] +guest_hook_path = "/usr/share" +rx_rate_limiter_max_rate = 10000 +tx_rate_limiter_max_rate = 10000 +guest_memory_dump_path="/var/crash/kata" +guest_memory_dump_paging = true +enable_guest_swap = true + +[agent.agent0] +enable_tracing = true +debug_console_enabled = true +debug = true +dial_timeout = 1 +kernel_modules = ["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1","i915_enabled_ppgtt=0"] +container_pipe_size = 2 +[runtime] +enable_debug = true +internetworking_model="macvtap" +disable_guest_seccomp=false +enable_tracing = true +jaeger_endpoint = "localhost:1234" +jaeger_user = "user" +jaeger_password = "pw" +disable_new_netns = true +sandbox_cgroup_only=true +sandbox_bind_mounts=["/proc/self"] +vfio_mode="vfio" +experimental=["a", "b"] +enable_pprof = true +hypervisor_name = "qemu" +agent_name = "agent0" + + diff --git a/src/runtime-rs/tests/texture/image-bundle/config.json b/src/runtime-rs/tests/texture/image-bundle/config.json new file mode 100644 index 000000000000..0b6665a2eb75 --- /dev/null +++ b/src/runtime-rs/tests/texture/image-bundle/config.json @@ -0,0 +1,395 @@ +{ + "ociVersion": "0.5.0-dev", + "process": { + "terminal": true, + "user": { + "uid": 1, + "gid": 1, + "additionalGids": [ + 5, + 6 + ] + }, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL" + ], + "ambient": [ + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_CORE", + "hard": 1024, + "soft": 1024 + }, + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "apparmorProfile": "acme_secure_profile", + "selinuxLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675", + "noNewPrivileges": true + }, + "root": { + "path": "rootfs", + "readonly": true + }, + "hostname": "slartibartfast", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": { + "prestart": [ + { + "path": "/usr/bin/fix-mounts", + "args": [ + "fix-mounts", + "arg1", + "arg2" + ], + "env": [ + "key1=value1" + ] + }, + { + "path": "/usr/bin/setup-network" + } + ], + "createRuntime": [ + { + "path": "/usr/bin/fix-mounts", + "args": ["fix-mounts", "arg1", "arg2"], + "env": [ "key1=value1"] + }, + { + "path": "/usr/bin/setup-network" + } + ], + "createContainer": [ + { + "path": "/usr/bin/mount-hook", + "args": ["-mount", "arg1", "arg2"], + "env": [ "key1=value1"] + } + ], + "startContainer": [ + { + "path": "/usr/bin/refresh-ldcache" + } + ], + "poststart": [ + { + "path": "/usr/bin/notify-start", + "timeout": 5 + } + ], + "poststop": [ + { + "path": "/usr/sbin/cleanup.sh", + "args": [ + "cleanup.sh", + "-f" + ] + } + ] + }, + "linux": { + "devices": [ + { + "path": "/dev/fuse", + "type": "c", + "major": 10, + "minor": 229, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/sda", + "type": "b", + "major": 8, + "minor": 0, + "fileMode": 432, + "uid": 0, + "gid": 0 + } + ], + "uidMappings": [ + { + "containerID": 0, + "hostID": 1000, + "size": 32000 + } + ], + "gidMappings": [ + { + "containerID": 0, + "hostID": 1000, + "size": 32000 + } + ], + "sysctl": { + "net.ipv4.ip_forward": "1", + "net.core.somaxconn": "256" + }, + "cgroupsPath": "/myRuntime/myContainer", + "resources": { + "network": { + "classID": 1048577, + "priorities": [ + { + "name": "eth0", + "priority": 500 + }, + { + "name": "eth1", + "priority": 1000 + } + ] + }, + "pids": { + "limit": 32771 + }, + "hugepageLimits": [ + { + "pageSize": "2MB", + "limit": 9223372036854772000 + }, + { + "pageSize": "64KB", + "limit": 1000000 + } + ], + "oomScoreAdj": 100, + "memory": { + "limit": 536870912, + "reservation": 536870912, + "swap": 536870912, + "kernel": -1, + "kernelTCP": -1, + "swappiness": 0, + "disableOOMKiller": false, + "useHierarchy": false + }, + "cpu": { + "shares": 1024, + "quota": 1000000, + "period": 500000, + "realtimeRuntime": 950000, + "realtimePeriod": 1000000, + "cpus": "2-3", + "mems": "0-7" + }, + "devices": [ + { + "allow": false, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 10, + "minor": 229, + "access": "rw" + }, + { + "allow": true, + "type": "b", + "major": 8, + "minor": 0, + "access": "r" + } + ], + "blockIO": { + "weight": 10, + "leafWeight": 10, + "weightDevice": [ + { + "major": 8, + "minor": 0, + "weight": 500, + "leafWeight": 300 + }, + { + "major": 8, + "minor": 16, + "weight": 500 + } + ], + "throttleReadBpsDevice": [ + { + "major": 8, + "minor": 0, + "rate": 600 + } + ], + "throttleWriteIOPSDevice": [ + { + "major": 8, + "minor": 16, + "rate": 300 + } + ] + } + }, + "rootfsPropagation": "slave", + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "architectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "getcwd", + "chmod" + ], + "action": "SCMP_ACT_ERRNO" + } + ] + }, + "namespaces": [ + { + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + }, + { + "type": "user" + }, + { + "type": "cgroup" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_stats", + "/proc/sched_debug" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ], + "mountLabel": "system_u:object_r:svirt_sandbox_file_t:s0:c715,c811" + }, + "annotations": { + "com.example.key1": "value1", + "com.example.key2": "value2" + } +} \ No newline at end of file diff --git a/src/runtime-rs/tests/utils/Cargo.toml b/src/runtime-rs/tests/utils/Cargo.toml new file mode 100644 index 000000000000..d35892da5d68 --- /dev/null +++ b/src/runtime-rs/tests/utils/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "tests_utils" +version = "0.1.0" +edition = "2018" +description = "This crate is used to share code among tests" +license = "Apache-2.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "^1.0" +rand = "0.8.4" +kata-types = { path = "../../../libs/kata-types" } diff --git a/src/runtime-rs/tests/utils/src/lib.rs b/src/runtime-rs/tests/utils/src/lib.rs new file mode 100644 index 000000000000..934968234146 --- /dev/null +++ b/src/runtime-rs/tests/utils/src/lib.rs @@ -0,0 +1,54 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +// This crate is used to share code among tests + +use anyhow::{anyhow, Result}; +use kata_types::config::{QemuConfig, TomlConfig}; +use std::{fs, path::PathBuf}; + +use rand::{ + distributions::Alphanumeric, + {thread_rng, Rng}, +}; + +fn get_kata_config_file(hypervisor_name: String) -> PathBuf { + let target = format!( + "{}/../texture/configuration-{}.toml", + env!("CARGO_MANIFEST_DIR"), + hypervisor_name + ); + std::fs::canonicalize(target).unwrap() +} + +pub fn get_image_bundle_path() -> PathBuf { + let target = format!("{}/../texture/image-bundle", env!("CARGO_MANIFEST_DIR")); + std::fs::canonicalize(target).unwrap() +} + +pub fn gen_id(len: usize) -> String { + thread_rng() + .sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +pub fn load_test_config(hypervisor_name: String) -> Result { + match hypervisor_name.as_str() { + "qemu" => { + let qemu = QemuConfig::new(); + qemu.register(); + } + // TODO add other hypervisor test config + _ => { + return Err(anyhow!("invalid hypervisor {}", hypervisor_name)); + } + } + + let content = fs::read_to_string(get_kata_config_file(hypervisor_name))?; + Ok(TomlConfig::load(&content)?) +} diff --git a/src/runtime/.gitignore b/src/runtime/.gitignore index 0cc78193acba..5d402b719eb5 100644 --- a/src/runtime/.gitignore +++ b/src/runtime/.gitignore @@ -18,3 +18,4 @@ config-generated.go /virtcontainers/hook/mock/hook /virtcontainers/profile.cov /virtcontainers/utils/supportfiles +/virtcontainers/cpu_affinity_idx diff --git a/src/runtime/Makefile b/src/runtime/Makefile index fa07b87deba3..35a9ae5e19dd 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -56,6 +56,9 @@ BINLIBEXECLIST := BIN_PREFIX = $(PROJECT_TYPE) PROJECT_DIR = $(PROJECT_TAG) IMAGENAME = $(PROJECT_TAG).img +IMAGETDXNAME = $(PROJECT_TAG)-tdx.img +INITRDNAME = $(PROJECT_TAG)-initrd.img +INITRDSEVNAME = $(PROJECT_TAG)-initrd-sev.img TARGET = $(BIN_PREFIX)-runtime RUNTIME_OUTPUT = $(CURDIR)/$(TARGET) @@ -94,6 +97,10 @@ GENERATED_VARS = \ VERSION \ CONFIG_ACRN_IN \ CONFIG_QEMU_IN \ + CONFIG_QEMU_NVIDIA_GPU_IN \ + CONFIG_QEMU_SEV_IN \ + CONFIG_QEMU_TDX_IN \ + CONFIG_QEMU_SNP_IN \ CONFIG_CLH_IN \ CONFIG_FC_IN \ $(USER_VARS) @@ -110,9 +117,28 @@ PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR) KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) +IMAGETDXPATH := $(PKGDATADIR)/$(IMAGETDXNAME) +INITRDPATH := $(PKGDATADIR)/$(INITRDNAME) +INITRDSEVPATH := $(PKGDATADIR)/$(INITRDSEVNAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + FIRMWAREPATH := FIRMWAREVOLUMEPATH := +FIRMWARETDVFPATH := $(PREFIXDEPS)/share/tdvf/OVMF.fd +FIRMWARETDVFVOLUMEPATH := + +FIRMWARESEVPATH := $(PREFIXDEPS)/share/ovmf/OVMF.fd +FIRMWARESNPPATH := $(PREFIXDEPS)/share/ovmf/OVMF.fd + +ROOTMEASURECONFIG ?= "" +KERNELPARAMS += $(ROOTMEASURECONFIG) +KERNELTDXPARAMS += $(ROOTMEASURECONFIG) + # Name of default configuration file the runtime will use. CONFIG_FILE = configuration.toml @@ -130,6 +156,12 @@ HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVIS QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD) QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"] +QEMUTDXPATH := $(QEMUBINDIR)/$(QEMUTDXCMD) +QEMUTDXVALIDHYPERVISORPATHS := [\"$(QEMUTDXPATH)\"] + +QEMUSNPPATH := $(QEMUBINDIR)/$(QEMUSNPCMD) +QEMUSNPVALIDHYPERVISORPATHS := [\"$(QEMUSNPPATH)\"] + QEMUVIRTIOFSPATH := $(QEMUBINDIR)/$(QEMUVIRTIOFSCMD) CLHPATH := $(CLHBINDIR)/$(CLHCMD) @@ -162,13 +194,18 @@ DEFMEMSLOTS := 10 DEFMAXMEMSZ := 0 #Default number of bridges DEFBRIDGES := 1 -DEFENABLEANNOTATIONS := [\"enable_iommu\"] +DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\"] DEFDISABLEGUESTSECCOMP := true DEFDISABLEGUESTEMPTYDIR := false #Default experimental features enabled DEFAULTEXPFEATURES := [] DEFDISABLESELINUX := false + +# Default guest SELinux configuration +DEFDISABLEGUESTSELINUX := true +DEFGUESTSELINUXLABEL := system_u:system_r:container_t + #Default SeccomSandbox param #The same default policy is used by libvirt #More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html @@ -182,21 +219,22 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] DEFDISABLEBLOCK := false DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs +DEFSHAREDFS_QEMU_TDX_VIRTIOFS := virtio-9p +DEFSHAREDFS_QEMU_SEV_VIRTIOFS := virtio-9p +DEFSHAREDFS_QEMU_SNP_VIRTIOFS := virtio-9p DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd -ifeq ($(ARCH),ppc64le) -DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd -endif DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"] # Default DAX mapping cache size in MiB #if value is 0, DAX is not enabled DEFVIRTIOFSCACHESIZE ?= 0 DEFVIRTIOFSCACHE ?= auto +DEFVIRTIOFSQUEUESIZE ?= 1024 # Format example: # [\"-o\", \"arg1=xxx,arg2\", \"-o\", \"hello world\", \"--arg3=yyy\"] # # see `virtiofsd -h` for possible options. # Make sure you quote args. -DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"-o\", \"announce_submounts\"] +DEFVIRTIOFSEXTRAARGS ?= [\"--thread-pool-size=1\", \"--announce-submounts\"] DEFENABLEIOTHREADS := false DEFENABLEVHOSTUSERSTORE := false DEFVHOSTUSERSTOREPATH := $(PKGRUNDIR)/vhost-user @@ -210,9 +248,13 @@ DEFVFIOMODE := guest-kernel DEFSANDBOXCGROUPONLY ?= false DEFSTATICRESOURCEMGMT ?= false +DEFSTATICRESOURCEMGMT_TEE = true DEFBINDMOUNTS := [] +# Image Service Offload +DEFSERVICEOFFLOAD ?= false + SED = sed CLI_DIR = cmd @@ -251,12 +293,69 @@ ifneq (,$(QEMUCMD)) CONFIGS += $(CONFIG_QEMU) + CONFIG_FILE_QEMU_TDX = configuration-qemu-tdx.toml + CONFIG_QEMU_TDX = config/$(CONFIG_FILE_QEMU_TDX) + CONFIG_QEMU_TDX_IN = $(CONFIG_QEMU_TDX).in + + CONFIG_PATH_QEMU_TDX = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_TDX)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_TDX) + + SYSCONFIG_QEMU_TDX = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_TDX)) + SYSCONFIG_PATHS_TDX += $(SYSCONFIG_QEMU_TDX) + + CONFIGS += $(CONFIG_QEMU_TDX) + + CONFIG_FILE_QEMU_SEV = configuration-qemu-sev.toml + CONFIG_QEMU_SEV = config/$(CONFIG_FILE_QEMU_SEV) + CONFIG_QEMU_SEV_IN = $(CONFIG_QEMU_SEV).in + + CONFIG_PATH_QEMU_SEV = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SEV)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_SEV) + + SYSCONFIG_QEMU_SEV = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SEV)) + SYSCONFIG_PATHS_SEV += $(SYSCONFIG_QEMU_SEV) + + CONFIGS += $(CONFIG_QEMU_SEV) + + CONFIG_FILE_QEMU_SNP = configuration-qemu-snp.toml + CONFIG_QEMU_SNP = config/$(CONFIG_FILE_QEMU_SNP) + CONFIG_QEMU_SNP_IN = $(CONFIG_QEMU_SNP).in + + CONFIG_PATH_QEMU_SNP = $(abspath $(CONFDIR)/$(CONFIG_FILE_QEMU_SNP)) + CONFIG_PATHS += $(CONFIG_PATH_QEMU_SNP) + + SYSCONFIG_QEMU_SNP = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_SNP)) + SYSCONFIG_PATHS_SNP += $(SYSCONFIG_QEMU_SNP) + + CONFIGS += $(CONFIG_QEMU_SNP) + + CONFIG_FILE_QEMU_NVIDIA_GPU = configuration-qemu-nvidia-gpu.toml + CONFIG_QEMU_NVIDIA_GPU = config/$(CONFIG_FILE_QEMU_NVIDIA_GPU) + CONFIG_QEMU_NVIDIA_GPU_IN = $(CONFIG_QEMU_NVIDIA_GPU).in + + CONFIGS += $(CONFIG_QEMU_NVIDIA_GPU) + # qemu-specific options (all should be suffixed by "_QEMU") DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi + DEFBLOCKDEVICEAIO_QEMU := io_uring DEFNETWORKMODEL_QEMU := tcfilter + KERNELTYPE = uncompressed KERNELNAME = $(call MAKE_KERNEL_NAME,$(KERNELTYPE)) KERNELPATH = $(KERNELDIR)/$(KERNELNAME) + + KERNELSEVTYPE = compressed + KERNELSEVNAME = $(call MAKE_KERNEL_SEV_NAME,$(KERNELSEVTYPE)) + KERNELSEVPATH = $(KERNELDIR)/$(KERNELSEVNAME) + + KERNELTDXTYPE = compressed + KERNELTDXNAME = $(call MAKE_KERNEL_TDX_NAME,$(KERNELTDXTYPE)) + KERNELTDXPATH = $(KERNELDIR)/$(KERNELTDXNAME) + + KERNELSNPTYPE = compressed + KERNELSNPNAME = $(call MAKE_KERNEL_SNP_NAME,$(KERNELSNPTYPE)) + KERNELSNPPATH = $(KERNELDIR)/$(KERNELSNPNAME) + endif ifneq (,$(CLHCMD)) @@ -325,7 +424,8 @@ ifneq (,$(ACRNCMD)) DEFMAXVCPUS_ACRN := 1 DEFBLOCKSTORAGEDRIVER_ACRN := virtio-blk DEFNETWORKMODEL_ACRN := macvtap - KERNEL_NAME_ACRN = $(call MAKE_KERNEL_NAME,$(KERNELTYPE)) + KERNELTYPE_ACRN = compressed + KERNEL_NAME_ACRN = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_ACRN)) KERNELPATH_ACRN = $(KERNELDIR)/$(KERNEL_NAME_ACRN) endif @@ -399,7 +499,14 @@ USER_VARS += FCJAILERPATH USER_VARS += FCVALIDJAILERPATHS USER_VARS += SYSCONFIG USER_VARS += IMAGENAME +USER_VARS += IMAGETDXNAME USER_VARS += IMAGEPATH +USER_VARS += IMAGETDXPATH +USER_VARS += INITRDNAME +USER_VARS += INITRDSEVNAME +USER_VARS += INITRDPATH +USER_VARS += INITRDSEVPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE @@ -408,15 +515,24 @@ USER_VARS += KERNELTYPE_ACRN USER_VARS += KERNELTYPE_CLH USER_VARS += KERNELPATH_ACRN USER_VARS += KERNELPATH +USER_VARS += KERNELSEVPATH +USER_VARS += KERNELTDXPATH +USER_VARS += KERNELSNPPATH USER_VARS += KERNELPATH_CLH USER_VARS += KERNELPATH_FC USER_VARS += KERNELVIRTIOFSPATH USER_VARS += FIRMWAREPATH +USER_VARS += FIRMWARESEVPATH +USER_VARS += FIRMWARETDVFPATH USER_VARS += FIRMWAREVOLUMEPATH +USER_VARS += FIRMWARETDVFVOLUMEPATH +USER_VARS += FIRMWARESNPPATH USER_VARS += MACHINEACCELERATORS USER_VARS += CPUFEATURES +USER_VARS += TDXCPUFEATURES USER_VARS += DEFMACHINETYPE_CLH USER_VARS += KERNELPARAMS +USER_VARS += KERNELTDXPARAMS USER_VARS += LIBEXECDIR USER_VARS += LOCALSTATEDIR USER_VARS += PKGDATADIR @@ -432,8 +548,14 @@ USER_VARS += PROJECT_TYPE USER_VARS += PROJECT_URL USER_VARS += QEMUBINDIR USER_VARS += QEMUCMD +USER_VARS += QEMUTDXCMD +USER_VARS += QEMUSNPCMD USER_VARS += QEMUPATH +USER_VARS += QEMUTDXPATH +USER_VARS += QEMUSNPPATH USER_VARS += QEMUVALIDHYPERVISORPATHS +USER_VARS += QEMUTDXVALIDHYPERVISORPATHS +USER_VARS += QEMUSNPVALIDHYPERVISORPATHS USER_VARS += QEMUVIRTIOFSCMD USER_VARS += QEMUVIRTIOFSPATH USER_VARS += RUNTIME_NAME @@ -453,17 +575,24 @@ USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTEMPTYDIR USER_VARS += DEFDISABLEGUESTSECCOMP USER_VARS += DEFDISABLESELINUX +USER_VARS += DEFDISABLEGUESTSELINUX +USER_VARS += DEFGUESTSELINUXLABEL USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN USER_VARS += DEFBLOCKSTORAGEDRIVER_FC USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU +USER_VARS += DEFBLOCKDEVICEAIO_QEMU USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS +USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS +USER_VARS += DEFSHAREDFS_QEMU_SEV_VIRTIOFS +USER_VARS += DEFSHAREDFS_QEMU_SNP_VIRTIOFS USER_VARS += DEFVIRTIOFSDAEMON USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS USER_VARS += DEFVIRTIOFSCACHESIZE USER_VARS += DEFVIRTIOFSCACHE +USER_VARS += DEFVIRTIOFSQUEUESIZE USER_VARS += DEFVIRTIOFSEXTRAARGS USER_VARS += DEFENABLEANNOTATIONS USER_VARS += DEFENABLEIOTHREADS @@ -478,8 +607,11 @@ USER_VARS += DEFENTROPYSOURCE USER_VARS += DEFVALIDENTROPYSOURCES USER_VARS += DEFSANDBOXCGROUPONLY USER_VARS += DEFSTATICRESOURCEMGMT +USER_VARS += DEFSTATICRESOURCEMGMT_CLH USER_VARS += DEFSTATICRESOURCEMGMT_FC +USER_VARS += DEFSTATICRESOURCEMGMT_TEE USER_VARS += DEFBINDMOUNTS +USER_VARS += DEFSERVICEOFFLOAD USER_VARS += DEFVFIOMODE USER_VARS += BUILDFLAGS @@ -519,7 +651,7 @@ endef all: runtime containerd-shim-v2 monitor -# Targets that depend on .git-commit can use $(shell cat .git-commit) to get a +# Targets that depend on .git-commit can use $(shell git rev-parse HEAD) to get a # git revision string. They will only be rebuilt if the revision string # actually changes. .PHONY: .git-commit.tmp @@ -564,6 +696,19 @@ define MAKE_KERNEL_VIRTIOFS_NAME $(if $(findstring uncompressed,$1),vmlinux-virtiofs.container,vmlinuz-virtiofs.container) endef +define MAKE_KERNEL_SEV_NAME +$(if $(findstring uncompressed,$1),vmlinux-sev.container,vmlinuz-sev.container) +endef + +define MAKE_KERNEL_TDX_NAME +$(if $(findstring uncompressed,$1),vmlinux-tdx.container,vmlinuz-tdx.container) +endef + +# SNP configuration uses the SEV kernel +define MAKE_KERNEL_SNP_NAME +$(if $(findstring uncompressed,$1),vmlinux-sev.container,vmlinuz-sev.container) +endef + GENERATED_FILES += pkg/katautils/config-settings.go $(RUNTIME_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary @@ -574,7 +719,7 @@ $(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit $(QUIET_BUILD)(cd $(MONITOR_DIR)/ && go build \ - --ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -o $@ .) + --ldflags "-X main.GitCommit=$(shell git rev-parse HEAD)" $(BUILDFLAGS) -o $@ .) .PHONY: \ check \ @@ -595,7 +740,7 @@ GENERATED_FILES += $(CONFIGS) $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit $(QUIET_GENERATE)$(SED) \ - -e "s|@COMMIT@|$(shell cat .git-commit)|g" \ + -e "s|@COMMIT@|$(shell git rev-parse HEAD)|g" \ $(foreach v,$(GENERATED_VARS),-e "s|@$v@|$($v)|g") \ $< > $@ @@ -644,7 +789,7 @@ install-bin: $(BINLIST) install-runtime: runtime install-scripts install-completions install-configs install-bin -install-containerd-shim-v2: $(SHIMV2) +install-containerd-shim-v2: $(SHIMV2_OUTPUT) $(QUIET_INST)$(call INSTALL_EXEC,$<,$(BINDIR)) install-monitor: $(MONITOR) @@ -671,6 +816,8 @@ handle_vendor: vendor: handle_vendor ./hack/tree_status.sh +static-checks-build: $(GENERATED_FILES) + clean: $(QUIET_CLEAN)rm -f \ $(CONFIGS) \ @@ -720,7 +867,7 @@ show-variables: @printf "\n" show-header: .git-commit - @printf "%s - version %s (commit %s)\n\n" $(TARGET) $(VERSION) $(shell cat .git-commit) + @printf "%s - version %s (commit %s)\n\n" $(TARGET) $(VERSION) $(shell git rev-parse HEAD) show-arches: show-header @printf "Supported architectures (possible values for ARCH variable):\n\n" @@ -758,15 +905,15 @@ endif @printf "\tbinary installation path (BINDIR) : %s\n" $(abspath $(BINDIR)) @printf "\tbinaries to install :\n" @printf \ - "$(foreach b,$(sort $(BINLIST)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" + "$(foreach b,$(sort $(BINLIST)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" @printf \ - "$(foreach b,$(sort $(SHIMV2)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" + "$(foreach b,$(sort $(SHIMV2)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" @printf \ - "$(foreach b,$(sort $(MONITOR)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" + "$(foreach b,$(sort $(MONITOR)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))" @printf \ - "$(foreach b,$(sort $(BINLIBEXECLIST)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(PKGLIBEXECDIR)/$(b))\\\n"))" + "$(foreach b,$(sort $(BINLIBEXECLIST)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(PKGLIBEXECDIR)/$(b))\\\n"))" @printf \ - "$(foreach s,$(sort $(SCRIPTS)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(s))\\\n"))" + "$(foreach s,$(sort $(SCRIPTS)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(s))\\\n"))" @printf "\tconfigs to install (CONFIGS) :\n" @printf \ "$(foreach c,$(sort $(CONFIGS)),$(shell printf "\\t - $(c)\\\n"))" diff --git a/src/runtime/README.md b/src/runtime/README.md index c8aeec0ce791..d2290e20037a 100644 --- a/src/runtime/README.md +++ b/src/runtime/README.md @@ -32,8 +32,6 @@ to work seamlessly with both Docker and Kubernetes respectively. ## Download and install -[![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/kata-containers) - See the [installation guides](../../docs/install/README.md) available for various operating systems. diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk index 6c1c9e96710f..e6068158cf4e 100644 --- a/src/runtime/arch/amd64-options.mk +++ b/src/runtime/arch/amd64-options.mk @@ -11,6 +11,9 @@ MACHINEACCELERATORS := CPUFEATURES := pmu=off QEMUCMD := qemu-system-x86_64 +QEMUTDXCMD := qemu-system-x86_64-tdx-experimental +QEMUSNPCMD := qemu-system-x86_64-snp-experimental +TDXCPUFEATURES := -vmx-rdseed-exit,pmu=off # Firecracker binary name FCCMD := firecracker @@ -23,3 +26,5 @@ ACRNCTLCMD := acrnctl # cloud-hypervisor binary name CLHCMD := cloud-hypervisor + +DEFSTATICRESOURCEMGMT_CLH := false diff --git a/src/runtime/arch/arm64-options.mk b/src/runtime/arch/arm64-options.mk index ad5ef5d43f12..7f74ae311168 100644 --- a/src/runtime/arch/arm64-options.mk +++ b/src/runtime/arch/arm64-options.mk @@ -19,3 +19,5 @@ FCJAILERCMD := jailer # cloud-hypervisor binary name CLHCMD := cloud-hypervisor + +DEFSTATICRESOURCEMGMT_CLH := true diff --git a/src/runtime/cmd/containerd-shim-kata-v2/main.go b/src/runtime/cmd/containerd-shim-kata-v2/main.go index 412dec6ec3fc..b9a6890bfa5f 100644 --- a/src/runtime/cmd/containerd-shim-kata-v2/main.go +++ b/src/runtime/cmd/containerd-shim-kata-v2/main.go @@ -24,7 +24,7 @@ func shimConfig(config *shimapi.Config) { func main() { if len(os.Args) == 2 && os.Args[1] == "--version" { - fmt.Printf("%s containerd shim: id: %q, version: %s, commit: %v\n", katautils.PROJECT, types.DefaultKataRuntimeName, katautils.VERSION, katautils.COMMIT) + fmt.Printf("%s containerd shim (Golang): id: %q, version: %s, commit: %v\n", katautils.PROJECT, types.DefaultKataRuntimeName, katautils.VERSION, katautils.COMMIT) os.Exit(0) } diff --git a/src/runtime/cmd/kata-monitor/README.md b/src/runtime/cmd/kata-monitor/README.md index 5ebbc8cdac41..f6fcec1d3dc9 100644 --- a/src/runtime/cmd/kata-monitor/README.md +++ b/src/runtime/cmd/kata-monitor/README.md @@ -52,6 +52,8 @@ The **log-level** allows the chose how verbose the logs should be. The default i **NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**. +The `/metrics` has a query parameter `filter_family`, which filter Kata sandboxes metrics with specific names. If `filter_family` is set to `A` (and `B`, split with `,`), metrics with prefix `A` (and `B`) will only be returned. + The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox. In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string. diff --git a/src/runtime/cmd/kata-runtime/kata-check.go b/src/runtime/cmd/kata-runtime/kata-check.go index 6637c389c555..73fe61075a0c 100644 --- a/src/runtime/cmd/kata-runtime/kata-check.go +++ b/src/runtime/cmd/kata-runtime/kata-check.go @@ -202,7 +202,8 @@ func checkCPUAttribs(cpuinfo string, attribs map[string]string) uint32 { // onVMM - `true` if the host is running under a VMM environment // fields - A set of fields showing the expected and actual module parameter values. // msg - The message that would be logged showing the incorrect kernel module -// parameter. +// +// parameter. // // The function must return `true` if the kernel module parameter error should // be ignored, or `false` if it is a real error. @@ -274,7 +275,7 @@ func checkKernelModules(modules map[string]kernelModule, handler kernelParamHand // genericHostIsVMContainerCapable checks to see if the host is theoretically capable // of creating a VM container. -//nolint: unused,deadcode +// nolint: unused,deadcode func genericHostIsVMContainerCapable(details vmContainerCapableDetails) error { cpuinfo, err := getCPUInfo(details.cpuInfoFile) if err != nil { diff --git a/src/runtime/cmd/kata-runtime/kata-check_amd64.go b/src/runtime/cmd/kata-runtime/kata-check_amd64.go index 46b3a2916561..fcdb047fbef1 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_amd64.go +++ b/src/runtime/cmd/kata-runtime/kata-check_amd64.go @@ -30,7 +30,6 @@ const ( cpuFlagLM = "lm" cpuFlagSVM = "svm" cpuFlagSSE4_1 = "sse4_1" - kernelModvhm = "vhm_dev" kernelModvhost = "vhost" kernelModvhostnet = "vhost_net" kernelModvhostvsock = "vhost_vsock" @@ -46,26 +45,39 @@ const ( cpuTypeUnknown = -1 ) -const acrnDevice = "/dev/acrn_vhm" +const acrnDevice = "/dev/acrn_hsm" // ioctl_ACRN_CREATE_VM is the IOCTL to create VM in ACRN. // Current Linux mainstream kernel doesn't have support for ACRN. // Due to this several macros are not defined in Linux headers. // Until the support is available, directly use the value instead // of macros. -//https://github.com/kata-containers/runtime/issues/1784 -const ioctl_ACRN_CREATE_VM = 0x43000010 //nolint -const ioctl_ACRN_DESTROY_VM = 0x43000011 //nolint - -type acrn_create_vm struct { //nolint - vmid uint16 //nolint - reserved0 uint16 //nolint - vcpu_num uint16 //nolint - reserved1 uint16 //nolint - uuid [16]uint8 - vm_flag uint64 //nolint - req_buf uint64 //nolint - reserved2 [16]uint8 //nolint +// https://github.com/kata-containers/runtime/issues/1784 +const ioctl_ACRN_CREATE_VM = 0xC030A210 //nolint +const ioctl_ACRN_PAUSE_VM = 0xA213 //nolint +const ioctl_ACRN_DESTROY_VM = 0xA211 //nolint + +type acrn_vm_creation struct { //nolint + vmid uint16 //nolint + reserved0 uint16 //nolint + vcpu_num uint16 //nolint + reserved1 uint16 //nolint + name [16]uint8 + vm_flag uint64 //nolint + ioreq_buf uint64 //nolint + cpu_affinity uint64 //nolint +} + +var io_request_page [4096]byte + +type acrn_io_request struct { // nolint + io_type uint32 // nolint + completion_polling uint32 // nolint + reserved0 [14]uint32 // nolint + data [8]uint64 // nolint + reserved1 uint32 // nolint + kernel_handled uint32 // nolint + processed uint32 // nolint } // cpuType save the CPU type @@ -103,11 +115,13 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { } switch hypervisorType { - case "firecracker": + case vc.FirecrackerHypervisor: + fallthrough + case vc.ClhHypervisor: fallthrough - case "clh": + case vc.DragonballHypervisor: fallthrough - case "qemu": + case vc.QemuHypervisor: archRequiredCPUFlags = map[string]string{ cpuFlagVMX: "Virtualization support", cpuFlagLM: "64Bit CPU", @@ -139,7 +153,7 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { required: false, }, } - case "acrn": + case vc.AcrnHypervisor: archRequiredCPUFlags = map[string]string{ cpuFlagLM: "64Bit CPU", cpuFlagSSE4_1: "SSE4.1", @@ -148,10 +162,6 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { archGenuineIntel: "Intel Architecture CPU", } archRequiredKernelModules = map[string]kernelModule{ - kernelModvhm: { - desc: "Intel ACRN", - required: false, - }, kernelModvhost: { desc: msgKernelVirtio, required: false, @@ -160,8 +170,12 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { desc: msgKernelVirtioNet, required: false, }, + kernelModvhostvsock: { + desc: msgKernelVirtioVhostVsock, + required: false, + }, } - case "mock": + case vc.MockHypervisor: archRequiredCPUFlags = map[string]string{ cpuFlagVMX: "Virtualization support", cpuFlagLM: "64Bit CPU", @@ -245,19 +259,10 @@ func acrnIsUsable() error { defer syscall.Close(f) kataLog.WithField("device", acrnDevice).Info("device available") - acrnInst := vc.Acrn{} - uuidStr, err := acrnInst.GetNextAvailableUUID() - if err != nil { - return err - } - - uuid, err := acrnInst.GetACRNUUIDBytes(uuidStr) - if err != nil { - return fmt.Errorf("Converting UUID str to bytes failed, Err:%s", err) - } - - var createVM acrn_create_vm - createVM.uuid = uuid + var createVM acrn_vm_creation + copy(createVM.name[:], "KataACRNVM") + ioreq_buf := (*acrn_io_request)(unsafe.Pointer(&io_request_page)) + createVM.ioreq_buf = uint64(uintptr(unsafe.Pointer(ioreq_buf))) ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(f), @@ -267,10 +272,23 @@ func acrnIsUsable() error { if errno == syscall.EBUSY { kataLog.WithField("reason", "another hypervisor running").Error("cannot create VM") } + kataLog.WithFields(logrus.Fields{ + "ret": ret, + "errno": errno, + "VM_name": createVM.name, + }).Info("Create VM Error") + return errno + } + + ret, _, errno = syscall.Syscall(syscall.SYS_IOCTL, + uintptr(f), + uintptr(ioctl_ACRN_PAUSE_VM), + 0) + if ret != 0 || errno != 0 { kataLog.WithFields(logrus.Fields{ "ret": ret, "errno": errno, - }).Info("Create VM Error") + }).Info("PAUSE VM Error") return errno } @@ -292,17 +310,16 @@ func acrnIsUsable() error { } func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { - switch hypervisorType { - case "qemu": + case vc.QemuHypervisor: fallthrough - case "clh": + case vc.ClhHypervisor: fallthrough - case "firecracker": + case vc.FirecrackerHypervisor: return kvmIsUsable() - case "acrn": + case vc.AcrnHypervisor: return acrnIsUsable() - case "mock": + case vc.MockHypervisor: return nil default: return fmt.Errorf("archHostCanCreateVMContainer: Unknown hypervisor type %s", hypervisorType) diff --git a/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go b/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go index 47a6c21200bf..f2b107778616 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go +++ b/src/runtime/cmd/kata-runtime/kata-check_amd64_test.go @@ -51,6 +51,7 @@ func TestCCCheckCLIFunction(t *testing.T) { var cpuData []testCPUData var moduleData []testModuleData + cpuType = getCPUtype() if cpuType == cpuTypeIntel { cpuData = []testCPUData{ {archGenuineIntel, "lm vmx sse4_1", false}, @@ -466,7 +467,12 @@ func TestSetCPUtype(t *testing.T) { assert.NotEmpty(archRequiredCPUAttribs) assert.NotEmpty(archRequiredKernelModules) - assert.Equal(archRequiredCPUFlags["vmx"], "Virtualization support") + cpuType = getCPUtype() + if cpuType == cpuTypeIntel { + assert.Equal(archRequiredCPUFlags["vmx"], "Virtualization support") + } else if cpuType == cpuTypeAMD { + assert.Equal(archRequiredCPUFlags["svm"], "Virtualization support") + } _, ok := archRequiredKernelModules["kvm"] assert.True(ok) diff --git a/src/runtime/cmd/kata-runtime/kata-check_generic_test.go b/src/runtime/cmd/kata-runtime/kata-check_generic_test.go index 6584b66e04cf..18e722e43197 100644 --- a/src/runtime/cmd/kata-runtime/kata-check_generic_test.go +++ b/src/runtime/cmd/kata-runtime/kata-check_generic_test.go @@ -4,7 +4,6 @@ // //go:build arm64 || ppc64le -// +build arm64 ppc64le package main diff --git a/src/runtime/cmd/kata-runtime/kata-env.go b/src/runtime/cmd/kata-runtime/kata-env.go index b1421fa0068c..9b4fc8064035 100644 --- a/src/runtime/cmd/kata-runtime/kata-env.go +++ b/src/runtime/cmd/kata-runtime/kata-env.go @@ -17,6 +17,7 @@ import ( "github.com/prometheus/procfs" "github.com/urfave/cli" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" @@ -76,6 +77,7 @@ type RuntimeConfigInfo struct { type RuntimeInfo struct { Config RuntimeConfigInfo Path string + GuestSeLinuxLabel string Experimental []exp.Feature Version RuntimeVersionInfo Debug bool @@ -101,19 +103,19 @@ type RuntimeVersionInfo struct { // HypervisorInfo stores hypervisor details type HypervisorInfo struct { - MachineType string - Version string - Path string - BlockDeviceDriver string - EntropySource string - SharedFS string - VirtioFSDaemon string - SocketPath string - Msize9p uint32 - MemorySlots uint32 - PCIeRootPort uint32 - HotplugVFIOOnRootBus bool - Debug bool + MachineType string + Version string + Path string + BlockDeviceDriver string + EntropySource string + SharedFS string + VirtioFSDaemon string + SocketPath string + Msize9p uint32 + MemorySlots uint32 + HotPlugVFIO config.PCIePort + ColdPlugVFIO config.PCIePort + Debug bool } // AgentInfo stores agent details @@ -186,6 +188,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo { SandboxCgroupOnly: config.SandboxCgroupOnly, Experimental: config.Experimental, DisableGuestSeccomp: config.DisableGuestSeccomp, + GuestSeLinuxLabel: config.GuestSeLinuxLabel, } } @@ -313,10 +316,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) { EntropySource: config.HypervisorConfig.EntropySource, SharedFS: config.HypervisorConfig.SharedFS, VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon, - - HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, - PCIeRootPort: config.HypervisorConfig.PCIeRootPort, - SocketPath: socketPath, + HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO, + ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO, + SocketPath: socketPath, }, nil } diff --git a/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go b/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go index 7c1fe849db2d..97245a22bdb5 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_amd64_test.go @@ -55,7 +55,12 @@ func TestEnvGetEnvInfoSetsCPUType(t *testing.T) { assert.NotEmpty(archRequiredCPUAttribs) assert.NotEmpty(archRequiredKernelModules) - assert.Equal(archRequiredCPUFlags["vmx"], "Virtualization support") + cpuType = getCPUtype() + if cpuType == cpuTypeIntel { + assert.Equal(archRequiredCPUFlags["vmx"], "Virtualization support") + } else if cpuType == cpuTypeAMD { + assert.Equal(archRequiredCPUFlags["svm"], "Virtualization support") + } _, ok := archRequiredKernelModules["kvm"] assert.True(ok) diff --git a/src/runtime/cmd/kata-runtime/kata-env_generic_test.go b/src/runtime/cmd/kata-runtime/kata-env_generic_test.go index 8e22ba74aecd..ca862338babc 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_generic_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_generic_test.go @@ -4,7 +4,6 @@ // //go:build arm64 || ppc64le -// +build arm64 ppc64le package main diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 96847dc131c5..5bf2c5a88017 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -24,6 +24,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" @@ -73,18 +74,21 @@ func createConfig(configPath string, fileData string) error { return nil } -func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeConfig, err error) { +func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.RuntimeConfig, err error) { + var hotPlugVFIO config.PCIePort + var coldPlugVFIO config.PCIePort const logPath = "/log/path" hypervisorPath := filepath.Join(prefixDir, "hypervisor") kernelPath := filepath.Join(prefixDir, "kernel") imagePath := filepath.Join(prefixDir, "image") + rootfsType := "ext4" kernelParams := "foo=bar xyz" machineType := "machineType" disableBlock := true blockStorageDriver := "virtio-scsi" enableIOThreads := true - hotplugVFIOOnRootBus := true - pcieRootPort := uint32(2) + hotPlugVFIO = config.BridgePort + coldPlugVFIO = config.NoPort disableNewNetNs := false sharedFS := "virtio-9p" virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd") @@ -119,6 +123,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, @@ -126,8 +131,8 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC DisableBlock: disableBlock, BlockDeviceDriver: blockStorageDriver, EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, + HotPlugVFIO: hotPlugVFIO, + ColdPlugVFIO: coldPlugVFIO, DisableNewNetNs: disableNewNetNs, DefaultVCPUCount: hypConfig.NumVCPUs, DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs, @@ -150,12 +155,12 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC return "", oci.RuntimeConfig{}, err } - _, config, err = katautils.LoadConfiguration(configFile, true) + _, ociConfig, err = katautils.LoadConfiguration(configFile, true) if err != nil { return "", oci.RuntimeConfig{}, err } - return configFile, config, nil + return configFile, ociConfig, nil } func getExpectedAgentDetails(config oci.RuntimeConfig) (AgentInfo, error) { @@ -189,12 +194,13 @@ func genericGetExpectedHostDetails(tmpdir string, expectedVendor string, expecte expectedSupportVSocks, _ := vcUtils.SupportsVsocks() expectedHostDetails := HostInfo{ - Kernel: expectedKernelVersion, - Architecture: expectedArch, - Distro: expectedDistro, - CPU: expectedCPU, - VMContainerCapable: expectedVMContainerCapable, - SupportVSocks: expectedSupportVSocks, + AvailableGuestProtections: vc.AvailableGuestProtections(), + Kernel: expectedKernelVersion, + Architecture: expectedArch, + Distro: expectedDistro, + CPU: expectedCPU, + VMContainerCapable: expectedVMContainerCapable, + SupportVSocks: expectedSupportVSocks, } testProcCPUInfo := filepath.Join(tmpdir, "cpuinfo") @@ -268,9 +274,8 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo { EntropySource: config.HypervisorConfig.EntropySource, SharedFS: config.HypervisorConfig.SharedFS, VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon, - - HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, - PCIeRootPort: config.HypervisorConfig.PCIeRootPort, + HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO, + ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO, } if os.Geteuid() == 0 { diff --git a/src/runtime/cmd/kata-runtime/kata-iptables.go b/src/runtime/cmd/kata-runtime/kata-iptables.go index 8e2bd93c6090..a9953bfad117 100644 --- a/src/runtime/cmd/kata-runtime/kata-iptables.go +++ b/src/runtime/cmd/kata-runtime/kata-iptables.go @@ -7,7 +7,7 @@ package main import ( "fmt" - "io/ioutil" + "os" containerdshim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" @@ -103,7 +103,7 @@ var setIPTablesCommand = cli.Command{ } // Read file into buffer, and make request to the appropriate shim - buf, err := ioutil.ReadFile(iptablesFile) + buf, err := os.ReadFile(iptablesFile) if err != nil { return err } diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 5f1368ce824f..ef0207589716 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -17,6 +17,12 @@ ctlpath = "@ACRNCTLPATH@" kernel = "@KERNELPATH_ACRN@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" @@ -148,8 +154,8 @@ disable_selinux=@DEFDISABLESELINUX@ #debug_console_enabled = true # Agent connection dialing timeout value in seconds -# (default: 30) -#dial_timeout = 30 +# (default: 45) +dial_timeout = 45 [runtime] # If enabled, the runtime will log additional debug messages to the diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index f09c095f0e15..f2eca9b3d824 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -16,6 +16,12 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. @@ -28,10 +34,6 @@ image = "@IMAGEPATH@" # - CPU Hotplug # - Memory Hotplug # - NVDIMM devices -# - SharedFS, such as virtio-fs and virtio-fs-nydus -# -# Requirements: -# * virtio-block used as rootfs, thus the usage of devmapper snapshotter. # # Supported TEEs: # * Intel TDX @@ -39,9 +41,21 @@ image = "@IMAGEPATH@" # Default false # confidential_guest = true +# Enable running clh VMM as a non-root user. +# By default clh VMM run as root. When this is set to true, clh VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + # Path to the firmware. # If you want Cloud Hypervisor to use a specific firmware, set its path below. # This is option is only used when confidential_guest is enabled. @@ -117,6 +131,7 @@ default_maxmemory = @DEFMAXMEMSZ@ # Shared file system type: # - virtio-fs (default) # - virtio-fs-nydus +# - none shared_fs = "@DEFSHAREDFS_CLH_VIRTIOFS@" # Path to vhost-user-fs daemon. @@ -130,18 +145,21 @@ valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ # Default size of DAX cache in MiB virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + # Extra args for virtiofsd daemon # # Format example: -# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# ["--arg1=xxx", "--arg2=yyy"] # Examples: -# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# Set virtiofsd log level to debug : ["--log-level=debug"] # see `virtiofsd -h` for possible options. virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ # Cache mode: # -# - none +# - never # Metadata, data, and pathname lookup are not cached in guest. They are # always fetched from host and any changes are immediately pushed to host. # @@ -158,6 +176,15 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@" # rootfs is backed by a block device. This is virtio-blk. block_device_driver = "virtio-blk" +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + # Enable huge pages for VM RAM, default false # Enabling this will result in the VM memory # being allocated using huge pages. @@ -166,12 +193,22 @@ block_device_driver = "virtio-blk" # Disable the 'seccomp' feature from Cloud Hypervisor, default false # disable_seccomp = true +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: iommu=pt +#enable_iommu = true + # This option changes the default hypervisor and kernel parameters # to enable debug output where available. # # Default false #enable_debug = true +# Enable hot-plugging of VFIO devices to a root-port. +# The default setting is "no-port" +#hot_plug_vfio = "root-port" + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. @@ -288,8 +325,8 @@ block_device_driver = "virtio-blk" #debug_console_enabled = true # Agent connection dialing timeout value in seconds -# (default: 30) -#dial_timeout = 30 +# (default: 45) +dial_timeout = 45 [runtime] # If enabled, the runtime will log additional debug messages to the @@ -322,6 +359,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) @@ -360,7 +405,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O # does not yet support sandbox sizing annotations. # - When running single containers using a tool like ctr, container sizing information will be available. -static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_CLH@ # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index b7f349c0ddc2..28040aacc12b 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -16,6 +16,12 @@ path = "@FCPATH@" kernel = "@KERNELPATH_FC@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" @@ -120,11 +126,6 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@" # Default false #block_device_cache_set = true -# Specifies cache-related options for block devices. -# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. -# Default false -#block_device_cache_direct = true - # Specifies cache-related options for block devices. # Denotes whether flush requests for the device are ignored. # Default false @@ -278,8 +279,8 @@ kernel_modules=[] #debug_console_enabled = true # Agent connection dialing timeout value in seconds -# (default: 30) -#dial_timeout = 30 +# (default: 45) +dial_timeout = 45 [runtime] # If enabled, the runtime will log additional debug messages to the diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in new file mode 100644 index 000000000000..e63d3b4813af --- /dev/null +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -0,0 +1,708 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2021 Adobe Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUPATH@" +kernel = "@KERNELPATH@" +image = "@IMAGEPATH@" +# initrd = "@INITRDPATH@" +machine_type = "@MACHINETYPE@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +# confidential_guest = true + +# Choose AMD SEV-SNP confidential guests +# In case of using confidential guests on AMD hardware that supports both SEV +# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default. +# Default false +# sev_snp_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWAREPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWAREVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@CPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# This option allows to add an extra HMP or QMP socket when `enable_debug = true` +# +# WARNING: Anyone with access to the extra socket can take full control of +# Qemu. This is for debugging purpose only and must *NEVER* be used in +# production. +# +# Valid values are : +# - "hmp" +# - "qmp" +# - "qmp-pretty" (same as "qmp" with pretty json formatting) +# +# If set to the empty string "", no extra monitor socket is added. This is +# the default. +#extra_monitor_socket = hmp + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +#disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +pcie_root_port = 1 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +guest_hook_path = "/etc/oci/hooks.d" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 30) +#dial_timeout = 30 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/config/configuration-qemu-sev.toml.in b/src/runtime/config/configuration-qemu-sev.toml.in new file mode 100644 index 000000000000..56da05d20353 --- /dev/null +++ b/src/runtime/config/configuration-qemu-sev.toml.in @@ -0,0 +1,651 @@ +# Copyright 2022 Advanced Micro Devices, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_SEV_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUPATH@" +kernel = "@KERNELSEVPATH@" +initrd = "@INITRDSEVPATH@" +machine_type = "@MACHINETYPE@" + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWARESEVPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWAREVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@CPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_QEMU_SEV_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - none +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +#disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +#hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 90) +dial_timeout = 90 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +service_offload = @DEFSERVICEOFFLOAD@ + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/config/configuration-qemu-snp.toml.in b/src/runtime/config/configuration-qemu-snp.toml.in new file mode 100644 index 000000000000..f85985b19767 --- /dev/null +++ b/src/runtime/config/configuration-qemu-snp.toml.in @@ -0,0 +1,691 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2021 Adobe Inc. +# Copyright (c) 2023 IBM Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_SNP_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUSNPPATH@" +kernel = "@KERNELSNPPATH@" +#image = "@IMAGEPATH@" +initrd = "@INITRDSEVPATH@" +machine_type = "@MACHINETYPE@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# enable SEV SNP VMs +sev_snp_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWARESNPPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWARETDVFVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@CPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_QEMU_SNP_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +file_mem_backend = "" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. And Debug also enable the hmp socket. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +#hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 90) +dial_timeout = 90 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in new file mode 100644 index 000000000000..ca5d0ded3a18 --- /dev/null +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -0,0 +1,687 @@ +# Copyright (c) 2017-2019 Intel Corporation +# Copyright (c) 2021 Adobe Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "@CONFIG_QEMU_IN@" +# XXX: Project: +# XXX: Name: @PROJECT_NAME@ +# XXX: Type: @PROJECT_TYPE@ + +[hypervisor.qemu] +path = "@QEMUTDXPATH@" +kernel = "@KERNELTDXPATH@" +image = "@IMAGETDXPATH@" +# initrd = "@INITRDPATH@" +machine_type = "@MACHINETYPE@" + +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Default false +confidential_guest = true + +# Enable running QEMU VMM as a non-root user. +# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as +# a non-root random user. See documentation for the limitations of this mode. +# rootless = true + +# List of valid annotation names for the hypervisor +# Each member of the list is a regular expression, which is the base name +# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" +enable_annotations = @DEFENABLEANNOTATIONS@ + +# List of valid annotations values for the hypervisor +# Each member of the list is a path pattern as described by glob(3). +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@ +valid_hypervisor_paths = @QEMUTDXVALIDHYPERVISORPATHS@ + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "@KERNELTDXPARAMS@" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "@FIRMWARETDVFPATH@" + +# Path to the firmware volume. +# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables +# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables +# can be customized per each user while UEFI code is kept same. +firmware_volume = "@FIRMWARETDVFVOLUMEPATH@" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="@MACHINEACCELERATORS@" + +# Qemu seccomp sandbox feature +# comma-separated list of seccomp sandbox features to control the syscall access. +# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"` +# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox +# Another note: enabling this feature may reduce performance, you may enable +# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html +#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@" + +# CPU features +# comma-separated list of cpu features to pass to the cpu +# For example, `cpu_features = "pmu=off,vmx=off" +cpu_features="@TDXCPUFEATURES@" + +# Default number of vCPUs per SB/VM: +# unspecified or 0 --> will be set to @DEFVCPUS@ +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. +default_maxvcpus = @DEFMAXVCPUS@ + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per SB/VM: +# unspecified or 0 --> will be set to @DEFBRIDGES@ +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = @DEFBRIDGES@ + +# Default memory size in MiB for SB/VM. +# If unspecified then it will be set @DEFMEMSZ@ MiB. +default_memory = @DEFMEMSZ@ +# +# Default memory slots per SB/VM. +# If unspecified then it will be set @DEFMEMSLOTS@. +# This is will determine the times that memory will be hotadded to sandbox/VM. +#memory_slots = @DEFMEMSLOTS@ + +# Default maximum memory in MiB per SB / VM +# unspecified or == 0 --> will be set to the actual amount of physical RAM +# > 0 <= amount of physical RAM --> will be set to the specified number +# > amount of physical RAM --> will be set to the actual amount of physical RAM +default_maxmemory = @DEFMAXMEMSZ@ + +# The size in MiB will be plused to max memory of hypervisor. +# It is the memory address space for the NVDIMM devie. +# If set block storage driver (block_device_driver) to "nvdimm", +# should set memory_offset to the size of block device. +# Default 0 +#memory_offset = 0 + +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# virtio-fs is used instead to pass the rootfs. +disable_block_device_use = @DEFDISABLEBLOCK@ + +# Shared file system type: +# - virtio-fs (default) +# - virtio-9p +# - virtio-fs-nydus +# - none +shared_fs = "@DEFSHAREDFS_QEMU_TDX_VIRTIOFS@" + +# Path to vhost-user-fs daemon. +virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" + +# List of valid annotations values for the virtiofs daemon +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@ +valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ + +# Default size of DAX cache in MiB +virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ + +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + +# Extra args for virtiofsd daemon +# +# Format example: +# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# Examples: +# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# +# see `virtiofsd -h` for possible options. +virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ + +# Cache mode: +# +# - never +# Metadata, data, and pathname lookup are not cached in guest. They are +# always fetched from host and any changes are immediately pushed to host. +# +# - auto +# Metadata and pathname lookup cache expires after a configured amount of +# time (default is 1 second). Data is cached while the file is open (close +# to open consistency). +# +# - always +# Metadata, data, and pathname lookup are cached in guest and never expire. +virtio_fs_cache = "@DEFVIRTIOFSCACHE@" + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is virtio-scsi, virtio-blk +# or nvdimm. +block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" + +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + +# Specifies cache-related options will be set to block devices or not. +# Default false +#block_device_cache_set = true + +# Specifies cache-related options for block devices. +# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. +# Default false +#block_device_cache_direct = true + +# Specifies cache-related options for block devices. +# Denotes whether flush requests for the device are ignored. +# Default false +#block_device_cache_noflush = true + +# Enable iothreads (data-plane) to be used. This causes IO to be +# handled in a separate IO thread. This is currently only implemented +# for SCSI. +# +enable_iothreads = @DEFENABLEIOTHREADS@ + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable vhost-user storage device, default false +# Enabling this will result in some Linux reserved block type +# major range 240-254 being chosen to represent vhost-user devices. +enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ + +# The base directory specifically used for vhost-user devices. +# Its sub-path "block" is used for block devices; "block/sockets" is +# where we expect vhost-user sockets to live; "block/devices" is where +# simulated block device nodes for vhost-user devices to live. +vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" + +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + +# Enable IOMMU_PLATFORM, default false +# Enabling this will result in the VM device having iommu_platform=on set +#enable_iommu_platform = true + +# List of valid annotations values for the vhost user store path +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ +valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ + +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + +# Enable file based guest memory support. The default is an empty string which +# will disable this feature. In the case of virtio-fs, this is enabled +# automatically and '/dev/shm' is used as the backing folder. +# This option will be ignored if VM templating is enabled. +#file_mem_backend = "@DEFFILEMEMBACKEND@" + +# List of valid annotations values for the file_mem_backend annotation +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@ +valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@ + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = [] + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. And Debug also enable the hmp socket. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +# This is the msize used for 9p shares. It is the number of bytes +# used for 9p packet payload. +#msize_9p = @DEFMSIZE9P@ + +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# +# Default is false +#disable_image_nvdimm = true + +# VFIO devices are hotplugged on a bridge by default. +# Enable hotplugging on root bus. This may be required for devices with +# a large PCI bar, as this is a current limitation with hotplugging on +# a bridge. +# Default false +#hotplug_vfio_on_root_bus = true + +# Before hot plugging a PCIe device, you need to add a pcie_root_port device. +# Use this parameter when using some large PCI bar devices, such as Nvidia GPU +# The value means the number of pcie_root_port +# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35" +# Default 0 +#pcie_root_port = 2 + +# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off +# security (vhost-net runs ring0) for network I/O performance. +#disable_vhost_net = true + +# +# Default entropy source. +# The path to a host source of entropy (including a real hardware RNG) +# /dev/urandom and /dev/random are two main options. +# Be aware that /dev/random is a blocking source of entropy. If the host +# runs out of entropy, the VMs boot time will increase leading to get startup +# timeouts. +# The source of entropy /dev/urandom is non-blocking and provides a +# generally acceptable source of entropy. It should work well for pretty much +# all practical purposes. +#entropy_source= "@DEFENTROPYSOURCE@" + +# List of valid annotations values for entropy_source +# The default if not set is empty (all annotations rejected.) +# Your distribution recommends: @DEFVALIDENTROPYSOURCES@ +valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ + +# Path to OCI hook binaries in the *guest rootfs*. +# This does not affect host-side hooks which must instead be added to +# the OCI spec passed to the runtime. +# +# You can create a rootfs with hooks by customizing the osbuilder scripts: +# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder +# +# Hooks must be stored in a subdirectory of guest_hook_path according to their +# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". +# The agent will scan these directories for executable files and add them, in +# lexicographical order, to the lifecycle of the guest container. +# Hooks are executed in the runtime namespace of the guest. See the official documentation: +# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks +# Warnings will be logged if any error is encountered while scanning for hooks, +# but it will not abort container execution. +#guest_hook_path = "/usr/share/oci/hooks" +# +# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic. +# Default 0-sized value means unlimited rate. +#rx_rate_limiter_max_rate = 0 +# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM). +# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block) +# to discipline traffic. +# Default 0-sized value means unlimited rate. +#tx_rate_limiter_max_rate = 0 + +# Set where to save the guest memory dump file. +# If set, when GUEST_PANICKED event occurred, +# guest memeory will be dumped to host filesystem under guest_memory_dump_path, +# This directory will be created automatically if it does not exist. +# +# The dumped file(also called vmcore) can be processed with crash or gdb. +# +# WARNING: +# Dump guest’s memory can take very long depending on the amount of guest memory +# and use much disk space. +#guest_memory_dump_path="/var/crash/kata" + +# If enable paging. +# Basically, if you want to use "gdb" rather than "crash", +# or need the guest-virtual addresses in the ELF vmcore, +# then you should enable paging. +# +# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details +#guest_memory_dump_paging=false + +# Enable swap in the guest. Default false. +# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device +# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness") +# is bigger than 0. +# The size of the swap device should be +# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes. +# If swap_in_bytes is not set, the size should be memory_limit_in_bytes. +# If swap_in_bytes and memory_limit_in_bytes is not set, the size should +# be default_memory. +#enable_guest_swap = true + +# use legacy serial for guest console if available and implemented for architecture. Default false +#use_legacy_serial = true + +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + +[factory] +# VM templating support. Once enabled, new VMs are created from template +# using vm cloning. They will share the same initial kernel, initramfs and +# agent memory by mapping it readonly. It helps speeding up new container +# creation and saves a lot of memory if there are many kata containers running +# on the same host. +# +# When disabled, new VMs are created from scratch. +# +# Note: Requires "initrd=" to be set ("image=" is not supported). +# +# Default false +#enable_template = true + +# Specifies the path of template. +# +# Default "/run/vc/vm/template" +#template_path = "/run/vc/vm/template" + +# The number of caches of VMCache: +# unspecified or == 0 --> VMCache is disabled +# > 0 --> will be set to the specified number +# +# VMCache is a function that creates VMs as caches before using it. +# It helps speed up new container creation. +# The function consists of a server and some clients communicating +# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto. +# The VMCache server will create some VMs and cache them by factory cache. +# It will convert the VM to gRPC format and transport it when gets +# requestion from clients. +# Factory grpccache is the VMCache client. It will request gRPC format +# VM and convert it back to a VM. If VMCache function is enabled, +# kata-runtime will request VM from factory grpccache when it creates +# a new sandbox. +# +# Default 0 +#vm_cache_number = 0 + +# Specify the address of the Unix socket that is used by VMCache. +# +# Default /var/run/kata-containers/cache.sock +#vm_cache_endpoint = "/var/run/kata-containers/cache.sock" + +[agent.@PROJECT_TYPE@] +# If enabled, make the agent display debug-level messages. +# (default: disabled) +#enable_debug = true + +# Enable agent tracing. +# +# If enabled, the agent will generate OpenTelemetry trace spans. +# +# Notes: +# +# - If the runtime also has tracing enabled, the agent spans will be +# associated with the appropriate runtime parent span. +# - If enabled, the runtime will wait for the container to shutdown, +# increasing the container shutdown time slightly. +# +# (default: disabled) +#enable_tracing = true + +# Comma separated list of kernel modules and their parameters. +# These modules will be loaded in the guest kernel using modprobe(8). +# The following example can be used to load two kernel modules with parameters +# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] +# The first word is considered as the module name and the rest as its parameters. +# Container will not be started when: +# * A kernel module is specified and the modprobe command is not installed in the guest +# or it fails loading the module. +# * The module is not available in the guest or it doesn't met the guest kernel +# requirements, like architecture and version. +# +kernel_modules=[] + +# Enable debug console. + +# If enabled, user can connect guest OS running inside hypervisor +# through "kata-runtime exec " command + +#debug_console_enabled = true + +# Agent connection dialing timeout value in seconds +# (default: 60) +dial_timeout = 60 + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +# +# - none +# Used when customize network. Only creates a tap device. No veth pair. +# +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# +internetworking_model="@DEFNETWORKMODEL_QEMU@" + +# disable guest seccomp +# Determines whether container seccomp profiles are passed to the virtual +# machine and applied by the kata agent. If set to true, seccomp is not applied +# within the guest +# (default: true) +disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ + +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + +# If enabled, the runtime will create opentracing.io traces and spans. +# (See https://www.jaegertracing.io/docs/getting-started). +# (default: disabled) +#enable_tracing = true + +# Set the full url to the Jaeger HTTP Thrift collector. +# The default if not set will be "http://localhost:14268/api/traces" +#jaeger_endpoint = "" + +# Sets the username to be used if basic auth is required for Jaeger. +#jaeger_user = "" + +# Sets the password to be used if basic auth is required for Jaeger. +#jaeger_password = "" + +# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. +# This option may have some potential impacts to your host. It should only be used when you know what you're doing. +# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only +# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge +# (like OVS) directly. +# (default: false) +#disable_new_netns = true + +# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. +# The container cgroups in the host are not created, just one single cgroup per sandbox. +# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. +# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. +# The sandbox cgroup is constrained if there is no container type annotation. +# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType +sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ + +# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In +# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful +# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. +# Compatibility for determining appropriate sandbox (VM) size: +# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O +# does not yet support sandbox sizing annotations. +# - When running single containers using a tool like ctr, container sizing information will be available. +static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_TEE@ + +# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +sandbox_bind_mounts=@DEFBINDMOUNTS@ + +# VFIO Mode +# Determines how VFIO devices should be be presented to the container. +# Options: +# +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# +# - guest-kernel +# This is a Kata-specific behaviour that's useful in certain cases. +# The VFIO device is managed by whatever driver in the VM kernel +# claims it. This means it will appear as one or more device nodes +# or network interfaces depending on the nature of the device. +# Using this mode requires specially built workloads that know how +# to locate the relevant device interfaces within the VM. +# +vfio_mode="@DEFVFIOMODE@" + +# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will +# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. +disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ + +# Enabled experimental feature list, format: ["a", "b"]. +# Experimental features are features not stable enough for production, +# they may break compatibility, and are prepared for a big version bump. +# Supported experimental features: +# (default: []) +experimental=@DEFAULTEXPFEATURES@ + +# If enabled, user can run pprof tools with shim v2 process through kata-monitor. +# (default: false) +# enable_pprof = true + +# WARNING: All the options in the following section have not been implemented yet. +# This section was added as a placeholder. DO NOT USE IT! +[image] +# Container image service. +# +# Offload the CRI image management service to the Kata agent. +# (default: false) +#service_offload = true + +# Container image decryption keys provisioning. +# Applies only if service_offload is true. +# Keys can be provisioned locally (e.g. through a special command or +# a local file) or remotely (usually after the guest is remotely attested). +# The provision setting is a complete URL that lets the Kata agent decide +# which method to use in order to fetch the keys. +# +# Keys can be stored in a local file, in a measured and attested initrd: +#provision=data:///local/key/file +# +# Keys could be fetched through a special command or binary from the +# initrd (guest) image, e.g. a firmware call: +#provision=file:///path/to/bin/fetcher/in/guest +# +# Keys can be remotely provisioned. The Kata agent fetches them from e.g. +# a HTTPS URL: +#provision=https://my-key-broker.foo/tenant/ diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 3ec44c8b6ed0..511a63dfd6b8 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -15,8 +15,15 @@ path = "@QEMUPATH@" kernel = "@KERNELPATH@" image = "@IMAGEPATH@" +# initrd = "@INITRDPATH@" machine_type = "@MACHINETYPE@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. @@ -33,6 +40,12 @@ machine_type = "@MACHINETYPE@" # Default false # confidential_guest = true +# Choose AMD SEV-SNP confidential guests +# In case of using confidential guests on AMD hardware that supports both SEV +# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default. +# Default false +# sev_snp_guest = true + # Enable running QEMU VMM as a non-root user. # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as # a non-root random user. See documentation for the limitations of this mode. @@ -165,6 +178,7 @@ disable_block_device_use = @DEFDISABLEBLOCK@ # - virtio-fs (default) # - virtio-9p # - virtio-fs-nydus +# - none shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@" # Path to vhost-user-fs daemon. @@ -178,19 +192,22 @@ valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ # Default size of DAX cache in MiB virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ +# Default size of virtqueues +virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@ + # Extra args for virtiofsd daemon # # Format example: -# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"] +# ["--arg1=xxx", "--arg2=yyy"] # Examples: -# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"] +# Set virtiofsd log level to debug : ["--log-level=debug"] # # see `virtiofsd -h` for possible options. virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ # Cache mode: # -# - none +# - never # Metadata, data, and pathname lookup are not cached in guest. They are # always fetched from host and any changes are immediately pushed to host. # @@ -208,6 +225,20 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@" # or nvdimm. block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@" +# aio is the I/O mechanism used by qemu +# Options: +# +# - threads +# Pthread based disk I/O. +# +# - native +# Native Linux I/O. +# +# - io_uring +# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and +# qemu >=5.0. +block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@" + # Specifies cache-related options will be set to block devices or not. # Default false #block_device_cache_set = true @@ -271,6 +302,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" # Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. @@ -292,6 +328,21 @@ pflashes = [] # Default false #enable_debug = true +# This option allows to add an extra HMP or QMP socket when `enable_debug = true` +# +# WARNING: Anyone with access to the extra socket can take full control of +# Qemu. This is for debugging purpose only and must *NEVER* be used in +# production. +# +# Valid values are : +# - "hmp" +# - "qmp" +# - "qmp-pretty" (same as "qmp" with pretty json formatting) +# +# If set to the empty string "", no extra monitor socket is added. This is +# the default. +#extra_monitor_socket = hmp + # Disable the customizations done in the runtime when it detects # that it is running on top a VMM. This will result in the runtime # behaving as it would when running on bare metal. @@ -317,6 +368,18 @@ pflashes = [] # Default false #hotplug_vfio_on_root_bus = true +# Enable hot-plugging of VFIO devices to a bridge-port, +# root-port or switch-port. +# The default setting is "no-port" +#hot_plug_vfio = "root-port" + +# In a confidential compute environment hot-plugging can compromise +# security. +# Enable cold-plugging of VFIO devices to a bridge-port, +# root-port or switch-port. +# The default setting is "no-port", which means disabled. +#cold_plug_vfio = "root-port" + # Before hot plugging a PCIe device, you need to add a pcie_root_port device. # Use this parameter when using some large PCI bar devices, such as Nvidia GPU # The value means the number of pcie_root_port @@ -409,6 +472,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@ # disable applying SELinux on the VMM process (default false) disable_selinux=@DEFDISABLESELINUX@ +# disable applying SELinux on the container process +# If set to false, the type `container_t` is applied to the container process by default. +# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built +# with `SELINUX=yes`. +# (default: true) +disable_guest_selinux=@DEFDISABLEGUESTSELINUX@ + + [factory] # VM templating support. Once enabled, new VMs are created from template # using vm cloning. They will share the same initial kernel, initramfs and @@ -492,8 +563,8 @@ kernel_modules=[] #debug_console_enabled = true # Agent connection dialing timeout value in seconds -# (default: 30) -#dial_timeout = 30 +# (default: 45) +dial_timeout = 45 [runtime] # If enabled, the runtime will log additional debug messages to the @@ -526,6 +597,19 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# vCPUs pinning settings +# if enabled, each vCPU thread will be scheduled to a fixed CPU +# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) +# enable_vcpus_pinning = false + +# Apply a custom SELinux security policy to the container process inside the VM. +# This is used when you want to apply a type other than the default `container_t`, +# so general users should not uncomment and apply it. +# (format: "user:role:type") +# Note: You cannot specify MCS policy with the label because the sensitivity levels and +# categories are determined automatically by high-level container runtimes such as containerd. +#guest_selinux_label="@DEFGUESTSELINUXLABEL@" + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/go.mod b/src/runtime/go.mod index 36a2f618b981..da61107c9dc6 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -1,66 +1,117 @@ module github.com/kata-containers/kata-containers/src/runtime -go 1.14 +go 1.19 require ( code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 - github.com/BurntSushi/toml v0.3.1 + github.com/BurntSushi/toml v1.2.0 github.com/blang/semver v3.5.1+incompatible github.com/blang/semver/v4 v4.0.0 - github.com/containerd/cgroups v1.0.3 + github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682 github.com/containerd/console v1.0.3 - github.com/containerd/containerd v1.6.6 - github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5 + github.com/containerd/containerd v1.6.8 + github.com/containerd/cri-containerd v1.19.0 github.com/containerd/fifo v1.0.0 github.com/containerd/ttrpc v1.1.0 github.com/containerd/typeurl v1.0.2 github.com/containernetworking/plugins v1.1.1 + github.com/containers/podman/v4 v4.2.0 github.com/coreos/go-systemd/v22 v22.3.2 - github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af github.com/docker/go-units v0.4.0 - github.com/fsnotify/fsnotify v1.4.9 + github.com/fsnotify/fsnotify v1.5.4 github.com/go-ini/ini v1.28.2 - github.com/go-openapi/errors v0.18.0 - github.com/go-openapi/runtime v0.18.0 - github.com/go-openapi/strfmt v0.18.0 - github.com/go-openapi/swag v0.19.14 - github.com/go-openapi/validate v0.18.0 - github.com/godbus/dbus/v5 v5.0.6 + github.com/go-openapi/errors v0.20.2 + github.com/go-openapi/runtime v0.19.21 + github.com/go-openapi/strfmt v0.21.1 + github.com/go-openapi/swag v0.21.1 + github.com/go-openapi/validate v0.22.0 + github.com/godbus/dbus/v5 v5.1.0 github.com/gogo/protobuf v1.3.2 github.com/hashicorp/go-multierror v1.1.1 github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 github.com/mdlayher/vsock v1.1.0 - github.com/opencontainers/runc v1.1.2 - github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 + github.com/opencontainers/runc v1.1.3 + github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab github.com/opencontainers/selinux v1.10.1 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.11.1 + github.com/prometheus/client_golang v1.12.1 github.com/prometheus/client_model v0.2.0 - github.com/prometheus/common v0.30.0 + github.com/prometheus/common v0.32.1 github.com/prometheus/procfs v0.7.3 github.com/safchain/ethtool v0.0.0-20210803160452-9aa261dae9b1 - github.com/sirupsen/logrus v1.8.1 - github.com/stretchr/testify v1.7.0 - github.com/urfave/cli v1.22.2 - github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868 + github.com/sirupsen/logrus v1.9.0 + github.com/stretchr/testify v1.8.0 + github.com/urfave/cli v1.22.4 + github.com/vishvananda/netlink v1.2.1-beta.2 github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965 go.opentelemetry.io/otel v1.3.0 go.opentelemetry.io/otel/exporters/jaeger v1.0.0 go.opentelemetry.io/otel/sdk v1.3.0 go.opentelemetry.io/otel/trace v1.3.0 - golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd - golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f - golang.org/x/sys v0.0.0-20220412211240-33da011f77ad - google.golang.org/grpc v1.43.0 + golang.org/x/oauth2 v0.0.0-20220622183110-fd043fe589d2 + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f + google.golang.org/grpc v1.47.0 k8s.io/apimachinery v0.22.5 k8s.io/cri-api v0.23.1 ) +require ( + github.com/Microsoft/go-winio v0.5.2 // indirect + github.com/Microsoft/hcsshim v0.9.4 // indirect + github.com/PuerkitoBio/purell v1.1.1 // indirect + github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect + github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/cilium/ebpf v0.7.0 // indirect + github.com/containerd/go-runc v1.0.0 // indirect + github.com/containernetworking/cni v1.1.2 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/cyphar/filepath-securejoin v0.2.3 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect + github.com/frankban/quicktest v1.13.1 // indirect + github.com/go-logr/logr v1.2.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/analysis v0.21.2 // indirect + github.com/go-openapi/jsonpointer v0.19.5 // indirect + github.com/go-openapi/jsonreference v0.19.6 // indirect + github.com/go-openapi/loads v0.21.1 // indirect + github.com/go-openapi/spec v0.20.4 // indirect + github.com/go-stack/stack v1.8.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.6 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect + github.com/mdlayher/socket v0.2.0 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/moby/sys/mountinfo v0.6.2 // indirect + github.com/oklog/ulid v1.3.1 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rogpeppe/go-internal v1.8.1-0.20210923151022-86f73c517451 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + go.mongodb.org/mongo-driver v1.7.5 // indirect + go.opencensus.io v0.23.0 // indirect + golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect + golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f // indirect + golang.org/x/text v0.3.7 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/genproto v0.0.0-20220624142145-8cd45d7dbd1f // indirect + google.golang.org/protobuf v1.28.1 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + replace ( github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.2 - github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.3 + github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.3 github.com/uber-go/atomic => go.uber.org/atomic v1.5.1 google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8 ) diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 63870b2325a2..93bac929de51 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -20,15 +20,35 @@ cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmW cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= +cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAVY= +cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= +cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= +cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= +cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= +cloud.google.com/go v0.94.1/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= +cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Udc= +cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= +cloud.google.com/go v0.100.1/go.mod h1:fs4QogzfH5n2pBXBP9vRiU+eCny7lD2vmFZy79Iuw1U= +cloud.google.com/go v0.100.2/go.mod h1:4Xra9TjzAeYHrl5+oeLlzbM2k3mjVhZh4UqTZ//w99A= +cloud.google.com/go v0.102.0/go.mod h1:oWcCzKlqJ5zgHQt9YsaeTY9KzIvjyy0ArmiBUgpQ+nc= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/compute v0.1.0/go.mod h1:GAesmwr110a34z04OlxYkATPBEfVhkymfTBXtfbBFow= +cloud.google.com/go/compute v1.3.0/go.mod h1:cCZiE1NHEtai4wiufUhW8I8S1JKkAnhnQJWM7YD99wM= +cloud.google.com/go/compute v1.5.0/go.mod h1:9SMHyhJlzhlkJqrPAc839t2BZFTSk6Jdj6mkzQJeu0M= +cloud.google.com/go/compute v1.6.0/go.mod h1:T29tfhtVbq1wvAPo0E3+7vhgmkOYeXjhFvz/FMzPu0s= +cloud.google.com/go/compute v1.6.1/go.mod h1:g85FgpzFvNULZ+S8AYq87axRKuf2Kh7deLqV/jJ3thU= +cloud.google.com/go/compute v1.7.0/go.mod h1:435lt8av5oL9P3fv1OEzSbSUe+ybHXGMPQHHZWZxy9U= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= +cloud.google.com/go/iam v0.1.0/go.mod h1:vcUNEa0pEm0qRVpmWepWaFMIAI8/hjB9mO8rNCJtF6c= +cloud.google.com/go/iam v0.3.0/go.mod h1:XzJPvDayI+9zsASAFO68Hk07u3z+f+JrT2xXNdp4bnY= +cloud.google.com/go/kms v1.4.0/go.mod h1:fajBHndQ+6ubNw6Ss2sSd+SWvjL26RNo/dr7uxsnnOA= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -38,11 +58,14 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +cloud.google.com/go/storage v1.22.1/go.mod h1:S8N1cAStu7BOeFfE8KAQzmyyLkK8p/vmRq6kuBTW58Y= code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 h1:tM5+dn2C9xZw1RzgI6WTQW1rGqdUimKB3RFbyu4h6Hc= code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5/go.mod h1:v4VVB6oBMz/c9fRY6vZrwr5xKRWOH5NPDjQZlPk0Gbs= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/14rcole/gopopulate v0.0.0-20180821133914-b175b219e774/go.mod h1:6/0dYRLLXyJjbkIPeeGyoJ/eKOSI0eU6eTlCBYibgd0= github.com/AdaLogics/go-fuzz-headers v0.0.0-20210715213245-6c3934b029d8/go.mod h1:CzsSbkDixRphAF5hS6wbMKq0eI6ccJRb7/A0M6JBnwg= github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v66.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-ansiterm v0.0.0-20210608223527-2377c96fe795/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= @@ -50,18 +73,34 @@ github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSW github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= github.com/Azure/go-autorest/autorest v0.11.18/go.mod h1:dSiJPy22c3u0OtOKDNttNgqpNFY/GeWa7GH/Pz56QRA= +github.com/Azure/go-autorest/autorest v0.11.24/go.mod h1:G6kyRlFnTuSbEYkQGawPfsCswgme4iYf6rfSKUDzbCc= +github.com/Azure/go-autorest/autorest v0.11.27/go.mod h1:7l8ybrIdUmGqZMTD0sRtAr8NvbHjfofbf8RSP2q7w7U= github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= github.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M= +github.com/Azure/go-autorest/autorest/adal v0.9.18/go.mod h1:XVVeme+LZwABT8K5Lc3hA4nAe8LDBVle26gTrguhhPQ= +github.com/Azure/go-autorest/autorest/azure/auth v0.5.11/go.mod h1:84w/uV8E37feW2NCJ08uT9VBfjfUHpgLVnG2InYD6cg= +github.com/Azure/go-autorest/autorest/azure/cli v0.4.5/go.mod h1:ADQAXrkgm7acgWVUNamOgh8YNrv4p27l3Wc55oVfpzg= github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU= +github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE= +github.com/Azure/go-autorest/autorest/validation v0.3.1/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E= github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/BurntSushi/toml v1.2.0 h1:Rt8g24XnyGTyglgET/PRUNlrUeu9F5L+7FilkXfZgs0= +github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/Masterminds/semver/v3 v3.0.3/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= @@ -70,8 +109,10 @@ github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugX github.com/Microsoft/go-winio v0.4.17-0.20210211115548-6eac466e5fa3/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.4.17-0.20210324224401-5516f17a5958/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY= +github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/Microsoft/go-winio v0.5.2 h1:a9IhgEQBCUEk6QCdml9CiJGhAws+YwffDHEMp1VMrpA= +github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= github.com/Microsoft/hcsshim v0.8.7-0.20190325164909-8abdbb8205e4/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= github.com/Microsoft/hcsshim v0.8.7/go.mod h1:OHd7sQqRFrYd3RmSgbgji+ctCwkbq2wbEYNSzOYtcBQ= @@ -81,15 +122,18 @@ github.com/Microsoft/hcsshim v0.8.15/go.mod h1:x38A4YbHbdxJtc0sF6oIz+RG0npwSCAvn github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600= github.com/Microsoft/hcsshim v0.8.20/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4= github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4= +github.com/Microsoft/hcsshim v0.8.22/go.mod h1:91uVCVzvX2QD16sMCenoxxXo6L1wJnLMX2PSufFMtF0= github.com/Microsoft/hcsshim v0.8.23/go.mod h1:4zegtUJth7lAvFyc6cH2gGQ5B3OFQim01nnU2M8jKDg= github.com/Microsoft/hcsshim v0.9.2/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= -github.com/Microsoft/hcsshim v0.9.3 h1:k371PzBuRrz2b+ebGuI2nVgVhgsVX60jMfSw80NECxo= github.com/Microsoft/hcsshim v0.9.3/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= +github.com/Microsoft/hcsshim v0.9.4 h1:mnUj0ivWy6UzbB1uLFqKR6F+ZyiDc7j4iGgHTpO+5+I= +github.com/Microsoft/hcsshim v0.9.4/go.mod h1:7pLA8lDk46WKDWlVsENo92gC0XFa8rbKfyFRBqxEbCc= github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU= github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/ProtonMail/go-crypto v0.0.0-20220517143526-88bb52951d5b/go.mod h1:z4/9nQmJSSwwds7ejkxaJwO37dru3geImFUdJlaLzQo= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= @@ -97,7 +141,10 @@ github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/ReneKroon/ttlcache/v2 v2.11.0/go.mod h1:mBxvsNY+BT8qLLd6CuAJubbKo6r0jh3nb5et22bbfGY= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= +github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= +github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -109,11 +156,31 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-metrics v0.3.9/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= +github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= +github.com/asaskevich/govalidator v0.0.0-20200428143746-21a406dcc535/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg= +github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef h1:46PFijGLmAjMPwCCCo7Jf0W6f9slllCkkv7vyc1yOSg= +github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= +github.com/aws/aws-sdk-go v1.44.44/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= +github.com/aws/aws-sdk-go-v2 v1.16.5/go.mod h1:Wh7MEsmEApyL5hrWzpDkba4gwAPc5/piwLVLFnCxp48= +github.com/aws/aws-sdk-go-v2/config v1.15.11/go.mod h1:mD5tNFciV7YHNjPpFYqJ6KGpoSfY107oZULvTHIxtbI= +github.com/aws/aws-sdk-go-v2/credentials v1.12.6/go.mod h1:mQgnRmBPF2S/M01W4T4Obp3ZaZB6o1s/R8cOUda9vtI= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.6/go.mod h1:ClLMcuQA/wcHPmOIfNzNI4Y1Q0oDbmEkbYhMFOzHDh8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.12/go.mod h1:Afj/U8svX6sJ77Q+FPWMzabJ9QjbwP32YlopgKALUpg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.6/go.mod h1:FwpAKI+FBPIELJIdmQzlLtRe8LQSOreMcM2wBsPMvvc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.13/go.mod h1:hiM/y1XPp3DoEPhoVEYc/CZcS58dP6RKJRDFp99wdX0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.6/go.mod h1:DxAPjquoEHf3rUHh1b9+47RAaXB8/7cB6jkzCt/GOEI= +github.com/aws/aws-sdk-go-v2/service/kms v1.17.3/go.mod h1:EKkrWWXwWYf8x3Nrm6Oix3zZP9NRBHqxw5buFGVBHA0= +github.com/aws/aws-sdk-go-v2/service/sso v1.11.9/go.mod h1:UqRD9bBt15P0ofRyDZX6CfsIqPpzeHOhZKWzgSuAzpo= +github.com/aws/aws-sdk-go-v2/service/sts v1.16.7/go.mod h1:lVxTdiiSHY3jb1aeg+BBFtDzZGSUCv6qaNOyEGCJ1AY= +github.com/aws/smithy-go v1.11.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= +github.com/beeker1121/goque v1.0.3-0.20191103205551-d618510128af/go.mod h1:84CWnaDz4g1tEVnFLnuBigmGK15oPohy0RfvSN8d4eg= github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -122,32 +189,37 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= -github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= +github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/bshuster-repo/logrus-logstash-hook v0.4.1/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk= +github.com/buger/goterm v1.0.4/go.mod h1:HiFWV3xnkolgrBV3mY8m0X0Pumt4zg4QhbdOzQtB8tE= github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50= github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= +github.com/cenkalti/backoff/v3 v3.0.0/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= +github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cenkalti/backoff/v4 v4.1.2/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= -github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= +github.com/checkpoint-restore/checkpointctl v0.0.0-20220321135231-33f4a66335f0/go.mod h1:67kWC1PXQLR3lM/mmNnu3Kzn7K4TSWZAGUuQP1JSngk= +github.com/checkpoint-restore/go-criu/v5 v5.2.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= +github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -155,9 +227,10 @@ github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmE github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc= github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k= github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= @@ -166,11 +239,15 @@ github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XP github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= +github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4= +github.com/container-orchestrated-devices/container-device-interface v0.4.0/go.mod h1:E1zcucIkq9P3eyNmY+68dBQsTcsXJh9cgRo2IVNScKQ= github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE= github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU= github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= @@ -185,8 +262,9 @@ github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4S github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= -github.com/containerd/cgroups v1.0.3 h1:ADZftAkglvCiD44c77s5YmMqaP2pzVCFZvBmAlBdAP4= github.com/containerd/cgroups v1.0.3/go.mod h1:/ofk34relqNjSGyqPrmEULrO4Sc8LJhvJmWbUCUKqj8= +github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682 h1:d/YjAAP6A6fT0vpMhbYSDkE+K1ww/DZodOIamD8Pr/E= +github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA= github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= @@ -210,9 +288,11 @@ github.com/containerd/containerd v1.5.0-rc.0/go.mod h1:V/IXoMqNGgBlabz3tHD2TWDoT github.com/containerd/containerd v1.5.1/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g= github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c= github.com/containerd/containerd v1.5.8/go.mod h1:YdFSv5bTFLpG2HIYmfqDpSYYTDX+mc5qtSuYx1YUb/s= +github.com/containerd/containerd v1.5.9/go.mod h1:fvQqCfadDGga5HZyn3j4+dx56qj2I9YwBrlSdalvJYQ= github.com/containerd/containerd v1.6.1/go.mod h1:1nJz5xCZPusx6jJU8Frfct988y0NpumIq9ODB0kLtoE= -github.com/containerd/containerd v1.6.6 h1:xJNPhbrmz8xAMDNoVjHy9YHtWwEQNS+CDkcIRh7t8Y0= github.com/containerd/containerd v1.6.6/go.mod h1:ZoP1geJldzCVY3Tonoz7b1IXk8rIX0Nltt5QE4OMNk0= +github.com/containerd/containerd v1.6.8 h1:h4dOFDwzHmqFEP754PgfgTeVXFnLiRc6kiqC7tplDJs= +github.com/containerd/containerd v1.6.8/go.mod h1:By6p5KqPK0/7/CgO/A6t/Gz+CUYUu2zf1hUaaymVXB0= github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= @@ -222,8 +302,8 @@ github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EX github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM= github.com/containerd/continuity v0.2.2 h1:QSqfxcn8c+12slxwu00AtzXrsami0MJb/MQs9lOLHLA= github.com/containerd/continuity v0.2.2/go.mod h1:pWygW9u7LtS1o4N/Tn0FoCFDIXZ7rxcMX7HX1Dmibvk= -github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5 h1:/srF029I+oDfm/qeltxCGJyJ8urmlqWGOQmQ7HvwrRc= -github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5/go.mod h1:wxbGdReWGCalzGOEpifoHeYCK4xAgnj4o/4bVB+9voU= +github.com/containerd/cri-containerd v1.19.0 h1:PcTvvl+SHaekCMQZFQkYjn1RKlYrK6khYbuhOeF68k0= +github.com/containerd/cri-containerd v1.19.0/go.mod h1:wxbGdReWGCalzGOEpifoHeYCK4xAgnj4o/4bVB+9voU= github.com/containerd/fifo v0.0.0-20180307165137-3d5202aec260/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= github.com/containerd/fifo v0.0.0-20200410184934-f15a3290365b/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0= @@ -252,6 +332,10 @@ github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFY github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= github.com/containerd/stargz-snapshotter/estargz v0.4.1/go.mod h1:x7Q9dg9QYb4+ELgxmo4gBUeJB0tl5dqH1Sdz0nJU1QM= +github.com/containerd/stargz-snapshotter/estargz v0.9.0/go.mod h1:aE5PCyhFMwR8sbrErO5eM2GcvkyXTTJremG883D4qF0= +github.com/containerd/stargz-snapshotter/estargz v0.10.1/go.mod h1:aE5PCyhFMwR8sbrErO5eM2GcvkyXTTJremG883D4qF0= +github.com/containerd/stargz-snapshotter/estargz v0.11.4/go.mod h1:7vRJIcImfY8bpifnMjt+HTJoQxASq7T28MYbP15/Nf0= +github.com/containerd/stargz-snapshotter/estargz v0.12.0/go.mod h1:AIQ59TewBFJ4GOPEQXujcrJ/EKxh5xXZegW1rkR1P/M= github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8= @@ -273,18 +357,31 @@ github.com/containernetworking/cni v0.7.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ github.com/containernetworking/cni v0.8.0/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= github.com/containernetworking/cni v0.8.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= github.com/containernetworking/cni v1.0.1/go.mod h1:AKuhXbN5EzmD4yTNtfSsX3tPcmtrBI6QcRV0NiNt15Y= -github.com/containernetworking/cni v1.1.1 h1:ky20T7c0MvKvbMOwS/FrlbNwjEoqJEUUYfsL4b0mc4k= github.com/containernetworking/cni v1.1.1/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= +github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ= +github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw= github.com/containernetworking/plugins v0.8.6/go.mod h1:qnw5mN19D8fIwkqW7oHHYDHVlzhJpcY6TQxn/fUyDDM= github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRDjeJr6FLK6vuiUwoH7P8= github.com/containernetworking/plugins v1.0.1/go.mod h1:QHCfGpaTwYTbbH+nZXKVTxNBDZcxSOplJT5ico8/FLE= github.com/containernetworking/plugins v1.1.1 h1:+AGfFigZ5TiQH00vhR8qPeSatj53eNGz0C1d3wVYlHE= github.com/containernetworking/plugins v1.1.1/go.mod h1:Sr5TH/eBsGLXK/h71HeLfX19sZPp3ry5uHSkI4LPxV8= +github.com/containers/buildah v1.27.0/go.mod h1:anH3ExvDXRNP9zLQCrOc1vWb5CrhqLF/aYFim4tslvA= +github.com/containers/common v0.49.1/go.mod h1:ueM5hT0itKqCQvVJDs+EtjornAQtrHYxQJzP2gxeGIg= +github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I= +github.com/containers/image/v5 v5.22.0/go.mod h1:D8Ksv2RNB8qLJ7xe1P3rgJJOSQpahA6amv2Ax++/YO4= +github.com/containers/libtrust v0.0.0-20200511145503-9c3a6c22cd9a/go.mod h1:9rfv8iPl1ZP7aqh9YA68wnZv2NUDbXdcdPHVz0pFbPY= github.com/containers/ocicrypt v1.0.1/go.mod h1:MeJDzk1RJHv89LjsH0Sp5KTY3ZYkjXO/C+bKAeWFIrc= github.com/containers/ocicrypt v1.1.0/go.mod h1:b8AOe0YR67uU8OqfVNcznfFpAzu3rdgUV4GP9qXPfu4= github.com/containers/ocicrypt v1.1.1/go.mod h1:Dm55fwWm1YZAjYRaJ94z2mfZikIyIN4B0oB3dj3jFxY= github.com/containers/ocicrypt v1.1.2/go.mod h1:Dm55fwWm1YZAjYRaJ94z2mfZikIyIN4B0oB3dj3jFxY= github.com/containers/ocicrypt v1.1.3/go.mod h1:xpdkbVAuaH3WzbEabUd5yDsl9SwJA5pABH85425Es2g= +github.com/containers/ocicrypt v1.1.5/go.mod h1:WgjxPWdTJMqYMjf3M6cuIFFA1/MpyyhIM99YInA+Rvc= +github.com/containers/podman/v4 v4.2.0 h1:mqQ0CtdSOTfsl6IEcO0UiA/Yi/9Yxoe/mSEC4h21CK8= +github.com/containers/podman/v4 v4.2.0/go.mod h1:sUxBZd/VXjXXsHuQURWaQaetb0Ugqd1C6y7uScgmf4o= +github.com/containers/psgo v1.7.2/go.mod h1:SLpqxsPOHtTqRygjutCPXmeU2PoEFzV3gzJplN4BMx0= +github.com/containers/storage v1.37.0/go.mod h1:kqeJeS0b7DO2ZT1nVWs0XufrmPFbgV3c+Q/45RlH6r4= +github.com/containers/storage v1.38.0/go.mod h1:lBzt28gAk5ADZuRtwdndRJyqX22vnRaXmlF+7ktfMYc= +github.com/containers/storage v1.42.0/go.mod h1:JiUJwOgOo1dr2DdOUc1MRe2GCAXABYoYmOdPF8yvH78= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= @@ -292,60 +389,80 @@ github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmeka github.com/coreos/go-iptables v0.5.0/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= +github.com/coreos/go-oidc/v3 v3.2.0/go.mod h1:rEJ/idjfUyfkBit1eI1fvyr+64/g9dcKpAm8MJMesvo= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/coreos/stream-metadata-go v0.0.0-20210225230131-70edb9eb47b3/go.mod h1:RTjQyHgO/G37oJ3qnqYK6Z4TPZ5EsaabOtfMjVXmgko= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af h1:H6nLV96F1LkWizYLQtrMtqJBrlJxnpjgisHsTsOS2HU= -github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af/go.mod h1:POmDVglzQ2jWTlL9ZCfZ8d1QjLhmk0oB36O8T0oG75Y= -github.com/cyphar/filepath-securejoin v0.2.2 h1:jCwT2GTP+PY5nBz3c/YL5PAIbusElVrPujOBSCj8xRg= -github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= +github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI= +github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s= github.com/d2g/dhcp4server v0.0.0-20181031114812-7d4a0a7f59a5/go.mod h1:Eo87+Kg/IX2hfWJfwxMzLyuSZyxSoAug2nGa1G2QAi8= github.com/d2g/hardwareaddr v0.0.0-20190221164911-e7d9fbe030e4/go.mod h1:bMl4RjIciD2oAxI7DmWRx6gbeqrkoLqv3MV0vzNad+I= +github.com/danieljoos/wincred v1.1.0/go.mod h1:XYlo+eRTsVA9aHGp7NGjFkPla4m+DCL7hqDjlFjiygg= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/denverdino/aliyungo v0.0.0-20190125010748-a747050bb1ba/go.mod h1:dV8lFg6daOBZbT6/BDGIz6Y3WFGn8juu6G+CQ6LHtl0= github.com/dgrijalva/jwt-go v0.0.0-20170104182250-a601269ab70c/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/digitalocean/go-libvirt v0.0.0-20201209184759-e2a69bcd5bd1/go.mod h1:QS1XzqZLcDniNYrN7EZefq3wIyb/M2WmJbql4ZKoc1Q= +github.com/digitalocean/go-qemu v0.0.0-20210326154740-ac9e0b687001/go.mod h1:IetBE52JfFxK46p2n2Rqm+p5Gx1gpu2hRHsrbnPOWZQ= +github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= +github.com/disiqueira/gotree/v3 v3.0.2/go.mod h1:ZuyjE4+mUQZlbpkI24AmruZKhg3VHEgPLDY8Qk+uUu8= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/docker/cli v0.0.0-20191017083524-a8ff7f821017/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/cli v20.10.16+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v0.0.0-20190905152932-14b96e55d84c/go.mod h1:0+TTO4EOBfRPhZXAeF1Vu+W3hHZ8eLp8PgKVZlcvtFY= github.com/docker/distribution v2.7.1-0.20190205005809-0d3efadf0154+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v20.10.12+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v20.10.16+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v20.10.17+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.6.3/go.mod h1:WRaJzqw3CTB9bk10avuGsjVBZsD05qeibJ1/TYlvc0Y= +github.com/docker/docker-credential-helpers v0.6.4/go.mod h1:ofX3UI0Gz1TteYBjtgs07O36Pyasyp66D2uKT7H8W1c= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= +github.com/docker/go-connections v0.4.1-0.20210727194412-58542c764a11/go.mod h1:a6bNUGTbQBsY6VRHTr4h/rkOXjl244DyRD0tx3fgq4Q= github.com/docker/go-events v0.0.0-20170721190031-9461782956ad/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8= github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= github.com/docker/go-metrics v0.0.0-20180209012529-399ea8c73916/go.mod h1:/u0gXw0Gay3ceNrsHubL3BtdOL2fHf93USgMTe0W5dI= github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw= +github.com/docker/go-plugins-helpers v0.0.0-20211224144127-6eecb7beb651/go.mod h1:LFyLie6XcDbyKGeVK6bHe+9aJTYCxWLBg5IrJZOaXKA= github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/docker/libnetwork v0.8.0-dev.2.0.20190625141545-5a177b73e316/go.mod h1:93m0aTqz6z+g32wla4l4WxTrdtvBRmVzYRkYvasA5Z8= github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= +github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/dtylman/scp v0.0.0-20181017070807-f3000a34aef4/go.mod h1:jN1ZaUPSNA8jm10nmaRLky84qV/iCeiHmcEf3EbP+dc= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eggsampler/acme/v3 v3.2.1/go.mod h1:/qh0rKC/Dh7Jj+p4So7DbWmFNzC4dpcpK53r226Fhuo= github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= @@ -357,35 +474,60 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v4.11.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.5.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= +github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a/go.mod h1:7Ga40egUymuWXxAe151lTNnCv97MddSOVsjpPPkityA= +github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= +github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01/go.mod h1:ypD5nozFk9vcGw1ATYefw6jHe/jZP++Z15/+VTMcWhc= +github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52/go.mod h1:yIquW87NGRw1FU5p5lEkpnt/QxoH5uPAOUlOVkAUuMg= +github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg= +github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0= +github.com/fanliao/go-promise v0.0.0-20141029170127-1890db352a72/go.mod h1:PjfxuH4FZdUyfMdtBio2lsRr1AKEaVPwelzuHuh8Lqc= +github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/flynn/go-docopt v0.0.0-20140912013429-f6dd2ebbb31e/go.mod h1:HyVoz1Mz5Co8TFO8EupIdlcpwShBmY98dkT2xeHkvEI= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= +github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= +github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= +github.com/frankban/quicktest v1.13.1 h1:xVm/f9seEhZFL9+n5kv5XLrGwy6elc4V9v/XFY2vmd8= +github.com/frankban/quicktest v1.13.1/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= +github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= +github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= +github.com/fsouza/go-dockerclient v1.7.7/go.mod h1:njNCXvoZj3sLPjf3yO0DPHf1mdLdCPDYPc14GskKA4Y= +github.com/fsouza/go-dockerclient v1.8.1/go.mod h1:zmA2ogSxRnXmbZcy0Aq7yhRoCdP/bDns/qghCK9SWtM= github.com/fullsailor/pkcs7 v0.0.0-20190404230743-d7302db945fa/go.mod h1:KnogPXtdwXqoenmZCw6S+25EAm2MkxbG0deNDu4cbSA= github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.7.1/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY= github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= -github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 h1:DujepqpGd1hyOd7aW59XpK7Qymp8iy83xq74fLr21is= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= +github.com/go-asn1-ber/asn1-ber v1.3.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-gorp/gorp/v3 v3.0.2/go.mod h1:BJ3q1ejpV8cVALtcXvXaXyTOlMmJhWDxTmncaR6rwBY= github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-ini/ini v1.28.2 h1:drmmYv7psRpoGZkPtPKKTB+ZFSnvmwCMfNj5o1nLh2Y= github.com/go-ini/ini v1.28.2/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-ldap/ldap/v3 v3.1.10/go.mod h1:5Zun81jBTabRaI8lzN7E1JjyEl1g6zI6u9pd8luAK4Q= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= @@ -401,61 +543,158 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI= github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= -github.com/go-openapi/analysis v0.17.2 h1:eYp14J1o8TTSCzndHBtsNuckikV1PfZOSnx4BcBeu0c= -github.com/go-openapi/analysis v0.17.2/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= +github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik= +github.com/go-openapi/analysis v0.19.2/go.mod h1:3P1osvZa9jKjb8ed2TPng3f0i/UY9snX6gxi44djMjk= +github.com/go-openapi/analysis v0.19.4/go.mod h1:3P1osvZa9jKjb8ed2TPng3f0i/UY9snX6gxi44djMjk= +github.com/go-openapi/analysis v0.19.5/go.mod h1:hkEAkxagaIvIP7VTn8ygJNkd4kAYON2rCu0v0ObL0AU= +github.com/go-openapi/analysis v0.19.10/go.mod h1:qmhS3VNFxBlquFJ0RGoDtylO9y4pgTAUNE9AEEMdlJQ= +github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= +github.com/go-openapi/analysis v0.21.2/go.mod h1:HZwRk4RRisyG8vx2Oe6aqeSQcoxRp47Xkp3+K6q+LdY= github.com/go-openapi/errors v0.17.0/go.mod h1:LcZQpmvG4wyF5j4IhA73wkLFQg+QJXOQHVjmcZxhka0= -github.com/go-openapi/errors v0.17.2/go.mod h1:LcZQpmvG4wyF5j4IhA73wkLFQg+QJXOQHVjmcZxhka0= -github.com/go-openapi/errors v0.18.0 h1:+RnmJ5MQccF7jwWAoMzwOpzJEspZ18ZIWfg9Z2eiXq8= github.com/go-openapi/errors v0.18.0/go.mod h1:LcZQpmvG4wyF5j4IhA73wkLFQg+QJXOQHVjmcZxhka0= +github.com/go-openapi/errors v0.19.2/go.mod h1:qX0BLWsyaKfvhluLejVpVNwNRdXZhEbTA4kxxpKBC94= +github.com/go-openapi/errors v0.19.3/go.mod h1:qX0BLWsyaKfvhluLejVpVNwNRdXZhEbTA4kxxpKBC94= +github.com/go-openapi/errors v0.19.6/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= +github.com/go-openapi/errors v0.19.8/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= +github.com/go-openapi/errors v0.19.9/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= +github.com/go-openapi/errors v0.20.2 h1:dxy7PGTqEh94zj2E3h1cUmQQWiM1+aeCROfAr02EmK8= +github.com/go-openapi/errors v0.20.2/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= github.com/go-openapi/jsonpointer v0.17.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= -github.com/go-openapi/jsonpointer v0.17.2/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= +github.com/go-openapi/jsonpointer v0.18.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M= github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= github.com/go-openapi/jsonreference v0.17.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= -github.com/go-openapi/jsonreference v0.17.2/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= +github.com/go-openapi/jsonreference v0.18.0/go.mod h1:g4xxGn04lDIRh0GJb5QlpE3HfopLOL6uZrK/VgnsK9I= github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= -github.com/go-openapi/jsonreference v0.19.5 h1:1WJP/wi4OjB4iV8KVbH73rQaoialJrqv8gitZLxGLtM= github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE69AqPYEJeo/TWfEeg= +github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs= +github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= github.com/go-openapi/loads v0.17.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= -github.com/go-openapi/loads v0.17.2 h1:tEXYu6Xc0pevpzzQx5ghrMN9F7IVpN/+u4iD3rkYE5o= -github.com/go-openapi/loads v0.17.2/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= +github.com/go-openapi/loads v0.18.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= +github.com/go-openapi/loads v0.19.0/go.mod h1:72tmFy5wsWx89uEVddd0RjRWPZm92WRLhf7AC+0+OOU= +github.com/go-openapi/loads v0.19.2/go.mod h1:QAskZPMX5V0C2gvfkGZzJlINuP7Hx/4+ix5jWFxsNPs= +github.com/go-openapi/loads v0.19.3/go.mod h1:YVfqhUCdahYwR3f3iiwQLhicVRvLlU/WO5WPaZvcvSI= +github.com/go-openapi/loads v0.19.5/go.mod h1:dswLCAdonkRufe/gSUC3gN8nTSaB9uaS2es0x5/IbjY= +github.com/go-openapi/loads v0.21.1 h1:Wb3nVZpdEzDTcly8S4HMkey6fjARRzb7iEaySimlDW0= +github.com/go-openapi/loads v0.21.1/go.mod h1:/DtAMXXneXFjbQMGEtbamCZb+4x7eGwkvZCvBmwUG+g= github.com/go-openapi/runtime v0.0.0-20180920151709-4f900dc2ade9/go.mod h1:6v9a6LTXWQCdL8k1AO3cvqx5OtZY/Y9wKTgaoP6YRfA= -github.com/go-openapi/runtime v0.18.0 h1:ddoL4Uo/729XbNAS9UIsG7Oqa8R8l2edBe6Pq/i8AHM= -github.com/go-openapi/runtime v0.18.0/go.mod h1:uI6pHuxWYTy94zZxgcwJkUWa9wbIlhteGfloI10GD4U= +github.com/go-openapi/runtime v0.19.0/go.mod h1:OwNfisksmmaZse4+gpV3Ne9AyMOlP1lt4sK4FXt0O64= +github.com/go-openapi/runtime v0.19.4/go.mod h1:X277bwSUBxVlCYR3r7xgZZGKVvBd/29gLDlFGtJ8NL4= +github.com/go-openapi/runtime v0.19.15/go.mod h1:dhGWCTKRXlAfGnQG0ONViOZpjfg0m2gUt9nTQPQZuoo= +github.com/go-openapi/runtime v0.19.21 h1:81PiYus9l6fwwS4EwhJD+tQb3EPZBeWfgdAVTfFD25Q= +github.com/go-openapi/runtime v0.19.21/go.mod h1:Lm9YGCeecBnUUkFTxPC4s1+lwrkJ0pthx8YvyjCfkgk= github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= github.com/go-openapi/spec v0.17.0/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= -github.com/go-openapi/spec v0.17.2/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= -github.com/go-openapi/spec v0.19.3 h1:0XRyw8kguri6Yw4SxhsQA/atC88yqrk0+G4YhI2wabc= +github.com/go-openapi/spec v0.18.0/go.mod h1:XkF/MOi14NmjsfZ8VtAKf8pIlbZzyoTvZsdfssdxcBI= +github.com/go-openapi/spec v0.19.2/go.mod h1:sCxk3jxKgioEJikev4fgkNmwS+3kuYdJtcsZsD5zxMY= github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= +github.com/go-openapi/spec v0.19.6/go.mod h1:Hm2Jr4jv8G1ciIAo+frC/Ft+rR2kQDh8JHKHb3gWUSk= +github.com/go-openapi/spec v0.19.8/go.mod h1:Hm2Jr4jv8G1ciIAo+frC/Ft+rR2kQDh8JHKHb3gWUSk= +github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M= +github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I= github.com/go-openapi/strfmt v0.17.0/go.mod h1:P82hnJI0CXkErkXi8IKjPbNBM6lV6+5pLP5l494TcyU= -github.com/go-openapi/strfmt v0.17.2/go.mod h1:P82hnJI0CXkErkXi8IKjPbNBM6lV6+5pLP5l494TcyU= -github.com/go-openapi/strfmt v0.18.0 h1:FqqmmVCKn3di+ilU/+1m957T1CnMz3IteVUcV3aGXWA= github.com/go-openapi/strfmt v0.18.0/go.mod h1:P82hnJI0CXkErkXi8IKjPbNBM6lV6+5pLP5l494TcyU= +github.com/go-openapi/strfmt v0.19.0/go.mod h1:+uW+93UVvGGq2qGaZxdDeJqSAqBqBdl+ZPMF/cC8nDY= +github.com/go-openapi/strfmt v0.19.2/go.mod h1:0yX7dbo8mKIvc3XSKp7MNfxw4JytCfCD6+bY1AVL9LU= +github.com/go-openapi/strfmt v0.19.3/go.mod h1:0yX7dbo8mKIvc3XSKp7MNfxw4JytCfCD6+bY1AVL9LU= +github.com/go-openapi/strfmt v0.19.4/go.mod h1:eftuHTlB/dI8Uq8JJOyRlieZf+WkkxUuk0dgdHXr2Qk= +github.com/go-openapi/strfmt v0.19.5/go.mod h1:eftuHTlB/dI8Uq8JJOyRlieZf+WkkxUuk0dgdHXr2Qk= +github.com/go-openapi/strfmt v0.21.0/go.mod h1:ZRQ409bWMj+SOgXofQAGTIo2Ebu72Gs+WaRADcS5iNg= +github.com/go-openapi/strfmt v0.21.1 h1:G6s2t5V5kGCHLVbSdZ/6lI8Wm4OzoPFkc3/cjAsKQrM= +github.com/go-openapi/strfmt v0.21.1/go.mod h1:I/XVKeLc5+MM5oPNN7P6urMOpuLXEcNrCX/rPGuWb0k= github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= github.com/go-openapi/swag v0.17.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= -github.com/go-openapi/swag v0.17.2/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= +github.com/go-openapi/swag v0.18.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg= github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng= +github.com/go-openapi/swag v0.19.7/go.mod h1:ao+8BpOPyKdpQz3AOJfbeEVpLmWAvlT1IfTe5McPyhY= +github.com/go-openapi/swag v0.19.9/go.mod h1:ao+8BpOPyKdpQz3AOJfbeEVpLmWAvlT1IfTe5McPyhY= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/validate v0.17.2/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4= -github.com/go-openapi/validate v0.18.0 h1:PVXYcP1GkTl+XIAJnyJxOmK6CSG5Q1UcvoCvNO++5Kg= +github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= +github.com/go-openapi/swag v0.21.1 h1:wm0rhTb5z7qpJRHBdPOMuY4QjVUMbF6/kwoYeRAOrKU= +github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4= +github.com/go-openapi/validate v0.19.2/go.mod h1:1tRCw7m3jtI8eNWEEliiAqUIcBztB2KDnRCRMUi7GTA= +github.com/go-openapi/validate v0.19.3/go.mod h1:90Vh6jjkTn+OT1Eefm0ZixWNFjhtOH7vS9k0lo6zwJo= +github.com/go-openapi/validate v0.19.10/go.mod h1:RKEZTUWDkxKQxN2jDT7ZnZi2bhZlbNMAuKvKB+IaGx8= +github.com/go-openapi/validate v0.22.0 h1:b0QecH6VslW/TxtpKgzpO1SNG7GU2FsaqKdP1E2T50Y= +github.com/go-openapi/validate v0.22.0/go.mod h1:rjnrwK57VJ7A8xqfpAOEKRH8yQSGUriMu5/zuPSQ1hg= +github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= +github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= +github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= +github.com/go-redis/redis/v8 v8.11.4/go.mod h1:2Z2wHZXdQpCDXEGzqMockDpNyYvi2l4Pxt6RJr792+w= +github.com/go-rod/rod v0.107.3/go.mod h1:4SqYRUrcc4dSr9iT36YRZ4hdUAPg3A0O8RhxAMh0eCQ= +github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/go-test/deep v1.0.2/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/gobuffalo/attrs v0.0.0-20190224210810-a9411de4debd/go.mod h1:4duuawTqi2wkkpB4ePgWMaai6/Kc6WEz83bhFwpHzj0= +github.com/gobuffalo/attrs v0.1.0/go.mod h1:fmNpaWyHM0tRm8gCZWKx8yY9fvaNLo2PyzBNSrBZ5Hw= +github.com/gobuffalo/depgen v0.0.0-20190329151759-d478694a28d3/go.mod h1:3STtPUQYuzV0gBVOY3vy6CfMm/ljR4pABfrTeHNLHUY= +github.com/gobuffalo/depgen v0.1.0/go.mod h1:+ifsuy7fhi15RWncXQQKjWS9JPkdah5sZvtHc2RXGlg= +github.com/gobuffalo/envy v1.6.15/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI= +github.com/gobuffalo/envy v1.7.0/go.mod h1:n7DRkBerg/aorDM8kbduw5dN3oXGswK5liaSCx4T5NI= +github.com/gobuffalo/envy v1.8.1/go.mod h1:FurDp9+EDPE4aIUS3ZLyD+7/9fpx7YRt/ukY6jIHf0w= +github.com/gobuffalo/envy v1.9.0/go.mod h1:FurDp9+EDPE4aIUS3ZLyD+7/9fpx7YRt/ukY6jIHf0w= +github.com/gobuffalo/fizz v1.10.0/go.mod h1:J2XGPO0AfJ1zKw7+2BA+6FEGAkyEsdCOLvN93WCT2WI= +github.com/gobuffalo/flect v0.1.0/go.mod h1:d2ehjJqGOH/Kjqcoz+F7jHTBbmDb38yXA598Hb50EGs= +github.com/gobuffalo/flect v0.1.1/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI= +github.com/gobuffalo/flect v0.1.3/go.mod h1:8JCgGVbRjJhVgD6399mQr4fx5rRfGKVzFjbj6RE/9UI= +github.com/gobuffalo/flect v0.1.5/go.mod h1:W3K3X9ksuZfir8f/LrfVtWmCDQFfayuylOJ7sz/Fj80= +github.com/gobuffalo/flect v0.2.0/go.mod h1:W3K3X9ksuZfir8f/LrfVtWmCDQFfayuylOJ7sz/Fj80= +github.com/gobuffalo/flect v0.2.1/go.mod h1:vmkQwuZYhN5Pc4ljYQZzP+1sq+NEkK+lh20jmEmX3jc= +github.com/gobuffalo/genny v0.0.0-20190329151137-27723ad26ef9/go.mod h1:rWs4Z12d1Zbf19rlsn0nurr75KqhYp52EAGGxTbBhNk= +github.com/gobuffalo/genny v0.0.0-20190403191548-3ca520ef0d9e/go.mod h1:80lIj3kVJWwOrXWWMRzzdhW3DsrdjILVil/SFKBzF28= +github.com/gobuffalo/genny v0.1.0/go.mod h1:XidbUqzak3lHdS//TPu2OgiFB+51Ur5f7CSnXZ/JDvo= +github.com/gobuffalo/genny v0.1.1/go.mod h1:5TExbEyY48pfunL4QSXxlDOmdsD44RRq4mVZ0Ex28Xk= +github.com/gobuffalo/genny/v2 v2.0.5/go.mod h1:kRkJuAw9mdI37AiEYjV4Dl+TgkBDYf8HZVjLkqe5eBg= +github.com/gobuffalo/gitgen v0.0.0-20190315122116-cc086187d211/go.mod h1:vEHJk/E9DmhejeLeNt7UVvlSGv3ziL+djtTr3yyzcOw= +github.com/gobuffalo/github_flavored_markdown v1.1.0/go.mod h1:TSpTKWcRTI0+v7W3x8dkSKMLJSUpuVitlptCkpeY8ic= +github.com/gobuffalo/gogen v0.0.0-20190315121717-8f38393713f5/go.mod h1:V9QVDIxsgKNZs6L2IYiGR8datgMhB577vzTDqypH360= +github.com/gobuffalo/gogen v0.1.0/go.mod h1:8NTelM5qd8RZ15VjQTFkAW6qOMx5wBbW4dSCS3BY8gg= +github.com/gobuffalo/gogen v0.1.1/go.mod h1:y8iBtmHmGc4qa3urIyo1shvOD8JftTtfcKi+71xfDNE= +github.com/gobuffalo/helpers v0.6.0/go.mod h1:pncVrer7x/KRvnL5aJABLAuT/RhKRR9klL6dkUOhyv8= +github.com/gobuffalo/helpers v0.6.1/go.mod h1:wInbDi0vTJKZBviURTLRMFLE4+nF2uRuuL2fnlYo7w4= +github.com/gobuffalo/logger v0.0.0-20190315122211-86e12af44bc2/go.mod h1:QdxcLw541hSGtBnhUc4gaNIXRjiDppFGaDqzbrBd3v8= +github.com/gobuffalo/logger v1.0.3/go.mod h1:SoeejUwldiS7ZsyCBphOGURmWdwUFXs0J7TCjEhjKxM= +github.com/gobuffalo/mapi v1.0.1/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc= +github.com/gobuffalo/mapi v1.0.2/go.mod h1:4VAGh89y6rVOvm5A8fKFxYG+wIW6LO1FMTG9hnKStFc= +github.com/gobuffalo/nulls v0.2.0/go.mod h1:w4q8RoSCEt87Q0K0sRIZWYeIxkxog5mh3eN3C/n+dUc= +github.com/gobuffalo/packd v0.0.0-20190315124812-a385830c7fc0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4= +github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWeG2RIxq4= +github.com/gobuffalo/packd v0.3.0/go.mod h1:zC7QkmNkYVGKPw4tHpBQ+ml7W/3tIebgeo1b36chA3Q= +github.com/gobuffalo/packd v1.0.0/go.mod h1:6VTc4htmJRFB7u1m/4LeMTWjFoYrUiBkU9Fdec9hrhI= +github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ= +github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0= +github.com/gobuffalo/packr/v2 v2.8.0/go.mod h1:PDk2k3vGevNE3SwVyVRgQCCXETC9SaONCNSXT1Q8M1g= +github.com/gobuffalo/plush/v4 v4.0.0/go.mod h1:ErFS3UxKqEb8fpFJT7lYErfN/Nw6vHGiDMTjxpk5bQ0= +github.com/gobuffalo/pop/v5 v5.3.1/go.mod h1:vcEDhh6cJ3WVENqJDFt/6z7zNb7lLnlN8vj3n5G9rYA= +github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw= +github.com/gobuffalo/tags/v3 v3.0.2/go.mod h1:ZQeN6TCTiwAFnS0dNcbDtSgZDwNKSpqajvVtt6mlYpA= +github.com/gobuffalo/tags/v3 v3.1.0/go.mod h1:ZQeN6TCTiwAFnS0dNcbDtSgZDwNKSpqajvVtt6mlYpA= +github.com/gobuffalo/validate/v3 v3.0.0/go.mod h1:HFpjq+AIiA2RHoQnQVTFKF/ZpUPXwyw82LgyDPxQ9r0= +github.com/gobuffalo/validate/v3 v3.1.0/go.mod h1:HFpjq+AIiA2RHoQnQVTFKF/ZpUPXwyw82LgyDPxQ9r0= github.com/godbus/dbus v0.0.0-20151105175453-c7fdd8b5cd55/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= github.com/godbus/dbus v0.0.0-20180201030542-885f9cc04c9c/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= -github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8= github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro= github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= +github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/gogo/googleapis v1.2.0/go.mod h1:Njal3psf3qN6dwBtQfUmBZh2ybovJ0tlu3o/AC7HYjU= github.com/gogo/googleapis v1.4.0/go.mod h1:5YRNX2z1oM5gXdAkurHa942MDgEJyk02w4OecKY87+c= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -465,6 +704,8 @@ github.com/gogo/protobuf v1.3.0/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= +github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -500,9 +741,14 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/certificate-transparency-go v1.0.22-0.20181127102053-c25855a82c75/go.mod h1:QeJfpSbVSfYc7RgB3gJFj9cbuQMMchQxrWXz8Ruopmg= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -515,9 +761,12 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.5.1/go.mod h1:Ct15B4yir3PLOP5jsy0GNeYVaIZs/MK/Jz5any1wFW0= +github.com/google/go-containerregistry v0.10.0/go.mod h1:C7uwbB1QUAtvnknyd3ethxJRd4gtEjU/9WLXzckfI1Y= +github.com/google/go-intervals v0.0.2/go.mod h1:MkaR3LNRfeKLPmqgJYs4E66z5InYjmCjbbr4TQlcT6Y= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= @@ -525,6 +774,7 @@ github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.2.1/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= @@ -537,23 +787,38 @@ github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.2.0 h1:qJYtXnJRWmpe7m/3XlyhrsLrEURqHRM2kxzoxXqyUDs= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= +github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= +github.com/googleapis/gax-go/v2 v2.1.1/go.mod h1:hddJymUZASv3XPyGkUpKj8pPO47Rmb0eJc8R6ouapiM= +github.com/googleapis/gax-go/v2 v2.2.0/go.mod h1:as02EH8zWkzwUoLbBaFeQ+arQaj/OthfcblKl4IGNaM= +github.com/googleapis/gax-go/v2 v2.3.0/go.mod h1:b8LNqSzNabLiUpXKkY7HAR5jr6bIT99EXz9pXxye9YM= +github.com/googleapis/gax-go/v2 v2.4.0/go.mod h1:XOTVJ59hdnfJLIP/dh8n5CGryZR2LxK9wbMD5+iXC6c= github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= +github.com/googleapis/go-type-adapters v1.0.0/go.mod h1:zHW75FOG2aur7gAO2B+MLby+cLsWGBF62rFAi7WjWO4= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/handlers v0.0.0-20150720190736-60c7bfde3e33/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= github.com/gorilla/mux v1.7.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/gorilla/schema v1.2.0/go.mod h1:kgLaKoK1FELgZqMAVxx/5cbj0kT+57qxUrAlIO2eleU= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= @@ -571,27 +836,61 @@ github.com/hashicorp/errwrap v0.0.0-20141028054710-7554cd9344ce/go.mod h1:YH+1FK github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v0.14.1/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= +github.com/hashicorp/go-hclog v0.16.2/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= +github.com/hashicorp/go-hclog v1.1.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-kms-wrapping/entropy v0.1.0/go.mod h1:d1g9WGtAunDNpek8jUIEJnBlbgKS1N2Q61QkHiZyR1g= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v0.0.0-20161216184304-ed905158d874/go.mod h1:JMRHfdO9jKNzS/+BTlxCjKNQHg/jZAft8U7LloJvN7I= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-plugin v1.4.3/go.mod h1:5fGEH17QVwTTcR0zV7yhDPLLmFX9YSZ38b18Udy6vYQ= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-retryablehttp v0.6.6/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= +github.com/hashicorp/go-retryablehttp v0.7.0/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= +github.com/hashicorp/go-secure-stdlib/base62 v0.1.1/go.mod h1:EdWO6czbmthiwZ3/PUsDV+UD1D5IRU4ActiaWGwt0Yw= +github.com/hashicorp/go-secure-stdlib/mlock v0.1.1/go.mod h1:zq93CJChV6L9QTfGKtfBxKqD7BqqXx5O04A/ns2p5+I= +github.com/hashicorp/go-secure-stdlib/mlock v0.1.2/go.mod h1:zq93CJChV6L9QTfGKtfBxKqD7BqqXx5O04A/ns2p5+I= +github.com/hashicorp/go-secure-stdlib/parseutil v0.1.1/go.mod h1:QmrqtbKuxxSWTN3ETMPuB+VtEiBJ/A9XhoYGv8E1uD8= +github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6/go.mod h1:QmrqtbKuxxSWTN3ETMPuB+VtEiBJ/A9XhoYGv8E1uD8= +github.com/hashicorp/go-secure-stdlib/password v0.1.1/go.mod h1:9hH302QllNwu1o2TGYtSk8I8kTAN0ca1EHpwhm5Mmzo= +github.com/hashicorp/go-secure-stdlib/strutil v0.1.1/go.mod h1:gKOamz3EwoIoJq7mlMIRBpVTAUn8qPCrEclOKKWhD3U= +github.com/hashicorp/go-secure-stdlib/strutil v0.1.2/go.mod h1:Gou2R9+il93BqX25LAKCLuM+y9U2T4hlwvT1yprcna4= +github.com/hashicorp/go-secure-stdlib/tlsutil v0.1.1/go.mod h1:l8slYwnJA26yBz+ErHpp2IRCLr0vuOMGBORIz4rRiAs= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go-version v1.4.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hashicorp/vault/api v1.7.2/go.mod h1:xbfA+1AvxFseDzxxdWaL0uO99n1+tndus4GCrtouy0M= +github.com/hashicorp/vault/sdk v0.5.1/go.mod h1:DoGraE9kKGNcVgPmTuX357Fm6WAx1Okvde8Vp3dPDoU= +github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= +github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ= +github.com/honeycombio/beeline-go v1.1.1/go.mod h1:kN0cfUGBMfA87DyCYbiiLoSzWsnw3bluZvNEWtatHxk= +github.com/honeycombio/libhoney-go v1.15.2/go.mod h1:JzhRPYgoBCd0rZvudrqmej4Ntx0w7AT3wAJpf5+t1WA= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/hugelgupf/socketpair v0.0.0-20190730060125-05d35a94e714/go.mod h1:2Goc3h8EklBH5mspfHFxBnEoURQCGzQQH1ga9Myjvis= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= @@ -599,22 +898,69 @@ github.com/imdario/mergo v0.3.8/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJ github.com/imdario/mergo v0.3.10/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= +github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/insomniacslk/dhcp v0.0.0-20220119180841-3c283ff8b7dd/go.mod h1:h+MxyHxRg9NH3terB1nfRIUaQEcI0XOVkdR9LNBlp8E= github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 h1:x9HFDMDCsaxTvC4X3o0ZN6mw99dT/wYnTItGwhBRmg0= github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9/go.mod h1:RmeVYf9XrPRbRc3XIx0gLYA8qOFvNoPOfaEZduRlEp4= github.com/intel/goresctrl v0.2.0/go.mod h1:+CZdzouYFn5EsxgqAQTEzMfwKwuc0fVdMrT9FCCAVRQ= github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= github.com/j-keck/arping v1.0.2/go.mod h1:aJbELhR92bSk7tp79AWM/ftfc90EfEi2bQJrbBFOsPw= +github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= +github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= +github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= +github.com/jackc/pgconn v0.0.0-20190420214824-7e0022ef6ba3/go.mod h1:jkELnwuX+w9qN5YIfX0fl88Ehu4XC3keFuOJJk9pcnA= +github.com/jackc/pgconn v0.0.0-20190824142844-760dd75542eb/go.mod h1:lLjNuW/+OfW9/pnVKPazfWOgNfH2aPem8YQ7ilXGvJE= +github.com/jackc/pgconn v0.0.0-20190831204454-2fabfa3c18b7/go.mod h1:ZJKsE/KZfsUgOEh9hBm+xYTstcNHg7UPMVJqRfQxq4s= +github.com/jackc/pgconn v1.5.0/go.mod h1:QeD3lBfpTFe8WUnPZWN5KY/mB8FGMIYRdd8P8Jr0fAI= +github.com/jackc/pgconn v1.6.0/go.mod h1:yeseQo4xhQbgyJs2c87RAXOH2i624N0Fh1KSPJya7qo= +github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= +github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78= +github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA= +github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg= +github.com/jackc/pgproto3/v2 v2.0.0-rc3/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM= +github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM= +github.com/jackc/pgproto3/v2 v2.0.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= +github.com/jackc/pgproto3/v2 v2.0.2/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= +github.com/jackc/pgservicefile v0.0.0-20200307190119-3430c5407db8/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E= +github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg= +github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc= +github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw= +github.com/jackc/pgtype v1.3.0/go.mod h1:b0JqxHvPmljG+HQ5IsvQ0yqeSi4nGcDTVjFoiLDb0Ik= +github.com/jackc/pgx v3.6.2+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I= +github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y= +github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM= +github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc= +github.com/jackc/pgx/v4 v4.6.0/go.mod h1:vPh43ZzxijXUVJ+t/EmXBtFmbFVO72cuneCT9oAlxAg= +github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v1.1.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jhump/protoreflect v1.6.0/go.mod h1:eaTn3RZAmMBcV0fifFvlm6VHNz3wSkYyXYWUh7ymB74= +github.com/jinzhu/copier v0.3.5/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/jmhodges/clock v0.0.0-20160418191101-880ee4c33548/go.mod h1:hGT6jSUVzF6no3QaDSMLGLEHtHSBSefs+MgcDWnmhmo= +github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= +github.com/jmoiron/sqlx v1.3.4/go.mod h1:2BljVx/86SuTyjE+aPYlHCTNvZrnJXghYGpNiXLBMCQ= github.com/joefitzgerald/rainbow-reporter v0.1.0/go.mod h1:481CNgqmVHQZzdIbN52CupLJyoVwB10FQ/IQlF1pdL8= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/jsimonetti/rtnetlink v0.0.0-20190606172950-9527aa82566a/go.mod h1:Oz+70psSo5OFh8DBl0Zv2ACw7Esh6pPUphlvZG9x7uw= +github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4/go.mod h1:WGuG/smIU4J/54PblvSbh+xvCZmpJnFgr3ds6Z55XMQ= +github.com/jsimonetti/rtnetlink v0.0.0-20201009170750-9c6f07d100c1/go.mod h1:hqoO/u39cqLeBLebZ8fWdE96O7FxrAsRYhnVOdgHxok= +github.com/jsimonetti/rtnetlink v0.0.0-20201110080708-d2c240429e6c/go.mod h1:huN4d1phzjhlOsNIjFsw2SVRbwIHj3fJDMEU2SDPTmg= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -624,82 +970,170 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4= +github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= +github.com/karrick/godirwalk v1.15.3/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= +github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.14.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.4/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.7/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= +github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/labstack/echo/v4 v4.3.0/go.mod h1:PvmtTvhVqKDzDQy4d3bWzPjZLzom4iQbAZy2sgZ/qI8= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= +github.com/letsencrypt/boulder v0.0.0-20220331220046-b23ab962616e/go.mod h1:Bl3mfF2LHYepsU2XfzMceIglyByfPe1IFAXtO+p37Qk= +github.com/letsencrypt/challtestsrv v1.2.1/go.mod h1:Ur4e4FvELUXLGhkMztHOsPIsvGxD/kzSJninOrkM+zc= +github.com/letsencrypt/pkcs11key/v4 v4.0.0/go.mod h1:EFUvBDay26dErnNb70Nd0/VW3tJiIbETBPTl9ATXQag= +github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/linuxkit/virtsock v0.0.0-20201010232012-f8cee7dfc7a3/go.mod h1:3r6x7q95whyfWQpmGZTu3gk3v2YkMi05HEzl7Tf7YEo= +github.com/luna-duclos/instrumentedsql v1.1.3/go.mod h1:9J1njvFds+zN7y85EDhN9XNQLANWwZt2ULeIC8yMNYs= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= +github.com/mailru/easyjson v0.7.1/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/manifoldco/promptui v0.9.0/go.mod h1:ka04sppxSGFAtxX0qhlYQjISsg9mR4GWtQEhdbn6Pgg= +github.com/markbates/errx v1.1.0/go.mod h1:PLa46Oex9KNbVDZhKel8v1OT7hD5JZ2eI7AHhA0wswc= +github.com/markbates/oncer v0.0.0-20181203154359-bf2de49a0be2/go.mod h1:Ld9puTsIW75CHf65OeIOkyKbteujpZVXDpWK6YGZbxE= +github.com/markbates/oncer v1.0.0/go.mod h1:Z59JA581E9GP6w96jai+TGqafHPW+cPfRxz2aSZ0mcI= +github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0= github.com/marstr/guid v1.1.0/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= +github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= +github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= +github.com/mattn/go-sqlite3 v1.11.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= +github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +github.com/mattn/go-sqlite3 v2.0.3+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY= +github.com/mdlayher/ethernet v0.0.0-20190606142754-0394541c37b7/go.mod h1:U6ZQobyTjI/tJyq2HG+i/dfSoFUt8/aZCM+GKtmFk/Y= +github.com/mdlayher/netlink v0.0.0-20190409211403-11939a169225/go.mod h1:eQB3mZE4aiYnlUsyGGCOpPETfdQq4Jhsgf1fk3cwQaA= +github.com/mdlayher/netlink v1.0.0/go.mod h1:KxeJAFOFLG6AjpyDkQ/iIhxygIUKD+vcwqcnu43w/+M= +github.com/mdlayher/netlink v1.1.0/go.mod h1:H4WCitaheIsdF9yOYu8CFmCgQthAPIWZmcKp9uZHgmY= +github.com/mdlayher/netlink v1.1.1/go.mod h1:WTYpFb/WTvlRJAyKhZL5/uy69TDDpHHu2VZmb2XgV7o= +github.com/mdlayher/raw v0.0.0-20190606142536-fef19f00fc18/go.mod h1:7EpbotpCmVZcu+KCX4g9WaRNuu11uyhiW7+Le1dKawg= +github.com/mdlayher/raw v0.0.0-20191009151244-50f2db8cc065/go.mod h1:7EpbotpCmVZcu+KCX4g9WaRNuu11uyhiW7+Le1dKawg= github.com/mdlayher/socket v0.2.0 h1:EY4YQd6hTAg2tcXF84p5DTHazShE50u5HeBzBaNgjkA= github.com/mdlayher/socket v0.2.0/go.mod h1:QLlNPkFR88mRUNQIzRBMfXxwKal8H7u1h3bL1CV+f0E= github.com/mdlayher/vsock v1.1.0 h1:2k9udP/hUkLUOboGxXMHOk4f0GWWZwS3IuE3Ee/YYfk= github.com/mdlayher/vsock v1.1.0/go.mod h1:nsVhPsVuBBwAKh6i6PzdNoke6/TNYTjkxoRKAp/+pXs= +github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4= +github.com/miekg/dns v1.1.45/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME= +github.com/miekg/pkcs11 v1.0.2/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v0.0.0-20171004221916-a61a99592b77/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= +github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.3.2/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/mapstructure v1.3.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A= +github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= +github.com/moby/sys/mount v0.2.0/go.mod h1:aAivFE2LB3W4bACsUXChRHQ0qKWsetY4Y9V7sxOougM= +github.com/moby/sys/mount v0.3.3/go.mod h1:PBaEorSNTLG5t/+4EgukEQVlAvVEc6ZjTySwKdqp5K0= github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= -github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI= github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= +github.com/moby/sys/mountinfo v0.6.1/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= +github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= +github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= github.com/moby/sys/signal v0.6.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ= github.com/moby/sys/symlink v0.2.0/go.mod h1:7uZVF2dqJjG/NsClqul95CqKOBRQyYSNnJ6BMgR/gFs= github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo= github.com/moby/term v0.0.0-20210610120745-9d4ed1856297/go.mod h1:vgPCkQMyxTZ7IDy8SXRufE172gr8+K/JE/7hHFxHW3A= +github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6/go.mod h1:E2VnQOmVuvZB6UYnnDB0qG5Nq/1tD9acaOpo6xmt0Kw= +github.com/moby/vpnkit v0.5.0/go.mod h1:KyjUrL9cb6ZSNNAUwZfqRjhwwgJ3BJN+kXh0t43WTUQ= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mreiferson/go-httpclient v0.0.0-20160630210159-31f0106b4474/go.mod h1:OQA4XLvDbMgS8P0CevmM4m9Q3Jq4phKUzcocxuGJ5m8= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -712,11 +1146,16 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= +github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= +github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -725,12 +1164,16 @@ github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0= github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= -github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= -github.com/onsi/ginkgo/v2 v2.1.3 h1:e/3Cwtogj0HA+25nMP1jCMDIf8RtRYbGwGGuBIFztkc= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/ginkgo/v2 v2.0.0/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/ginkgo/v2 v2.1.4 h1:GNapqRSid3zijZ9H77KrgVG4/8KqiyRsxcSxe+7ApXY= +github.com/onsi/ginkgo/v2 v2.1.4/go.mod h1:um6tUpWM/cxCK3/FK8BXqEiUMUwRgSM4JXG47RKZmLU= github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= +github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= @@ -739,8 +1182,12 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= github.com/onsi/gomega v1.15.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+tEHG0= github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= -github.com/onsi/gomega v1.17.0 h1:9Luw4uT5HTjHTN8+aNcSThgH1vdXnmdJ8xIfZ4wyTRE= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/onsi/gomega v1.18.1/go.mod h1:0q+aL8jAiMXy9hbwj2mr5GziHiwhAIQpFmmtT5hitRs= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/onsi/gomega v1.20.0 h1:8W0cWlwFkflGPLltQvLRB7ZVD5HuP6ng320w2IS245Q= +github.com/onsi/gomega v1.20.0/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= @@ -749,50 +1196,69 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v1.0.3 h1:1hbqejyQWCJBvtKAfdO0b1FmaEf2z/bxnjqbARass5k= -github.com/opencontainers/runc v1.0.3/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= +github.com/opencontainers/runc v1.1.3 h1:vIXrkId+0/J2Ymu2m7VjGvbSlAId9XNRPhn2p4b+d8w= +github.com/opencontainers/runc v1.1.3/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= +github.com/opencontainers/runtime-spec v1.0.3-0.20201121164853-7413a7f753e1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab h1:YQZXa3elcHgKXAa2GjVFC9M3JeP7ZPyFD1YByDx/dgQ= +github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= +github.com/opencontainers/runtime-tools v0.0.0-20190417131837-cd1349b7c47e/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= +github.com/opencontainers/runtime-tools v0.9.1-0.20220714195903-17b3287fafb7/go.mod h1:/tgP02fPXGHkU3/qKK1Y0Db4yqNyGm03vLq/mzHzcS4= github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE= github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= +github.com/opencontainers/selinux v1.8.5/go.mod h1:HTvjPFoGMbpQsG886e3lQwnsRWtE4TC1OF3OUvG9FAo= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w= github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= +github.com/openshift/imagebuilder v1.2.4-0.20220711175835-4151e43600df/go.mod h1:TRYHe4CH9U6nkDjxjBNM5klrLbJBrRbpJE5SaRwUBsQ= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/ostreedev/ostree-go v0.0.0-20210805093236-719684c64e4f/go.mod h1:J6OG6YJVEWopen4avK3VNQSnALmmjvniMmni/YFYAwc= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= +github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE= github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/poy/onpar v0.0.0-20190519213022-ee068f8ea4d1/go.mod h1:nSbFQvMj97ZyhFRSJYtut+msi4sOY6zJDGCdSc+/rZU= github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= +github.com/proglottis/gpgme v0.1.3/go.mod h1:fPbW/EZ0LvwQtH8Hy7eixhp1eF3G39dtx7GUN+0Gmy0= github.com/prometheus/client_golang v0.0.0-20180209125602-c332b6f63c06/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.11.1 h1:+4eQaD7vAZ6DsfsxB15hbE0odUjGI5ARs9yskGu1v4s= github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= +github.com/prometheus/client_golang v1.12.1 h1:ZiaPsmm9uiBeaSMRznKsCDNtPCS0T3JVDGF+06gjBzk= +github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -804,10 +1270,12 @@ github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7q github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.30.0 h1:JEkYlQnpzrzQFxi6gnukFPdQ+ac82oRhzMcIduJu/Ug= github.com/prometheus/common v0.30.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= +github.com/prometheus/common v0.32.1 h1:hWIdL3N2HoUx3B8j3YN9mWor0qhY/NlEKZEaXxuIRh4= +github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= github.com/prometheus/procfs v0.0.0-20180125133057-cb4147076ac7/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= @@ -822,12 +1290,27 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1 github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= +github.com/rogpeppe/go-internal v1.3.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.8.1-0.20210923151022-86f73c517451 h1:d1PiN4RxzIFXCJTvRkvSkKqwtRAl5ZV4lATKtQI0B7I= +github.com/rogpeppe/go-internal v1.8.1-0.20210923151022-86f73c517451/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= +github.com/rootless-containers/rootlesskit v1.0.1/go.mod h1:t2UAiYagxrJ+wmpFAUIZPcqsm4k2B7ve6g7lILKbloc= +github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= +github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= +github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= github.com/safchain/ethtool v0.0.0-20210803160452-9aa261dae9b1 h1:ZFfeKAhIQiiOrQaI3/znw0gOmYpO28Tcu1YaqMa/jtQ= github.com/safchain/ethtool v0.0.0-20210803160452-9aa261dae9b1/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= @@ -835,19 +1318,32 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw= github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/seccomp/libseccomp-golang v0.9.1 h1:NJjM5DNFOs0s3kYE1WUOr6G8V97sdt46rlXTMfXGWBo= -github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= -github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= +github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= +github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= +github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY= +github.com/seccomp/libseccomp-golang v0.10.0/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= +github.com/secure-systems-lab/go-securesystemslib v0.3.1/go.mod h1:o8hhjkbNl2gOamKUA/eNW3xUrntHT9L4W89W1nfj43U= +github.com/secure-systems-lab/go-securesystemslib v0.4.0/go.mod h1:FGBZgq2tXWICsxWQW1msNf49F0Pf2Op5Htayx335Qbs= +github.com/segmentio/ksuid v1.0.4/go.mod h1:/XUiZBD3kVx5SmUOl55voK5yeAbBNNIed+2O73XgrPE= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sigstore/sigstore v1.3.1-0.20220629021053-b95fc0d626c1/go.mod h1:y83NePRM98MJpbGgBgi54UZduhG0aD7lYngAVCx+i/E= github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.3.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= @@ -855,15 +1351,25 @@ github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIK github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8/go.mod h1:P5HUIBuIWKbyjl083/loAegFkfbFNx5i2qEP4CNbm7E= +github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= +github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= +github.com/spf13/cobra v0.0.6/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= +github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk= +github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= +github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -871,62 +1377,115 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= +github.com/sylabs/sif/v2 v2.7.1/go.mod h1:bBse2nEFd3yHkmq6KmAOFEWQg5LdFYiQUdVcgamxlc8= github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= +github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= +github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= +github.com/tchap/go-patricia v2.3.0+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= +github.com/theupdateframework/go-tuf v0.3.0/go.mod h1:E5XP0wXitrFUHe4b8cUcAAdxBW4LbfnqF4WXXGLgWNo= +github.com/theupdateframework/go-tuf v0.3.1/go.mod h1:lhHZ3Vt2pdAh15h0Cc6gWdlI+Okn2ZznD3q/cNjd5jw= +github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399/go.mod h1:LdwHTNJT99C5fTAzDz0ud328OgXz+gierycbcIx2fRs= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM= +github.com/u-root/uio v0.0.0-20210528114334-82958018845c/go.mod h1:LpEX5FO/cB+WF4TYGY1V5qktpaZLkKkSegbr0V4eYXA= +github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli v0.0.0-20171014202726-7bc6a0acffa5/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= +github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/urfave/cli v1.22.4 h1:u7tSpNPPswAFymm8IehJhy4uJMlUuU/GmqSkvJ1InXA= +github.com/urfave/cli v1.22.4/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= +github.com/urfave/cli/v2 v2.5.1/go.mod h1:oDzoM7pVwz6wHn5ogWgFUU1s4VJayeQS+aEZDqXIEJs= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI= +github.com/vbauerster/mpb/v7 v7.4.2/go.mod h1:UmOiIUI8aPqWXIps0ciik3RKMdzx7+ooQpq+fBcXwBA= github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868 h1:FFT5/l13iFxg+2dzyoiXZPmMtoclsyBKnUqTEzYpDXw= -github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netlink v1.1.1-0.20220115184804-dd687eb2f2d4/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= +github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA= github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4= +github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/weppos/publicsuffix-go v0.15.1-0.20210807195340-dc689ff0bb59/go.mod h1:HYux0V0Zi04bHNwOHy4cXJVz/TQjYonnF6aoYhj+3QE= +github.com/weppos/publicsuffix-go v0.15.1-0.20220329081811-9a40b608a236/go.mod h1:HYux0V0Zi04bHNwOHy4cXJVz/TQjYonnF6aoYhj+3QE= github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs= +github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= +github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= +github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= +github.com/ysmood/goob v0.4.0/go.mod h1:u6yx7ZhS4Exf2MwciFr6nIM8knHQIE22lFpWHnfql18= +github.com/ysmood/got v0.31.2/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY= +github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.7.1/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs= github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA= github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg= +github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= +github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= +github.com/zmap/rc2 v0.0.0-20131011165748-24b9757f5521/go.mod h1:3YZ9o3WnatTIZhuOtot4IcUfzoKVjUHqu6WALIyI0nE= +github.com/zmap/zcertificate v0.0.0-20180516150559-0e3d58b1bac4/go.mod h1:5iU54tB79AMBcySS0R2XIyZBAVmeHranShAFELYx7is= +github.com/zmap/zcrypto v0.0.0-20210811211718-6f9bc4aff20f/go.mod h1:y/9hjFEub4DtQxTHp/pqticBgdYeCwL97vojV3lsvHY= +github.com/zmap/zlint/v3 v3.3.1-0.20211019173530-cb17369b4628/go.mod h1:O+4OXRfNLKqOyDl4eKZ1SBlYudKGUBGRFcv+m1KLr28= gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965 h1:EXE1ZsUqiUWGV5Dw2oTYpXx24ffxj0//yhTB0Ppv+4s= gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965/go.mod h1:TBB3sR7/jg4RCThC/cgT4fB8mAbbMO307TycfgeR59w= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -941,6 +1500,13 @@ go.etcd.io/etcd/client/v3 v3.5.0/go.mod h1:AIKXXVX/DQXtfTEqBryiLTUXwON+GuvO6Z7lL go.etcd.io/etcd/pkg/v3 v3.5.0/go.mod h1:UzJGatBQ1lXChBkQF0AuAtkRQMYnHubxAEYIrC3MSsE= go.etcd.io/etcd/raft/v3 v3.5.0/go.mod h1:UFOHSIvO/nKwd4lhkwabrTD3cqW5yVyYYf/KlD00Szc= go.etcd.io/etcd/server/v3 v3.5.0/go.mod h1:3Ah5ruV+M+7RZr0+Y/5mNLwC+eQlni+mQmOVdCRJoS4= +go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= +go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= +go.mongodb.org/mongo-driver v1.3.0/go.mod h1:MSWZXKOynuguX+JSvwP8i+58jYCXxbia8HS3gZBapIE= +go.mongodb.org/mongo-driver v1.3.4/go.mod h1:MSWZXKOynuguX+JSvwP8i+58jYCXxbia8HS3gZBapIE= +go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg= +go.mongodb.org/mongo-driver v1.7.5 h1:ny3p0reEpgsR2cfA5cjgwFZg3Cv/ofFh/8jbhGtz9VI= +go.mongodb.org/mongo-driver v1.7.5/go.mod h1:VXEWRZ6URJIkUq2SCAyapmhH0ZLRBP+FT4xhp5Zvxng= go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -954,6 +1520,8 @@ go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUz go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0/go.mod h1:oVGt1LRbBOBq1A5BQLlUg9UaU/54aiHw8cgjV3aWZ/E= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0/go.mod h1:vEhqr0m4eTc+DWxfsXoXue2GBgV2uUwVznkGIHW/e5w= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0/go.mod h1:2AboqHi0CiIZU0qwhtUfCYD1GeUzvvIXWNkhDt7ZMG4= +go.opentelemetry.io/contrib/propagators v0.19.0/go.mod h1:4QOdZClXISU5S43xZxk5tYaWcpb+lehqfKtE6PK6msE= +go.opentelemetry.io/otel v0.19.0/go.mod h1:j9bF567N9EfomkSidSfmMwIwIBuP37AMAIzVW85OxSg= go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= go.opentelemetry.io/otel v1.0.0/go.mod h1:AjRVh9A5/5DE7S+mZtTR6t8vpKKryam+0lREnfmS4cg= go.opentelemetry.io/otel v1.3.0 h1:APxLf0eiBwLl+SOXiJJCVYzA1OOJNyAoV8C5RNRyy7Y= @@ -965,7 +1533,9 @@ go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.3.0/go.mod h1:VpP4/RMn go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.3.0/go.mod h1:hO1KLR7jcKaDDKDkvI9dP/FIhpmna5lkqPUQdEjFAM8= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.3.0/go.mod h1:keUU7UfnwWTWpJ+FWnyqmogPa82nuU5VUANFq49hlMY= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.3.0/go.mod h1:QNX1aly8ehqqX1LEa6YniTU7VY9I6R3X/oPxhGdTceE= +go.opentelemetry.io/otel/metric v0.19.0/go.mod h1:8f9fglJPRnXuskQmKpnad31lcLJ2VmNNqIsx/uIwBSc= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= +go.opentelemetry.io/otel/oteltest v0.19.0/go.mod h1:tI4yxwh8U21v7JD6R3BcA/2+RBoTKFexE/PJ/nSO7IA= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= go.opentelemetry.io/otel/sdk v1.0.0/go.mod h1:PCrDHlSy5x1kjezSdL37PhbFUMjrsLRshJ2zCzeXwbM= @@ -973,6 +1543,7 @@ go.opentelemetry.io/otel/sdk v1.3.0 h1:3278edCoH89MEJ0Ky8WQXVmDQv3FX4ZJ3Pp+9fJre go.opentelemetry.io/otel/sdk v1.3.0/go.mod h1:rIo4suHNhQwBIPg9axF8V9CA72Wz2mKF1teNrup8yzs= go.opentelemetry.io/otel/sdk/export/metric v0.20.0/go.mod h1:h7RBNMsDJ5pmI1zExLi+bJK+Dr8NQCh0qGhm1KDnNlE= go.opentelemetry.io/otel/sdk/metric v0.20.0/go.mod h1:knxiS8Xd4E/N+ZqKmUPf3gTTZ4/0TjTXukfxjzSTpHE= +go.opentelemetry.io/otel/trace v0.19.0/go.mod h1:4IXiNextNOpPnRlI4ryK69mn5iC84bjBWZQA5DXz/qg= go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= go.opentelemetry.io/otel/trace v1.0.0/go.mod h1:PXTWqayeFUlJV1YDNhsJYB184+IvAH814St6o6ajzIs= go.opentelemetry.io/otel/trace v1.3.0 h1:doy8Hzb1RJ+I3yFhtDmwNc7tIyw1tNMOIsyPzp1NOGY= @@ -982,28 +1553,48 @@ go.opentelemetry.io/proto/otlp v0.11.0/go.mod h1:QpEjXPrNQzrFDZgoTo49dgHR9RYRSrg go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= +go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +goji.io/v3 v3.0.0/go.mod h1:c02FFnNiVNCDo+DpR2IhBQpM9r5G1BG/MkHNTPUJ13U= golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181009213950-7c1a557ab941/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= +golang.org/x/crypto v0.0.0-20190422162423-af44ce270edf/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190530122614-20be4c3c3ed5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190617133340-57b3e21c3d56/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191122220453-ac88ee75c92c/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201124201722-c8d3bf9c5392/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220131195533-30dcbda58838/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -1026,6 +1617,7 @@ golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRu golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= @@ -1037,6 +1629,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1048,7 +1642,9 @@ golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190320064053-1272bf9dcd53/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190419010253-1f3472d942ba/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= @@ -1061,6 +1657,7 @@ golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191007182048-72f939374954/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -1069,14 +1666,18 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200505041828-1ed23360d12c/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= @@ -1087,14 +1688,29 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211029224645-99673261e6eb/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= +golang.org/x/net v0.0.0-20220114011407-0dd24b26b47d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220325170049-de3da57026de/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220524220425-1d687d428aca/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220607020251-c690dde0001d/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1106,21 +1722,34 @@ golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f h1:Qmd2pbz05z7z6lm0DrgQVVPuBm92jqujBKMHMOlOQEw= +golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= +golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= +golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= +golang.org/x/oauth2 v0.0.0-20220524215830-622c5d57e401/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= +golang.org/x/oauth2 v0.0.0-20220608161450-d0670ef3b1eb/go.mod h1:jaDAt6Dkxork7LmZnYtzbRWj0W47D86a3TGe0YHBvmE= +golang.org/x/oauth2 v0.0.0-20220622183110-fd043fe589d2 h1:+jnHzr9VPj32ykQVai5DNahi9+NSp7yYuCsl5eAQtL0= +golang.org/x/oauth2 v0.0.0-20220622183110-fd043fe589d2/go.mod h1:jaDAt6Dkxork7LmZnYtzbRWj0W47D86a3TGe0YHBvmE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190412183630-56d357773e84/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220513210516-0976fa681c29/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f h1:Ax0t5p6N38Ga0dThY21weqDEyz2oklo4IvDkpigvkD8= +golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1128,15 +1757,24 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190321052220-f7bb7a8bee54/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190411185658-b44545bcd369/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190418153312-f0ce4c0180be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190419153524-e8e3143a4f4a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190531175056-4c3a928424d2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606122018-79a91cf218c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1145,11 +1783,13 @@ golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190812073006-9eafafc0a87e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1159,6 +1799,7 @@ golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1179,6 +1820,7 @@ golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200817155316-9781c653f443/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1187,6 +1829,8 @@ golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200922070232-aee5d888a860/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201101102859-da207088b7d1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201117170446-d9b008d0a637/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1197,34 +1841,66 @@ golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210228012217-479acdf4ea46/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210331175145-43e1dd70ce54/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210525143221-35b2ab0089ea/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210820121016-41cdb8703e55/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210903071746-97244b99971b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= +golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220624220833-87e55d714810/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1241,8 +1917,10 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -1252,16 +1930,23 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190329151228-23e29df326fe/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190416151739-9c9e1878f421/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190420181800-aa740d480789/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190531172133-b3315ee88b7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190617190820-da514acc4774/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190706070813-72ffa07ba3db/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1274,6 +1959,7 @@ golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200117220505-0cba7a3a9ee9/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= @@ -1282,6 +1968,7 @@ golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200308013534-11ec41452d41/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -1291,6 +1978,7 @@ golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200616133436-c1934b75d054/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200711155855-7342f9734a7d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= @@ -1302,15 +1990,23 @@ golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210112230658-8b4aab62c064/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= @@ -1333,6 +2029,26 @@ google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34q google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= +google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8= +google.golang.org/api v0.47.0/go.mod h1:Wbvgpq1HddcWVtzsVLyfLp8lDg6AA241LmgIL59tHXo= +google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtukyy4= +google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= +google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= +google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= +google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= +google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= +google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdrMgI= +google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= +google.golang.org/api v0.63.0/go.mod h1:gs4ij2ffTRXwuzzgJl/56BdwJaA194ijkfn++9tDuPo= +google.golang.org/api v0.67.0/go.mod h1:ShHKP8E60yPsKNw/w8w+VYaj9H6buA5UqDp8dhbQZ6g= +google.golang.org/api v0.70.0/go.mod h1:Bs4ZM2HGifEvXwd50TtW70ovgJffJYw2oRCOFU/SkfA= +google.golang.org/api v0.71.0/go.mod h1:4PyU6e6JogV1f9eA4voyrTY2batOLdgZ5qZ5HOCc4j8= +google.golang.org/api v0.74.0/go.mod h1:ZpfMZOVRMywNyvJFeqL9HRWBgAuRfSjJFpe9QtRRyDs= +google.golang.org/api v0.75.0/go.mod h1:pU9QmyHLnzlpar1Mjt4IbapUCy8J+6HD6GeELN69ljA= +google.golang.org/api v0.78.0/go.mod h1:1Sg78yoMLOhlQTeF+ARBoytAcH1NNyyl390YMy6rKmw= +google.golang.org/api v0.80.0/go.mod h1:xY3nI94gbvBrE0J6NHXhxOmW97HG7Khjkku6AFB3Hyg= +google.golang.org/api v0.84.0/go.mod h1:NTsGnUFJMYROtiquksZHBWtHfeMC7iYthki7Eq3pa8o= +google.golang.org/api v0.86.0/go.mod h1:+Sem1dnrKlrXMR/X0bPnMWyluQe4RsNoYfmNLhOIkzw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1345,6 +2061,7 @@ google.golang.org/cloud v0.0.0-20151119220103-975617b05ea8/go.mod h1:0H1ncTHf11K google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8 h1:Nw54tB0rB7hY/N0NQvRW8DG4Yk3Q6T9cu9RcFQDu1tc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.8.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1368,11 +2085,22 @@ google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= +google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.43.0 h1:Eeu7bZtDZ2DpRCsLhUlcrLnvYaMK1Gz86a+hMVvELmM= google.golang.org/grpc v1.43.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= +google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/grpc v1.46.2/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/grpc v1.47.0 h1:9n77onPX5F3qfFCqjy9dhn8PbNQsIKeVU04J9G7umt8= +google.golang.org/grpc v1.47.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1383,10 +2111,13 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/alexcesaro/statsd.v2 v2.0.0/go.mod h1:i0ubccKGzBVNBpdGV5MocxyA/XlLUJzA7SLonnE4drU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20141024133853-64131543e789/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1398,14 +2129,18 @@ gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qS gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= +gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= +gopkg.in/square/go-jose.v2 v2.4.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= +gopkg.in/square/go-jose.v2 v2.6.0/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= @@ -1419,9 +2154,12 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= @@ -1466,6 +2204,7 @@ k8s.io/cri-api v0.23.1/go.mod h1:REJE3PSU0h/LOV1APBrupxrEJqnoxZC8KWzkBUHwrK4= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= +k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= @@ -1491,3 +2230,4 @@ sigs.k8s.io/structured-merge-diff/v4 v4.0.3/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK sigs.k8s.io/structured-merge-diff/v4 v4.1.2/go.mod h1:j/nl6xW8vLS49O8YvXW1ocPhZawJtm+Yrr7PPRQ0Vg4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/src/runtime/pkg/README.md b/src/runtime/pkg/README.md index b5b0d07d3224..72bf3248bbf7 100644 --- a/src/runtime/pkg/README.md +++ b/src/runtime/pkg/README.md @@ -7,4 +7,5 @@ This repository contains a number of packages in addition to the |-|-| | [`katatestutils`](katatestutils) | Unit test utilities. | | [`katautils`](katautils) | Utilities. | +| [`sev`](sev) | AMD SEV confidential guest utilities. | | [`signals`](signals) | Signal handling functions. | diff --git a/src/runtime/pkg/containerd-shim-v2/create.go b/src/runtime/pkg/containerd-shim-v2/create.go index 6b14a94c7a97..ee9bbb36b660 100644 --- a/src/runtime/pkg/containerd-shim-v2/create.go +++ b/src/runtime/pkg/containerd-shim-v2/create.go @@ -16,6 +16,7 @@ import ( "path" "path/filepath" "strconv" + "strings" "syscall" containerd_types "github.com/containerd/containerd/api/types" @@ -23,9 +24,12 @@ import ( taskAPI "github.com/containerd/containerd/runtime/v2/task" "github.com/containerd/typeurl" "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/sirupsen/logrus" // only register the proto type crioption "github.com/containerd/containerd/pkg/runtimeoptions/v1" @@ -47,6 +51,28 @@ var defaultStartManagementServerFunc startManagementServerFunc = func(s *service shimLog.Info("management server started") } +func copyLayersToMounts(rootFs *vc.RootFs, spec *specs.Spec) error { + for _, o := range rootFs.Options { + if !strings.HasPrefix(o, annotations.FileSystemLayer) { + continue + } + + fields := strings.Split(o[len(annotations.FileSystemLayer):], ",") + if len(fields) < 2 { + return fmt.Errorf("Missing fields in rootfs layer: %q", o) + } + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/run/kata-containers/sandbox/layers/" + filepath.Base(fields[0]), + Type: fields[1], + Source: fields[0], + Options: fields[2:], + }) + } + + return nil +} + func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*container, error) { rootFs := vc.RootFs{} if len(r.Rootfs) == 1 { @@ -62,6 +88,11 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con if err != nil { return nil, err } + + if err := copyLayersToMounts(&rootFs, ociSpec); err != nil { + return nil, err + } + containerType, err := oci.ContainerType(*ociSpec) if err != nil { return nil, err @@ -97,9 +128,10 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con } // create root span + // rootSpan will be ended when the entire trace is ended rootSpan, newCtx := katatrace.Trace(s.ctx, shimLog, "rootSpan", shimTracingTags) s.rootCtx = newCtx - defer rootSpan.End() + s.rootSpan = rootSpan // create span span, newCtx := katatrace.Trace(s.rootCtx, shimLog, "create", shimTracingTags) @@ -135,7 +167,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con katautils.HandleFactory(ctx, vci, s.config) rootless.SetRootless(s.config.HypervisorConfig.Rootless) if rootless.IsRootless() { - if err := configureNonRootHypervisor(s.config); err != nil { + if err := configureNonRootHypervisor(s.config, r.ID); err != nil { return nil, err } } @@ -266,6 +298,11 @@ func checkAndMount(s *service, r *taskAPI.CreateTaskRequest) (bool, error) { if katautils.IsBlockDevice(m.Source) && !s.config.HypervisorConfig.DisableBlockDeviceUse { return false, nil } + + if virtcontainers.HasOptionPrefix(m.Options, annotations.FileSystemLayer) { + return false, nil + } + if m.Type == vc.NydusRootFSType { // if kata + nydus, do not mount return false, nil @@ -302,13 +339,17 @@ func doMount(mounts []*containerd_types.Mount, rootfs string) error { return nil } -func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error { +func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig, sandboxId string) error { userName, err := utils.CreateVmmUser() if err != nil { return err } defer func() { if err != nil { + shimLog.WithFields(logrus.Fields{ + "user_name": userName, + "sandbox_id": sandboxId, + }).WithError(err).Warn("configure non root hypervisor failed, delete the user") if err2 := utils.RemoveVmmUser(userName); err2 != nil { shimLog.WithField("userName", userName).WithError(err).Warn("failed to remove user") } @@ -329,7 +370,14 @@ func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error { return err } runtimeConfig.HypervisorConfig.Uid = uint32(uid) + runtimeConfig.HypervisorConfig.User = userName runtimeConfig.HypervisorConfig.Gid = uint32(gid) + shimLog.WithFields(logrus.Fields{ + "user_name": userName, + "uid": uid, + "gid": gid, + "sandbox_id": sandboxId, + }).Debug("successfully created a non root user for the hypervisor") userTmpDir := path.Join("/run/user/", fmt.Sprint(uid)) _, err = os.Stat(userTmpDir) diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index 121d5ea4db06..82ce7357f268 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -20,6 +20,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" @@ -41,7 +42,7 @@ func TestCreateSandboxSuccess(t *testing.T) { }, } - testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) { + testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) { return sandbox, nil } @@ -307,7 +308,9 @@ func TestCreateContainerConfigFail(t *testing.T) { assert.Error(err) } -func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err error) { +func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string, err error) { + var hotPlugVFIO config.PCIePort + var coldPlugVFIO config.PCIePort if dir == "" { return "", fmt.Errorf("BUG: need directory") } @@ -320,36 +323,36 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") - shimPath := path.Join(dir, "shim") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" disableBlockDevice := true blockDeviceDriver := "virtio-scsi" enableIOThreads := true - hotplugVFIOOnRootBus := true - pcieRootPort := uint32(2) disableNewNetNs := false sharedFS := "virtio-9p" virtioFSdaemon := path.Join(dir, "virtiofsd") + hotPlugVFIO = config.BridgePort + coldPlugVFIO = config.NoPort configFileOptions := ktu.RuntimeConfigOptions{ - Hypervisor: "qemu", - HypervisorPath: hypervisorPath, - KernelPath: kernelPath, - ImagePath: imagePath, - KernelParams: kernelParams, - MachineType: machineType, - ShimPath: shimPath, - LogPath: logPath, - DisableBlock: disableBlockDevice, - BlockDeviceDriver: blockDeviceDriver, - EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, - DisableNewNetNs: disableNewNetNs, - SharedFS: sharedFS, - VirtioFSDaemon: virtioFSdaemon, + Hypervisor: "qemu", + HypervisorPath: hypervisorPath, + KernelPath: kernelPath, + ImagePath: imagePath, + RootfsType: rootfsType, + KernelParams: kernelParams, + MachineType: machineType, + LogPath: logPath, + DisableBlock: disableBlockDevice, + BlockDeviceDriver: blockDeviceDriver, + EnableIOThreads: enableIOThreads, + DisableNewNetNs: disableNewNetNs, + SharedFS: sharedFS, + VirtioFSDaemon: virtioFSdaemon, + HotPlugVFIO: hotPlugVFIO, + ColdPlugVFIO: coldPlugVFIO, } runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions) @@ -360,7 +363,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err return "", err } - files := []string{hypervisorPath, kernelPath, imagePath, shimPath} + files := []string{hypervisorPath, kernelPath, imagePath} for _, file := range files { // create the resource (which must be >0 bytes) diff --git a/src/runtime/pkg/containerd-shim-v2/metrics.go b/src/runtime/pkg/containerd-shim-v2/metrics.go index 86476ad3acd8..69ce552ebcfe 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics.go @@ -9,9 +9,11 @@ import ( "context" cgroupsv1 "github.com/containerd/cgroups/stats/v1" + cgroupsv2 "github.com/containerd/cgroups/v2/stats" "github.com/containerd/typeurl" google_protobuf "github.com/gogo/protobuf/types" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" ) @@ -21,7 +23,18 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return nil, err } - metrics := statsToMetrics(&stats) + isCgroupV1, err := resCtrl.IsCgroupV1() + if err != nil { + return nil, err + } + + var metrics interface{} + + if isCgroupV1 { + metrics = statsToMetricsV1(&stats) + } else { + metrics = statsToMetricsV2(&stats) + } data, err := typeurl.MarshalAny(metrics) if err != nil { @@ -31,40 +44,71 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return data, nil } -func statsToMetrics(stats *vc.ContainerStats) *cgroupsv1.Metrics { +func statsToMetricsV1(stats *vc.ContainerStats) *cgroupsv1.Metrics { metrics := &cgroupsv1.Metrics{} if stats.CgroupStats != nil { metrics = &cgroupsv1.Metrics{ - Hugetlb: setHugetlbStats(stats.CgroupStats.HugetlbStats), - Pids: setPidsStats(stats.CgroupStats.PidsStats), - CPU: setCPUStats(stats.CgroupStats.CPUStats), - Memory: setMemoryStats(stats.CgroupStats.MemoryStats), - Blkio: setBlkioStats(stats.CgroupStats.BlkioStats), + Hugetlb: setHugetlbStatsV1(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV1(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV1(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV1(stats.CgroupStats.MemoryStats), + Blkio: setBlkioStatsV1(stats.CgroupStats.BlkioStats), } } - metrics.Network = setNetworkStats(stats.NetworkStats) return metrics } -func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { +func statsToMetricsV2(stats *vc.ContainerStats) *cgroupsv2.Metrics { + metrics := &cgroupsv2.Metrics{} + + if stats.CgroupStats != nil { + metrics = &cgroupsv2.Metrics{ + Hugetlb: setHugetlbStatsV2(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV2(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV2(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV2(stats.CgroupStats.MemoryStats), + Io: setBlkioStatsV2(stats.CgroupStats.BlkioStats), + } + } + + return metrics +} + +func setHugetlbStatsV1(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { var hugetlbStats []*cgroupsv1.HugetlbStat - for _, v := range vcHugetlb { + for k, v := range vcHugetlb { hugetlbStats = append( hugetlbStats, &cgroupsv1.HugetlbStat{ - Usage: v.Usage, - Max: v.MaxUsage, - Failcnt: v.Failcnt, + Usage: v.Usage, + Max: v.MaxUsage, + Failcnt: v.Failcnt, + Pagesize: k, + }) + } + + return hugetlbStats +} + +func setHugetlbStatsV2(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv2.HugeTlbStat { + var hugetlbStats []*cgroupsv2.HugeTlbStat + for k, v := range vcHugetlb { + hugetlbStats = append( + hugetlbStats, + &cgroupsv2.HugeTlbStat{ + Current: v.Usage, + Max: v.MaxUsage, + Pagesize: k, }) } return hugetlbStats } -func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { +func setPidsStatsV1(vcPids vc.PidsStats) *cgroupsv1.PidsStat { pidsStats := &cgroupsv1.PidsStat{ Current: vcPids.Current, Limit: vcPids.Limit, @@ -73,8 +117,16 @@ func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { return pidsStats } -func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { +func setPidsStatsV2(vcPids vc.PidsStats) *cgroupsv2.PidsStat { + pidsStats := &cgroupsv2.PidsStat{ + Current: vcPids.Current, + Limit: vcPids.Limit, + } + + return pidsStats +} +func setCPUStatsV1(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { var perCPU []uint64 perCPU = append(perCPU, vcCPU.CPUUsage.PercpuUsage...) @@ -95,7 +147,20 @@ func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { return cpuStats } -func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { +func setCPUStatsV2(vcCPU vc.CPUStats) *cgroupsv2.CPUStat { + cpuStats := &cgroupsv2.CPUStat{ + UsageUsec: vcCPU.CPUUsage.TotalUsage / 1000, + UserUsec: vcCPU.CPUUsage.UsageInKernelmode / 1000, + SystemUsec: vcCPU.CPUUsage.UsageInUsermode / 1000, + NrPeriods: vcCPU.ThrottlingData.Periods, + NrThrottled: vcCPU.ThrottlingData.ThrottledPeriods, + ThrottledUsec: vcCPU.ThrottlingData.ThrottledTime / 1000, + } + + return cpuStats +} + +func setMemoryStatsV1(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { memoryStats := &cgroupsv1.MemoryStat{ Usage: &cgroupsv1.MemoryEntry{ Limit: vcMemory.Usage.Limit, @@ -145,22 +210,41 @@ func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { return memoryStats } -func setBlkioStats(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { +func setMemoryStatsV2(vcMemory vc.MemoryStats) *cgroupsv2.MemoryStat { + memoryStats := &cgroupsv2.MemoryStat{ + Usage: vcMemory.Usage.Usage, + UsageLimit: vcMemory.Usage.Limit, + SwapUsage: vcMemory.SwapUsage.Usage, + SwapLimit: vcMemory.SwapUsage.Limit, + } + + return memoryStats +} + +func setBlkioStatsV1(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { blkioStats := &cgroupsv1.BlkIOStat{ - IoServiceBytesRecursive: copyBlkio(vcBlkio.IoServiceBytesRecursive), - IoServicedRecursive: copyBlkio(vcBlkio.IoServicedRecursive), - IoQueuedRecursive: copyBlkio(vcBlkio.IoQueuedRecursive), - SectorsRecursive: copyBlkio(vcBlkio.SectorsRecursive), - IoServiceTimeRecursive: copyBlkio(vcBlkio.IoServiceTimeRecursive), - IoWaitTimeRecursive: copyBlkio(vcBlkio.IoWaitTimeRecursive), - IoMergedRecursive: copyBlkio(vcBlkio.IoMergedRecursive), - IoTimeRecursive: copyBlkio(vcBlkio.IoTimeRecursive), + IoServiceBytesRecursive: copyBlkioV1(vcBlkio.IoServiceBytesRecursive), + IoServicedRecursive: copyBlkioV1(vcBlkio.IoServicedRecursive), + IoQueuedRecursive: copyBlkioV1(vcBlkio.IoQueuedRecursive), + SectorsRecursive: copyBlkioV1(vcBlkio.SectorsRecursive), + IoServiceTimeRecursive: copyBlkioV1(vcBlkio.IoServiceTimeRecursive), + IoWaitTimeRecursive: copyBlkioV1(vcBlkio.IoWaitTimeRecursive), + IoMergedRecursive: copyBlkioV1(vcBlkio.IoMergedRecursive), + IoTimeRecursive: copyBlkioV1(vcBlkio.IoTimeRecursive), } return blkioStats } -func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { +func setBlkioStatsV2(vcBlkio vc.BlkioStats) *cgroupsv2.IOStat { + ioStats := &cgroupsv2.IOStat{ + Usage: copyBlkioV2(vcBlkio.IoServiceBytesRecursive), + } + + return ioStats +} + +func copyBlkioV1(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { ret := make([]*cgroupsv1.BlkIOEntry, len(s)) for i, v := range s { ret[i] = &cgroupsv1.BlkIOEntry{ @@ -174,6 +258,28 @@ func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { return ret } +func copyBlkioV2(s []vc.BlkioStatEntry) []*cgroupsv2.IOEntry { + var ret []*cgroupsv2.IOEntry + item := cgroupsv2.IOEntry{} + for _, v := range s { + switch v.Op { + case "read": + item.Rbytes = v.Value + case "write": + item.Wbytes = v.Value + case "rios": + item.Rios = v.Value + case "wios": + item.Wios = v.Value + } + item.Major = v.Major + item.Minor = v.Minor + } + ret = append(ret, &item) + + return ret +} + func setNetworkStats(vcNetwork []*vc.NetworkStats) []*cgroupsv1.NetworkStat { networkStats := make([]*cgroupsv1.NetworkStat, len(vcNetwork)) for i, v := range vcNetwork { diff --git a/src/runtime/pkg/containerd-shim-v2/metrics_test.go b/src/runtime/pkg/containerd-shim-v2/metrics_test.go index e2a9177a207f..57842d93eaaf 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics_test.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics_test.go @@ -17,8 +17,7 @@ import ( ) func TestStatNetworkMetric(t *testing.T) { - - assert := assert.New(t) + assertions := assert.New(t) var err error mockNetwork := []*vc.NetworkStats{ @@ -52,8 +51,8 @@ func TestStatNetworkMetric(t *testing.T) { }() resp, err := sandbox.StatsContainer(context.Background(), testContainerID) - assert.NoError(err) + assertions.NoError(err) - metrics := statsToMetrics(&resp) - assert.Equal(expectedNetwork, metrics.Network) + metrics := statsToMetricsV1(&resp) + assertions.Equal(expectedNetwork, metrics.Network) } diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go index 9e703c9e218a..26d4c2158b03 100644 --- a/src/runtime/pkg/containerd-shim-v2/service.go +++ b/src/runtime/pkg/containerd-shim-v2/service.go @@ -28,6 +28,7 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" + otelTrace "go.opentelemetry.io/otel/trace" "golang.org/x/sys/unix" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" @@ -122,8 +123,9 @@ type exit struct { type service struct { sandbox vc.VCSandbox - ctx context.Context - rootCtx context.Context // root context for tracing + ctx context.Context + rootCtx context.Context // root context for tracing + rootSpan otelTrace.Span containers map[string]*container @@ -823,7 +825,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E // Pids returns all pids inside the container // Since for kata, it cannot get the process's pid from VM, -// thus only return the Shim's pid directly. +// thus only return the hypervisor's pid directly. func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.PidsResponse, err error) { shimLog.WithField("container", r.ID).Debug("Pids() start") defer shimLog.WithField("container", r.ID).Debug("Pids() end") @@ -925,7 +927,7 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *ta return &taskAPI.ConnectResponse{ ShimPid: s.pid, - //Since kata cannot get the container's pid in VM, thus only return the shim's pid. + //Since kata cannot get the container's pid in VM, thus only return the hypervisor's pid. TaskPid: s.hpid, }, nil } @@ -946,6 +948,7 @@ func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (_ * s.mu.Unlock() span.End() + s.rootSpan.End() katatrace.StopTracing(s.rootCtx) return empty, nil diff --git a/src/runtime/pkg/containerd-shim-v2/shim_management.go b/src/runtime/pkg/containerd-shim-v2/shim_management.go index a6b9bdba3485..0c6d5c6e2c8f 100644 --- a/src/runtime/pkg/containerd-shim-v2/shim_management.go +++ b/src/runtime/pkg/containerd-shim-v2/shim_management.go @@ -11,10 +11,10 @@ import ( "expvar" "fmt" "io" - "io/ioutil" "net/http" "net/http/pprof" "net/url" + "os" "path/filepath" "strconv" "strings" @@ -173,7 +173,7 @@ func (s *service) serveVolumeStats(w http.ResponseWriter, r *http.Request) { } func (s *service) serveVolumeResize(w http.ResponseWriter, r *http.Request) { - body, err := ioutil.ReadAll(r.Body) + body, err := io.ReadAll(r.Body) if err != nil { shimMgtLog.WithError(err).Error("failed to read request body") w.WriteHeader(http.StatusInternalServerError) @@ -212,7 +212,7 @@ func (s *service) genericIPTablesHandler(w http.ResponseWriter, r *http.Request, switch r.Method { case http.MethodPut: - body, err := ioutil.ReadAll(r.Body) + body, err := io.ReadAll(r.Body) if err != nil { logger.WithError(err).Error("failed to read request body") w.WriteHeader(http.StatusInternalServerError) @@ -243,7 +243,7 @@ func (s *service) genericIPTablesHandler(w http.ResponseWriter, r *http.Request, func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) { // metrics socket will under sandbox's bundle path - metricsAddress := SocketAddress(s.id) + metricsAddress := ServerSocketAddress(s.id) listener, err := cdshim.NewSocket(metricsAddress) if err != nil { @@ -307,8 +307,43 @@ func GetSandboxesStoragePath() string { return "/run/vc/sbs" } -// SocketAddress returns the address of the unix domain socket for communicating with the +// GetSandboxesStoragePathRust returns the storage path where sandboxes info are stored in runtime-rs +func GetSandboxesStoragePathRust() string { + return "/run/kata" +} + +// SocketPath returns the path of the socket using the given storagePath +func SocketPath(id string, storagePath string) string { + return filepath.Join(string(filepath.Separator), storagePath, id, "shim-monitor.sock") +} + +// SocketPathGo returns the path of the socket to be used with the go runtime +func SocketPathGo(id string) string { + return SocketPath(id, GetSandboxesStoragePath()) +} + +// SocketPathRust returns the path of the socket to be used with the rust runtime +func SocketPathRust(id string) string { + return SocketPath(id, GetSandboxesStoragePathRust()) +} + +// ServerSocketAddress returns the address of the unix domain socket the shim management endpoint +// should listen. +// NOTE: this code is only called by the go shim management implementation. +func ServerSocketAddress(id string) string { + return fmt.Sprintf("unix://%s", SocketPathGo(id)) +} + +// ClientSocketAddress returns the address of the unix domain socket for communicating with the // shim management endpoint -func SocketAddress(id string) string { - return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock")) +// NOTE: this code allows various go clients, e.g. kata-runtime or kata-monitor commands, to +// connect to the rust shim management implementation. +func ClientSocketAddress(id string) string { + // get the go runtime uds path + socketPath := SocketPathGo(id) + // if the path not exist, use the rust runtime uds path instead + if _, err := os.Stat(socketPath); err != nil { + socketPath = SocketPathRust(id) + } + return fmt.Sprintf("unix://%s", socketPath) } diff --git a/src/runtime/pkg/containerd-shim-v2/stream.go b/src/runtime/pkg/containerd-shim-v2/stream.go index c20e63de82b3..123c1c1148e4 100644 --- a/src/runtime/pkg/containerd-shim-v2/stream.go +++ b/src/runtime/pkg/containerd-shim-v2/stream.go @@ -126,11 +126,11 @@ func ioCopy(shimLog *logrus.Entry, exitch, stdinCloser chan struct{}, tty *ttyIO p := bufPool.Get().(*[]byte) defer bufPool.Put(p) io.CopyBuffer(tty.io.Stdout(), stdoutPipe, *p) - wg.Done() if tty.io.Stdin() != nil { // close stdin to make the other routine stop tty.io.Stdin().Close() } + wg.Done() shimLog.Debug("stdout io stream copy exited") }() } diff --git a/src/runtime/pkg/containerd-shim-v2/stream_test.go b/src/runtime/pkg/containerd-shim-v2/stream_test.go index ea4f026ca114..c85633c88437 100644 --- a/src/runtime/pkg/containerd-shim-v2/stream_test.go +++ b/src/runtime/pkg/containerd-shim-v2/stream_test.go @@ -10,6 +10,7 @@ import ( "io" "os" "path/filepath" + "runtime" "syscall" "testing" "time" @@ -96,6 +97,10 @@ func TestNewTtyIOFifoReopen(t *testing.T) { } func TestIoCopy(t *testing.T) { + // This test fails on aarch64 regularly, temporarily skip it + if runtime.GOARCH == "arm64" { + t.Skip("Skip TestIoCopy for aarch64") + } assert := assert.New(t) ctx := context.TODO() diff --git a/src/runtime/pkg/containerd-shim-v2/wait.go b/src/runtime/pkg/containerd-shim-v2/wait.go index ebb742790d16..ecf75b8c344f 100644 --- a/src/runtime/pkg/containerd-shim-v2/wait.go +++ b/src/runtime/pkg/containerd-shim-v2/wait.go @@ -120,10 +120,12 @@ func watchSandbox(ctx context.Context, s *service) { if err == nil { return } - s.monitor = nil s.mu.Lock() defer s.mu.Unlock() + + s.monitor = nil + // sandbox malfunctioning, cleanup as much as we can shimLog.WithError(err).Warn("sandbox stopped unexpectedly") err = s.sandbox.Stop(ctx, true) diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index 61f9236b9cce..773eaaa2d5f1 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -61,6 +61,17 @@ const ( Nvdimm = "nvdimm" ) +const ( + // AIOThreads is the pthread asynchronous I/O implementation. + AIOThreads = "threads" + + // AIONative is the native Linux AIO implementation + AIONative = "native" + + // AIOUring is the Linux io_uring I/O implementation + AIOIOUring = "io_uring" +) + const ( // Virtio9P means use virtio-9p for the shared file system Virtio9P = "virtio-9p" @@ -70,12 +81,25 @@ const ( // VirtioFSNydus means use nydus for the shared file system VirtioFSNydus = "virtio-fs-nydus" + + // NoSharedFS means *no* shared file system solution will be used + // and files will be copied into the guest system. + // + // WARNING: This should be carefully used, and only used in very few + // specific cases, as any update to the mount will *NOT* be reflected + // during the lifecycle of the pod, causing issues with rotation of + // secrets, certs, or configurations via kubernetes objects like + // configMaps or secrets, as those will be copied into the guest at + // *pod* *creation* *time*. + NoSharedFS = "none" ) const ( // Define the string key for DriverOptions in DeviceInfo struct FsTypeOpt = "fstype" BlockDriverOpt = "block-driver" + + VhostUserReconnectTimeOutOpt = "vhost-user-reconnect-timeout" ) const ( @@ -86,20 +110,132 @@ const ( VhostUserSCSIMajor = 242 ) +const ( + + // The timeout for reconnecting on non-server sockets when the remote end + // goes away. + // qemu will delay this many seconds and then attempt to reconnect. Zero + // disables reconnecting, and is the default. + DefaultVhostUserReconnectTimeOut = 0 +) + // Defining these as a variable instead of a const, to allow // overriding this in the tests. // SysDevPrefix is static string of /sys/dev var SysDevPrefix = "/sys/dev" -// SysIOMMUPath is static string of /sys/kernel/iommu_groups -var SysIOMMUPath = "/sys/kernel/iommu_groups" +// SysIOMMUGroupPath is static string of /sys/kernel/iommu_groups +var SysIOMMUGroupPath = "/sys/kernel/iommu_groups" // SysBusPciDevicesPath is static string of /sys/bus/pci/devices var SysBusPciDevicesPath = "/sys/bus/pci/devices" var getSysDevPath = getSysDevPathImpl +// PCIePortBusPrefix gives us the correct bus nameing dependeing on the port +// used to hot(cold)-plug the device +type PCIePortBusPrefix string + +const ( + PCIeRootPortPrefix PCIePortBusPrefix = "rp" + PCIeSwitchPortPrefix PCIePortBusPrefix = "sw" + PCIeSwitchUpstreamPortPrefix PCIePortBusPrefix = "swup" + PCIeSwitchhDownstreamPortPrefix PCIePortBusPrefix = "swdp" + PCIBridgePortPrefix PCIePortBusPrefix = "bp" +) + +func (p PCIePortBusPrefix) String() string { + switch p { + case PCIeRootPortPrefix: + fallthrough + case PCIeSwitchPortPrefix: + fallthrough + case PCIeSwitchUpstreamPortPrefix: + fallthrough + case PCIeSwitchhDownstreamPortPrefix: + fallthrough + case PCIBridgePortPrefix: + return string(p) + } + return fmt.Sprintf("", string(p)) +} + +// PCIePort distinguish only between root and switch port +type PCIePort string + +const ( + // RootPort attach VFIO devices to a root-port + RootPort PCIePort = "root-port" + // SwitchPort attach VFIO devices to a switch-port + SwitchPort = "switch-port" + // BridgePort is the default + BridgePort = "bridge-port" + // NoPort is for disabling VFIO hotplug/coldplug + NoPort = "no-port" + // InvalidPort is for invalid port + InvalidPort = "invalid-port" +) + +func (p PCIePort) String() string { + switch p { + case RootPort: + fallthrough + case SwitchPort: + fallthrough + case BridgePort: + fallthrough + case NoPort: + fallthrough + case InvalidPort: + return string(p) + } + return fmt.Sprintf("", string(p)) +} + +var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{ + RootPort: PCIeRootPortPrefix, + SwitchPort: PCIeSwitchhDownstreamPortPrefix, + BridgePort: PCIBridgePortPrefix, +} + +func (p PCIePort) Invalid() bool { + switch p { + case RootPort: + fallthrough + case SwitchPort: + fallthrough + case BridgePort: + fallthrough + case NoPort: + return false + } + return true +} + +func (p PCIePort) Valid() bool { + switch p { + case RootPort: + fallthrough + case SwitchPort: + fallthrough + case BridgePort: + fallthrough + case NoPort: + return true + } + return false +} + +type PCIePortMapping map[string]bool + +var ( + // Each of this structures keeps track of the devices attached to the + // different types of PCI ports. We can deduces the Bus number from it + // and eliminate duplicates being assigned. + PCIeDevices = map[PCIePort]PCIePortMapping{} +) + // DeviceInfo is an embedded type that contains device data common to all types of devices. type DeviceInfo struct { // DriverOptions is specific options for each device driver @@ -145,6 +281,9 @@ type DeviceInfo struct { // ColdPlug specifies whether the device must be cold plugged (true) // or hot plugged (false). ColdPlug bool + + // Specifies the PCIe port type to which the device is attached + Port PCIePort } // BlockDrive represents a block storage drive which may be used in case the storage @@ -236,14 +375,17 @@ const ( // VFIODeviceErrorType is the error type of VFIO device VFIODeviceErrorType VFIODeviceType = iota - // VFIODeviceNormalType is a normal VFIO device type - VFIODeviceNormalType + // VFIOPCIDeviceNormalType is a normal VFIO PCI device type + VFIOPCIDeviceNormalType - // VFIODeviceMediatedType is a VFIO mediated device type - VFIODeviceMediatedType + // VFIOPCIDeviceMediatedType is a VFIO PCI mediated device type + VFIOPCIDeviceMediatedType + + // VFIOAPDeviceMediatedType is a VFIO AP mediated device type + VFIOAPDeviceMediatedType ) -// VFIODev represents a VFIO drive used for hotplugging +// VFIODev represents a VFIO PCI device used for hotplugging type VFIODev struct { // ID is used to identify this drive in the hypervisor options. ID string @@ -274,6 +416,15 @@ type VFIODev struct { // IsPCIe specifies device is PCIe or PCI IsPCIe bool + + // APDevices are the Adjunct Processor devices assigned to the mdev + APDevices []string + + // Rank identifies a device in a IOMMU group + Rank int + + // Port is the PCIe port type to which the device is attached + Port PCIePort } // RNGDev represents a random number generator device @@ -307,6 +458,11 @@ type VhostUserDeviceAttrs struct { Index int CacheSize uint32 + + QueueSize uint32 + + // Reconnect timeout for socket of vhost user block device + ReconnectTime uint32 } // GetHostPathFunc is function pointer used to mock GetHostPath in tests. @@ -324,6 +480,10 @@ func GetHostPath(devInfo DeviceInfo, vhostUserStoreEnabled bool, vhostUserStoreP return "", fmt.Errorf("Empty path provided for device") } + if devInfo.Major == -1 { + return devInfo.HostPath, nil + } + // Filter out vhost-user storage devices by device Major numbers. if vhostUserStoreEnabled && devInfo.DevType == "b" && (devInfo.Major == VhostUserSCSIMajor || devInfo.Major == VhostUserBlkMajor) { diff --git a/src/runtime/pkg/device/drivers/block.go b/src/runtime/pkg/device/drivers/block.go index d2e9644fde60..7107053f2440 100644 --- a/src/runtime/pkg/device/drivers/block.go +++ b/src/runtime/pkg/device/drivers/block.go @@ -60,6 +60,12 @@ func (device *BlockDevice) Attach(ctx context.Context, devReceiver api.DeviceRec return err } + hypervisorType := devReceiver.GetHypervisorType() + if hypervisorType == "acrn" { + deviceLogger().Debug("Special casing for ACRN to increment BlockIndex") + index = index + 1 + } + drive := &config.BlockDrive{ File: device.DeviceInfo.HostPath, Format: "raw", diff --git a/src/runtime/pkg/device/drivers/utils.go b/src/runtime/pkg/device/drivers/utils.go index 25f021eda030..5f76bff48e26 100644 --- a/src/runtime/pkg/device/drivers/utils.go +++ b/src/runtime/pkg/device/drivers/utils.go @@ -10,10 +10,12 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/sirupsen/logrus" ) @@ -45,9 +47,9 @@ func deviceLogger() *logrus.Entry { return api.DeviceLogger() } -// Identify PCIe device by reading the size of the PCI config space +// IsPCIeDevice identifies PCIe device by reading the size of the PCI config space // Plain PCI device have 256 bytes of config space where PCIe devices have 4K -func isPCIeDevice(bdf string) bool { +func IsPCIeDevice(bdf string) bool { if len(strings.Split(bdf, ":")) == 2 { bdf = PCIDomain + ":" + bdf } @@ -89,18 +91,139 @@ func readPCIProperty(propertyPath string) (string, error) { return strings.Split(string(buf), "\n")[0], nil } -func GetVFIODeviceType(deviceFileName string) config.VFIODeviceType { +func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) { + deviceFileName := filepath.Base(deviceFilePath) + //For example, 0000:04:00.0 tokens := strings.Split(deviceFileName, ":") - vfioDeviceType := config.VFIODeviceErrorType if len(tokens) == 3 { - vfioDeviceType = config.VFIODeviceNormalType - } else { - //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 - tokens = strings.Split(deviceFileName, "-") - if len(tokens) == 5 { - vfioDeviceType = config.VFIODeviceMediatedType + return config.VFIOPCIDeviceNormalType, nil + } + + //For example, 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + tokens = strings.Split(deviceFileName, "-") + if len(tokens) != 5 { + return config.VFIODeviceErrorType, fmt.Errorf("Incorrect tokens found while parsing VFIO details: %s", deviceFileName) + } + + deviceSysfsDev, err := GetSysfsDev(deviceFilePath) + if err != nil { + return config.VFIODeviceErrorType, err + } + + if strings.Contains(deviceSysfsDev, vfioAPSysfsDir) { + return config.VFIOAPDeviceMediatedType, nil + } + + return config.VFIOPCIDeviceMediatedType, nil +} + +// GetSysfsDev returns the sysfsdev of mediated device +// Expected input string format is absolute path to the sysfs dev node +// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 +func GetSysfsDev(sysfsDevStr string) (string, error) { + return filepath.EvalSymlinks(sysfsDevStr) +} + +// GetAPVFIODevices retrieves all APQNs associated with a mediated VFIO-AP +// device +func GetAPVFIODevices(sysfsdev string) ([]string, error) { + data, err := os.ReadFile(filepath.Join(sysfsdev, "matrix")) + if err != nil { + return []string{}, err + } + // Split by newlines, omitting final newline + return strings.Split(string(data[:len(data)-1]), "\n"), nil +} + +// Ignore specific PCI devices, supply the pciClass and the bitmask to check +// against the device class, deviceBDF for meaningfull info message +func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (bool, error) { + if pciClass == "" { + return false, nil + } + pciClassID, err := strconv.ParseUint(pciClass, 0, 32) + if err != nil { + return false, err + } + // ClassID is 16 bits, remove the two trailing zeros + pciClassID = pciClassID >> 8 + if pciClassID&bitmask == bitmask { + deviceLogger().Infof("Ignoring PCI (Host) Bridge deviceBDF %v Class %x", deviceBDF, pciClassID) + return true, nil + } + return false, nil +} + +// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group +// We can reuse this function at various levels, sandbox, container. +func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) { + + vfioDevs := []*config.VFIODev{} + + vfioGroup := filepath.Base(device.HostPath) + iommuDevicesPath := filepath.Join(config.SysIOMMUGroupPath, vfioGroup, "devices") + + deviceFiles, err := os.ReadDir(iommuDevicesPath) + if err != nil { + return nil, err + } + + // Pass all devices in iommu group + for i, deviceFile := range deviceFiles { + //Get bdf of device eg 0000:00:1c.0 + deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(deviceFile.Name(), iommuDevicesPath) + if err != nil { + return nil, err + } + id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize) + + var vfio config.VFIODev + + switch vfioDeviceType { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + // This is vfio-pci and vfio-mdev specific + pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass) + // We need to ignore Host or PCI Bridges that are in the same IOMMU group as the + // passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge. + // Class 0x0604 is PCI bridge, 0x0600 is Host bridge + ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600) + if err != nil { + return nil, err + } + if ignorePCIDevice { + continue + } + // Do not directly assign to `vfio` -- need to access field still + vfio = config.VFIODev{ + ID: id, + Type: vfioDeviceType, + BDF: deviceBDF, + SysfsDev: deviceSysfsDev, + IsPCIe: IsPCIeDevice(deviceBDF), + Class: pciClass, + Rank: -1, + Port: device.Port, + } + + case config.VFIOAPDeviceMediatedType: + devices, err := GetAPVFIODevices(deviceSysfsDev) + if err != nil { + return nil, err + } + vfio = config.VFIODev{ + ID: id, + SysfsDev: deviceSysfsDev, + Type: config.VFIOAPDeviceMediatedType, + APDevices: devices, + Port: device.Port, + } + default: + return nil, fmt.Errorf("Failed to append device: VFIO device type unrecognized") } + + vfioDevs = append(vfioDevs, &vfio) } - return vfioDeviceType + + return vfioDevs, nil } diff --git a/src/runtime/pkg/device/drivers/vfio.go b/src/runtime/pkg/device/drivers/vfio.go index 58658b0b881c..feec9c44827c 100644 --- a/src/runtime/pkg/device/drivers/vfio.go +++ b/src/runtime/pkg/device/drivers/vfio.go @@ -11,7 +11,6 @@ import ( "fmt" "os" "path/filepath" - "strconv" "strings" "github.com/sirupsen/logrus" @@ -29,11 +28,7 @@ const ( vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id" iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group" vfioDevPath = "/dev/vfio/%s" - pcieRootPortPrefix = "rp" -) - -var ( - AllPCIeDevs = map[string]bool{} + vfioAPSysfsDir = "/sys/devices/vfio_ap" ) // VFIODevice is a vfio device meant to be passed to the hypervisor @@ -70,33 +65,22 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece } }() - vfioGroup := filepath.Base(device.DeviceInfo.HostPath) - iommuDevicesPath := filepath.Join(config.SysIOMMUPath, vfioGroup, "devices") - - deviceFiles, err := os.ReadDir(iommuDevicesPath) + device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo) if err != nil { return err } - // Pass all devices in iommu group - for i, deviceFile := range deviceFiles { - //Get bdf of device eg 0000:00:1c.0 - deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(deviceFile.Name(), iommuDevicesPath) - if err != nil { - return err + for _, vfio := range device.VfioDevs { + // If vfio.Port is not set we bail out, users should set + // explicitly the port in the config file + if vfio.Port == "" { + return fmt.Errorf("cold_plug_vfio= or hot_plug_vfio= port is not set for device %s (BridgePort | RootPort | SwitchPort)", vfio.BDF) } - vfio := &config.VFIODev{ - ID: utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize), - Type: vfioDeviceType, - BDF: deviceBDF, - SysfsDev: deviceSysfsDev, - IsPCIe: isPCIeDevice(deviceBDF), - Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass), - } - device.VfioDevs = append(device.VfioDevs, vfio) + if vfio.IsPCIe { - vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs)) - AllPCIeDevs[vfio.BDF] = true + busIndex := len(config.PCIeDevices[vfio.Port]) + vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex) + config.PCIeDevices[vfio.Port][vfio.BDF] = true } } @@ -192,31 +176,59 @@ func (device *VFIODevice) Load(ds config.DeviceState) { device.GenericDevice.Load(ds) for _, dev := range ds.VFIODevs { - device.VfioDevs = append(device.VfioDevs, &config.VFIODev{ - ID: dev.ID, - Type: config.VFIODeviceType(dev.Type), - BDF: dev.BDF, - SysfsDev: dev.SysfsDev, - }) + var vfio config.VFIODev + + switch dev.Type { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + vfio = config.VFIODev{ + ID: dev.ID, + Type: config.VFIODeviceType(dev.Type), + BDF: dev.BDF, + SysfsDev: dev.SysfsDev, + } + case config.VFIOAPDeviceMediatedType: + vfio = config.VFIODev{ + ID: dev.ID, + SysfsDev: dev.SysfsDev, + } + default: + deviceLogger().WithError( + fmt.Errorf("VFIO device type unrecognized"), + ).Error("Failed to append device") + return + } + + device.VfioDevs = append(device.VfioDevs, &vfio) } } // It should implement GetAttachCount() and DeviceID() as api.Device implementation // here it shares function from *GenericDevice so we don't need duplicate codes -func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) { - vfioDeviceType = GetVFIODeviceType(deviceFileName) +func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) { + sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) + vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr) + if err != nil { + return deviceBDF, deviceSysfsDev, vfioDeviceType, err + } switch vfioDeviceType { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: // Get bdf of device eg. 0000:00:1c.0 - deviceBDF = getBDF(deviceFileName) + // OLD IMPL: deviceBDF = getBDF(deviceFileName) + // The old implementation did not consider the case where + // vfio devices are located on different root busses. The + // kata-agent will handle the case now, here, use the full PCI addr + deviceBDF = deviceFileName // Get sysfs path used by cloud-hypervisor deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType: // Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4 sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) - deviceSysfsDev, err = getSysfsDev(sysfsDevStr) - deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev)) + deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) + deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev)) + case config.VFIOAPDeviceMediatedType: + sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName) + deviceSysfsDev, err = GetSysfsDev(sysfsDevStr) default: err = fmt.Errorf("Incorrect tokens found while parsing vfio details: %s", deviceFileName) } @@ -236,7 +248,7 @@ func getMediatedBDF(deviceSysfsDev string) string { // getBDF returns the BDF of pci device // Expected input string format is []:[][].[] eg. 0000:02:10.0 -func getBDF(deviceSysStr string) string { +func GetBDF(deviceSysStr string) string { tokens := strings.SplitN(deviceSysStr, ":", 2) if len(tokens) == 1 { return "" @@ -244,13 +256,6 @@ func getBDF(deviceSysStr string) string { return tokens[1] } -// getSysfsDev returns the sysfsdev of mediated device -// Expected input string format is absolute path to the sysfs dev node -// eg. /sys/kernel/iommu_groups/0/devices/f79944e4-5a3d-11e8-99ce-479cbab002e4 -func getSysfsDev(sysfsDevStr string) (string, error) { - return filepath.EvalSymlinks(sysfsDevStr) -} - // BindDevicetoVFIO binds the device to vfio driver after unbinding from host. // Will be called by a network interface or a generic pcie device. func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) { diff --git a/src/runtime/pkg/device/drivers/vfio_test.go b/src/runtime/pkg/device/drivers/vfio_test.go index 3c25a64c3c8a..9a03fa030edc 100644 --- a/src/runtime/pkg/device/drivers/vfio_test.go +++ b/src/runtime/pkg/device/drivers/vfio_test.go @@ -20,7 +20,7 @@ func TestGetVFIODetails(t *testing.T) { } data := []testData{ - {"0000:02:10.0", "02:10.0"}, + {"0000:02:10.0", "0000:02:10.0"}, {"0000:0210.0", ""}, {"f79944e4-5a3d-11e8-99ce-", ""}, {"f79944e4-5a3d-11e8-99ce", ""}, @@ -29,12 +29,12 @@ func TestGetVFIODetails(t *testing.T) { } for _, d := range data { - deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "") + deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(d.deviceStr, "") switch vfioDeviceType { - case config.VFIODeviceNormalType: + case config.VFIOPCIDeviceNormalType: assert.Equal(t, d.expectedStr, deviceBDF) - case config.VFIODeviceMediatedType: + case config.VFIOPCIDeviceMediatedType, config.VFIOAPDeviceMediatedType: assert.Equal(t, d.expectedStr, deviceSysfsDev) default: assert.NotNil(t, err) diff --git a/src/runtime/pkg/device/drivers/vhost_user_blk.go b/src/runtime/pkg/device/drivers/vhost_user_blk.go index 49c66e711758..fbf195c30c0c 100644 --- a/src/runtime/pkg/device/drivers/vhost_user_blk.go +++ b/src/runtime/pkg/device/drivers/vhost_user_blk.go @@ -8,6 +8,7 @@ package drivers import ( "context" + "strconv" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" @@ -72,17 +73,19 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De } vAttrs := &config.VhostUserDeviceAttrs{ - DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), - SocketPath: device.DeviceInfo.HostPath, - Type: config.VhostUserBlk, - Index: index, + DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), + SocketPath: device.DeviceInfo.HostPath, + Type: config.VhostUserBlk, + Index: index, + ReconnectTime: vhostUserReconnect(device.DeviceInfo.DriverOptions), } deviceLogger().WithFields(logrus.Fields{ - "device": device.DeviceInfo.HostPath, - "SocketPath": vAttrs.SocketPath, - "Type": config.VhostUserBlk, - "Index": index, + "device": device.DeviceInfo.HostPath, + "SocketPath": vAttrs.SocketPath, + "Type": config.VhostUserBlk, + "Index": index, + "ReconnectTime": vAttrs.ReconnectTime, }).Info("Attaching device") device.VhostUserDeviceAttrs = vAttrs @@ -93,6 +96,24 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De return nil } +func vhostUserReconnect(customOptions map[string]string) uint32 { + var vhostUserReconnectTimeout uint32 + + if customOptions == nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + } else { + reconnectTimeoutStr := customOptions[config.VhostUserReconnectTimeOutOpt] + if reconnectTimeout, err := strconv.Atoi(reconnectTimeoutStr); err != nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + deviceLogger().WithField("reconnect", reconnectTimeoutStr).WithError(err).Warn("Failed to get reconnect timeout for vhost-user-blk device") + } else { + vhostUserReconnectTimeout = uint32(reconnectTimeout) + } + } + + return vhostUserReconnectTimeout +} + func isVirtioBlkBlockDriver(customOptions map[string]string) bool { var blockDriverOption string diff --git a/src/runtime/pkg/device/manager/manager.go b/src/runtime/pkg/device/manager/manager.go index eed9e39f1e53..ed3708dc9ae7 100644 --- a/src/runtime/pkg/device/manager/manager.go +++ b/src/runtime/pkg/device/manager/manager.go @@ -10,6 +10,7 @@ import ( "context" "encoding/hex" "errors" + "fmt" "sync" "github.com/sirupsen/logrus" @@ -42,6 +43,8 @@ type deviceManager struct { sync.RWMutex vhostUserStoreEnabled bool + + vhostUserReconnectTimeout uint32 } func deviceLogger() *logrus.Entry { @@ -49,11 +52,12 @@ func deviceLogger() *logrus.Entry { } // NewDeviceManager creates a deviceManager object behaved as api.DeviceManager -func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, devices []api.Device) api.DeviceManager { +func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, vhostUserReconnect uint32, devices []api.Device) api.DeviceManager { dm := &deviceManager{ - vhostUserStoreEnabled: vhostUserStoreEnabled, - vhostUserStorePath: vhostUserStorePath, - devices: make(map[string]api.Device), + vhostUserStoreEnabled: vhostUserStoreEnabled, + vhostUserStorePath: vhostUserStorePath, + vhostUserReconnectTimeout: vhostUserReconnect, + devices: make(map[string]api.Device), } if blockDriver == config.VirtioMmio { dm.blockDriver = config.VirtioMmio @@ -67,7 +71,11 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS dm.blockDriver = config.VirtioSCSI } - drivers.AllPCIeDevs = make(map[string]bool) + config.PCIeDevices = make(map[config.PCIePort]config.PCIePortMapping) + + config.PCIeDevices[config.RootPort] = make(map[string]bool) + config.PCIeDevices[config.SwitchPort] = make(map[string]bool) + config.PCIeDevices[config.BridgePort] = make(map[string]bool) for _, dev := range devices { dm.devices[dev.DeviceID()] = dev @@ -75,10 +83,21 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS return dm } -func (dm *deviceManager) findDeviceByMajorMinor(major, minor int64) api.Device { +func (dm *deviceManager) findDevice(devInfo *config.DeviceInfo) api.Device { + // For devices with a major of -1, we use the host path to find existing instances. + if devInfo.Major == -1 { + for _, dev := range dm.devices { + dma, _ := dev.GetMajorMinor() + if dma == -1 && dev.GetHostPath() == devInfo.HostPath { + return dev + } + } + return nil + } + for _, dev := range dm.devices { dma, dmi := dev.GetMajorMinor() - if dma == major && dmi == minor { + if dma == devInfo.Major && dmi == devInfo.Minor { return dev } } @@ -103,7 +122,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device } }() - if existingDev := dm.findDeviceByMajorMinor(devInfo.Major, devInfo.Minor); existingDev != nil { + if existingDev := dm.findDevice(&devInfo); existingDev != nil { return existingDev, nil } @@ -112,13 +131,14 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device if devInfo.ID, err = dm.newDeviceID(); err != nil { return nil, err } - if isVFIO(devInfo.HostPath) { + if IsVFIODevice(devInfo.HostPath) { return drivers.NewVFIODevice(&devInfo), nil - } else if isVhostUserBlk(devInfo) { + } else if IsVhostUserBlk(devInfo) { if devInfo.DriverOptions == nil { devInfo.DriverOptions = make(map[string]string) } devInfo.DriverOptions[config.BlockDriverOpt] = dm.blockDriver + devInfo.DriverOptions[config.VhostUserReconnectTimeOutOpt] = fmt.Sprintf("%d", dm.vhostUserReconnectTimeout) return drivers.NewVhostUserBlkDevice(&devInfo), nil } else if isBlock(devInfo) { if devInfo.DriverOptions == nil { @@ -182,12 +202,12 @@ func (dm *deviceManager) AttachDevice(ctx context.Context, id string, dr api.Dev dm.Lock() defer dm.Unlock() - d, ok := dm.devices[id] + dev, ok := dm.devices[id] if !ok { return ErrDeviceNotExist } - if err := d.Attach(ctx, dr); err != nil { + if err := dev.Attach(ctx, dr); err != nil { return err } return nil diff --git a/src/runtime/pkg/device/manager/manager_test.go b/src/runtime/pkg/device/manager/manager_test.go index 1070e6b8e203..c08cb66ab422 100644 --- a/src/runtime/pkg/device/manager/manager_test.go +++ b/src/runtime/pkg/device/manager/manager_test.go @@ -90,6 +90,100 @@ func TestNewDevice(t *testing.T) { assert.Equal(t, vfioDev.DeviceInfo.GID, uint32(2)) } +func TestAttachVFIOAPDevice(t *testing.T) { + + var err error + var ok bool + + dm := &deviceManager{ + devices: make(map[string]api.Device), + } + + tmpDir := t.TempDir() + // sys/devices/vfio_ap/matrix/f94290f8-78ac-45fb-bb22-e55e519fa64f + testSysfsAP := "/sys/devices/vfio_ap/" + testDeviceAP := "f94290f8-78ac-45fb-bb22-e55e519fa64f" + testVFIOGroup := "42" + + matrixDir := filepath.Join(tmpDir, testSysfsAP, "matrix") + err = os.MkdirAll(matrixDir, dirMode) + assert.Nil(t, err) + + deviceAPFile := filepath.Join(matrixDir, testDeviceAP) + err = os.MkdirAll(deviceAPFile, dirMode) + assert.Nil(t, err) + + matrixDeviceAPFile := filepath.Join(deviceAPFile, "matrix") + _, err = os.Create(matrixDeviceAPFile) + assert.Nil(t, err) + // create AP devices in the matrix file + APDevices := []byte("05.001f\n") + err = os.WriteFile(matrixDeviceAPFile, APDevices, 0644) + assert.Nil(t, err) + + devicesVFIOGroupDir := filepath.Join(tmpDir, testVFIOGroup, "devices") + err = os.MkdirAll(devicesVFIOGroupDir, dirMode) + assert.Nil(t, err) + + deviceAPSymlink := filepath.Join(devicesVFIOGroupDir, testDeviceAP) + err = os.Symlink(deviceAPFile, deviceAPSymlink) + assert.Nil(t, err) + + savedIOMMUPath := config.SysIOMMUGroupPath + config.SysIOMMUGroupPath = tmpDir + + savedSysBusPciDevicesPath := config.SysBusPciDevicesPath + config.SysBusPciDevicesPath = devicesVFIOGroupDir + + defer func() { + config.SysIOMMUGroupPath = savedIOMMUPath + config.SysBusPciDevicesPath = savedSysBusPciDevicesPath + }() + + path := filepath.Join(vfioPath, testVFIOGroup) + deviceInfo := config.DeviceInfo{ + HostPath: path, + ContainerPath: path, + DevType: "c", + ColdPlug: false, + Port: config.RootPort, + } + + device, err := dm.NewDevice(deviceInfo) + assert.Nil(t, err) + _, ok = device.(*drivers.VFIODevice) + assert.True(t, ok) + + devReceiver := &api.MockDeviceReceiver{} + err = device.Attach(context.Background(), devReceiver) + assert.Nil(t, err) + + err = device.Detach(context.Background(), devReceiver) + assert.Nil(t, err) + + // If we omit the port setting we should fail + failDm := &deviceManager{ + devices: make(map[string]api.Device), + } + + failDeviceInfo := config.DeviceInfo{ + HostPath: path, + ContainerPath: path, + DevType: "c", + ColdPlug: false, + } + + failDevice, err := failDm.NewDevice(failDeviceInfo) + assert.Nil(t, err) + _, ok = failDevice.(*drivers.VFIODevice) + assert.True(t, ok) + + failDevReceiver := &api.MockDeviceReceiver{} + err = failDevice.Attach(context.Background(), failDevReceiver) + assert.Error(t, err) + +} + func TestAttachVFIODevice(t *testing.T) { dm := &deviceManager{ blockDriver: config.VirtioBlock, @@ -116,14 +210,14 @@ func TestAttachVFIODevice(t *testing.T) { _, err = os.Create(deviceConfigFile) assert.Nil(t, err) - savedIOMMUPath := config.SysIOMMUPath - config.SysIOMMUPath = tmpDir + savedIOMMUPath := config.SysIOMMUGroupPath + config.SysIOMMUGroupPath = tmpDir savedSysBusPciDevicesPath := config.SysBusPciDevicesPath config.SysBusPciDevicesPath = devicesDir defer func() { - config.SysIOMMUPath = savedIOMMUPath + config.SysIOMMUGroupPath = savedIOMMUPath config.SysBusPciDevicesPath = savedSysBusPciDevicesPath }() @@ -132,6 +226,8 @@ func TestAttachVFIODevice(t *testing.T) { HostPath: path, ContainerPath: path, DevType: "c", + ColdPlug: false, + Port: config.RootPort, } device, err := dm.NewDevice(deviceInfo) @@ -208,7 +304,7 @@ func TestAttachBlockDevice(t *testing.T) { } func TestAttachDetachDevice(t *testing.T) { - dm := NewDeviceManager(config.VirtioSCSI, false, "", nil) + dm := NewDeviceManager(config.VirtioSCSI, false, "", 0, nil) path := "/dev/hda" deviceInfo := config.DeviceInfo{ diff --git a/src/runtime/pkg/device/manager/utils.go b/src/runtime/pkg/device/manager/utils.go index 17d14741c1dc..6658b19a414b 100644 --- a/src/runtime/pkg/device/manager/utils.go +++ b/src/runtime/pkg/device/manager/utils.go @@ -17,8 +17,15 @@ const ( vfioPath = "/dev/vfio/" ) -// isVFIO checks if the device provided is a vfio group. -func isVFIO(hostPath string) bool { +// IsVFIOControlDevice checks if the device provided is a vfio control device. +// Depending no the vfio_mode we need to know if a device is a VFIO device +// or the VFIO control device +func IsVFIOControlDevice(path string) bool { + return path == filepath.Join(vfioPath, "vfio") +} + +// IsVFIO checks if the device provided is a vfio group. +func IsVFIODevice(hostPath string) bool { // Ignore /dev/vfio/vfio character device if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) { return false @@ -37,7 +44,7 @@ func isBlock(devInfo config.DeviceInfo) bool { } // isVhostUserBlk checks if the device is a VhostUserBlk device. -func isVhostUserBlk(devInfo config.DeviceInfo) bool { +func IsVhostUserBlk(devInfo config.DeviceInfo) bool { return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor } diff --git a/src/runtime/pkg/device/manager/utils_test.go b/src/runtime/pkg/device/manager/utils_test.go index 273283823fad..9fbc829d7ed7 100644 --- a/src/runtime/pkg/device/manager/utils_test.go +++ b/src/runtime/pkg/device/manager/utils_test.go @@ -31,7 +31,7 @@ func TestIsVFIO(t *testing.T) { } for _, d := range data { - isVFIO := isVFIO(d.path) + isVFIO := IsVFIODevice(d.path) assert.Equal(t, d.expected, isVFIO) } } @@ -70,7 +70,7 @@ func TestIsVhostUserBlk(t *testing.T) { } for _, d := range data { - isVhostUserBlk := isVhostUserBlk( + isVhostUserBlk := IsVhostUserBlk( config.DeviceInfo{ DevType: d.devType, Major: d.major, diff --git a/src/runtime/pkg/direct-volume/utils.go b/src/runtime/pkg/direct-volume/utils.go index 8520ddd35e66..9e13a4d227a6 100644 --- a/src/runtime/pkg/direct-volume/utils.go +++ b/src/runtime/pkg/direct-volume/utils.go @@ -10,7 +10,6 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "os" "path/filepath" ) @@ -75,7 +74,7 @@ func Add(volumePath string, mountInfo string) error { return err } - return ioutil.WriteFile(filepath.Join(volumeDir, mountInfoFileName), []byte(mountInfo), 0600) + return os.WriteFile(filepath.Join(volumeDir, mountInfoFileName), []byte(mountInfo), 0600) } // Remove deletes the direct volume path including all the files inside it. @@ -89,7 +88,7 @@ func VolumeMountInfo(volumePath string) (*MountInfo, error) { if _, err := os.Stat(mountInfoFilePath); err != nil { return nil, err } - buf, err := ioutil.ReadFile(mountInfoFilePath) + buf, err := os.ReadFile(mountInfoFilePath) if err != nil { return nil, err } @@ -108,11 +107,11 @@ func RecordSandboxId(sandboxId string, volumePath string) error { return err } - return ioutil.WriteFile(filepath.Join(kataDirectVolumeRootPath, encodedPath, sandboxId), []byte(""), 0600) + return os.WriteFile(filepath.Join(kataDirectVolumeRootPath, encodedPath, sandboxId), []byte(""), 0600) } func GetSandboxIdForVolume(volumePath string) (string, error) { - files, err := ioutil.ReadDir(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)))) + files, err := os.ReadDir(filepath.Join(kataDirectVolumeRootPath, b64.URLEncoding.EncodeToString([]byte(volumePath)))) if err != nil { return "", err } diff --git a/src/runtime/pkg/govmm/qemu/examples_test.go b/src/runtime/pkg/govmm/qemu/examples_test.go index 03e52b87ac5c..6f6727effbd5 100644 --- a/src/runtime/pkg/govmm/qemu/examples_test.go +++ b/src/runtime/pkg/govmm/qemu/examples_test.go @@ -27,13 +27,16 @@ func Example() { // resources params = append(params, "-m", "370", "-smp", "cpus=2") - // LaunchCustomQemu should return as soon as the instance has launched as we - // are using the --daemonize flag. It will set up a unix domain socket - // called /tmp/qmp-socket that we can use to manage the instance. - _, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) + // LaunchCustomQemu should return immediately. We must then wait + // the returned process to terminate as we are using the --daemonize + // flag. + // It will set up a unix domain socket called /tmp/qmp-socket that we + // can use to manage the instance. + proc, _, err := qemu.LaunchCustomQemu(context.Background(), "", params, nil, nil, nil) if err != nil { panic(err) } + proc.Wait() // This channel will be closed when the instance dies. disconnectedCh := make(chan struct{}) diff --git a/src/runtime/pkg/govmm/qemu/image.go b/src/runtime/pkg/govmm/qemu/image.go index b17efdd162cf..405ea5a0084f 100644 --- a/src/runtime/pkg/govmm/qemu/image.go +++ b/src/runtime/pkg/govmm/qemu/image.go @@ -8,7 +8,6 @@ package qemu import ( "context" "fmt" - "io/ioutil" "os" "os/exec" "path" @@ -44,12 +43,12 @@ func CreateCloudInitISO(ctx context.Context, scratchDir, isoPath string, dataDirPath, err) } - err = ioutil.WriteFile(metaDataPath, metaData, 0644) + err = os.WriteFile(metaDataPath, metaData, 0644) if err != nil { return fmt.Errorf("unable to create %s : %v", metaDataPath, err) } - err = ioutil.WriteFile(userDataPath, userData, 0644) + err = os.WriteFile(userDataPath, userData, 0644) if err != nil { return fmt.Errorf("unable to create %s : %v", userDataPath, err) } diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 100316dd9ee9..ffb464c33d50 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -14,9 +14,9 @@ package qemu import ( - "bytes" "context" "fmt" + "io" "log" "os" "os/exec" @@ -123,6 +123,14 @@ const ( // PCIeRootPort is a PCIe Root Port, the PCIe device should be hotplugged to this port. PCIeRootPort DeviceDriver = "pcie-root-port" + // PCIeSwitchUpstreamPort is a PCIe switch upstream port + // A upstream port connects to a PCIe Root Port + PCIeSwitchUpstreamPort DeviceDriver = "x3130-upstream" + + // PCIeSwitchDownstreamPort is a PCIe switch downstream port + // PCIe devices can be hot-plugged to the downstream port. + PCIeSwitchDownstreamPort DeviceDriver = "xio3130-downstream" + // Loader is the Loader device driver. Loader DeviceDriver = "loader" @@ -133,9 +141,16 @@ const ( func isDimmSupported(config *Config) bool { switch runtime.GOARCH { case "amd64", "386", "ppc64le", "arm64": - if config != nil && config.Machine.Type == MachineTypeMicrovm { - // microvm does not support NUMA - return false + if config != nil { + if config.Machine.Type == MachineTypeMicrovm { + // microvm does not support NUMA + return false + } + if config.Knobs.MemFDPrivate { + // TDX guests rely on MemFD Private, which + // does not have NUMA support yet + return false + } } return true default: @@ -155,6 +170,9 @@ const ( // TransportMMIO is the MMIO transport for virtio devices. TransportMMIO VirtioTransport = "mmio" + + // TransportAP is the AP transport for virtio devices. + TransportAP VirtioTransport = "ap" ) // defaultTransport returns the default transport for the current combination @@ -191,6 +209,14 @@ func (transport VirtioTransport) isVirtioCCW(config *Config) bool { return transport == TransportCCW } +func (transport VirtioTransport) isVirtioAP(config *Config) bool { + if transport == "" { + transport = transport.defaultTransport(config) + } + + return transport == TransportAP +} + // getName returns the name of the current transport. func (transport VirtioTransport) getName(config *Config) string { if transport == "" { @@ -231,8 +257,12 @@ const ( // SEVGuest represents an SEV guest object SEVGuest ObjectType = "sev-guest" + // SNPGuest represents an SNP guest object + SNPGuest ObjectType = "sev-snp-guest" + // SecExecGuest represents an s390x Secure Execution (Protected Virtualization in QEMU) object SecExecGuest ObjectType = "s390-pv-guest" + // PEFGuest represent ppc64le PEF(Protected Execution Facility) object. PEFGuest ObjectType = "pef-guest" ) @@ -295,6 +325,8 @@ func (object Object) Valid() bool { case TDXGuest: return object.ID != "" && object.File != "" && object.DeviceID != "" case SEVGuest: + fallthrough + case SNPGuest: return object.ID != "" && object.File != "" && object.CBitPos != 0 && object.ReducedPhysBits != 0 case SecExecGuest: return object.ID != "" @@ -338,17 +370,15 @@ func (object Object) QemuParams(config *Config) []string { case TDXGuest: objectParams = append(objectParams, string(object.Type)) + objectParams = append(objectParams, "sept-ve-disable=on") objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) if object.Debug { objectParams = append(objectParams, "debug=on") } - deviceParams = append(deviceParams, string(object.Driver)) - deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID)) - deviceParams = append(deviceParams, fmt.Sprintf("file=%s", object.File)) - if object.FirmwareVolume != "" { - deviceParams = append(deviceParams, fmt.Sprintf("config-firmware-volume=%s", object.FirmwareVolume)) - } + config.Bios = object.File case SEVGuest: + fallthrough + case SNPGuest: objectParams = append(objectParams, string(object.Type)) objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) objectParams = append(objectParams, fmt.Sprintf("cbitpos=%d", object.CBitPos)) @@ -366,7 +396,6 @@ func (object Object) QemuParams(config *Config) []string { deviceParams = append(deviceParams, string(object.Driver)) deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID)) deviceParams = append(deviceParams, fmt.Sprintf("host-path=%s", object.File)) - } if len(deviceParams) > 0 { @@ -1140,8 +1169,11 @@ const ( // Threads is the pthread asynchronous I/O implementation. Threads BlockDeviceAIO = "threads" - // Native is the pthread asynchronous I/O implementation. + // Native is the native Linux AIO implementation. Native BlockDeviceAIO = "native" + + // IOUring is the Linux io_uring I/O implementation. + IOUring BlockDeviceAIO = "io_uring" ) const ( @@ -1326,6 +1358,7 @@ type VhostUserDevice struct { Address string //used for MAC address in net case Tag string //virtio-fs volume id for mounting inside guest CacheSize uint32 //virtio-fs DAX cache size in MiB + QueueSize uint32 //size of virtqueues SharedVersions bool //enable virtio-fs shared version metadata VhostUserType DeviceDriver @@ -1493,6 +1526,11 @@ func (vhostuserDev VhostUserDevice) QemuFSParams(config *Config) []string { deviceParams = append(deviceParams, driver) deviceParams = append(deviceParams, fmt.Sprintf("chardev=%s", vhostuserDev.CharDevID)) deviceParams = append(deviceParams, fmt.Sprintf("tag=%s", vhostuserDev.Tag)) + queueSize := uint32(1024) + if vhostuserDev.QueueSize != 0 { + queueSize = vhostuserDev.QueueSize + } + deviceParams = append(deviceParams, fmt.Sprintf("queue-size=%d", queueSize)) if vhostuserDev.CacheSize != 0 { deviceParams = append(deviceParams, fmt.Sprintf("cache-size=%dM", vhostuserDev.CacheSize)) } @@ -1669,6 +1707,106 @@ func (b PCIeRootPortDevice) Valid() bool { return true } +// PCIeSwitchUpstreamPortDevice is the port connecting to the root port +type PCIeSwitchUpstreamPortDevice struct { + ID string // format: sup{n}, n>=0 + Bus string // default is rp0 +} + +// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice. +func (b PCIeSwitchUpstreamPortDevice) QemuParams(config *Config) []string { + var qemuParams []string + var deviceParams []string + + driver := PCIeSwitchUpstreamPort + + deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID)) + deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus)) + + qemuParams = append(qemuParams, "-device") + qemuParams = append(qemuParams, strings.Join(deviceParams, ",")) + return qemuParams +} + +// Valid returns true if the PCIeSwitchUpstreamPortDevice structure is valid and complete. +func (b PCIeSwitchUpstreamPortDevice) Valid() bool { + if b.ID == "" { + return false + } + if b.Bus == "" { + return false + } + return true +} + +// PCIeSwitchDownstreamPortDevice is the port connecting to the root port +type PCIeSwitchDownstreamPortDevice struct { + ID string // format: sup{n}, n>=0 + Bus string // default is rp0 + Chassis string // (slot, chassis) pair is mandatory and must be unique for each downstream port, >=0, default is 0x00 + Slot string // >=0, default is 0x00 + // This to work needs patches to QEMU + BusReserve string + // Pref64 and Pref32 are not allowed to be set simultaneously + Pref64Reserve string // reserve prefetched MMIO aperture, 64-bit + Pref32Reserve string // reserve prefetched MMIO aperture, 32-bit + MemReserve string // reserve non-prefetched MMIO aperture, 32-bit *only* + IOReserve string // IO reservation + +} + +// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice. +func (b PCIeSwitchDownstreamPortDevice) QemuParams(config *Config) []string { + var qemuParams []string + var deviceParams []string + driver := PCIeSwitchDownstreamPort + + deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID)) + deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus)) + deviceParams = append(deviceParams, fmt.Sprintf("chassis=%s", b.Chassis)) + deviceParams = append(deviceParams, fmt.Sprintf("slot=%s", b.Slot)) + if b.BusReserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("bus-reserve=%s", b.BusReserve)) + } + + if b.Pref64Reserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("pref64-reserve=%s", b.Pref64Reserve)) + } + + if b.Pref32Reserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("pref32-reserve=%s", b.Pref32Reserve)) + } + + if b.MemReserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("mem-reserve=%s", b.MemReserve)) + } + + if b.IOReserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("io-reserve=%s", b.IOReserve)) + } + + qemuParams = append(qemuParams, "-device") + qemuParams = append(qemuParams, strings.Join(deviceParams, ",")) + return qemuParams +} + +// Valid returns true if the PCIeSwitchUpstremPortDevice structure is valid and complete. +func (b PCIeSwitchDownstreamPortDevice) Valid() bool { + if b.ID == "" { + return false + } + if b.Bus == "" { + return false + } + if b.Chassis == "" { + return false + } + if b.Slot == "" { + return false + } + return true +} + // VFIODevice represents a qemu vfio device meant for direct access by guest OS. type VFIODevice struct { // Bus-Device-Function of device @@ -1691,6 +1829,9 @@ type VFIODevice struct { // Transport is the virtio transport for this device. Transport VirtioTransport + + // SysfsDev specifies the sysfs matrix entry for the AP device + SysfsDev string } // VFIODeviceTransport is a map of the vfio device name that corresponds to @@ -1699,11 +1840,13 @@ var VFIODeviceTransport = map[VirtioTransport]string{ TransportPCI: "vfio-pci", TransportCCW: "vfio-ccw", TransportMMIO: "vfio-device", + TransportAP: "vfio-ap", } // Valid returns true if the VFIODevice structure is valid and complete. +// s390x architecture requires SysfsDev to be set. func (vfioDev VFIODevice) Valid() bool { - return vfioDev.BDF != "" + return vfioDev.BDF != "" || vfioDev.SysfsDev != "" } // QemuParams returns the qemu parameters built out of this vfio device. @@ -1713,6 +1856,15 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string { driver := vfioDev.deviceName(config) + if vfioDev.Transport.isVirtioAP(config) { + deviceParams = append(deviceParams, fmt.Sprintf("%s,sysfsdev=%s", driver, vfioDev.SysfsDev)) + + qemuParams = append(qemuParams, "-device") + qemuParams = append(qemuParams, strings.Join(deviceParams, ",")) + + return qemuParams + } + deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF)) if vfioDev.Transport.isVirtioPCI(config) { if vfioDev.VendorID != "" { @@ -2319,11 +2471,32 @@ const ( Unix QMPSocketType = "unix" ) -// QMPSocket represents a qemu QMP socket configuration. +// MonitorProtocol tells what protocol is used on a QMPSocket +type MonitorProtocol string + +const ( + // Socket using a human-friendly text-based protocol. + Hmp MonitorProtocol = "hmp" + + // Socket using a richer json-based protocol. + Qmp MonitorProtocol = "qmp" + + // Same as Qmp with pretty json formatting. + QmpPretty MonitorProtocol = "qmp-pretty" +) + +// QMPSocket represents a qemu QMP or HMP socket configuration. +// nolint: govet type QMPSocket struct { // Type is the socket type (e.g. "unix"). Type QMPSocketType + // Protocol is the protocol to be used on the socket. + Protocol MonitorProtocol + + // QMP listener file descriptor to be passed to qemu + FD *os.File + // Name is the socket name. Name string @@ -2336,7 +2509,8 @@ type QMPSocket struct { // Valid returns true if the QMPSocket structure is valid and complete. func (qmp QMPSocket) Valid() bool { - if qmp.Type == "" || qmp.Name == "" { + // Exactly one of Name of FD must be set. + if qmp.Type == "" || (qmp.Name == "") == (qmp.FD == nil) { return false } @@ -2344,6 +2518,10 @@ func (qmp QMPSocket) Valid() bool { return false } + if qmp.Protocol != Hmp && qmp.Protocol != Qmp && qmp.Protocol != QmpPretty { + return false + } + return true } @@ -2475,6 +2653,9 @@ type Knobs struct { // MemPrealloc will allocate all the RAM upfront MemPrealloc bool + // Private Memory FD meant for private memory map/unmap. + MemFDPrivate bool + // FileBackedMem requires Memory.Size and Memory.Path of the VM to // be set. FileBackedMem bool @@ -2604,14 +2785,12 @@ type Config struct { // PidFile is the -pidfile parameter PidFile string - // LogFile is the -D parameter - LogFile string - qemuParams []string } -// appendFDs append a list of file descriptors to the qemu configuration and -// returns a slice of offset file descriptors that will be seen by the qemu process. +// appendFDs appends a list of arbitrary file descriptors to the qemu configuration and +// returns a slice of consecutive file descriptors that will be seen by the qemu process. +// Please see the comment below for details. func (config *Config) appendFDs(fds []*os.File) []int { var fdInts []int @@ -2623,6 +2802,10 @@ func (config *Config) appendFDs(fds []*os.File) []int { // ExtraFiles specifies additional open files to be inherited by the // new process. It does not include standard input, standard output, or // standard error. If non-nil, entry i becomes file descriptor 3+i. + // This means that arbitrary file descriptors fd0, fd1... fdN passed in + // the array will be presented to the guest as consecutive descriptors + // 3, 4... N+3. The golang library internally relies on dup2() to do + // the renumbering. for i := range fds { fdInts = append(fdInts, oldLen+3+i) } @@ -2676,7 +2859,13 @@ func (config *Config) appendQMPSockets() { continue } - qmpParams := append([]string{}, fmt.Sprintf("%s:%s", q.Type, q.Name)) + var qmpParams []string + if q.FD != nil { + qemuFDs := config.appendFDs([]*os.File{q.FD}) + qmpParams = append([]string{}, fmt.Sprintf("%s:fd=%d", q.Type, qemuFDs[0])) + } else { + qmpParams = append([]string{}, fmt.Sprintf("%s:path=%s", q.Type, q.Name)) + } if q.Server { qmpParams = append(qmpParams, "server=on") if q.NoWait { @@ -2684,17 +2873,27 @@ func (config *Config) appendQMPSockets() { } } - config.qemuParams = append(config.qemuParams, "-qmp") + switch q.Protocol { + case Hmp: + config.qemuParams = append(config.qemuParams, "-monitor") + default: + config.qemuParams = append(config.qemuParams, fmt.Sprintf("-%s", q.Protocol)) + } + config.qemuParams = append(config.qemuParams, strings.Join(qmpParams, ",")) } } -func (config *Config) appendDevices() { +func (config *Config) appendDevices(logger QMPLog) { + if logger == nil { + logger = qmpNullLogger{} + } + for _, d := range config.Devices { if !d.Valid() { + logger.Errorf("vm device is not valid: %+v", d) continue } - config.qemuParams = append(config.qemuParams, d.QemuParams(config)...) } } @@ -2822,10 +3021,13 @@ func (config *Config) appendMemoryKnobs() { return } var objMemParam, numaMemParam string + dimmName := "dimm1" if config.Knobs.HugePages { objMemParam = "memory-backend-file,id=" + dimmName + ",size=" + config.Memory.Size + ",mem-path=/dev/hugepages" numaMemParam = "node,memdev=" + dimmName + } else if config.Knobs.MemFDPrivate { + objMemParam = "memory-backend-memfd-private,id=" + dimmName + ",size=" + config.Memory.Size } else if config.Knobs.FileBackedMem && config.Memory.Path != "" { objMemParam = "memory-backend-file,id=" + dimmName + ",size=" + config.Memory.Size + ",mem-path=" + config.Memory.Path numaMemParam = "node,memdev=" + dimmName @@ -2928,13 +3130,6 @@ func (config *Config) appendPidFile() { } } -func (config *Config) appendLogFile() { - if config.LogFile != "" { - config.qemuParams = append(config.qemuParams, "-D") - config.qemuParams = append(config.qemuParams, config.LogFile) - } -} - func (config *Config) appendFwCfg(logger QMPLog) { if logger == nil { logger = qmpNullLogger{} @@ -2954,19 +3149,15 @@ func (config *Config) appendFwCfg(logger QMPLog) { // // The Config parameter contains a set of qemu parameters and settings. // -// This function writes its log output via logger parameter. -// -// The function will block until the launched qemu process exits. "", nil -// will be returned if the launch succeeds. Otherwise a string containing -// the contents of stderr + a Go error object will be returned. -func LaunchQemu(config Config, logger QMPLog) (string, error) { +// See LaunchCustomQemu for more information. +func LaunchQemu(config Config, logger QMPLog) (*exec.Cmd, io.ReadCloser, error) { config.appendName() config.appendUUID() config.appendMachine() config.appendCPUModel() config.appendQMPSockets() config.appendMemory() - config.appendDevices() + config.appendDevices(logger) config.appendRTC() config.appendGlobalParam() config.appendPFlashParam() @@ -2977,12 +3168,11 @@ func LaunchQemu(config Config, logger QMPLog) (string, error) { config.appendIOThreads() config.appendIncoming() config.appendPidFile() - config.appendLogFile() config.appendFwCfg(logger) config.appendSeccompSandbox() if err := config.appendCPUs(); err != nil { - return "", err + return nil, nil, err } ctx := config.Ctx @@ -3013,17 +3203,16 @@ func LaunchQemu(config Config, logger QMPLog) (string, error) { // // This function writes its log output via logger parameter. // -// The function will block until the launched qemu process exits. "", nil -// will be returned if the launch succeeds. Otherwise a string containing -// the contents of stderr + a Go error object will be returned. +// The function returns cmd, reader, nil where cmd is a Go exec.Cmd object +// representing the QEMU process and reader a Go io.ReadCloser object +// connected to QEMU's stderr, if launched successfully. Otherwise +// nil, nil, err where err is a Go error object is returned. func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []*os.File, - attr *syscall.SysProcAttr, logger QMPLog) (string, error) { + attr *syscall.SysProcAttr, logger QMPLog) (*exec.Cmd, io.ReadCloser, error) { if logger == nil { logger = qmpNullLogger{} } - errStr := "" - if path == "" { path = "qemu-system-x86_64" } @@ -3037,15 +3226,17 @@ func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []* cmd.SysProcAttr = attr - var stderr bytes.Buffer - cmd.Stderr = &stderr + reader, err := cmd.StderrPipe() + if err != nil { + logger.Errorf("Unable to connect stderr to a pipe") + return nil, nil, err + } logger.Infof("launching %s with: %v", path, params) - err := cmd.Run() + err = cmd.Start() if err != nil { logger.Errorf("Unable to launch %s: %v", path, err) - errStr = stderr.String() - logger.Errorf("%s", errStr) + return nil, nil, err } - return errStr, err + return cmd, reader, nil } diff --git a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go index 26b2ac547414..ec70767d7307 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go @@ -1,5 +1,4 @@ //go:build !s390x -// +build !s390x // Copyright contributors to the Virtual Machine Manager for Go project // diff --git a/src/runtime/pkg/govmm/qemu/qemu_test.go b/src/runtime/pkg/govmm/qemu/qemu_test.go index ae4030fb70e9..e1cb2a2d192f 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_test.go @@ -7,7 +7,6 @@ package qemu import ( "fmt" - "io/ioutil" "os" "reflect" "strings" @@ -35,7 +34,7 @@ func testConfigAppend(config *Config, structure interface{}, expected string, t case Device: config.Devices = []Device{s} - config.appendDevices() + config.appendDevices(nil) case Knobs: config.Knobs = s @@ -186,8 +185,8 @@ func TestAppendDeviceNetwork(t *testing.T) { } func TestAppendDeviceNetworkMq(t *testing.T) { - foo, _ := ioutil.TempFile(os.TempDir(), "govmm-qemu-test") - bar, _ := ioutil.TempFile(os.TempDir(), "govmm-qemu-test") + foo, _ := os.CreateTemp(os.TempDir(), "govmm-qemu-test") + bar, _ := os.CreateTemp(os.TempDir(), "govmm-qemu-test") defer func() { _ = foo.Close() @@ -633,6 +632,29 @@ func TestAppendMemoryFileBackedMemPrealloc(t *testing.T) { testConfigAppend(conf, knobs, memString+" "+knobsString, t) } +func TestAppendMemoryBackedMemFdPrivate(t *testing.T) { + conf := &Config{ + Memory: Memory{ + Size: "1G", + Slots: 8, + }, + } + memString := "-m 1G,slots=8" + testConfigAppend(conf, conf.Memory, memString, t) + + knobs := Knobs{ + MemFDPrivate: true, + MemShared: false, + } + objMemString := "-object memory-backend-memfd-private,id=dimm1,size=1G" + memBackendString := "-machine memory-backend=dimm1" + + knobsString := objMemString + " " + knobsString += memBackendString + + testConfigAppend(conf, knobs, memString+" "+knobsString, t) +} + func TestNoRebootKnob(t *testing.T) { conf := &Config{} @@ -699,15 +721,16 @@ func TestFailToAppendCPUs(t *testing.T) { } } -var qmpSingleSocketServerString = "-qmp unix:cc-qmp,server=on,wait=off" -var qmpSingleSocketString = "-qmp unix:cc-qmp" +var qmpSingleSocketServerString = "-qmp unix:path=cc-qmp,server=on,wait=off" +var qmpSingleSocketString = "-qmp unix:path=cc-qmp" func TestAppendSingleQMPSocketServer(t *testing.T) { qmp := QMPSocket{ - Type: "unix", - Name: "cc-qmp", - Server: true, - NoWait: true, + Type: "unix", + Name: "cc-qmp", + Server: true, + NoWait: true, + Protocol: Qmp, } testAppend(qmp, qmpSingleSocketServerString, t) @@ -715,29 +738,53 @@ func TestAppendSingleQMPSocketServer(t *testing.T) { func TestAppendSingleQMPSocket(t *testing.T) { qmp := QMPSocket{ - Type: Unix, - Name: "cc-qmp", - Server: false, + Type: Unix, + Name: "cc-qmp", + Server: false, + Protocol: Qmp, } testAppend(qmp, qmpSingleSocketString, t) } -var qmpSocketServerString = "-qmp unix:cc-qmp-1,server=on,wait=off -qmp unix:cc-qmp-2,server=on,wait=off" +var qmpSocketServerFdString = "-qmp unix:fd=3,server=on,wait=off" + +func TestAppendQMPSocketServerFd(t *testing.T) { + foo, _ := os.CreateTemp(os.TempDir(), "govmm-qemu-test") + + defer func() { + _ = foo.Close() + _ = os.Remove(foo.Name()) + }() + + qmp := QMPSocket{ + Type: "unix", + FD: foo, + Server: true, + NoWait: true, + Protocol: Qmp, + } + + testAppend(qmp, qmpSocketServerFdString, t) +} + +var qmpSocketServerString = "-qmp unix:path=cc-qmp-1,server=on,wait=off -qmp unix:path=cc-qmp-2,server=on,wait=off" func TestAppendQMPSocketServer(t *testing.T) { qmp := []QMPSocket{ { - Type: "unix", - Name: "cc-qmp-1", - Server: true, - NoWait: true, + Type: "unix", + Name: "cc-qmp-1", + Server: true, + NoWait: true, + Protocol: Qmp, }, { - Type: "unix", - Name: "cc-qmp-2", - Server: true, - NoWait: true, + Type: "unix", + Name: "cc-qmp-2", + Server: true, + NoWait: true, + Protocol: Qmp, }, } @@ -745,8 +792,7 @@ func TestAppendQMPSocketServer(t *testing.T) { } var pidfile = "/run/vc/vm/iamsandboxid/pidfile" -var logfile = "/run/vc/vm/iamsandboxid/logfile" -var qemuString = "-name cc-qemu -cpu host -uuid " + agentUUID + " -pidfile " + pidfile + " -D " + logfile +var qemuString = "-name cc-qemu -cpu host -uuid " + agentUUID + " -pidfile " + pidfile func TestAppendStrings(t *testing.T) { config := Config{ @@ -755,14 +801,12 @@ func TestAppendStrings(t *testing.T) { UUID: agentUUID, CPUModel: "host", PidFile: pidfile, - LogFile: logfile, } config.appendName() config.appendCPUModel() config.appendUUID() config.appendPidFile() - config.appendLogFile() result := strings.Join(config.qemuParams, " ") if result != qemuString { @@ -890,7 +934,7 @@ func TestBadQMPSockets(t *testing.T) { func TestBadDevices(t *testing.T) { c := &Config{} - c.appendDevices() + c.appendDevices(nil) if len(c.qemuParams) != 0 { t.Errorf("Expected empty qemuParams, found %s", c.qemuParams) } @@ -942,7 +986,7 @@ func TestBadDevices(t *testing.T) { }, } - c.appendDevices() + c.appendDevices(nil) if len(c.qemuParams) != 0 { t.Errorf("Expected empty qemuParams, found %s", c.qemuParams) } diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 9bf091af841e..8241d917290d 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -702,6 +702,16 @@ func QMPStart(ctx context.Context, socket string, cfg QMPConfig, disconnectedCh return nil, nil, err } + return QMPStartWithConn(ctx, conn, cfg, disconnectedCh) +} + +// Same as QMPStart but with a pre-established connection +func QMPStartWithConn(ctx context.Context, conn net.Conn, cfg QMPConfig, disconnectedCh chan struct{}) (*QMP, *QMPVersion, error) { + if conn == nil { + close(disconnectedCh) + return nil, nil, fmt.Errorf("invalid connection") + } + connectedCh := make(chan *QMPVersion) q := startQMPLoop(conn, cfg, connectedCh, disconnectedCh) @@ -771,30 +781,33 @@ func (q *QMP) ExecuteQuit(ctx context.Context) error { return q.executeCommand(ctx, "quit", nil, nil) } -func (q *QMP) blockdevAddBaseArgs(driver, device, blockdevID string, ro bool) (map[string]interface{}, map[string]interface{}) { - var args map[string]interface{} - +func (q *QMP) blockdevAddBaseArgs(driver string, blockDevice *BlockDevice) map[string]interface{} { blockdevArgs := map[string]interface{}{ "driver": "raw", - "read-only": ro, + "read-only": blockDevice.ReadOnly, "file": map[string]interface{}{ "driver": driver, - "filename": device, + "filename": blockDevice.File, + "aio": string(blockDevice.AIO), }, } - blockdevArgs["node-name"] = blockdevID - args = blockdevArgs + blockdevArgs["node-name"] = blockDevice.ID - return args, blockdevArgs + return blockdevArgs } // ExecuteBlockdevAdd sends a blockdev-add to the QEMU instance. device is the // path of the device to add, e.g., /dev/rdb0, and blockdevID is an identifier // used to name the device. As this identifier will be passed directly to QMP, // it must obey QMP's naming rules, e,g., it must start with a letter. -func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, device, blockdevID string, ro bool) error { - args, _ := q.blockdevAddBaseArgs("host_device", device, blockdevID, ro) +func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, blockDevice *BlockDevice) error { + var args map[string]interface{} + if fi, err := os.Stat(blockDevice.File); err == nil && fi.Mode().IsRegular() { + args = q.blockdevAddBaseArgs("file", blockDevice) + } else { + args = q.blockdevAddBaseArgs("host_device", blockDevice) + } return q.executeCommand(ctx, "blockdev-add", args, nil) } @@ -806,29 +819,29 @@ func (q *QMP) ExecuteBlockdevAdd(ctx context.Context, device, blockdevID string, // direct denotes whether use of O_DIRECT (bypass the host page cache) // is enabled. noFlush denotes whether flush requests for the device are // ignored. -func (q *QMP) ExecuteBlockdevAddWithCache(ctx context.Context, device, blockdevID string, direct, noFlush, ro bool) error { - args, blockdevArgs := q.blockdevAddBaseArgs("host_device", device, blockdevID, ro) +func (q *QMP) ExecuteBlockdevAddWithCache(ctx context.Context, blockDevice *BlockDevice, direct, noFlush bool) error { + blockdevArgs := q.blockdevAddBaseArgs("host_device", blockDevice) blockdevArgs["cache"] = map[string]interface{}{ "direct": direct, "no-flush": noFlush, } - return q.executeCommand(ctx, "blockdev-add", args, nil) + return q.executeCommand(ctx, "blockdev-add", blockdevArgs, nil) } // ExecuteBlockdevAddWithDriverCache has three one parameter driver // than ExecuteBlockdevAddWithCache. // Parameter driver can set the driver of block device. -func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver, device, blockdevID string, direct, noFlush, ro bool) error { - args, blockdevArgs := q.blockdevAddBaseArgs(driver, device, blockdevID, ro) +func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver string, blockDevice *BlockDevice, direct, noFlush bool) error { + blockdevArgs := q.blockdevAddBaseArgs(driver, blockDevice) blockdevArgs["cache"] = map[string]interface{}{ "direct": direct, "no-flush": noFlush, } - return q.executeCommand(ctx, "blockdev-add", args, nil) + return q.executeCommand(ctx, "blockdev-add", blockdevArgs, nil) } // ExecuteDeviceAdd adds the guest portion of a device to a QEMU instance @@ -1209,10 +1222,11 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd } // ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command. -func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error { +func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string, devID string) error { args := map[string]interface{}{ "driver": VfioAP, "sysfsdev": sysfsdev, + "id": devID, } return q.executeCommand(ctx, "device_add", args, nil) } @@ -1516,7 +1530,7 @@ func (q *QMP) ExecuteGetFD(ctx context.Context, fdname string, fd *os.File) erro // ExecuteCharDevUnixSocketAdd adds a character device using as backend a unix socket, // id is an identifier for the device, path specifies the local path of the unix socket, // wait is to block waiting for a client to connect, server specifies that the socket is a listening socket. -func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool) error { +func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool, reconnect uint32) error { data := map[string]interface{}{ "server": server, "addr": map[string]interface{}{ @@ -1532,6 +1546,10 @@ func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, data["wait"] = wait } + if reconnect > 0 { + data["reconnect"] = reconnect + } + args := map[string]interface{}{ "id": id, "backend": map[string]interface{}{ diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index 23114a0d72cc..06738a40d671 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -273,6 +273,22 @@ func TestQMPStartBadPath(t *testing.T) { <-disconnectedCh } +// Checks that a call to QMPStartWithConn with a nil connection exits gracefully. +// +// We call QMPStartWithConn with a nil connection. +// +// An error should be returned and the disconnected channel should be closed. +func TestQMPStartWithConnNil(t *testing.T) { + cfg := QMPConfig{Logger: qmpTestLogger{}} + disconnectedCh := make(chan struct{}) + q, _, err := QMPStartWithConn(context.Background(), nil, cfg, disconnectedCh) + if err == nil { + t.Errorf("Expected error") + q.Shutdown() + } + <-disconnectedCh +} + // Checks that the qmp_capabilities command is correctly sent. // // We start a QMPLoop, send the qmp_capabilities command and stop the @@ -400,8 +416,13 @@ func TestQMPBlockdevAdd(t *testing.T) { cfg := QMPConfig{Logger: qmpTestLogger{}} q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) q.version = checkVersion(t, connectedCh) - err := q.ExecuteBlockdevAdd(context.Background(), "/dev/rbd0", - fmt.Sprintf("drive_%s", volumeUUID), false) + dev := BlockDevice{ + ID: fmt.Sprintf("drive_%s", volumeUUID), + File: "/dev/rbd0", + ReadOnly: false, + AIO: Native, + } + err := q.ExecuteBlockdevAdd(context.Background(), &dev) if err != nil { t.Fatalf("Unexpected error %v", err) } @@ -424,8 +445,13 @@ func TestQMPBlockdevAddWithCache(t *testing.T) { cfg := QMPConfig{Logger: qmpTestLogger{}} q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) q.version = checkVersion(t, connectedCh) - err := q.ExecuteBlockdevAddWithCache(context.Background(), "/dev/rbd0", - fmt.Sprintf("drive_%s", volumeUUID), true, true, false) + dev := BlockDevice{ + ID: fmt.Sprintf("drive_%s", volumeUUID), + File: "/dev/rbd0", + ReadOnly: false, + AIO: Native, + } + err := q.ExecuteBlockdevAddWithCache(context.Background(), &dev, true, true) if err != nil { t.Fatalf("Unexpected error %v", err) } @@ -1102,7 +1128,7 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) { q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) checkVersion(t, connectedCh) sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525" - err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev) + err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev, "test-id") if err != nil { t.Fatalf("Unexpected error %v", err) } @@ -1419,7 +1445,7 @@ func TestExecuteCharDevUnixSocketAdd(t *testing.T) { cfg := QMPConfig{Logger: qmpTestLogger{}} q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) checkVersion(t, connectedCh) - err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true) + err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true, 1) if err != nil { t.Fatalf("Unexpected error %v", err) } diff --git a/src/runtime/pkg/govmm/vmm_arm64.go b/src/runtime/pkg/govmm/vmm_arm64.go index a01cd683f9c5..216851d54b55 100644 --- a/src/runtime/pkg/govmm/vmm_arm64.go +++ b/src/runtime/pkg/govmm/vmm_arm64.go @@ -6,11 +6,11 @@ package govmm -//In qemu, maximum number of vCPUs depends on the GIC version, or on how -//many redistributors we can fit into the memory map. -//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) -//for now, qemu only supports v2 and v3, we treat v4 as v3 based on -//backward compatibility. +// In qemu, maximum number of vCPUs depends on the GIC version, or on how +// many redistributors we can fit into the memory map. +// related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) +// for now, qemu only supports v2 and v3, we treat v4 as v3 based on +// backward compatibility. var gicList = map[uint32]uint32{ uint32(2): uint32(8), uint32(3): uint32(123), diff --git a/src/runtime/pkg/hypervisors/hypervisor_state.go b/src/runtime/pkg/hypervisors/hypervisor_state.go index b1f58e7fa669..7384cca5e464 100644 --- a/src/runtime/pkg/hypervisors/hypervisor_state.go +++ b/src/runtime/pkg/hypervisors/hypervisor_state.go @@ -5,6 +5,8 @@ package hypervisors +import "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + // Bridge is a bridge where devices can be hot plugged type Bridge struct { // DeviceAddr contains information about devices plugged and its address in the bridge @@ -28,7 +30,6 @@ type CPUDevice struct { type HypervisorState struct { BlockIndexMap map[int]struct{} - // Type of hypervisor, E.g. qemu/firecracker/acrn. Type string UUID string @@ -44,7 +45,6 @@ type HypervisorState struct { HotpluggedMemory int VirtiofsDaemonPid int Pid int - PCIeRootPort int - - HotplugVFIOOnRootBus bool + HotPlugVFIO config.PCIePort + ColdPlugVFIO config.PCIePort } diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index 98ecb68f0970..e45a8f19dcc3 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -114,25 +114,32 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ writer = gz } - // create encoder to encode metrics. - encoder := expfmt.NewEncoder(writer, contentType) - - // gather metrics collected for management agent. - mfs, err := prometheus.DefaultGatherer.Gather() + filterFamilies, err := getFilterFamilyFromReq(r) if err != nil { - monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer") - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) return } - // encode metric gathered in current process - if err := encodeMetricFamily(mfs, encoder); err != nil { - monitorLog.WithError(err).Warnf("failed to encode metrics") + // create encoder to encode metrics. + encoder := expfmt.NewEncoder(writer, contentType) + + if len(filterFamilies) == 0 { + // gather metrics collected for management agent. + mfs, err := prometheus.DefaultGatherer.Gather() + if err != nil { + monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(err.Error())) + return + } + + // encode metric gathered in current process + if err := encodeMetricFamily(mfs, encoder); err != nil { + monitorLog.WithError(err).Warnf("failed to encode metrics") + } } // aggregate sandboxes metrics and write to response by encoder - if err := km.aggregateSandboxMetrics(encoder); err != nil { + if err := km.aggregateSandboxMetrics(encoder, filterFamilies); err != nil { monitorLog.WithError(err).Errorf("failed aggregateSandboxMetrics") scrapeFailedCount.Inc() } @@ -155,7 +162,7 @@ func encodeMetricFamily(mfs []*dto.MetricFamily, encoder expfmt.Encoder) error { } // aggregateSandboxMetrics will get metrics from one sandbox and do some process -func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { +func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder, filterFamilies []string) error { // get all kata sandboxes from cache sandboxes := km.sandboxCache.getSandboxList() // save running kata pods as a metrics. @@ -230,9 +237,21 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } // write metrics to response. - for _, mf := range metricsMap { - if err := encoder.Encode(mf); err != nil { - return err + if len(filterFamilies) > 0 { + for _, filterName := range filterFamilies { + for fullName, mf := range metricsMap { + if strings.HasPrefix(fullName, filterName) { + if err := encoder.Encode(mf); err != nil { + return err + } + } + } + } + } else { + for _, mf := range metricsMap { + if err := encoder.Encode(mf); err != nil { + return err + } } } return nil diff --git a/src/runtime/pkg/kata-monitor/pprof.go b/src/runtime/pkg/kata-monitor/pprof.go index 0d768e428d40..afaae85567fd 100644 --- a/src/runtime/pkg/kata-monitor/pprof.go +++ b/src/runtime/pkg/kata-monitor/pprof.go @@ -32,7 +32,7 @@ func (km *KataMonitor) composeSocketAddress(r *http.Request) (string, error) { return "", err } - return shim.SocketAddress(sandbox), nil + return shim.ClientSocketAddress(sandbox), nil } func (km *KataMonitor) proxyRequest(w http.ResponseWriter, r *http.Request, diff --git a/src/runtime/pkg/kata-monitor/shim_client.go b/src/runtime/pkg/kata-monitor/shim_client.go index 388ac6fff5b5..3730c8af0adb 100644 --- a/src/runtime/pkg/kata-monitor/shim_client.go +++ b/src/runtime/pkg/kata-monitor/shim_client.go @@ -8,6 +8,7 @@ package katamonitor import ( "fmt" "net/http" + "strings" "time" shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2" @@ -36,3 +37,11 @@ func getSandboxIDFromReq(r *http.Request) (string, error) { func getSandboxFS() string { return shim.GetSandboxesStoragePath() } + +func getFilterFamilyFromReq(r *http.Request) ([]string, error) { + filterFamilies := r.URL.Query().Get("filter_family") + if filterFamilies != "" { + return strings.Split(filterFamilies, ","), nil + } + return nil, nil +} diff --git a/src/runtime/pkg/katatestutils/constraints.go b/src/runtime/pkg/katatestutils/constraints.go index 48b3e5d3b30f..93bb67064b63 100644 --- a/src/runtime/pkg/katatestutils/constraints.go +++ b/src/runtime/pkg/katatestutils/constraints.go @@ -87,15 +87,16 @@ func getKernelVersion() (string, error) { // Examples of actual kernel versions which can be made into valid semver // format by calling this function: // -// centos: 3.10.0-957.12.1.el7.x86_64 -// fedora: 5.0.9-200.fc29.x86_64 +// centos: 3.10.0-957.12.1.el7.x86_64 +// fedora: 5.0.9-200.fc29.x86_64 // // For some self compiled kernel, the kernel version will be with "+" as its suffix // For example: -// 5.12.0-rc4+ +// +// 5.12.0-rc4+ +// // These kernel version can't be parsed by the current lib and lead to panic // therefore the '+' should be removed. -// func fixKernelVersion(version string) string { version = strings.Replace(version, "_", "-", -1) return strings.Replace(version, "+", "", -1) diff --git a/src/runtime/pkg/katatestutils/constraints_api.go b/src/runtime/pkg/katatestutils/constraints_api.go index ce27ee424a63..4a8e5ab19a54 100644 --- a/src/runtime/pkg/katatestutils/constraints_api.go +++ b/src/runtime/pkg/katatestutils/constraints_api.go @@ -84,12 +84,12 @@ func NewTestConstraint(debug bool) TestConstraint { // // Notes: // -// - Constraints are applied in the order specified. -// - A constraint type (user, kernel) can only be specified once. -// - If the function fails to determine whether it can check the constraints, -// it will panic. Since this is facility is used for testing, this seems like -// the best approach as it unburdens the caller from checking for an error -// (which should never be ignored). +// - Constraints are applied in the order specified. +// - A constraint type (user, kernel) can only be specified once. +// - If the function fails to determine whether it can check the constraints, +// it will panic. Since this is facility is used for testing, this seems like +// the best approach as it unburdens the caller from checking for an error +// (which should never be ignored). func (tc *TestConstraint) NotValid(constraints ...Constraint) bool { if len(constraints) == 0 { panic("need atleast one constraint") diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 5676f4451c0a..041a2ec5ed99 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -14,6 +14,7 @@ import ( "strconv" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" ) @@ -211,18 +212,20 @@ type RuntimeConfigOptions struct { DefaultGuestHookPath string KernelPath string ImagePath string + RootfsType string KernelParams string MachineType string - ShimPath string LogPath string BlockDeviceDriver string + BlockDeviceAIO string SharedFS string VirtioFSDaemon string JaegerEndpoint string JaegerUser string JaegerPassword string PFlash []string - PCIeRootPort uint32 + HotPlugVFIO config.PCIePort + ColdPlugVFIO config.PCIePort DefaultVCPUCount uint32 DefaultMaxVCPUCount uint32 DefaultMemSize uint32 @@ -230,12 +233,10 @@ type RuntimeConfigOptions struct { DefaultMsize9p uint32 DisableBlock bool EnableIOThreads bool - HotplugVFIOOnRootBus bool DisableNewNetNs bool HypervisorDebug bool RuntimeDebug bool RuntimeTrace bool - ShimDebug bool AgentDebug bool AgentTrace bool EnablePprof bool @@ -305,26 +306,24 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { path = "` + config.HypervisorPath + `" kernel = "` + config.KernelPath + `" block_device_driver = "` + config.BlockDeviceDriver + `" + block_device_aio = "` + config.BlockDeviceAIO + `" kernel_params = "` + config.KernelParams + `" image = "` + config.ImagePath + `" + rootfs_type = "` + config.RootfsType + `" machine_type = "` + config.MachineType + `" default_vcpus = ` + strconv.FormatUint(uint64(config.DefaultVCPUCount), 10) + ` default_maxvcpus = ` + strconv.FormatUint(uint64(config.DefaultMaxVCPUCount), 10) + ` default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + ` disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + ` enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + ` - hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + ` - pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + ` + cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `" + hot_plug_vfio = "` + config.HotPlugVFIO.String() + `" msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + ` enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + ` guest_hook_path = "` + config.DefaultGuestHookPath + `" shared_fs = "` + config.SharedFS + `" virtio_fs_daemon = "` + config.VirtioFSDaemon + `" - [shim.kata] - path = "` + config.ShimPath + `" - enable_debug = ` + strconv.FormatBool(config.ShimDebug) + ` - [agent.kata] enable_debug = ` + strconv.FormatBool(config.AgentDebug) + ` enable_tracing = ` + strconv.FormatBool(config.AgentTrace) + ` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 2aad22bd8505..58faec56c796 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -9,6 +9,11 @@ package katautils +import ( + config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" +) + // name is the name of the runtime var NAME = "@RUNTIME_NAME@" @@ -44,6 +49,7 @@ var defaultJailerPath = "/usr/bin/jailer" var defaultImagePath = "/usr/share/kata-containers/kata-containers.img" var defaultKernelPath = "/usr/share/kata-containers/vmlinuz.container" var defaultInitrdPath = "/usr/share/kata-containers/kata-containers-initrd.img" +var defaultRootfsType = "ext4" var defaultFirmwarePath = "" var defaultFirmwareVolumePath = "" var defaultMachineAccelerators = "" @@ -63,6 +69,7 @@ const defaultBridgesCount uint32 = 1 const defaultInterNetworkingModel = "tcfilter" const defaultDisableBlockDeviceUse bool = false const defaultBlockDeviceDriver = "virtio-scsi" +const defaultBlockDeviceAIO string = "io_uring" const defaultBlockDeviceCacheSet bool = false const defaultBlockDeviceCacheDirect bool = false const defaultBlockDeviceCacheNoflush bool = false @@ -73,21 +80,23 @@ const defaultEnableIOMMU bool = false const defaultEnableIOMMUPlatform bool = false const defaultFileBackedMemRootDir string = "" const defaultEnableDebug bool = false +const defaultExtraMonitorSocket govmmQemu.MonitorProtocol = "" const defaultDisableNestingChecks bool = false const defaultMsize9p uint32 = 8192 -const defaultHotplugVFIOOnRootBus bool = false -const defaultPCIeRootPort = 0 const defaultEntropySource = "/dev/urandom" const defaultGuestHookPath string = "" -const defaultVirtioFSCacheMode = "none" +const defaultVirtioFSCacheMode = "never" const defaultDisableImageNvdimm = false const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" +const defaultVhostUserDeviceReconnect = 0 const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) const defaultConfidentialGuest = false +const defaultSevSnpGuest = false const defaultGuestSwap = false const defaultRootlessHypervisor = false const defaultDisableSeccomp = false +const defaultDisableGuestSeLinux = true const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false @@ -98,3 +107,6 @@ const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock" // Default config file used by stateless systems. var defaultRuntimeConfiguration = "@CONFIG_PATH@" + +const defaultHotPlugVFIO = config.NoPort +const defaultColdPlugVFIO = config.NoPort diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 0903c8ea9ed0..9dc0ff57c0e4 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -10,7 +10,6 @@ package katautils import ( "errors" "fmt" - "io/ioutil" "os" "path/filepath" "reflect" @@ -23,6 +22,7 @@ import ( govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" @@ -38,11 +38,11 @@ const ( // tables). The names of these tables are in dotted ("nested table") // form: // -// [.] +// [.] // // The components are hypervisor, and agent. For example, // -// [agent.kata] +// [agent.kata] // // The currently supported types are listed below: const ( @@ -51,6 +51,7 @@ const ( clhHypervisorTableType = "clh" qemuHypervisorTableType = "qemu" acrnHypervisorTableType = "acrn" + dragonballHypervisorTableType = "dragonball" // the maximum amount of PCI bridges that can be cold plugged in a VM maxPCIBridges uint32 = 5 @@ -59,9 +60,9 @@ const ( type tomlConfig struct { Hypervisor map[string]hypervisor Agent map[string]agent - Runtime runtime Image image Factory factory + Runtime runtime } type image struct { @@ -77,81 +78,88 @@ type factory struct { } type hypervisor struct { - Path string `toml:"path"` - JailerPath string `toml:"jailer_path"` - Kernel string `toml:"kernel"` - CtlPath string `toml:"ctlpath"` - Initrd string `toml:"initrd"` - Image string `toml:"image"` - Firmware string `toml:"firmware"` - FirmwareVolume string `toml:"firmware_volume"` - MachineAccelerators string `toml:"machine_accelerators"` - CPUFeatures string `toml:"cpu_features"` - KernelParams string `toml:"kernel_params"` - MachineType string `toml:"machine_type"` - BlockDeviceDriver string `toml:"block_device_driver"` - EntropySource string `toml:"entropy_source"` - SharedFS string `toml:"shared_fs"` - VirtioFSDaemon string `toml:"virtio_fs_daemon"` - VirtioFSCache string `toml:"virtio_fs_cache"` - VhostUserStorePath string `toml:"vhost_user_store_path"` - FileBackedMemRootDir string `toml:"file_mem_backend"` - GuestHookPath string `toml:"guest_hook_path"` - GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` - SeccompSandbox string `toml:"seccompsandbox"` - HypervisorPathList []string `toml:"valid_hypervisor_paths"` - JailerPathList []string `toml:"valid_jailer_paths"` - CtlPathList []string `toml:"valid_ctlpaths"` - VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"` - VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"` - PFlashList []string `toml:"pflashes"` - VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"` - FileBackedMemRootList []string `toml:"valid_file_mem_backends"` - EntropySourceList []string `toml:"valid_entropy_sources"` - EnableAnnotations []string `toml:"enable_annotations"` - RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"` - TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"` - MemOffset uint64 `toml:"memory_offset"` - DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"` - DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"` - DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"` - DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"` - NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"` - NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"` - NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"` - NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"` - VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"` - DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` - MemorySize uint32 `toml:"default_memory"` - MemSlots uint32 `toml:"memory_slots"` - DefaultMaxMemorySize uint64 `toml:"default_maxmemory"` - DefaultBridges uint32 `toml:"default_bridges"` - Msize9p uint32 `toml:"msize_9p"` - PCIeRootPort uint32 `toml:"pcie_root_port"` - NumVCPUs int32 `toml:"default_vcpus"` - BlockDeviceCacheSet bool `toml:"block_device_cache_set"` - BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` - BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` - EnableVhostUserStore bool `toml:"enable_vhost_user_store"` - DisableBlockDeviceUse bool `toml:"disable_block_device_use"` - MemPrealloc bool `toml:"enable_mem_prealloc"` - HugePages bool `toml:"enable_hugepages"` - VirtioMem bool `toml:"enable_virtio_mem"` - IOMMU bool `toml:"enable_iommu"` - IOMMUPlatform bool `toml:"enable_iommu_platform"` - Debug bool `toml:"enable_debug"` - DisableNestingChecks bool `toml:"disable_nesting_checks"` - EnableIOThreads bool `toml:"enable_iothreads"` - DisableImageNvdimm bool `toml:"disable_image_nvdimm"` - HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"` - DisableVhostNet bool `toml:"disable_vhost_net"` - GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` - ConfidentialGuest bool `toml:"confidential_guest"` - GuestSwap bool `toml:"enable_guest_swap"` - Rootless bool `toml:"rootless"` - DisableSeccomp bool `toml:"disable_seccomp"` - DisableSeLinux bool `toml:"disable_selinux"` - LegacySerial bool `toml:"use_legacy_serial"` + Path string `toml:"path"` + JailerPath string `toml:"jailer_path"` + Kernel string `toml:"kernel"` + CtlPath string `toml:"ctlpath"` + Initrd string `toml:"initrd"` + Image string `toml:"image"` + RootfsType string `toml:"rootfs_type"` + Firmware string `toml:"firmware"` + FirmwareVolume string `toml:"firmware_volume"` + MachineAccelerators string `toml:"machine_accelerators"` + CPUFeatures string `toml:"cpu_features"` + KernelParams string `toml:"kernel_params"` + MachineType string `toml:"machine_type"` + BlockDeviceDriver string `toml:"block_device_driver"` + EntropySource string `toml:"entropy_source"` + SharedFS string `toml:"shared_fs"` + VirtioFSDaemon string `toml:"virtio_fs_daemon"` + VirtioFSCache string `toml:"virtio_fs_cache"` + VhostUserStorePath string `toml:"vhost_user_store_path"` + FileBackedMemRootDir string `toml:"file_mem_backend"` + GuestHookPath string `toml:"guest_hook_path"` + GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` + SeccompSandbox string `toml:"seccompsandbox"` + BlockDeviceAIO string `toml:"block_device_aio"` + HypervisorPathList []string `toml:"valid_hypervisor_paths"` + JailerPathList []string `toml:"valid_jailer_paths"` + CtlPathList []string `toml:"valid_ctlpaths"` + VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"` + VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"` + PFlashList []string `toml:"pflashes"` + VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"` + FileBackedMemRootList []string `toml:"valid_file_mem_backends"` + EntropySourceList []string `toml:"valid_entropy_sources"` + EnableAnnotations []string `toml:"enable_annotations"` + RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"` + TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"` + MemOffset uint64 `toml:"memory_offset"` + DefaultMaxMemorySize uint64 `toml:"default_maxmemory"` + DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"` + DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"` + DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"` + DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"` + NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"` + NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"` + NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"` + NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"` + VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"` + VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"` + DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` + MemorySize uint32 `toml:"default_memory"` + MemSlots uint32 `toml:"memory_slots"` + DefaultBridges uint32 `toml:"default_bridges"` + Msize9p uint32 `toml:"msize_9p"` + NumVCPUs int32 `toml:"default_vcpus"` + BlockDeviceCacheSet bool `toml:"block_device_cache_set"` + BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` + BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` + EnableVhostUserStore bool `toml:"enable_vhost_user_store"` + VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` + DisableBlockDeviceUse bool `toml:"disable_block_device_use"` + MemPrealloc bool `toml:"enable_mem_prealloc"` + HugePages bool `toml:"enable_hugepages"` + VirtioMem bool `toml:"enable_virtio_mem"` + IOMMU bool `toml:"enable_iommu"` + IOMMUPlatform bool `toml:"enable_iommu_platform"` + Debug bool `toml:"enable_debug"` + DisableNestingChecks bool `toml:"disable_nesting_checks"` + EnableIOThreads bool `toml:"enable_iothreads"` + DisableImageNvdimm bool `toml:"disable_image_nvdimm"` + HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"` + ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"` + DisableVhostNet bool `toml:"disable_vhost_net"` + GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"` + ConfidentialGuest bool `toml:"confidential_guest"` + SevSnpGuest bool `toml:"sev_snp_guest"` + GuestSwap bool `toml:"enable_guest_swap"` + Rootless bool `toml:"rootless"` + DisableSeccomp bool `toml:"disable_seccomp"` + DisableSeLinux bool `toml:"disable_selinux"` + DisableGuestSeLinux bool `toml:"disable_guest_selinux"` + LegacySerial bool `toml:"use_legacy_serial"` + ExtraMonitorSocket govmmQemu.MonitorProtocol `toml:"extra_monitor_socket"` } type runtime struct { @@ -160,12 +168,14 @@ type runtime struct { JaegerUser string `toml:"jaeger_user"` JaegerPassword string `toml:"jaeger_password"` VfioMode string `toml:"vfio_mode"` + GuestSeLinuxLabel string `toml:"guest_selinux_label"` SandboxBindMounts []string `toml:"sandbox_bind_mounts"` Experimental []string `toml:"experimental"` - Debug bool `toml:"enable_debug"` Tracing bool `toml:"enable_tracing"` DisableNewNetNs bool `toml:"disable_new_netns"` DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` + EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` + Debug bool `toml:"enable_debug"` SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` EnablePprof bool `toml:"enable_pprof"` @@ -254,6 +264,16 @@ func (h hypervisor) image() (string, error) { return ResolvePath(p) } +func (h hypervisor) rootfsType() (string, error) { + p := h.RootfsType + + if p == "" { + p = "ext4" + } + + return p, nil +} + func (h hypervisor) firmware() (string, error) { p := h.Firmware @@ -267,6 +287,19 @@ func (h hypervisor) firmware() (string, error) { return ResolvePath(p) } +func (h hypervisor) coldPlugVFIO() config.PCIePort { + if h.ColdPlugVFIO == "" { + return defaultColdPlugVFIO + } + return h.ColdPlugVFIO +} +func (h hypervisor) hotPlugVFIO() config.PCIePort { + if h.HotPlugVFIO == "" { + return defaultHotPlugVFIO + } + return h.HotPlugVFIO +} + func (h hypervisor) firmwareVolume() (string, error) { p := h.FirmwareVolume @@ -468,8 +501,40 @@ func (h hypervisor) blockDeviceDriver() (string, error) { return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers) } +func (h hypervisor) blockDeviceAIO() (string, error) { + supportedBlockAIO := []string{config.AIOIOUring, config.AIONative, config.AIOThreads} + + if h.BlockDeviceAIO == "" { + return defaultBlockDeviceAIO, nil + } + + for _, b := range supportedBlockAIO { + if b == h.BlockDeviceAIO { + return h.BlockDeviceAIO, nil + } + } + + return "", fmt.Errorf("Invalid hypervisor block storage I/O mechanism %v specified (supported AIO: %v)", h.BlockDeviceAIO, supportedBlockAIO) +} + +func (h hypervisor) extraMonitorSocket() (govmmQemu.MonitorProtocol, error) { + supportedExtraMonitor := []govmmQemu.MonitorProtocol{govmmQemu.Hmp, govmmQemu.Qmp, govmmQemu.QmpPretty} + + if h.ExtraMonitorSocket == "" { + return "", nil + } + + for _, extra := range supportedExtraMonitor { + if extra == h.ExtraMonitorSocket { + return extra, nil + } + } + + return "", fmt.Errorf("Invalid hypervisor extra monitor socket %v specified (supported values: %v)", h.ExtraMonitorSocket, supportedExtraMonitor) +} + func (h hypervisor) sharedFS() (string, error) { - supportedSharedFS := []string{config.Virtio9P, config.VirtioFS, config.VirtioFSNydus} + supportedSharedFS := []string{config.Virtio9P, config.VirtioFS, config.VirtioFSNydus, config.NoSharedFS} if h.SharedFS == "" { return config.VirtioFS, nil @@ -625,6 +690,11 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -648,8 +718,9 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), @@ -670,6 +741,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { TxRateLimiterMaxRate: txRateLimiterMaxRate, EnableAnnotations: h.EnableAnnotations, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker }, nil } @@ -694,6 +766,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + pflashes, err := h.PFlash() if err != nil { return vc.HypervisorConfig{}, err @@ -722,11 +799,26 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { kataUtilsLogger.Info("Setting 'disable_image_nvdimm = true' as microvm does not support NVDIMM") } + // Nvdimm can only be support when UEFI/ACPI is enabled on arm64, otherwise disable it. + if goruntime.GOARCH == "arm64" && firmware == "" { + if p, err := h.PFlash(); err == nil { + if len(p) == 0 { + h.DisableImageNvdimm = true + kataUtilsLogger.Info("Setting 'disable_image_nvdimm = true' if there is no firmware specified") + } + } + } + blockDriver, err := h.blockDeviceDriver() if err != nil { return vc.HypervisorConfig{}, err } + blockAIO, err := h.blockDeviceAIO() + if err != nil { + return vc.HypervisorConfig{}, err + } + sharedFS, err := h.sharedFS() if err != nil { return vc.HypervisorConfig{}, err @@ -744,18 +836,24 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { rxRateLimiterMaxRate := h.getRxRateLimiterCfg() txRateLimiterMaxRate := h.getTxRateLimiterCfg() + extraMonitorSocket, err := h.extraMonitorSocket() + if err != nil { + return vc.HypervisorConfig{}, err + } + return vc.HypervisorConfig{ HypervisorPath: hypervisor, HypervisorPathList: h.HypervisorPathList, KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, FirmwareVolumePath: firmwareVolume, PFlash: pflashes, MachineAccelerators: machineAccelerators, CPUFeatures: cpuFeatures, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), @@ -773,6 +871,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { VirtioFSDaemonList: h.VirtioFSDaemonList, VirtioFSCacheSize: h.VirtioFSCacheSize, VirtioFSCache: h.defaultVirtioFSCache(), + VirtioFSQueueSize: h.VirtioFSQueueSize, VirtioFSExtraArgs: h.VirtioFSExtraArgs, MemPrealloc: h.MemPrealloc, HugePages: h.HugePages, @@ -783,14 +882,15 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { Debug: h.Debug, DisableNestingChecks: h.DisableNestingChecks, BlockDeviceDriver: blockDriver, + BlockDeviceAIO: blockAIO, BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, EnableIOThreads: h.EnableIOThreads, Msize9p: h.msize9p(), DisableImageNvdimm: h.DisableImageNvdimm, - HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, - PCIeRootPort: h.PCIeRootPort, + HotPlugVFIO: h.hotPlugVFIO(), + ColdPlugVFIO: h.coldPlugVFIO(), DisableVhostNet: h.DisableVhostNet, EnableVhostUserStore: h.EnableVhostUserStore, VhostUserStorePath: h.vhostUserStorePath(), @@ -803,10 +903,13 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestMemoryDumpPath: h.GuestMemoryDumpPath, GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, ConfidentialGuest: h.ConfidentialGuest, + SevSnpGuest: h.SevSnpGuest, GuestSwap: h.GuestSwap, Rootless: h.Rootless, LegacySerial: h.LegacySerial, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, + ExtraMonitorSocket: extraMonitorSocket, }, nil } @@ -836,6 +939,11 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -853,10 +961,11 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HypervisorPathList: h.HypervisorPathList, KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, HypervisorCtlPath: hypervisorctl, HypervisorCtlPathList: h.CtlPathList, FirmwarePath: firmware, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), MemorySize: h.defaultMemSz(), @@ -873,6 +982,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { GuestHookPath: h.guestHookPath(), DisableSeLinux: h.DisableSeLinux, EnableAnnotations: h.EnableAnnotations, + DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN }, nil } @@ -902,6 +1012,11 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image or initrd must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -921,11 +1036,12 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } - if sharedFS != config.VirtioFS && sharedFS != config.VirtioFSNydus { - return vc.HypervisorConfig{}, errors.New("clh only support virtio-fs or virtio-fs-nydus") + if sharedFS != config.VirtioFS && sharedFS != config.VirtioFSNydus && sharedFS != config.NoSharedFS { + return vc.HypervisorConfig{}, + fmt.Errorf("Cloud Hypervisor does not support %s shared filesystem option", sharedFS) } - if h.VirtioFSDaemon == "" { + if (sharedFS == config.VirtioFS || sharedFS == config.VirtioFSNydus) && h.VirtioFSDaemon == "" { return vc.HypervisorConfig{}, fmt.Errorf("cannot enable %s without daemon path in configuration file", sharedFS) } @@ -936,9 +1052,10 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, MachineAccelerators: machineAccelerators, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), @@ -965,11 +1082,10 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { BlockDeviceDriver: blockDriver, BlockDeviceCacheSet: h.BlockDeviceCacheSet, BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, - BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, EnableIOThreads: h.EnableIOThreads, Msize9p: h.msize9p(), - HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, - PCIeRootPort: h.PCIeRootPort, + ColdPlugVFIO: h.coldPlugVFIO(), + HotPlugVFIO: h.hotPlugVFIO(), DisableVhostNet: true, GuestHookPath: h.guestHookPath(), VirtioFSExtraArgs: h.VirtioFSExtraArgs, @@ -977,7 +1093,9 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EnableAnnotations: h.EnableAnnotations, DisableSeccomp: h.DisableSeccomp, ConfidentialGuest: h.ConfidentialGuest, + Rootless: h.Rootless, DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(), NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(), NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(), @@ -989,6 +1107,38 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { }, nil } +func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { + kernel, err := h.kernel() + if err != nil { + return vc.HypervisorConfig{}, err + } + + image, err := h.image() + if err != nil { + return vc.HypervisorConfig{}, err + } + + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + + kernelParams := h.kernelParams() + + return vc.HypervisorConfig{ + KernelPath: kernel, + ImagePath: image, + RootfsType: rootfsType, + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), + NumVCPUs: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), + MemorySize: h.defaultMemSz(), + MemSlots: h.defaultMemSlots(), + EntropySource: h.GetEntropySource(), + Debug: h.Debug, + }, nil +} + func newFactoryConfig(f factory) (oci.FactoryConfig, error) { if f.TemplatePath == "" { f.TemplatePath = defaultTemplatePath @@ -1022,6 +1172,9 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi case clhHypervisorTableType: config.HypervisorType = vc.ClhHypervisor hConfig, err = newClhHypervisorConfig(hypervisor) + case dragonballHypervisorTableType: + config.HypervisorType = vc.DragonballHypervisor + hConfig, err = newDragonballHypervisorConfig(hypervisor) } if err != nil { @@ -1129,50 +1282,56 @@ func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.Run func GetDefaultHypervisorConfig() vc.HypervisorConfig { return vc.HypervisorConfig{ - HypervisorPath: defaultHypervisorPath, - JailerPath: defaultJailerPath, - KernelPath: defaultKernelPath, - ImagePath: defaultImagePath, - InitrdPath: defaultInitrdPath, - FirmwarePath: defaultFirmwarePath, - FirmwareVolumePath: defaultFirmwareVolumePath, - MachineAccelerators: defaultMachineAccelerators, - CPUFeatures: defaultCPUFeatures, - HypervisorMachineType: defaultMachineType, - NumVCPUs: defaultVCPUCount, - DefaultMaxVCPUs: defaultMaxVCPUCount, - MemorySize: defaultMemSize, - MemOffset: defaultMemOffset, - VirtioMem: defaultVirtioMem, - DisableBlockDeviceUse: defaultDisableBlockDeviceUse, - DefaultBridges: defaultBridgesCount, - MemPrealloc: defaultEnableMemPrealloc, - HugePages: defaultEnableHugePages, - IOMMU: defaultEnableIOMMU, - IOMMUPlatform: defaultEnableIOMMUPlatform, - FileBackedMemRootDir: defaultFileBackedMemRootDir, - Debug: defaultEnableDebug, - DisableNestingChecks: defaultDisableNestingChecks, - BlockDeviceDriver: defaultBlockDeviceDriver, - BlockDeviceCacheSet: defaultBlockDeviceCacheSet, - BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, - BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, - EnableIOThreads: defaultEnableIOThreads, - Msize9p: defaultMsize9p, - HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, - PCIeRootPort: defaultPCIeRootPort, - GuestHookPath: defaultGuestHookPath, - VhostUserStorePath: defaultVhostUserStorePath, - VirtioFSCache: defaultVirtioFSCacheMode, - DisableImageNvdimm: defaultDisableImageNvdimm, - RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, - TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, - SGXEPCSize: defaultSGXEPCSize, - ConfidentialGuest: defaultConfidentialGuest, - GuestSwap: defaultGuestSwap, - Rootless: defaultRootlessHypervisor, - DisableSeccomp: defaultDisableSeccomp, - LegacySerial: defaultLegacySerial, + HypervisorPath: defaultHypervisorPath, + JailerPath: defaultJailerPath, + KernelPath: defaultKernelPath, + ImagePath: defaultImagePath, + InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, + FirmwarePath: defaultFirmwarePath, + FirmwareVolumePath: defaultFirmwareVolumePath, + MachineAccelerators: defaultMachineAccelerators, + CPUFeatures: defaultCPUFeatures, + HypervisorMachineType: defaultMachineType, + NumVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, + MemorySize: defaultMemSize, + MemOffset: defaultMemOffset, + VirtioMem: defaultVirtioMem, + DisableBlockDeviceUse: defaultDisableBlockDeviceUse, + DefaultBridges: defaultBridgesCount, + MemPrealloc: defaultEnableMemPrealloc, + HugePages: defaultEnableHugePages, + IOMMU: defaultEnableIOMMU, + IOMMUPlatform: defaultEnableIOMMUPlatform, + FileBackedMemRootDir: defaultFileBackedMemRootDir, + Debug: defaultEnableDebug, + ExtraMonitorSocket: defaultExtraMonitorSocket, + DisableNestingChecks: defaultDisableNestingChecks, + BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceAIO: defaultBlockDeviceAIO, + BlockDeviceCacheSet: defaultBlockDeviceCacheSet, + BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, + BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, + EnableIOThreads: defaultEnableIOThreads, + Msize9p: defaultMsize9p, + ColdPlugVFIO: defaultColdPlugVFIO, + HotPlugVFIO: defaultHotPlugVFIO, + GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, + VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect, + VirtioFSCache: defaultVirtioFSCacheMode, + DisableImageNvdimm: defaultDisableImageNvdimm, + RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, + TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, + SGXEPCSize: defaultSGXEPCSize, + ConfidentialGuest: defaultConfidentialGuest, + SevSnpGuest: defaultSevSnpGuest, + GuestSwap: defaultGuestSwap, + Rootless: defaultRootlessHypervisor, + DisableSeccomp: defaultDisableSeccomp, + DisableGuestSeLinux: defaultDisableGuestSeLinux, + LegacySerial: defaultLegacySerial, } } @@ -1259,7 +1418,8 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat } config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp - + config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning + config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs @@ -1353,7 +1513,7 @@ func decodeDropIns(mainConfigPath string, tomlConf *tomlConfig) error { configDir := filepath.Dir(mainConfigPath) dropInDir := filepath.Join(configDir, "config.d") - files, err := ioutil.ReadDir(dropInDir) + files, err := os.ReadDir(dropInDir) if err != nil { if !os.IsNotExist(err) { return fmt.Errorf("error reading %q directory: %s", dropInDir, err) @@ -1528,9 +1688,60 @@ func checkConfig(config oci.RuntimeConfig) error { return err } + hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO + coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO + machineType := config.HypervisorConfig.HypervisorMachineType + hypervisorType := config.HypervisorType + if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType, hypervisorType); err != nil { + return err + } + return nil } +// checkPCIeConfig ensures the PCIe configuration is valid. +// Only allow one of the following settings for cold-plug: +// no-port, root-port, switch-port +func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error { + if hypervisorType != virtcontainers.QemuHypervisor && hypervisorType != virtcontainers.ClhHypervisor { + kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU/CLH hypervisor, ignoring hot(cold)_vfio_port setting") + return nil + } + + if coldPlug != config.NoPort && hotPlug != config.NoPort { + return fmt.Errorf("invalid hot-plug=%s and cold-plug=%s settings, only one of them can be set", coldPlug, hotPlug) + } + if coldPlug == config.NoPort && hotPlug == config.NoPort { + return nil + } + // Currently only QEMU q35,virt support advanced PCIe topologies + // firecracker, dragonball do not have right now any PCIe support + if machineType != "q35" && machineType != "virt" { + return nil + } + if hypervisorType == virtcontainers.ClhHypervisor { + if coldPlug != config.NoPort { + return fmt.Errorf("cold-plug not supported on CLH") + } + if hotPlug != config.RootPort { + return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort) + } + } + + var port config.PCIePort + if coldPlug != config.NoPort { + port = coldPlug + } + if hotPlug != config.NoPort { + port = hotPlug + } + if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort { + return nil + } + return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s", + coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort) +} + // checkNetNsConfig performs sanity checks on disable_new_netns config. // Because it is an expert option and conflicts with some other common configs. func checkNetNsConfig(config oci.RuntimeConfig) error { diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 86619ab13d79..8b892343707c 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -13,10 +13,12 @@ import ( "path" "path/filepath" "reflect" + goruntime "runtime" "strings" "syscall" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" @@ -61,27 +63,30 @@ func createConfig(configPath string, fileData string) error { // createAllRuntimeConfigFiles creates all files necessary to call // loadConfiguration(). -func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConfig, err error) { +func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntimeConfig, err error) { if dir == "" { - return config, fmt.Errorf("BUG: need directory") + return testConfig, fmt.Errorf("BUG: need directory") } if hypervisor == "" { - return config, fmt.Errorf("BUG: need hypervisor") + return testConfig, fmt.Errorf("BUG: need hypervisor") } - + var hotPlugVFIO config.PCIePort + var coldPlugVFIO config.PCIePort hypervisorPath := path.Join(dir, "hypervisor") kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" disableBlockDevice := true blockDeviceDriver := "virtio-scsi" + blockDeviceAIO := "io_uring" enableIOThreads := true - hotplugVFIOOnRootBus := true - pcieRootPort := uint32(2) + hotPlugVFIO = config.NoPort + coldPlugVFIO = config.BridgePort disableNewNetNs := false sharedFS := "virtio-9p" virtioFSdaemon := path.Join(dir, "virtiofsd") @@ -93,15 +98,17 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, DefaultGuestHookPath: defaultGuestHookPath, DisableBlock: disableBlockDevice, BlockDeviceDriver: blockDeviceDriver, + BlockDeviceAIO: blockDeviceAIO, EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, + HotPlugVFIO: hotPlugVFIO, + ColdPlugVFIO: coldPlugVFIO, DisableNewNetNs: disableNewNetNs, DefaultVCPUCount: defaultVCPUCount, DefaultMaxVCPUCount: defaultMaxVCPUCount, @@ -126,7 +133,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf configPath := path.Join(dir, "runtime.toml") err = createConfig(configPath, runtimeConfigFileData) if err != nil { - return config, err + return testConfig, err } configPathLink := path.Join(filepath.Dir(configPath), "link-to-configuration.toml") @@ -134,7 +141,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf // create a link to the config file err = syscall.Symlink(configPath, configPathLink) if err != nil { - return config, err + return testConfig, err } files := []string{hypervisorPath, kernelPath, imagePath} @@ -143,7 +150,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf // create the resource (which must be >0 bytes) err := WriteFile(file, "foo", testFileMode) if err != nil { - return config, err + return testConfig, err } } @@ -151,7 +158,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + RootfsType: rootfsType, + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: getCurrentCpuNum(), @@ -159,10 +167,11 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf DefaultMaxMemorySize: maxMemory, DisableBlockDeviceUse: disableBlockDevice, BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceAIO: defaultBlockDeviceAIO, DefaultBridges: defaultBridgesCount, EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, + HotPlugVFIO: hotPlugVFIO, + ColdPlugVFIO: coldPlugVFIO, Msize9p: defaultMsize9p, MemSlots: defaultMemSlots, EntropySource: defaultEntropySource, @@ -175,6 +184,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf SGXEPCSize: epcSize, } + if goruntime.GOARCH == "arm64" && len(hypervisorConfig.PFlash) == 0 && hypervisorConfig.FirmwarePath == "" { + hypervisorConfig.DisableImageNvdimm = true + } + agentConfig := vc.KataAgentConfig{ LongLiveConn: true, } @@ -201,10 +214,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf err = SetKernelParams(&runtimeConfig) if err != nil { - return config, err + return testConfig, err } - config = testRuntimeConfig{ + rtimeConfig := testRuntimeConfig{ RuntimeConfig: runtimeConfig, RuntimeConfigFile: configPath, ConfigPath: configPath, @@ -213,7 +226,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf LogPath: logPath, } - return config, nil + return rtimeConfig, nil } // testLoadConfiguration accepts an optional function that can be used @@ -539,6 +552,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, HypervisorMachineType: defaultMachineType, NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: defaultMaxVCPUCount, @@ -550,6 +564,10 @@ func TestMinimalRuntimeConfig(t *testing.T) { GuestHookPath: defaultGuestHookPath, VhostUserStorePath: defaultVhostUserStorePath, VirtioFSCache: defaultVirtioFSCacheMode, + BlockDeviceAIO: defaultBlockDeviceAIO, + DisableGuestSeLinux: defaultDisableGuestSeLinux, + HotPlugVFIO: defaultHotPlugVFIO, + ColdPlugVFIO: defaultColdPlugVFIO, } expectedAgentConfig := vc.KataAgentConfig{ @@ -583,16 +601,16 @@ func TestMinimalRuntimeConfig(t *testing.T) { func TestNewQemuHypervisorConfig(t *testing.T) { dir := t.TempDir() - + var coldPlugVFIO config.PCIePort hypervisorPath := path.Join(dir, "hypervisor") kernelPath := path.Join(dir, "kernel") imagePath := path.Join(dir, "image") machineType := "machineType" disableBlock := true enableIOThreads := true - hotplugVFIOOnRootBus := true - pcieRootPort := uint32(2) + coldPlugVFIO = config.BridgePort orgVHostVSockDevicePath := utils.VHostVSockDevicePath + blockDeviceAIO := "io_uring" defer func() { utils.VHostVSockDevicePath = orgVHostVSockDevicePath }() @@ -608,12 +626,12 @@ func TestNewQemuHypervisorConfig(t *testing.T) { MachineType: machineType, DisableBlockDeviceUse: disableBlock, EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, + ColdPlugVFIO: coldPlugVFIO, RxRateLimiterMaxRate: rxRateLimiterMaxRate, TxRateLimiterMaxRate: txRateLimiterMaxRate, SharedFS: "virtio-fs", VirtioFSDaemon: filepath.Join(dir, "virtiofsd"), + BlockDeviceAIO: blockDeviceAIO, } files := []string{hypervisorPath, kernelPath, imagePath} @@ -659,14 +677,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) { t.Errorf("Expected value for enable IOThreads %v, got %v", enableIOThreads, config.EnableIOThreads) } - if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus { - t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus) - } - - if config.PCIeRootPort != pcieRootPort { - t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort) - } - if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate { t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate) } @@ -674,6 +684,11 @@ func TestNewQemuHypervisorConfig(t *testing.T) { if config.TxRateLimiterMaxRate != txRateLimiterMaxRate { t.Errorf("Expected value for tx rate limiter %v, got %v", txRateLimiterMaxRate, config.TxRateLimiterMaxRate) } + + if config.BlockDeviceAIO != blockDeviceAIO { + t.Errorf("Expected value for BlockDeviceAIO %v, got %v", blockDeviceAIO, config.BlockDeviceAIO) + } + } func TestNewFirecrackerHypervisorConfig(t *testing.T) { @@ -783,8 +798,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) { machineType := "machineType" disableBlock := true enableIOThreads := true - hotplugVFIOOnRootBus := true - pcieRootPort := uint32(2) hypervisor := hypervisor{ Path: hypervisorPath, @@ -794,8 +807,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) { MachineType: machineType, DisableBlockDeviceUse: disableBlock, EnableIOThreads: enableIOThreads, - HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, - PCIeRootPort: pcieRootPort, } _, err := newQemuHypervisorConfig(hypervisor) @@ -1230,9 +1241,9 @@ func TestDefaultVirtioFSCache(t *testing.T) { cache = h.defaultVirtioFSCache() assert.Equal("always", cache) - h.VirtioFSCache = "none" + h.VirtioFSCache = "never" cache = h.defaultVirtioFSCache() - assert.Equal("none", cache) + assert.Equal("never", cache) } func TestDefaultFirmware(t *testing.T) { diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index ffcaa07154c4..bd5808deba6c 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -18,6 +18,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vf "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/factory" + vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" ) @@ -162,17 +163,23 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo ociSpec.Annotations["nerdctl/network-namespace"] = sandboxConfig.NetworkConfig.NetworkID sandboxConfig.Annotations["nerdctl/network-namespace"] = ociSpec.Annotations["nerdctl/network-namespace"] - // Run pre-start OCI hooks, in the runtime namespace. - if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil { - return nil, vc.Process{}, err - } + // The value of this annotation is sent to the sandbox using SetPolicy. + delete(ociSpec.Annotations, vcAnnotations.Policy) + delete(sandboxConfig.Annotations, vcAnnotations.Policy) - // Run create runtime OCI hooks, in the runtime namespace. - if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil { - return nil, vc.Process{}, err - } + sandbox, err := vci.CreateSandbox(ctx, sandboxConfig, func(ctx context.Context) error { + // Run pre-start OCI hooks, in the runtime namespace. + if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil { + return err + } + + // Run create runtime OCI hooks, in the runtime namespace. + if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil { + return err + } - sandbox, err := vci.CreateSandbox(ctx, sandboxConfig) + return nil + }) if err != nil { return nil, vc.Process{}, err } @@ -226,6 +233,9 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp katatrace.AddTags(span, "container_id", containerID) defer span.End() + // The value of this annotation is sent to the sandbox using SetPolicy. + delete(ociSpec.Annotations, vcAnnotations.Policy) + ociSpec = SetEphemeralStorageType(ociSpec, disableGuestEmptyDir) contConfig, err := oci.ContainerConfig(ociSpec, bundlePath, containerID, disableOutput) @@ -255,6 +265,12 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp return vc.Process{}, err } + hid, err := sandbox.GetHypervisorPid() + if err != nil { + return vc.Process{}, err + } + ctx = context.WithValue(ctx, vc.HypervisorPidKey{}, hid) + // Run pre-start OCI hooks. err = EnterNetNS(sandbox.GetNetNs(), func() error { return PreStartHooks(ctx, ociSpec, containerID, bundlePath) diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index b1e4cf2a9006..903e68d95dea 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -18,8 +18,10 @@ import ( "syscall" "testing" + config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock" @@ -274,7 +276,7 @@ func TestCreateSandboxAnnotations(t *testing.T) { rootFs := vc.RootFs{Mounted: true} - testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) { + testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) { return &vcmock.Sandbox{ MockID: testSandboxID, MockContainers: []*vcmock.Container{ @@ -419,3 +421,32 @@ func TestCreateContainer(t *testing.T) { assert.NoError(err) } } + +func TestVfioChecksClh(t *testing.T) { + assert := assert.New(t) + + // Check valid CLH vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.ClhHypervisor) + } + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.RootPort, config.NoPort)) + assert.Error(f(config.NoPort, config.SwitchPort)) +} + +func TestVfioCheckQemu(t *testing.T) { + assert := assert.New(t) + + // Check valid Qemu vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.QemuHypervisor) + } + + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.RootPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.SwitchPort, config.RootPort)) +} diff --git a/src/runtime/pkg/katautils/hook.go b/src/runtime/pkg/katautils/hook.go index 50ac95cb8546..8ed6361ae18f 100644 --- a/src/runtime/pkg/katautils/hook.go +++ b/src/runtime/pkg/katautils/hook.go @@ -17,6 +17,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" syscallWrapper "github.com/kata-containers/kata-containers/src/runtime/pkg/syscall" + vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -38,8 +39,16 @@ func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundleP defer span.End() katatrace.AddTags(span, "path", hook.Path, "args", hook.Args) + pid, ok := ctx.Value(vc.HypervisorPidKey{}).(int) + if !ok || pid == 0 { + hookLogger().Info("no hypervisor pid") + + pid = syscallWrapper.Gettid() + } + hookLogger().Infof("hypervisor pid %v", pid) + state := specs.State{ - Pid: syscallWrapper.Gettid(), + Pid: pid, Bundle: bundlePath, ID: cid, Annotations: spec.Annotations, diff --git a/src/runtime/pkg/katautils/network_linux.go b/src/runtime/pkg/katautils/network_linux.go index fd37c660565b..f2691330cfd4 100644 --- a/src/runtime/pkg/katautils/network_linux.go +++ b/src/runtime/pkg/katautils/network_linux.go @@ -11,7 +11,6 @@ import ( "fmt" "os" "path/filepath" - goruntime "runtime" "strings" "github.com/containernetworking/plugins/pkg/ns" @@ -27,30 +26,7 @@ const procMountInfoFile = "/proc/self/mountinfo" // into runtime.LockOSThread(), meaning it won't be executed in a // different thread than the one expected by the caller. func EnterNetNS(networkID string, cb func() error) error { - if networkID == "" { - return cb() - } - - goruntime.LockOSThread() - defer goruntime.UnlockOSThread() - - currentNS, err := ns.GetCurrentNS() - if err != nil { - return err - } - defer currentNS.Close() - - targetNS, err := ns.GetNS(networkID) - if err != nil { - return err - } - - if err := targetNS.Set(); err != nil { - return err - } - defer currentNS.Set() - - return cb() + return vc.EnterNetNS(networkID, cb) } // SetupNetworkNamespace create a network namespace diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 055acb9b097a..21864d94bd51 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -8,6 +8,7 @@ package oci import ( "context" + "encoding/base64" "encoding/json" "errors" "fmt" @@ -19,7 +20,7 @@ import ( "syscall" ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations" - crioAnnotations "github.com/cri-o/cri-o/pkg/annotations" + podmanAnnotations "github.com/containers/podman/v4/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/api/resource" @@ -46,17 +47,17 @@ var ( // CRIContainerTypeKeyList lists all the CRI keys that could define // the container type from annotations in the config.json. - CRIContainerTypeKeyList = []string{ctrAnnotations.ContainerType, crioAnnotations.ContainerType, dockershimAnnotations.ContainerTypeLabelKey} + CRIContainerTypeKeyList = []string{ctrAnnotations.ContainerType, podmanAnnotations.ContainerType, dockershimAnnotations.ContainerTypeLabelKey} // CRISandboxNameKeyList lists all the CRI keys that could define // the sandbox ID (sandbox ID) from annotations in the config.json. - CRISandboxNameKeyList = []string{ctrAnnotations.SandboxID, crioAnnotations.SandboxID, dockershimAnnotations.SandboxIDLabelKey} + CRISandboxNameKeyList = []string{ctrAnnotations.SandboxID, podmanAnnotations.SandboxID, dockershimAnnotations.SandboxIDLabelKey} // CRIContainerTypeList lists all the maps from CRI ContainerTypes annotations // to a virtcontainers ContainerType. CRIContainerTypeList = []annotationContainerType{ - {crioAnnotations.ContainerTypeSandbox, vc.PodSandbox}, - {crioAnnotations.ContainerTypeContainer, vc.PodContainer}, + {podmanAnnotations.ContainerTypeSandbox, vc.PodSandbox}, + {podmanAnnotations.ContainerTypeContainer, vc.PodContainer}, {ctrAnnotations.ContainerTypeSandbox, vc.PodSandbox}, {ctrAnnotations.ContainerTypeContainer, vc.PodContainer}, {dockershimAnnotations.ContainerTypeLabelSandbox, vc.PodSandbox}, @@ -128,6 +129,12 @@ type RuntimeConfig struct { //Determines if seccomp should be applied inside guest DisableGuestSeccomp bool + // EnableVCPUsPinning controls whether each vCPU thread should be scheduled to a fixed CPU + EnableVCPUsPinning bool + + //SELinux security context applied to the container process inside guest. + GuestSeLinuxLabel string + // Sandbox sizing information which, if provided, indicates the size of // the sandbox needed for the workload(s) SandboxCPUs uint32 @@ -439,7 +446,7 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } - if err := addHypervisporNetworkOverrides(ocispec, config); err != nil { + if err := addHypervisorNetworkOverrides(ocispec, config); err != nil { return err } @@ -447,6 +454,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok { if value != "" { config.HypervisorConfig.HypervisorMachineType = value @@ -466,6 +477,18 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, config.HypervisorConfig.VhostUserStorePath = value } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVhostUserStore).setBool(func(enable bool) { + config.HypervisorConfig.EnableVhostUserStore = enable + }); err != nil { + return err + } + + if err := newAnnotationConfiguration(ocispec, vcAnnotations.VhostUserDeviceReconnect).setUint(func(reconnect uint64) { + config.HypervisorConfig.VhostUserDeviceReconnect = uint32(reconnect) + }); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.GuestHookPath]; ok { if value != "" { config.HypervisorConfig.GuestHookPath = value @@ -478,24 +501,12 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } - if err := newAnnotationConfiguration(ocispec, vcAnnotations.HotplugVFIOOnRootBus).setBool(func(hotplugVFIOOnRootBus bool) { - config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus - }); err != nil { - return err - } - if err := newAnnotationConfiguration(ocispec, vcAnnotations.UseLegacySerial).setBool(func(useLegacySerial bool) { config.HypervisorConfig.LegacySerial = useLegacySerial }); err != nil { return err } - if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeRootPort).setUint(func(pcieRootPort uint64) { - config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort) - }); err != nil { - return err - } - if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok { if !checkPathIsInGlobs(runtime.HypervisorConfig.EntropySourceList, value) { return fmt.Errorf("entropy source %v required from annotation is not valid", value) @@ -558,6 +569,37 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru return nil } +func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) { + if value == "" { + return config.NoPort, nil + } + port := config.PCIePort(value) + if port.Invalid() { + return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value) + } + return port, nil +} + +func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + + var err error + if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok { + if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil { + return err + } + // If hot-plug is specified disable cold-plug and vice versa + sbConfig.HypervisorConfig.ColdPlugVFIO = config.NoPort + } + if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok { + if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil { + return err + } + // If cold-plug is specified disable hot-plug and vice versa + sbConfig.HypervisorConfig.HotPlugVFIO = config.NoPort + } + return nil +} + func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error { if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error { @@ -683,6 +725,22 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) } } + if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceAIO]; ok { + supportedAIO := []string{config.AIONative, config.AIOThreads, config.AIOIOUring} + + valid := false + for _, b := range supportedAIO { + if b == value { + sbConfig.HypervisorConfig.BlockDeviceAIO = value + valid = true + } + } + + if !valid { + return fmt.Errorf("Invalid AIO mechanism %v specified in annotation (supported IO mechanism : %v)", value, supportedAIO) + } + } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableBlockDeviceUse).setBool(func(disableBlockDeviceUse bool) { sbConfig.HypervisorConfig.DisableBlockDeviceUse = disableBlockDeviceUse }); err != nil { @@ -767,7 +825,7 @@ func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConf }) } -func addHypervisporNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { +func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok { if value != "" { sbConfig.HypervisorConfig.CPUFeatures = value @@ -805,6 +863,12 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) { + sbConfig.EnableVCPUsPinning = enableVCPUsPinning + }); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok { features := strings.Split(value, " ") sbConfig.Experimental = []exp.Feature{} @@ -845,10 +909,24 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r func addAgentConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig) error { c := config.AgentConfig + updateConfig := false if value, ok := ocispec.Annotations[vcAnnotations.KernelModules]; ok { modules := strings.Split(value, KernelModulesSeparator) c.KernelModules = modules + updateConfig = true + } + + if value, ok := ocispec.Annotations[vcAnnotations.Policy]; ok { + if decoded_rules, err := base64.StdEncoding.DecodeString(value); err == nil { + c.Policy = string(decoded_rules) + updateConfig = true + } else { + return err + } + } + + if updateConfig { config.AgentConfig = c } @@ -923,6 +1001,10 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st DisableGuestSeccomp: runtime.DisableGuestSeccomp, + EnableVCPUsPinning: runtime.EnableVCPUsPinning, + + GuestSeLinuxLabel: runtime.GuestSeLinuxLabel, + Experimental: runtime.Experimental, } @@ -1047,8 +1129,8 @@ func getShmSize(c vc.ContainerConfig) (uint64, error) { // IsCRIOContainerManager check if a Pod is created from CRI-O func IsCRIOContainerManager(spec *specs.Spec) bool { - if val, ok := spec.Annotations[crioAnnotations.ContainerType]; ok { - if val == crioAnnotations.ContainerTypeSandbox || val == crioAnnotations.ContainerTypeContainer { + if val, ok := spec.Annotations[podmanAnnotations.ContainerType]; ok { + if val == podmanAnnotations.ContainerTypeSandbox || val == podmanAnnotations.ContainerTypeContainer { return true } } diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index b5e7440b0caf..4eeaedd11513 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -16,7 +16,7 @@ import ( "testing" ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations" - crioAnnotations "github.com/cri-o/cri-o/pkg/annotations" + podmanAnnotations "github.com/containers/podman/v4/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" "golang.org/x/sys/unix" @@ -224,22 +224,22 @@ func TestContainerType(t *testing.T) { }, { description: "crio unexpected annotation, expect error", - annotationKey: crioAnnotations.ContainerType, + annotationKey: podmanAnnotations.ContainerType, annotationValue: "foo", expectedType: vc.UnknownContainerType, expectedErr: true, }, { description: "crio sandbox", - annotationKey: crioAnnotations.ContainerType, - annotationValue: string(crioAnnotations.ContainerTypeSandbox), + annotationKey: podmanAnnotations.ContainerType, + annotationValue: string(podmanAnnotations.ContainerTypeSandbox), expectedType: vc.PodSandbox, expectedErr: false, }, { description: "crio container", - annotationKey: crioAnnotations.ContainerType, - annotationValue: string(crioAnnotations.ContainerTypeContainer), + annotationKey: podmanAnnotations.ContainerType, + annotationValue: string(podmanAnnotations.ContainerTypeContainer), expectedType: vc.PodContainer, expectedErr: false, }, @@ -287,7 +287,7 @@ func TestSandboxIDSuccessful(t *testing.T) { assert := assert.New(t) ociSpec.Annotations = map[string]string{ - crioAnnotations.SandboxID: testSandboxID, + podmanAnnotations.SandboxID: testSandboxID, } sandboxID, err := SandboxID(ociSpec) @@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) { func TestAddHypervisorAnnotations(t *testing.T) { assert := assert.New(t) - config := vc.SandboxConfig{ + sbConfig := vc.SandboxConfig{ Annotations: make(map[string]string), } @@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"} ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on" - addHypervisorConfigOverrides(ocispec, &config, runtimeConfig) - assert.Exactly(expectedHyperConfig, config.HypervisorConfig) + addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig) + assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" @@ -642,6 +642,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.HugePages] = "true" ocispec.Annotations[vcAnnotations.IOMMU] = "true" ocispec.Annotations[vcAnnotations.BlockDeviceDriver] = "virtio-scsi" + ocispec.Annotations[vcAnnotations.BlockDeviceAIO] = "io_uring" ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse] = "true" ocispec.Annotations[vcAnnotations.EnableIOThreads] = "true" ocispec.Annotations[vcAnnotations.BlockDeviceCacheSet] = "true" @@ -649,7 +650,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.BlockDeviceCacheNoflush] = "true" ocispec.Annotations[vcAnnotations.SharedFS] = "virtio-fs" ocispec.Annotations[vcAnnotations.VirtioFSDaemon] = "/bin/false" - ocispec.Annotations[vcAnnotations.VirtioFSCache] = "/home/cache" + ocispec.Annotations[vcAnnotations.VirtioFSCache] = "auto" ocispec.Annotations[vcAnnotations.VirtioFSExtraArgs] = "[ \"arg0\", \"arg1\" ]" ocispec.Annotations[vcAnnotations.Msize9p] = "512" ocispec.Annotations[vcAnnotations.MachineType] = "q35" @@ -658,8 +659,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.DisableVhostNet] = "true" ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/" ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true" - ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true" - ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2" + ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort + ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true" ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi" ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true" @@ -667,54 +668,57 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000" ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000" - addAnnotations(ocispec, &config, runtimeConfig) - assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1)) - assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) - assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024)) - assert.Equal(config.HypervisorConfig.MemSlots, uint32(20)) - assert.Equal(config.HypervisorConfig.MemOffset, uint64(512)) - assert.Equal(config.HypervisorConfig.VirtioMem, true) - assert.Equal(config.HypervisorConfig.MemPrealloc, true) - assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") - assert.Equal(config.HypervisorConfig.HugePages, true) - assert.Equal(config.HypervisorConfig.IOMMU, true) - assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") - assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true) - assert.Equal(config.HypervisorConfig.EnableIOThreads, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true) - assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs") - assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false") - assert.Equal(config.HypervisorConfig.VirtioFSCache, "/home/cache") - assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"}) - assert.Equal(config.HypervisorConfig.Msize9p, uint32(512)) - assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35") - assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw") - assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off") - assert.Equal(config.HypervisorConfig.DisableVhostNet, true) - assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/") - assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true) - assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true) - assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2)) - assert.Equal(config.HypervisorConfig.IOMMUPlatform, true) - assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864)) - assert.Equal(config.HypervisorConfig.LegacySerial, true) - assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000)) - assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000)) + err := addAnnotations(ocispec, &sbConfig, runtimeConfig) + assert.NoError(err) + + assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1)) + assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) + assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024)) + assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20)) + assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512)) + assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true) + assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true) + assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") + assert.Equal(sbConfig.HypervisorConfig.HugePages, true) + assert.Equal(sbConfig.HypervisorConfig.IOMMU, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring") + assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true) + assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true) + assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs") + assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false") + assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto") + assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"}) + assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512)) + assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35") + assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw") + assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off") + assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true) + assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/") + assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true) + assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort)) + assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort)) + assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true) + assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864)) + assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true) + assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000)) + assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000)) // In case an absurd large value is provided, the config value if not over-ridden ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536" - err := addAnnotations(ocispec, &config, runtimeConfig) + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1" - err = addAnnotations(ocispec, &config, runtimeConfig) + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1" - err = addAnnotations(ocispec, &config, runtimeConfig) + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" @@ -883,15 +887,15 @@ func TestIsCRIOContainerManager(t *testing.T) { result bool }{ { - annotations: map[string]string{crioAnnotations.ContainerType: "abc"}, + annotations: map[string]string{podmanAnnotations.ContainerType: "abc"}, result: false, }, { - annotations: map[string]string{crioAnnotations.ContainerType: crioAnnotations.ContainerTypeSandbox}, + annotations: map[string]string{podmanAnnotations.ContainerType: podmanAnnotations.ContainerTypeSandbox}, result: true, }, { - annotations: map[string]string{crioAnnotations.ContainerType: crioAnnotations.ContainerTypeContainer}, + annotations: map[string]string{podmanAnnotations.ContainerType: podmanAnnotations.ContainerTypeContainer}, result: true, }, } diff --git a/src/runtime/pkg/resourcecontrol/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go index 4210392d2a35..be8e9dc97369 100644 --- a/src/runtime/pkg/resourcecontrol/cgroups.go +++ b/src/runtime/pkg/resourcecontrol/cgroups.go @@ -1,5 +1,4 @@ //go:build linux -// +build linux // Copyright (c) 2021-2022 Apple Inc. // @@ -15,16 +14,21 @@ import ( "sync" "github.com/containerd/cgroups" - v1 "github.com/containerd/cgroups/stats/v1" + cgroupsv2 "github.com/containerd/cgroups/v2" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) -// prepend a kata specific string to oci cgroup path to -// form a different cgroup path, thus cAdvisor couldn't -// find kata containers cgroup path on host to prevent it -// from grabbing the stats data. -const CgroupKataPrefix = "kata" +const ( + // prepend a kata specific string to oci cgroup path to + // form a different cgroup path, thus cAdvisor couldn't + // find kata containers cgroup path on host to prevent it + // from grabbing the stats data. + CgroupKataPrefix = "kata" + + // cgroup v2 mount point + unifiedMountpoint = "/sys/fs/cgroup" +) func RenameCgroupPath(path string) (string, error) { if path == "" { @@ -34,11 +38,10 @@ func RenameCgroupPath(path string) (string, error) { cgroupPathDir := filepath.Dir(path) cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path)) return filepath.Join(cgroupPathDir, cgroupPathName), nil - } type LinuxCgroup struct { - cgroup cgroups.Cgroup + cgroup interface{} path string cpusets *specs.LinuxCPU devices []specs.LinuxDeviceCgroup @@ -64,7 +67,8 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { // In order to run Virtual Machines and create virtqueues, hypervisors // need access to certain character devices in the host, like kvm and vhost-net. hypervisorDevices := []string{ - "/dev/kvm", // To run virtual machines + "/dev/kvm", // To run virtual machines with KVM + "/dev/mshv", // To run virtual machines with Hyper-V "/dev/vhost-net", // To create virtqueues "/dev/vfio/vfio", // To access VFIO devices "/dev/vhost-vsock", // To interact with vsock if @@ -128,15 +132,29 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) { var err error + var cgroup interface{} + var cgroupPath string - cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) - if err != nil { - return nil, err - } - - cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) - if err != nil { - return nil, err + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + cgroupPath, err = ValidCgroupPathV1(path, IsSystemdCgroup(path)) + if err != nil { + return nil, err + } + cgroup, err = cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) + if err != nil { + return nil, err + } + } else if cgroups.Mode() == cgroups.Unified { + cgroupPath, err = ValidCgroupPathV2(path, IsSystemdCgroup(path)) + if err != nil { + return nil, err + } + cgroup, err = cgroupsv2.NewManager(unifiedMountpoint, cgroupPath, cgroupsv2.ToResources(resources)) + if err != nil { + return nil, err + } + } else { + return nil, ErrCgroupMode } return &LinuxCgroup{ @@ -148,40 +166,56 @@ func NewResourceController(path string, resources *specs.LinuxResources) (Resour } func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) { - var cgroup cgroups.Cgroup sandboxResources := *resources sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, - // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect + // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect. if !IsSystemdCgroup(path) || !sandboxCgroupOnly { return NewResourceController(path, &sandboxResources) } + var cgroup interface{} + slice, unit, err := getSliceAndUnit(path) if err != nil { return nil, err } - // github.com/containerd/cgroups doesn't support creating a scope unit with - // v1 cgroups against systemd, the following interacts directly with systemd - // to create the cgroup and then load it using containerd's api - err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant - if err != nil { - return nil, err - } - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { + //github.com/containerd/cgroups doesn't support creating a scope unit with + //v1 and v2 cgroups against systemd, the following interacts directly with systemd + //to create the cgroup and then load it using containerd's api. + //adding runtime process, it makes calling setupCgroups redundant + if createCgroupsSystemd(slice, unit, os.Getpid()); err != nil { return nil, err } - // load created cgroup and update with resources - if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil { - err = cgroup.Update(&sandboxResources) - } + // Create systemd cgroup + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } - if err != nil { - return nil, err + // load created cgroup and update with resources + cg, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + if cg.Update(&sandboxResources); err != nil { + return nil, err + } + } + cgroup = cg + } else if cgroups.Mode() == cgroups.Unified { + // load created cgroup and update with resources + cg, err := cgroupsv2.LoadSystemd(slice, unit) + if err != nil { + if cg.Update(cgroupsv2.ToResources(&sandboxResources)); err != nil { + return nil, err + } + } + cgroup = cg + } else { + return nil, ErrCgroupMode } return &LinuxCgroup{ @@ -193,14 +227,38 @@ func NewSandboxResourceController(path string, resources *specs.LinuxResources, } func LoadResourceController(path string) (ResourceController, error) { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return nil, err - } + var err error + var cgroup interface{} - cgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return nil, err + // load created cgroup and update with resources + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } + + cgroup, err = cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return nil, err + } + } else if cgroups.Mode() == cgroups.Unified { + if IsSystemdCgroup(path) { + slice, unit, err := getSliceAndUnit(path) + if err != nil { + return nil, err + } + cgroup, err = cgroupsv2.LoadSystemd(slice, unit) + if err != nil { + return nil, err + } + } else { + cgroup, err = cgroupsv2.LoadManager(unifiedMountpoint, path) + if err != nil { + return nil, err + } + } + } else { + return nil, ErrCgroupMode } return &LinuxCgroup{ @@ -214,37 +272,86 @@ func (c *LinuxCgroup) Logger() *logrus.Entry { } func (c *LinuxCgroup) Delete() error { - return c.cgroup.Delete() + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.Delete() + case *cgroupsv2.Manager: + if IsSystemdCgroup(c.ID()) { + if err := cg.DeleteSystemd(); err != nil { + return err + } + } + return cg.Delete() + default: + return ErrCgroupMode + } } -func (c *LinuxCgroup) Stat() (*v1.Metrics, error) { - return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) +func (c *LinuxCgroup) Stat() (interface{}, error) { + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.Stat(cgroups.IgnoreNotExist) + case *cgroupsv2.Manager: + return cg.Stat() + default: + return nil, ErrCgroupMode + } } func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error { - return c.cgroup.Add(cgroups.Process{Pid: pid}) + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.AddProc(uint64(pid)) + case *cgroupsv2.Manager: + return cg.AddProc(uint64(pid)) + default: + return ErrCgroupMode + } } func (c *LinuxCgroup) AddThread(pid int, subsystems ...string) error { - return c.cgroup.AddTask(cgroups.Process{Pid: pid}) + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.AddTask(cgroups.Process{Pid: pid}) + case *cgroupsv2.Manager: + return cg.AddProc(uint64(pid)) + default: + return ErrCgroupMode + } } func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error { - return c.cgroup.Update(resources) + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.Update(resources) + case *cgroupsv2.Manager: + return cg.Update(cgroupsv2.ToResources(resources)) + default: + return ErrCgroupMode + } } func (c *LinuxCgroup) MoveTo(path string) error { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return err - } - - newCgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return err + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return err + } + newCgroup, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return err + } + return cg.MoveTo(newCgroup) + case *cgroupsv2.Manager: + newCgroup, err := cgroupsv2.LoadManager(unifiedMountpoint, path) + if err != nil { + return err + } + return cg.MoveTo(newCgroup) + default: + return ErrCgroupMode } - - return c.cgroup.MoveTo(newCgroup) } func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { @@ -258,10 +365,21 @@ func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { c.devices = append(c.devices, deviceResource) - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + if err := cg.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + case *cgroupsv2.Manager: + if err := cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{ + Devices: c.devices, + })); err != nil { + return err + } + default: + return ErrCgroupMode } return nil @@ -284,10 +402,21 @@ func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error { } } - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + if err := cg.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + case *cgroupsv2.Manager: + if err := cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{ + Devices: c.devices, + })); err != nil { + return err + } + default: + return ErrCgroupMode } return nil @@ -315,9 +444,18 @@ func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error { c.cpusets.Mems = memset } - return c.cgroup.Update(&specs.LinuxResources{ - CPU: c.cpusets, - }) + switch cg := c.cgroup.(type) { + case cgroups.Cgroup: + return cg.Update(&specs.LinuxResources{ + CPU: c.cpusets, + }) + case *cgroupsv2.Manager: + return cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{ + CPU: c.cpusets, + })) + default: + return ErrCgroupMode + } } func (c *LinuxCgroup) Type() ResourceControllerType { diff --git a/src/runtime/pkg/resourcecontrol/cgroups_darwin.go b/src/runtime/pkg/resourcecontrol/cgroups_darwin.go new file mode 100644 index 000000000000..50cde8e5d0b3 --- /dev/null +++ b/src/runtime/pkg/resourcecontrol/cgroups_darwin.go @@ -0,0 +1,86 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package resourcecontrol + +import ( + "errors" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +type DarwinResourceController struct{} + +func RenameCgroupPath(path string) (string, error) { + return "", errors.New("RenameCgroupPath not supported on Darwin") +} + +func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) { + return &DarwinResourceController{}, nil +} + +func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) { + return &DarwinResourceController{}, nil +} + +func LoadResourceController(path string) (ResourceController, error) { + return &DarwinResourceController{}, nil +} + +func (c *DarwinResourceController) Delete() error { + return nil +} + +func (c *DarwinResourceController) Stat() (interface{}, error) { + return nil, nil +} + +func (c *DarwinResourceController) AddProcess(pid int, subsystems ...string) error { + return nil +} + +func (c *DarwinResourceController) AddThread(pid int, subsystems ...string) error { + return nil +} + +func (c *DarwinResourceController) AddTask(pid int, subsystems ...string) error { + return nil +} + +func (c *DarwinResourceController) Update(resources *specs.LinuxResources) error { + return nil +} + +func (c *DarwinResourceController) MoveTo(path string) error { + return nil +} + +func (c *DarwinResourceController) ID() string { + return "" +} + +func (c *DarwinResourceController) Parent() string { + return "" +} + +func (c *DarwinResourceController) Type() ResourceControllerType { + return DarwinResourceControllerType +} + +func (c *DarwinResourceController) AddDevice(deviceHostPath string) error { + return nil +} + +func (c *DarwinResourceController) RemoveDevice(deviceHostPath string) error { + return nil +} + +func (c *DarwinResourceController) UpdateCpuSet(cpuset, memset string) error { + return nil +} + +func (c *DarwinResourceController) Path() string { + return "" +} diff --git a/src/runtime/pkg/resourcecontrol/controller.go b/src/runtime/pkg/resourcecontrol/controller.go index 11dafd0e28aa..e59767f5c829 100644 --- a/src/runtime/pkg/resourcecontrol/controller.go +++ b/src/runtime/pkg/resourcecontrol/controller.go @@ -6,7 +6,6 @@ package resourcecontrol import ( - v1 "github.com/containerd/cgroups/stats/v1" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -26,7 +25,8 @@ func SetLogger(logger *logrus.Entry) { type ResourceControllerType string const ( - LinuxCgroups ResourceControllerType = "cgroups" + LinuxCgroups ResourceControllerType = "cgroups" + DarwinResourceControllerType ResourceControllerType = "darwin" ) // String converts a resource type to a string. @@ -56,7 +56,7 @@ type ResourceController interface { Delete() error // Stat returns the statistics for the controller. - Stat() (*v1.Metrics, error) + Stat() (interface{}, error) // AddProcess adds a process to a set of controllers. AddProcess(int, ...string) error diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go index 835cb54c47f5..449a89e9aa42 100644 --- a/src/runtime/pkg/resourcecontrol/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -6,6 +6,7 @@ package resourcecontrol import ( + "errors" "fmt" "strings" @@ -14,6 +15,10 @@ import ( "golang.org/x/sys/unix" ) +var ( + ErrCgroupMode = errors.New("cgroup controller type error") +) + func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { var st unix.Stat_t deviceRule := devices.Rule{ diff --git a/src/runtime/pkg/resourcecontrol/utils_darwin.go b/src/runtime/pkg/resourcecontrol/utils_darwin.go new file mode 100644 index 000000000000..86c50ae78344 --- /dev/null +++ b/src/runtime/pkg/resourcecontrol/utils_darwin.go @@ -0,0 +1,10 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package resourcecontrol + +func SetThreadAffinity(threadID int, cpuSetSlice []int) error { + return nil +} diff --git a/src/runtime/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go index f72890889feb..0acbc6c6afed 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -15,14 +15,15 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "golang.org/x/sys/unix" ) // DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. const DefaultResourceControllerID = "/vc" -// ValidCgroupPath returns a valid cgroup path. +// ValidCgroupPathV1 returns a valid cgroup path for cgroup v1. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path -func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { +func ValidCgroupPathV1(path string, systemdCgroup bool) (string, error) { if IsSystemdCgroup(path) { return path, nil } @@ -43,6 +44,30 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil } +// ValidCgroupPathV2 returns a valid cgroup path for cgroup v2. +// see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path +func ValidCgroupPathV2(path string, systemdCgroup bool) (string, error) { + // In cgroup v2,path must be a "clean" absolute path starts with "/". + if IsSystemdCgroup(path) { + return filepath.Join("/", path), nil + } + + if systemdCgroup { + return "", fmt.Errorf("malformed systemd path '%v': expected to be of form 'slice:prefix:name'", path) + } + + // In the case of an absolute path (starting with /), the runtime MUST + // take the path to be relative to the cgroups mount point. + if filepath.IsAbs(path) { + return filepath.Clean(path), nil + } + + // In the case of a relative path (not starting with /), the runtime MAY + // interpret the path relative to a runtime-determined location in the cgroups hierarchy. + // clean up path and return a new path relative to DefaultResourceControllerID + return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil +} + func newProperty(name string, units interface{}) systemdDbus.Property { return systemdDbus.Property{ Name: name, @@ -68,7 +93,7 @@ func cgroupHierarchy(path string) (cgroups.Hierarchy, cgroups.Path, error) { } } -func createCgroupsSystemd(slice string, unit string, pid uint32) error { +func createCgroupsSystemd(slice string, unit string, pid int) error { ctx := context.TODO() conn, err := systemdDbus.NewWithContext(ctx) if err != nil { @@ -84,14 +109,19 @@ func createCgroupsSystemd(slice string, unit string, pid uint32) error { newProperty("IOAccounting", true), } - // https://github.com/opencontainers/runc/blob/master/docs/systemd.md - if strings.HasSuffix(unit, ".scope") { - // It's a scope, which we put into a Slice=. - properties = append(properties, systemdDbus.PropSlice(slice)) - properties = append(properties, newProperty("Delegate", true)) - properties = append(properties, systemdDbus.PropPids(pid)) + if strings.HasSuffix(unit, ".slice") { + // If we create a slice, the parent is defined via a Wants=. + properties = append(properties, systemdDbus.PropWants(slice)) } else { - return fmt.Errorf("Failed to create cgroups with systemd: unit %s is not a scope", unit) + // Otherwise it's a scope, which we put into a Slice=. + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // Assume scopes always support delegation (supported since systemd v218). + properties = append(properties, newProperty("Delegate", true)) + + if pid != -1 { + properties = append(properties, systemdDbus.PropPids(uint32(pid))) } ch := make(chan string) @@ -112,3 +142,27 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } + +func IsCgroupV1() (bool, error) { + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + return true, nil + } else if cgroups.Mode() == cgroups.Unified { + return false, nil + } else { + return false, ErrCgroupMode + } +} + +func SetThreadAffinity(threadID int, cpuSetSlice []int) error { + unixCPUSet := unix.CPUSet{} + + for _, cpuId := range cpuSetSlice { + unixCPUSet.Set(cpuId) + } + + if err := unix.SchedSetaffinity(threadID, &unixCPUSet); err != nil { + return fmt.Errorf("failed to set vcpu thread %d affinity to cpu %d: %v", threadID, cpuSetSlice, err) + } + + return nil +} diff --git a/src/runtime/pkg/resourcecontrol/utils_linux_test.go b/src/runtime/pkg/resourcecontrol/utils_linux_test.go index 2e40c4d83595..804f6253a8b2 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux_test.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux_test.go @@ -41,7 +41,7 @@ func TestIsSystemdCgroup(t *testing.T) { } } -func TestValidCgroupPath(t *testing.T) { +func TestValidCgroupPathV1(t *testing.T) { assert := assert.New(t) for _, t := range []struct { @@ -62,12 +62,14 @@ func TestValidCgroupPath(t *testing.T) { {"/../hi/foo", false, false}, {"o / m /../ g", false, false}, {"/overhead/foobar", false, false}, + {"/kata/afhts2e5d4g5s", false, false}, + {"/kubepods/besteffort/podxxx-afhts2e5d4g5s/kata_afhts2e5d4g5s", false, false}, {"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false}, + {"kata_overhead/afhts2e5d4g5s", false, false}, // invalid systemd paths {"o / m /../ g", true, true}, {"slice:kata", true, true}, - {"/kata/afhts2e5d4g5s", true, true}, {"a:b:c:d", true, true}, {":::", true, true}, {"", true, true}, @@ -83,7 +85,7 @@ func TestValidCgroupPath(t *testing.T) { {"x.slice:kata:55555", true, false}, {"system.slice:kata:afhts2e5d4g5s", true, false}, } { - path, err := ValidCgroupPath(t.path, t.systemdCgroup) + path, err := ValidCgroupPathV1(t.path, t.systemdCgroup) if t.error { assert.Error(err) continue @@ -106,6 +108,73 @@ func TestValidCgroupPath(t *testing.T) { } +func TestValidCgroupPathV2(t *testing.T) { + assert := assert.New(t) + + for _, t := range []struct { + path string + systemdCgroup bool + error bool + }{ + // empty paths + {"../../../", false, false}, + {"../", false, false}, + {".", false, false}, + {"../../../", false, false}, + {"./../", false, false}, + + // valid no-systemd paths + {"../../../foo", false, false}, + {"/../hi", false, false}, + {"/../hi/foo", false, false}, + {"o / m /../ g", false, false}, + {"/overhead/foobar", false, false}, + {"/kata/afhts2e5d4g5s", false, false}, + {"/kubepods/besteffort/podxxx-afhts2e5d4g5s/kata_afhts2e5d4g5s", false, false}, + {"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false}, + {"kata_overhead/afhts2e5d4g5s", false, false}, + + // invalid systemd paths + {"o / m /../ g", true, true}, + {"slice:kata", true, true}, + {"a:b:c:d", true, true}, + {":::", true, true}, + {"", true, true}, + {":", true, true}, + {"::", true, true}, + {":::", true, true}, + {"a:b", true, true}, + {"a:b:", true, true}, + {":a:b", true, true}, + {"@:@:@", true, true}, + + // valid systemd paths + {"x.slice:kata:55555", true, false}, + {"system.slice:kata:afhts2e5d4g5s", true, false}, + } { + path, err := ValidCgroupPathV2(t.path, t.systemdCgroup) + if t.error { + assert.Error(err) + continue + } else { + assert.NoError(err) + } + + if filepath.IsAbs(t.path) { + cleanPath := filepath.Dir(filepath.Clean(t.path)) + assert.True(strings.HasPrefix(path, cleanPath), + "%v should have prefix %v", path, cleanPath) + } else if t.systemdCgroup { + assert.Equal(filepath.Join("/", t.path), path) + } else { + assert.True( + strings.HasPrefix(path, DefaultResourceControllerID), + "%v should have prefix /%v", path, DefaultResourceControllerID) + } + } + +} + func TestDeviceToCgroupDeviceRule(t *testing.T) { assert := assert.New(t) diff --git a/src/runtime/pkg/sev/README.md b/src/runtime/pkg/sev/README.md new file mode 100644 index 000000000000..8e864f46fa64 --- /dev/null +++ b/src/runtime/pkg/sev/README.md @@ -0,0 +1,14 @@ +# AMD SEV confidential guest utilities + +This package provides utilities for launching AMD SEV confidential guests. + +## Calculating expected launch digests + +The `CalculateLaunchDigest` function can be used to calculate the expected +SHA-256 of an SEV confidential guest given its firmware, kernel, initrd, and +kernel command-line. + +### Unit test data + +The [`testdata`](testdata) directory contains file used for testing +`CalculateLaunchDigest`. diff --git a/src/runtime/pkg/sev/kbs/kbs.go b/src/runtime/pkg/sev/kbs/kbs.go new file mode 100644 index 000000000000..3c7e20134aac --- /dev/null +++ b/src/runtime/pkg/sev/kbs/kbs.go @@ -0,0 +1,33 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 +// + +// Package kbs can be used interact with simple-kbs, the key broker +// server for SEV and SEV-ES pre-attestation + +package kbs + +const ( + Offline = "offline" + OfflineSecretType = "bundle" + OfflineSecretGuid = "e6f5a162-d67f-4750-a67c-5d065f2a9910" + Online = "online" + OnlineBootParam = "online_sev_kbc" + OnlineSecretType = "connection" + OnlineSecretGuid = "1ee27366-0c87-43a6-af48-28543eaf7cb0" +) + +type GuestPreAttestationConfig struct { + Proxy string + Keyset string + LaunchId string + KernelPath string + InitrdPath string + FwPath string + KernelParameters string + CertChainPath string + SecretType string + SecretGuid string + Policy uint32 +} diff --git a/src/runtime/pkg/sev/ovmf.go b/src/runtime/pkg/sev/ovmf.go new file mode 100644 index 000000000000..9c6947abefd5 --- /dev/null +++ b/src/runtime/pkg/sev/ovmf.go @@ -0,0 +1,101 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 + +package sev + +import ( + "bytes" + "encoding/binary" + "errors" + "os" +) + +// GUID 96b582de-1fb2-45f7-baea-a366c55a082d +var ovmfTableFooterGuid = guidLE{0xde, 0x82, 0xb5, 0x96, 0xb2, 0x1f, 0xf7, 0x45, 0xba, 0xea, 0xa3, 0x66, 0xc5, 0x5a, 0x08, 0x2d} + +// GUID 00f771de-1a7e-4fcb-890e-68c77e2fb44e +var sevEsResetBlockGuid = guidLE{0xde, 0x71, 0xf7, 0x00, 0x7e, 0x1a, 0xcb, 0x4f, 0x89, 0x0e, 0x68, 0xc7, 0x7e, 0x2f, 0xb4, 0x4e} + +type ovmfFooterTableEntry struct { + Size uint16 + Guid guidLE +} + +type ovmf struct { + table map[guidLE][]byte +} + +func NewOvmf(filename string) (ovmf, error) { + buf, err := os.ReadFile(filename) + if err != nil { + return ovmf{}, err + } + table, err := parseFooterTable(buf) + if err != nil { + return ovmf{}, err + } + return ovmf{table}, nil +} + +// Parse the OVMF footer table and return a map from GUID to entry value +func parseFooterTable(data []byte) (map[guidLE][]byte, error) { + table := make(map[guidLE][]byte) + + buf := new(bytes.Buffer) + err := binary.Write(buf, binary.LittleEndian, ovmfFooterTableEntry{}) + if err != nil { + return table, err + } + entryHeaderSize := buf.Len() + + // The OVMF table ends 32 bytes before the end of the firmware binary + startOfFooterTable := len(data) - 32 - entryHeaderSize + footerBytes := bytes.NewReader(data[startOfFooterTable:]) + var footer ovmfFooterTableEntry + err = binary.Read(footerBytes, binary.LittleEndian, &footer) + if err != nil { + return table, err + } + if footer.Guid != ovmfTableFooterGuid { + // No OVMF footer table + return table, nil + } + tableSize := int(footer.Size) - entryHeaderSize + if tableSize < 0 { + return table, nil + } + tableBytes := data[(startOfFooterTable - tableSize):startOfFooterTable] + for len(tableBytes) >= entryHeaderSize { + tsize := len(tableBytes) + entryBytes := bytes.NewReader(tableBytes[tsize-entryHeaderSize:]) + var entry ovmfFooterTableEntry + err := binary.Read(entryBytes, binary.LittleEndian, &entry) + if err != nil { + return table, err + } + if int(entry.Size) < entryHeaderSize { + return table, errors.New("Invalid entry size") + } + entryData := tableBytes[tsize-int(entry.Size) : tsize-entryHeaderSize] + table[entry.Guid] = entryData + tableBytes = tableBytes[:tsize-int(entry.Size)] + } + return table, nil +} + +func (o *ovmf) tableItem(guid guidLE) ([]byte, error) { + value, ok := o.table[guid] + if !ok { + return []byte{}, errors.New("OVMF footer table entry not found") + } + return value, nil +} + +func (o *ovmf) sevEsResetEip() (uint32, error) { + value, err := o.tableItem(sevEsResetBlockGuid) + if err != nil { + return 0, err + } + return binary.LittleEndian.Uint32(value), nil +} diff --git a/src/runtime/pkg/sev/sev.go b/src/runtime/pkg/sev/sev.go new file mode 100644 index 000000000000..bdf73cf6038e --- /dev/null +++ b/src/runtime/pkg/sev/sev.go @@ -0,0 +1,203 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 +// + +// Package sev can be used to compute the expected hash values for +// SEV/-ES pre-launch attestation +package sev + +import ( + "bytes" + "crypto/sha256" + "encoding/binary" + "io" + "os" +) + +type guidLE [16]byte + +// The following definitions must be identical to those in QEMU target/i386/sev.c + +// GUID: 9438d606-4f22-4cc9-b479-a793d411fd21 +var sevHashTableHeaderGuid = guidLE{0x06, 0xd6, 0x38, 0x94, 0x22, 0x4f, 0xc9, 0x4c, 0xb4, 0x79, 0xa7, 0x93, 0xd4, 0x11, 0xfd, 0x21} + +// GUID: 4de79437-abd2-427f-b835-d5b172d2045b +var sevKernelEntryGuid = guidLE{0x37, 0x94, 0xe7, 0x4d, 0xd2, 0xab, 0x7f, 0x42, 0xb8, 0x35, 0xd5, 0xb1, 0x72, 0xd2, 0x04, 0x5b} + +// GUID: 44baf731-3a2f-4bd7-9af1-41e29169781d +var sevInitrdEntryGuid = guidLE{0x31, 0xf7, 0xba, 0x44, 0x2f, 0x3a, 0xd7, 0x4b, 0x9a, 0xf1, 0x41, 0xe2, 0x91, 0x69, 0x78, 0x1d} + +// GUID: 97d02dd8-bd20-4c94-aa78-e7714d36ab2a +var sevCmdlineEntryGuid = guidLE{0xd8, 0x2d, 0xd0, 0x97, 0x20, 0xbd, 0x94, 0x4c, 0xaa, 0x78, 0xe7, 0x71, 0x4d, 0x36, 0xab, 0x2a} + +type sevHashTableEntry struct { + entryGuid guidLE + length uint16 + hash [sha256.Size]byte +} + +type sevHashTable struct { + tableGuid guidLE + length uint16 + cmdline sevHashTableEntry + initrd sevHashTableEntry + kernel sevHashTableEntry +} + +type paddedSevHashTable struct { + table sevHashTable + padding [8]byte +} + +func fileSha256(filename string) (res [sha256.Size]byte, err error) { + f, err := os.Open(filename) + if err != nil { + return res, err + } + defer f.Close() + + digest := sha256.New() + if _, err := io.Copy(digest, f); err != nil { + return res, err + } + + copy(res[:], digest.Sum(nil)) + return res, nil +} + +func constructSevHashesTable(kernelPath, initrdPath, cmdline string) ([]byte, error) { + kernelHash, err := fileSha256(kernelPath) + if err != nil { + return []byte{}, err + } + + initrdHash, err := fileSha256(initrdPath) + if err != nil { + return []byte{}, err + } + + cmdlineHash := sha256.Sum256(append([]byte(cmdline), 0)) + + buf := new(bytes.Buffer) + err = binary.Write(buf, binary.LittleEndian, sevHashTableEntry{}) + if err != nil { + return []byte{}, err + } + entrySize := uint16(buf.Len()) + + buf = new(bytes.Buffer) + err = binary.Write(buf, binary.LittleEndian, sevHashTable{}) + if err != nil { + return []byte{}, err + } + tableSize := uint16(buf.Len()) + + ht := paddedSevHashTable{ + table: sevHashTable{ + tableGuid: sevHashTableHeaderGuid, + length: tableSize, + cmdline: sevHashTableEntry{ + entryGuid: sevCmdlineEntryGuid, + length: entrySize, + hash: cmdlineHash, + }, + initrd: sevHashTableEntry{ + entryGuid: sevInitrdEntryGuid, + length: entrySize, + hash: initrdHash, + }, + kernel: sevHashTableEntry{ + entryGuid: sevKernelEntryGuid, + length: entrySize, + hash: kernelHash, + }, + }, + padding: [8]byte{0, 0, 0, 0, 0, 0, 0, 0}, + } + + htBuf := new(bytes.Buffer) + err = binary.Write(htBuf, binary.LittleEndian, ht) + if err != nil { + return []byte{}, err + } + return htBuf.Bytes(), nil +} + +// CalculateLaunchDigest returns the sha256 encoded SEV launch digest based off +// the current firmware, kernel, initrd, and the kernel cmdline +func CalculateLaunchDigest(firmwarePath, kernelPath, initrdPath, cmdline string) (res [sha256.Size]byte, err error) { + f, err := os.Open(firmwarePath) + if err != nil { + return res, err + } + defer f.Close() + + digest := sha256.New() + if _, err := io.Copy(digest, f); err != nil { + return res, err + } + + // When used for confidential containers in kata-containers, kernelPath + // is always set (direct boot). However, this current package can also + // be used by other programs which may calculate launch digests of + // arbitrary SEV guests without SEV kernel hashes table. + if kernelPath != "" { + ht, err := constructSevHashesTable(kernelPath, initrdPath, cmdline) + if err != nil { + return res, err + } + digest.Write(ht) + } + + copy(res[:], digest.Sum(nil)) + return res, nil +} + +// CalculateSEVESLaunchDigest returns the sha256 encoded SEV-ES launch digest +// based off the current firmware, kernel, initrd, and the kernel cmdline, and +// the number of vcpus and their type +func CalculateSEVESLaunchDigest(vcpus int, vcpuSig VCPUSig, firmwarePath, kernelPath, initrdPath, cmdline string) (res [sha256.Size]byte, err error) { + f, err := os.Open(firmwarePath) + if err != nil { + return res, err + } + defer f.Close() + + digest := sha256.New() + if _, err := io.Copy(digest, f); err != nil { + return res, err + } + + // When used for confidential containers in kata-containers, kernelPath + // is always set (direct boot). However, this current package can also + // be used by other programs which may calculate launch digests of + // arbitrary SEV guests without SEV kernel hashes table. + if kernelPath != "" { + ht, err := constructSevHashesTable(kernelPath, initrdPath, cmdline) + if err != nil { + return res, err + } + digest.Write(ht) + } + + o, err := NewOvmf(firmwarePath) + if err != nil { + return res, err + } + resetEip, err := o.sevEsResetEip() + if err != nil { + return res, err + } + v := vmsaBuilder{uint64(resetEip), vcpuSig} + for i := 0; i < vcpus; i++ { + vmsaPage, err := v.buildPage(i) + if err != nil { + return res, err + } + digest.Write(vmsaPage) + } + + copy(res[:], digest.Sum(nil)) + return res, nil +} diff --git a/src/runtime/pkg/sev/sev_test.go b/src/runtime/pkg/sev/sev_test.go new file mode 100644 index 000000000000..68a82ea90d76 --- /dev/null +++ b/src/runtime/pkg/sev/sev_test.go @@ -0,0 +1,54 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 + +package sev + +import ( + "encoding/hex" + "testing" +) + +func TestCalculateLaunchDigestWithoutKernelHashes(t *testing.T) { + ld, err := CalculateLaunchDigest("testdata/ovmf_suffix.bin", "", "", "") + if err != nil { + t.Fatalf("unexpected err value: %s", err) + } + hexld := hex.EncodeToString(ld[:]) + if hexld != "b184e06e012366fd7b33ebfb361a515d05f00d354dca07b36abbc1e1e177ced5" { + t.Fatalf("wrong measurement: %s", hexld) + } +} + +func TestCalculateLaunchDigestWithKernelHashes(t *testing.T) { + ld, err := CalculateLaunchDigest("testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") + if err != nil { + t.Fatalf("unexpected err value: %s", err) + } + hexld := hex.EncodeToString(ld[:]) + if hexld != "d59d7696efd7facfaa653758586e6120c4b6eaec3e327771d278cc6a44786ba5" { + t.Fatalf("wrong measurement: %s", hexld) + } +} + +func TestCalculateLaunchDigestWithKernelHashesSevEs(t *testing.T) { + ld, err := CalculateSEVESLaunchDigest(1, SigEpycV4, "testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") + if err != nil { + t.Fatalf("unexpected err value: %s", err) + } + hexld := hex.EncodeToString(ld[:]) + if hexld != "7e5c26fb454621eb466978b4d0242b3c04b44a034de7fc0a2d8dac60ea2b6403" { + t.Fatalf("wrong measurement: %s", hexld) + } +} + +func TestCalculateLaunchDigestWithKernelHashesSevEsAndSmp(t *testing.T) { + ld, err := CalculateSEVESLaunchDigest(4, SigEpycV4, "testdata/ovmf_suffix.bin", "/dev/null", "/dev/null", "") + if err != nil { + t.Fatalf("unexpected err value: %s", err) + } + hexld := hex.EncodeToString(ld[:]) + if hexld != "b2111b0051fc3a06ec216899b2c78da99fb9d56c6ff2e8261dd3fe6cff79ecbc" { + t.Fatalf("wrong measurement: %s", hexld) + } +} diff --git a/src/runtime/pkg/sev/testdata/README.md b/src/runtime/pkg/sev/testdata/README.md new file mode 100644 index 000000000000..34554dc8e2cd --- /dev/null +++ b/src/runtime/pkg/sev/testdata/README.md @@ -0,0 +1,9 @@ +# sev/testdata + +The `ovmf_suffix.bin` contains the last 4KB of the `OVMF.fd` binary from edk2's +`OvmfPkg/AmdSev/AmdSevX64.dsc` build. To save space, we committed only the +last 4KB instead of the the full 4MB binary. + +The end of the file contains a GUIDed footer table with entries that hold the +SEV-ES AP reset vector address, which is needed in order to compute VMSAs for +SEV-ES guests. diff --git a/src/runtime/pkg/sev/testdata/ovmf_suffix.bin b/src/runtime/pkg/sev/testdata/ovmf_suffix.bin new file mode 100644 index 000000000000..cc6d7ca7f087 Binary files /dev/null and b/src/runtime/pkg/sev/testdata/ovmf_suffix.bin differ diff --git a/src/runtime/pkg/sev/vcpu_sigs.go b/src/runtime/pkg/sev/vcpu_sigs.go new file mode 100644 index 000000000000..9cee59e2b18f --- /dev/null +++ b/src/runtime/pkg/sev/vcpu_sigs.go @@ -0,0 +1,76 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 + +package sev + +type VCPUSig uint64 + +const ( + // 'EPYC': family=23, model=1, stepping=2 + SigEpyc VCPUSig = 0x800f12 + + // 'EPYC-v1': family=23, model=1, stepping=2 + SigEpycV1 VCPUSig = 0x800f12 + + // 'EPYC-v2': family=23, model=1, stepping=2 + SigEpycV2 VCPUSig = 0x800f12 + + // 'EPYC-IBPB': family=23, model=1, stepping=2 + SigEpycIBPB VCPUSig = 0x800f12 + + // 'EPYC-v3': family=23, model=1, stepping=2 + SigEpycV3 VCPUSig = 0x800f12 + + // 'EPYC-v4': family=23, model=1, stepping=2 + SigEpycV4 VCPUSig = 0x800f12 + + // 'EPYC-Rome': family=23, model=49, stepping=0 + SigEpycRome VCPUSig = 0x830f10 + + // 'EPYC-Rome-v1': family=23, model=49, stepping=0 + SigEpycRomeV1 VCPUSig = 0x830f10 + + // 'EPYC-Rome-v2': family=23, model=49, stepping=0 + SigEpycRomeV2 VCPUSig = 0x830f10 + + // 'EPYC-Rome-v3': family=23, model=49, stepping=0 + SigEpycRomeV3 VCPUSig = 0x830f10 + + // 'EPYC-Milan': family=25, model=1, stepping=1 + SigEpycMilan VCPUSig = 0xa00f11 + + // 'EPYC-Milan-v1': family=25, model=1, stepping=1 + SigEpycMilanV1 VCPUSig = 0xa00f11 + + // 'EPYC-Milan-v2': family=25, model=1, stepping=1 + SigEpycMilanV2 VCPUSig = 0xa00f11 +) + +// NewVCPUSig computes the CPU signature (32-bit value) from the given family, +// model, and stepping. +// +// This computation is described in AMD's CPUID Specification, publication #25481 +// https://www.amd.com/system/files/TechDocs/25481.pdf +// See section: CPUID Fn0000_0001_EAX Family, Model, Stepping Identifiers +func NewVCPUSig(family, model, stepping uint32) VCPUSig { + var family_low, family_high uint32 + if family > 0xf { + family_low = 0xf + family_high = (family - 0x0f) & 0xff + } else { + family_low = family + family_high = 0 + } + + model_low := model & 0xf + model_high := (model >> 4) & 0xf + + stepping_low := stepping & 0xf + + return VCPUSig((family_high << 20) | + (model_high << 16) | + (family_low << 8) | + (model_low << 4) | + stepping_low) +} diff --git a/src/runtime/pkg/sev/vcpu_sigs_test.go b/src/runtime/pkg/sev/vcpu_sigs_test.go new file mode 100644 index 000000000000..70f84875097f --- /dev/null +++ b/src/runtime/pkg/sev/vcpu_sigs_test.go @@ -0,0 +1,21 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 + +package sev + +import ( + "testing" +) + +func TestNewVCPUSig(t *testing.T) { + if NewVCPUSig(23, 1, 2) != SigEpyc { + t.Errorf("wrong EPYC CPU signature") + } + if NewVCPUSig(23, 49, 0) != SigEpycRome { + t.Errorf("wrong EPYC-Rome CPU signature") + } + if NewVCPUSig(25, 1, 1) != SigEpycMilan { + t.Errorf("wrong EPYC-Milan CPU signature") + } +} diff --git a/src/runtime/pkg/sev/vmsa.go b/src/runtime/pkg/sev/vmsa.go new file mode 100644 index 000000000000..c2bbc4122b11 --- /dev/null +++ b/src/runtime/pkg/sev/vmsa.go @@ -0,0 +1,172 @@ +// Copyright contributors to AMD SEV/-ES in Go +// +// SPDX-License-Identifier: Apache-2.0 + +package sev + +import ( + "bytes" + "encoding/binary" +) + +// VMCB Segment (struct vmcb_seg in the linux kernel) +type vmcbSeg struct { + selector uint16 + attrib uint16 + limit uint32 + base uint64 +} + +// VMSA page +// +// The names of the fields are taken from struct sev_es_work_area in the linux kernel: +// https://github.com/AMDESE/linux/blob/sev-snp-v12/arch/x86/include/asm/svm.h#L318 +// (following the definitions in AMD APM Vol 2 Table B-4) +type sevEsSaveArea struct { + es vmcbSeg + cs vmcbSeg + ss vmcbSeg + ds vmcbSeg + fs vmcbSeg + gs vmcbSeg + gdtr vmcbSeg + ldtr vmcbSeg + idtr vmcbSeg + tr vmcbSeg + vmpl0_ssp uint64 // nolint: unused + vmpl1_ssp uint64 // nolint: unused + vmpl2_ssp uint64 // nolint: unused + vmpl3_ssp uint64 // nolint: unused + u_cet uint64 // nolint: unused + reserved_1 [2]uint8 // nolint: unused + vmpl uint8 // nolint: unused + cpl uint8 // nolint: unused + reserved_2 [4]uint8 // nolint: unused + efer uint64 + reserved_3 [104]uint8 // nolint: unused + xss uint64 // nolint: unused + cr4 uint64 + cr3 uint64 // nolint: unused + cr0 uint64 + dr7 uint64 + dr6 uint64 + rflags uint64 + rip uint64 + dr0 uint64 // nolint: unused + dr1 uint64 // nolint: unused + dr2 uint64 // nolint: unused + dr3 uint64 // nolint: unused + dr0_addr_mask uint64 // nolint: unused + dr1_addr_mask uint64 // nolint: unused + dr2_addr_mask uint64 // nolint: unused + dr3_addr_mask uint64 // nolint: unused + reserved_4 [24]uint8 // nolint: unused + rsp uint64 // nolint: unused + s_cet uint64 // nolint: unused + ssp uint64 // nolint: unused + isst_addr uint64 // nolint: unused + rax uint64 // nolint: unused + star uint64 // nolint: unused + lstar uint64 // nolint: unused + cstar uint64 // nolint: unused + sfmask uint64 // nolint: unused + kernel_gs_base uint64 // nolint: unused + sysenter_cs uint64 // nolint: unused + sysenter_esp uint64 // nolint: unused + sysenter_eip uint64 // nolint: unused + cr2 uint64 // nolint: unused + reserved_5 [32]uint8 // nolint: unused + g_pat uint64 + dbgctrl uint64 // nolint: unused + br_from uint64 // nolint: unused + br_to uint64 // nolint: unused + last_excp_from uint64 // nolint: unused + last_excp_to uint64 // nolint: unused + reserved_7 [80]uint8 // nolint: unused + pkru uint32 // nolint: unused + reserved_8 [20]uint8 // nolint: unused + reserved_9 uint64 // nolint: unused + rcx uint64 // nolint: unused + rdx uint64 + rbx uint64 // nolint: unused + reserved_10 uint64 // nolint: unused + rbp uint64 // nolint: unused + rsi uint64 // nolint: unused + rdi uint64 // nolint: unused + r8 uint64 // nolint: unused + r9 uint64 // nolint: unused + r10 uint64 // nolint: unused + r11 uint64 // nolint: unused + r12 uint64 // nolint: unused + r13 uint64 // nolint: unused + r14 uint64 // nolint: unused + r15 uint64 // nolint: unused + reserved_11 [16]uint8 // nolint: unused + guest_exit_info_1 uint64 // nolint: unused + guest_exit_info_2 uint64 // nolint: unused + guest_exit_int_info uint64 // nolint: unused + guest_nrip uint64 // nolint: unused + sev_features uint64 + vintr_ctrl uint64 // nolint: unused + guest_exit_code uint64 // nolint: unused + virtual_tom uint64 // nolint: unused + tlb_id uint64 // nolint: unused + pcpu_id uint64 // nolint: unused + event_inj uint64 // nolint: unused + xcr0 uint64 + reserved_12 [16]uint8 // nolint: unused + x87_dp uint64 // nolint: unused + mxcsr uint32 // nolint: unused + x87_ftw uint16 // nolint: unused + x87_fsw uint16 // nolint: unused + x87_fcw uint16 // nolint: unused + x87_fop uint16 // nolint: unused + x87_ds uint16 // nolint: unused + x87_cs uint16 // nolint: unused + x87_rip uint64 // nolint: unused + fpreg_x87 [80]uint8 // nolint: unused + fpreg_xmm [256]uint8 // nolint: unused + fpreg_ymm [256]uint8 // nolint: unused + unused [2448]uint8 // nolint: unused +} + +type vmsaBuilder struct { + apEIP uint64 + vcpuSig VCPUSig +} + +func (v *vmsaBuilder) buildPage(i int) ([]byte, error) { + eip := uint64(0xfffffff0) // BSP (first vcpu) + if i > 0 { + eip = v.apEIP + } + saveArea := sevEsSaveArea{ + es: vmcbSeg{0, 0x93, 0xffff, 0}, + cs: vmcbSeg{0xf000, 0x9b, 0xffff, eip & 0xffff0000}, + ss: vmcbSeg{0, 0x93, 0xffff, 0}, + ds: vmcbSeg{0, 0x93, 0xffff, 0}, + fs: vmcbSeg{0, 0x93, 0xffff, 0}, + gs: vmcbSeg{0, 0x93, 0xffff, 0}, + gdtr: vmcbSeg{0, 0, 0xffff, 0}, + idtr: vmcbSeg{0, 0, 0xffff, 0}, + ldtr: vmcbSeg{0, 0x82, 0xffff, 0}, + tr: vmcbSeg{0, 0x8b, 0xffff, 0}, + efer: 0x1000, // KVM enables EFER_SVME + cr4: 0x40, // KVM enables X86_CR4_MCE + cr0: 0x10, + dr7: 0x400, + dr6: 0xffff0ff0, + rflags: 0x2, + rip: eip & 0xffff, + g_pat: 0x7040600070406, // PAT MSR: See AMD APM Vol 2, Section A.3 + rdx: uint64(v.vcpuSig), + sev_features: 0, // SEV-ES + xcr0: 0x1, + } + page := new(bytes.Buffer) + err := binary.Write(page, binary.LittleEndian, saveArea) + if err != nil { + return []byte{}, err + } + return page.Bytes(), nil +} diff --git a/src/runtime/pkg/signals/signals.go b/src/runtime/pkg/signals/signals.go index a405ad09de07..f9dd18c97760 100644 --- a/src/runtime/pkg/signals/signals.go +++ b/src/runtime/pkg/signals/signals.go @@ -23,21 +23,6 @@ var signalLog = logrus.WithField("default-signal-logger", true) // or a fatal signal is received. var CrashOnError = false -// List of handled signals. -// -// The value is true if receiving the signal should be fatal. -var handledSignalsMap = map[syscall.Signal]bool{ - syscall.SIGABRT: true, - syscall.SIGBUS: true, - syscall.SIGILL: true, - syscall.SIGQUIT: true, - syscall.SIGSEGV: true, - syscall.SIGSTKFLT: true, - syscall.SIGSYS: true, - syscall.SIGTRAP: true, - syscall.SIGUSR1: false, -} - // DieCb is the callback function type that needs to be defined for every call // into the Die() function. This callback will be run as the first function of // the Die() implementation. diff --git a/src/runtime/pkg/signals/signals_darwin.go b/src/runtime/pkg/signals/signals_darwin.go new file mode 100644 index 000000000000..d6143bb91b7b --- /dev/null +++ b/src/runtime/pkg/signals/signals_darwin.go @@ -0,0 +1,22 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package signals + +import "syscall" + +// List of handled signals. +// +// The value is true if receiving the signal should be fatal. +var handledSignalsMap = map[syscall.Signal]bool{ + syscall.SIGABRT: true, + syscall.SIGBUS: true, + syscall.SIGILL: true, + syscall.SIGQUIT: true, + syscall.SIGSEGV: true, + syscall.SIGSYS: true, + syscall.SIGTRAP: true, + syscall.SIGUSR1: false, +} diff --git a/src/runtime/pkg/signals/signals_linux.go b/src/runtime/pkg/signals/signals_linux.go new file mode 100644 index 000000000000..1d4fae5fb18e --- /dev/null +++ b/src/runtime/pkg/signals/signals_linux.go @@ -0,0 +1,23 @@ +// Copyright 2018 Intel Corporation. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package signals + +import "syscall" + +// List of handled signals. +// +// The value is true if receiving the signal should be fatal. +var handledSignalsMap = map[syscall.Signal]bool{ + syscall.SIGABRT: true, + syscall.SIGBUS: true, + syscall.SIGILL: true, + syscall.SIGQUIT: true, + syscall.SIGSEGV: true, + syscall.SIGSTKFLT: true, + syscall.SIGSYS: true, + syscall.SIGTRAP: true, + syscall.SIGUSR1: false, +} diff --git a/src/runtime/pkg/signals/signals_test.go b/src/runtime/pkg/signals/signals_test.go index fab5578290d2..f13e02b04346 100644 --- a/src/runtime/pkg/signals/signals_test.go +++ b/src/runtime/pkg/signals/signals_test.go @@ -7,7 +7,9 @@ package signals import ( "bytes" + "errors" "os" + "os/exec" "reflect" goruntime "runtime" "sort" @@ -135,3 +137,69 @@ func TestSignalBacktrace(t *testing.T) { assert.True(strings.Contains(b, "contention:")) assert.True(strings.Contains(b, `level=error`)) } + +func TestSignalHandlePanic(t *testing.T) { + assert := assert.New(t) + + savedLog := signalLog + defer func() { + signalLog = savedLog + }() + + signalLog = logrus.WithFields(logrus.Fields{ + "name": "name", + "pid": os.Getpid(), + "source": "throttler", + "test-logger": true}) + + // Create buffer to save logger output. + buf := &bytes.Buffer{} + + savedOut := signalLog.Logger.Out + defer func() { + signalLog.Logger.Out = savedOut + }() + + // Capture output to buffer. + signalLog.Logger.Out = buf + + HandlePanic(nil) + + b := buf.String() + assert.True(len(b) == 0) +} + +func TestSignalHandlePanicWithError(t *testing.T) { + assert := assert.New(t) + + if os.Getenv("CALL_EXIT") != "1" { + cmd := exec.Command(os.Args[0], "-test.run=TestSignalHandlePanicWithError") + cmd.Env = append(os.Environ(), "CALL_EXIT=1") + + err := cmd.Run() + assert.True(err != nil) + + exitError, ok := err.(*exec.ExitError) + assert.True(ok) + assert.True(exitError.ExitCode() == 1) + + return + } + + signalLog = logrus.WithFields(logrus.Fields{ + "name": "name", + "pid": os.Getpid(), + "source": "throttler", + "test-logger": true}) + + // Create buffer to save logger output. + buf := &bytes.Buffer{} + + // Capture output to buffer. + signalLog.Logger.Out = buf + + dieCallBack := func() {} + defer HandlePanic(dieCallBack) + e := errors.New("test-panic") + panic(e) +} diff --git a/src/runtime/pkg/types/types_test.go b/src/runtime/pkg/types/types_test.go index 6da2c9bcea4a..5b8b489e6d91 100644 --- a/src/runtime/pkg/types/types_test.go +++ b/src/runtime/pkg/types/types_test.go @@ -1,7 +1,6 @@ // Copyright (c) 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 -// package types import ( diff --git a/src/runtime/pkg/utils/schedcore.go b/src/runtime/pkg/utils/schedcore.go index c35fecef4a11..c395b2bd47ae 100644 --- a/src/runtime/pkg/utils/schedcore.go +++ b/src/runtime/pkg/utils/schedcore.go @@ -5,10 +5,6 @@ package utils -import ( - "golang.org/x/sys/unix" -) - // PidType is the type of provided pid value and how it should be treated type PidType int @@ -24,13 +20,3 @@ const ( // ProcessGroup affects all processes in the group ProcessGroup PidType = pidTypeProcessGroupId ) - -// Create a new sched core domain -func Create(t PidType) error { - return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0) -} - -// ShareFrom shares the sched core domain from the provided pid -func ShareFrom(pid uint64, t PidType) error { - return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0) -} diff --git a/src/runtime/pkg/utils/schedcore_linux.go b/src/runtime/pkg/utils/schedcore_linux.go new file mode 100644 index 000000000000..73c4d8f55e28 --- /dev/null +++ b/src/runtime/pkg/utils/schedcore_linux.go @@ -0,0 +1,20 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package utils + +import ( + "golang.org/x/sys/unix" +) + +// Create a new sched core domain +func Create(t PidType) error { + return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0) +} + +// ShareFrom shares the sched core domain from the provided pid +func ShareFrom(pid uint64, t PidType) error { + return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0) +} diff --git a/src/runtime/pkg/utils/schedcore_other.go b/src/runtime/pkg/utils/schedcore_other.go new file mode 100644 index 000000000000..d19c70dee8d9 --- /dev/null +++ b/src/runtime/pkg/utils/schedcore_other.go @@ -0,0 +1,22 @@ +// Copyright (c) 2023 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +//go:build !linux + +package utils + +import ( + "errors" +) + +// Create a new sched core domain +func Create(t PidType) error { + return errors.New("schedcore not available on non-Linux platforms") +} + +// ShareFrom shares the sched core domain from the provided pid +func ShareFrom(pid uint64, t PidType) error { + return errors.New("schedcore not available on non-Linux platforms") +} diff --git a/src/runtime/pkg/utils/shimclient/shim_management_client.go b/src/runtime/pkg/utils/shimclient/shim_management_client.go index 3f9e68650713..28ef3708de8f 100644 --- a/src/runtime/pkg/utils/shimclient/shim_management_client.go +++ b/src/runtime/pkg/utils/shimclient/shim_management_client.go @@ -9,7 +9,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "net" "net/http" "time" @@ -20,7 +19,7 @@ import ( // BuildShimClient builds and returns an http client for communicating with the provided sandbox func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) { - return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout) + return buildUnixSocketClient(shim.ClientSocketAddress(sandboxID), timeout) } // buildUnixSocketClient build http client for Unix socket @@ -91,7 +90,7 @@ func DoPut(sandboxID string, timeoutInSeconds time.Duration, urlPath, contentTyp }() if resp.StatusCode != http.StatusOK { - data, _ := ioutil.ReadAll(resp.Body) + data, _ := io.ReadAll(resp.Body) return fmt.Errorf("error sending put: url: %s, status code: %d, response data: %s", urlPath, resp.StatusCode, string(data)) } @@ -117,7 +116,7 @@ func DoPost(sandboxID string, timeoutInSeconds time.Duration, urlPath, contentTy }() if resp.StatusCode != http.StatusOK { - data, _ := ioutil.ReadAll(resp.Body) + data, _ := io.ReadAll(resp.Body) return fmt.Errorf("error sending post: url: %s, status code: %d, response data: %s", urlPath, resp.StatusCode, string(data)) } diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod deleted file mode 100644 index dfc6cce21f3c..000000000000 --- a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod +++ /dev/null @@ -1,8 +0,0 @@ -module code.cloudfoundry.org/bytefmt - -go 1.16 - -require ( - github.com/onsi/ginkgo v1.16.4 - github.com/onsi/gomega v1.16.0 -) diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum deleted file mode 100644 index a881b853964e..000000000000 --- a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum +++ /dev/null @@ -1,93 +0,0 @@ -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= -github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= -github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.16.0 h1:6gjqkI8iiRHMvdccRJM8rVKjCWk6ZIm6FTm3ddIe4/c= -github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210428140749-89ef3d95e781 h1:DzZ89McO9/gWPsQXS/FVKAlG02ZjaQ6AlZRBimEYOd0= -golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/.gitignore b/src/runtime/vendor/github.com/BurntSushi/toml/.gitignore index 0cd3800377d4..fe79e3adda29 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/.gitignore +++ b/src/runtime/vendor/github.com/BurntSushi/toml/.gitignore @@ -1,5 +1,2 @@ -TAGS -tags -.*.swp -tomlcheck/tomlcheck -toml.test +/toml.test +/toml-test diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/.travis.yml b/src/runtime/vendor/github.com/BurntSushi/toml/.travis.yml deleted file mode 100644 index 8b8afc4f0e00..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/.travis.yml +++ /dev/null @@ -1,15 +0,0 @@ -language: go -go: - - 1.1 - - 1.2 - - 1.3 - - 1.4 - - 1.5 - - 1.6 - - tip -install: - - go install ./... - - go get github.com/BurntSushi/toml-test -script: - - export PATH="$PATH:$HOME/gopath/bin" - - make test diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/COMPATIBLE b/src/runtime/vendor/github.com/BurntSushi/toml/COMPATIBLE deleted file mode 100644 index 6efcfd0ce55e..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/COMPATIBLE +++ /dev/null @@ -1,3 +0,0 @@ -Compatible with TOML version -[v0.4.0](https://github.com/toml-lang/toml/blob/v0.4.0/versions/en/toml-v0.4.0.md) - diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/Makefile b/src/runtime/vendor/github.com/BurntSushi/toml/Makefile deleted file mode 100644 index 3600848d331a..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -install: - go install ./... - -test: install - go test -v - toml-test toml-test-decoder - toml-test -encoder toml-test-encoder - -fmt: - gofmt -w *.go */*.go - colcheck *.go */*.go - -tags: - find ./ -name '*.go' -print0 | xargs -0 gotags > TAGS - -push: - git push origin master - git push github master - diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/README.md b/src/runtime/vendor/github.com/BurntSushi/toml/README.md index 7c1b37ecc7a0..3651cfa96092 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/README.md +++ b/src/runtime/vendor/github.com/BurntSushi/toml/README.md @@ -1,46 +1,26 @@ -## TOML parser and encoder for Go with reflection - TOML stands for Tom's Obvious, Minimal Language. This Go package provides a -reflection interface similar to Go's standard library `json` and `xml` -packages. This package also supports the `encoding.TextUnmarshaler` and -`encoding.TextMarshaler` interfaces so that you can define custom data -representations. (There is an example of this below.) - -Spec: https://github.com/toml-lang/toml +reflection interface similar to Go's standard library `json` and `xml` packages. -Compatible with TOML version -[v0.4.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.4.0.md) +Compatible with TOML version [v1.0.0](https://toml.io/en/v1.0.0). -Documentation: https://godoc.org/github.com/BurntSushi/toml +Documentation: https://godocs.io/github.com/BurntSushi/toml -Installation: +See the [releases page](https://github.com/BurntSushi/toml/releases) for a +changelog; this information is also in the git tag annotations (e.g. `git show +v0.4.0`). -```bash -go get github.com/BurntSushi/toml -``` +This library requires Go 1.13 or newer; add it to your go.mod with: -Try the toml validator: + % go get github.com/BurntSushi/toml@latest -```bash -go get github.com/BurntSushi/toml/cmd/tomlv -tomlv some-toml-file.toml -``` +It also comes with a TOML validator CLI tool: -[![Build Status](https://travis-ci.org/BurntSushi/toml.svg?branch=master)](https://travis-ci.org/BurntSushi/toml) [![GoDoc](https://godoc.org/github.com/BurntSushi/toml?status.svg)](https://godoc.org/github.com/BurntSushi/toml) - -### Testing - -This package passes all tests in -[toml-test](https://github.com/BurntSushi/toml-test) for both the decoder -and the encoder. + % go install github.com/BurntSushi/toml/cmd/tomlv@latest + % tomlv some-toml-file.toml ### Examples - -This package works similarly to how the Go standard library handles `XML` -and `JSON`. Namely, data is loaded into Go values via reflection. - -For the simplest example, consider some TOML file as just a list of keys -and values: +For the simplest example, consider some TOML file as just a list of keys and +values: ```toml Age = 25 @@ -50,29 +30,23 @@ Perfection = [ 6, 28, 496, 8128 ] DOB = 1987-07-05T05:45:00Z ``` -Which could be defined in Go as: +Which can be decoded with: ```go type Config struct { - Age int - Cats []string - Pi float64 - Perfection []int - DOB time.Time // requires `import time` + Age int + Cats []string + Pi float64 + Perfection []int + DOB time.Time } -``` -And then decoded with: - -```go var conf Config -if _, err := toml.Decode(tomlData, &conf); err != nil { - // handle error -} +_, err := toml.Decode(tomlData, &conf) ``` -You can also use struct tags if your struct field name doesn't map to a TOML -key value directly: +You can also use struct tags if your struct field name doesn't map to a TOML key +value directly: ```toml some_key_NAME = "wat" @@ -80,139 +54,67 @@ some_key_NAME = "wat" ```go type TOML struct { - ObscureKey string `toml:"some_key_NAME"` + ObscureKey string `toml:"some_key_NAME"` } ``` -### Using the `encoding.TextUnmarshaler` interface +Beware that like other decoders **only exported fields** are considered when +encoding and decoding; private fields are silently ignored. -Here's an example that automatically parses duration strings into -`time.Duration` values: +### Using the `Marshaler` and `encoding.TextUnmarshaler` interfaces +Here's an example that automatically parses values in a `mail.Address`: ```toml -[[song]] -name = "Thunder Road" -duration = "4m49s" - -[[song]] -name = "Stairway to Heaven" -duration = "8m03s" -``` - -Which can be decoded with: - -```go -type song struct { - Name string - Duration duration -} -type songs struct { - Song []song -} -var favorites songs -if _, err := toml.Decode(blob, &favorites); err != nil { - log.Fatal(err) -} - -for _, s := range favorites.Song { - fmt.Printf("%s (%s)\n", s.Name, s.Duration) -} +contacts = [ + "Donald Duck ", + "Scrooge McDuck ", +] ``` -And you'll also need a `duration` type that satisfies the -`encoding.TextUnmarshaler` interface: +Can be decoded with: ```go -type duration struct { - time.Duration +// Create address type which satisfies the encoding.TextUnmarshaler interface. +type address struct { + *mail.Address } -func (d *duration) UnmarshalText(text []byte) error { +func (a *address) UnmarshalText(text []byte) error { var err error - d.Duration, err = time.ParseDuration(string(text)) + a.Address, err = mail.ParseAddress(string(text)) return err } -``` - -### More complex usage - -Here's an example of how to load the example from the official spec page: - -```toml -# This is a TOML document. Boom. - -title = "TOML Example" - -[owner] -name = "Tom Preston-Werner" -organization = "GitHub" -bio = "GitHub Cofounder & CEO\nLikes tater tots and beer." -dob = 1979-05-27T07:32:00Z # First class dates? Why not? - -[database] -server = "192.168.1.1" -ports = [ 8001, 8001, 8002 ] -connection_max = 5000 -enabled = true - -[servers] - - # You can indent as you please. Tabs or spaces. TOML don't care. - [servers.alpha] - ip = "10.0.0.1" - dc = "eqdc10" - - [servers.beta] - ip = "10.0.0.2" - dc = "eqdc10" - -[clients] -data = [ ["gamma", "delta"], [1, 2] ] # just an update to make sure parsers support it - -# Line breaks are OK when inside arrays -hosts = [ - "alpha", - "omega" -] -``` - -And the corresponding Go types are: - -```go -type tomlConfig struct { - Title string - Owner ownerInfo - DB database `toml:"database"` - Servers map[string]server - Clients clients -} -type ownerInfo struct { - Name string - Org string `toml:"organization"` - Bio string - DOB time.Time -} - -type database struct { - Server string - Ports []int - ConnMax int `toml:"connection_max"` - Enabled bool -} - -type server struct { - IP string - DC string -} - -type clients struct { - Data [][]interface{} - Hosts []string +// Decode it. +func decode() { + blob := ` + contacts = [ + "Donald Duck ", + "Scrooge McDuck ", + ] + ` + + var contacts struct { + Contacts []address + } + + _, err := toml.Decode(blob, &contacts) + if err != nil { + log.Fatal(err) + } + + for _, c := range contacts.Contacts { + fmt.Printf("%#v\n", c.Address) + } + + // Output: + // &mail.Address{Name:"Donald Duck", Address:"donald@duckburg.com"} + // &mail.Address{Name:"Scrooge McDuck", Address:"scrooge@duckburg.com"} } ``` -Note that a case insensitive match will be tried if an exact match can't be -found. +To target TOML specifically you can implement `UnmarshalTOML` TOML interface in +a similar way. -A working example of the above can be found in `_examples/example.{go,toml}`. +### More complex usage +See the [`_example/`](/_example) directory for a more complex example. diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/decode.go b/src/runtime/vendor/github.com/BurntSushi/toml/decode.go index b0fd51d5b6ea..09523315b83b 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/decode.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/decode.go @@ -1,146 +1,188 @@ package toml import ( + "bytes" + "encoding" + "encoding/json" "fmt" "io" "io/ioutil" "math" + "os" "reflect" + "strconv" "strings" "time" ) -func e(format string, args ...interface{}) error { - return fmt.Errorf("toml: "+format, args...) -} - // Unmarshaler is the interface implemented by objects that can unmarshal a // TOML description of themselves. type Unmarshaler interface { UnmarshalTOML(interface{}) error } -// Unmarshal decodes the contents of `p` in TOML format into a pointer `v`. -func Unmarshal(p []byte, v interface{}) error { - _, err := Decode(string(p), v) +// Unmarshal decodes the contents of `data` in TOML format into a pointer `v`. +func Unmarshal(data []byte, v interface{}) error { + _, err := NewDecoder(bytes.NewReader(data)).Decode(v) return err } +// Decode the TOML data in to the pointer v. +// +// See the documentation on Decoder for a description of the decoding process. +func Decode(data string, v interface{}) (MetaData, error) { + return NewDecoder(strings.NewReader(data)).Decode(v) +} + +// DecodeFile is just like Decode, except it will automatically read the +// contents of the file at path and decode it for you. +func DecodeFile(path string, v interface{}) (MetaData, error) { + fp, err := os.Open(path) + if err != nil { + return MetaData{}, err + } + defer fp.Close() + return NewDecoder(fp).Decode(v) +} + // Primitive is a TOML value that hasn't been decoded into a Go value. -// When using the various `Decode*` functions, the type `Primitive` may -// be given to any value, and its decoding will be delayed. // -// A `Primitive` value can be decoded using the `PrimitiveDecode` function. +// This type can be used for any value, which will cause decoding to be delayed. +// You can use the PrimitiveDecode() function to "manually" decode these values. // -// The underlying representation of a `Primitive` value is subject to change. -// Do not rely on it. +// NOTE: The underlying representation of a `Primitive` value is subject to +// change. Do not rely on it. // -// N.B. Primitive values are still parsed, so using them will only avoid -// the overhead of reflection. They can be useful when you don't know the -// exact type of TOML data until run time. +// NOTE: Primitive values are still parsed, so using them will only avoid the +// overhead of reflection. They can be useful when you don't know the exact type +// of TOML data until runtime. type Primitive struct { undecoded interface{} context Key } -// DEPRECATED! -// -// Use MetaData.PrimitiveDecode instead. -func PrimitiveDecode(primValue Primitive, v interface{}) error { - md := MetaData{decoded: make(map[string]bool)} - return md.unify(primValue.undecoded, rvalue(v)) -} +// The significand precision for float32 and float64 is 24 and 53 bits; this is +// the range a natural number can be stored in a float without loss of data. +const ( + maxSafeFloat32Int = 16777215 // 2^24-1 + maxSafeFloat64Int = int64(9007199254740991) // 2^53-1 +) -// PrimitiveDecode is just like the other `Decode*` functions, except it -// decodes a TOML value that has already been parsed. Valid primitive values -// can *only* be obtained from values filled by the decoder functions, -// including this method. (i.e., `v` may contain more `Primitive` -// values.) +// Decoder decodes TOML data. // -// Meta data for primitive values is included in the meta data returned by -// the `Decode*` functions with one exception: keys returned by the Undecoded -// method will only reflect keys that were decoded. Namely, any keys hidden -// behind a Primitive will be considered undecoded. Executing this method will -// update the undecoded keys in the meta data. (See the example.) -func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error { - md.context = primValue.context - defer func() { md.context = nil }() - return md.unify(primValue.undecoded, rvalue(v)) -} - -// Decode will decode the contents of `data` in TOML format into a pointer -// `v`. +// TOML tables correspond to Go structs or maps (dealer's choice – they can be +// used interchangeably). // -// TOML hashes correspond to Go structs or maps. (Dealer's choice. They can be -// used interchangeably.) +// TOML table arrays correspond to either a slice of structs or a slice of maps. // -// TOML arrays of tables correspond to either a slice of structs or a slice -// of maps. +// TOML datetimes correspond to Go time.Time values. Local datetimes are parsed +// in the local timezone. // -// TOML datetimes correspond to Go `time.Time` values. +// time.Duration types are treated as nanoseconds if the TOML value is an +// integer, or they're parsed with time.ParseDuration() if they're strings. // -// All other TOML types (float, string, int, bool and array) correspond -// to the obvious Go types. +// All other TOML types (float, string, int, bool and array) correspond to the +// obvious Go types. // -// An exception to the above rules is if a type implements the -// encoding.TextUnmarshaler interface. In this case, any primitive TOML value -// (floats, strings, integers, booleans and datetimes) will be converted to -// a byte string and given to the value's UnmarshalText method. See the -// Unmarshaler example for a demonstration with time duration strings. +// An exception to the above rules is if a type implements the TextUnmarshaler +// interface, in which case any primitive TOML value (floats, strings, integers, +// booleans, datetimes) will be converted to a []byte and given to the value's +// UnmarshalText method. See the Unmarshaler example for a demonstration with +// email addresses. // // Key mapping // -// TOML keys can map to either keys in a Go map or field names in a Go -// struct. The special `toml` struct tag may be used to map TOML keys to -// struct fields that don't match the key name exactly. (See the example.) -// A case insensitive match to struct names will be tried if an exact match -// can't be found. +// TOML keys can map to either keys in a Go map or field names in a Go struct. +// The special `toml` struct tag can be used to map TOML keys to struct fields +// that don't match the key name exactly (see the example). A case insensitive +// match to struct names will be tried if an exact match can't be found. // -// The mapping between TOML values and Go values is loose. That is, there -// may exist TOML values that cannot be placed into your representation, and -// there may be parts of your representation that do not correspond to -// TOML values. This loose mapping can be made stricter by using the IsDefined -// and/or Undecoded methods on the MetaData returned. +// The mapping between TOML values and Go values is loose. That is, there may +// exist TOML values that cannot be placed into your representation, and there +// may be parts of your representation that do not correspond to TOML values. +// This loose mapping can be made stricter by using the IsDefined and/or +// Undecoded methods on the MetaData returned. // -// This decoder will not handle cyclic types. If a cyclic type is passed, -// `Decode` will not terminate. -func Decode(data string, v interface{}) (MetaData, error) { +// This decoder does not handle cyclic types. Decode will not terminate if a +// cyclic type is passed. +type Decoder struct { + r io.Reader +} + +// NewDecoder creates a new Decoder. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +var ( + unmarshalToml = reflect.TypeOf((*Unmarshaler)(nil)).Elem() + unmarshalText = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() + primitiveType = reflect.TypeOf((*Primitive)(nil)).Elem() +) + +// Decode TOML data in to the pointer `v`. +func (dec *Decoder) Decode(v interface{}) (MetaData, error) { rv := reflect.ValueOf(v) if rv.Kind() != reflect.Ptr { - return MetaData{}, e("Decode of non-pointer %s", reflect.TypeOf(v)) + s := "%q" + if reflect.TypeOf(v) == nil { + s = "%v" + } + + return MetaData{}, fmt.Errorf("toml: cannot decode to non-pointer "+s, reflect.TypeOf(v)) } if rv.IsNil() { - return MetaData{}, e("Decode of nil %s", reflect.TypeOf(v)) - } - p, err := parse(data) - if err != nil { - return MetaData{}, err + return MetaData{}, fmt.Errorf("toml: cannot decode to nil value of %q", reflect.TypeOf(v)) } - md := MetaData{ - p.mapping, p.types, p.ordered, - make(map[string]bool, len(p.ordered)), nil, + + // Check if this is a supported type: struct, map, interface{}, or something + // that implements UnmarshalTOML or UnmarshalText. + rv = indirect(rv) + rt := rv.Type() + if rv.Kind() != reflect.Struct && rv.Kind() != reflect.Map && + !(rv.Kind() == reflect.Interface && rv.NumMethod() == 0) && + !rt.Implements(unmarshalToml) && !rt.Implements(unmarshalText) { + return MetaData{}, fmt.Errorf("toml: cannot decode to type %s", rt) } - return md, md.unify(p.mapping, indirect(rv)) -} -// DecodeFile is just like Decode, except it will automatically read the -// contents of the file at `fpath` and decode it for you. -func DecodeFile(fpath string, v interface{}) (MetaData, error) { - bs, err := ioutil.ReadFile(fpath) + // TODO: parser should read from io.Reader? Or at the very least, make it + // read from []byte rather than string + data, err := ioutil.ReadAll(dec.r) if err != nil { return MetaData{}, err } - return Decode(string(bs), v) -} -// DecodeReader is just like Decode, except it will consume all bytes -// from the reader and decode it for you. -func DecodeReader(r io.Reader, v interface{}) (MetaData, error) { - bs, err := ioutil.ReadAll(r) + p, err := parse(string(data)) if err != nil { return MetaData{}, err } - return Decode(string(bs), v) + + md := MetaData{ + mapping: p.mapping, + keyInfo: p.keyInfo, + keys: p.ordered, + decoded: make(map[string]struct{}, len(p.ordered)), + context: nil, + data: data, + } + return md, md.unify(p.mapping, rv) +} + +// PrimitiveDecode is just like the other `Decode*` functions, except it +// decodes a TOML value that has already been parsed. Valid primitive values +// can *only* be obtained from values filled by the decoder functions, +// including this method. (i.e., `v` may contain more `Primitive` +// values.) +// +// Meta data for primitive values is included in the meta data returned by +// the `Decode*` functions with one exception: keys returned by the Undecoded +// method will only reflect keys that were decoded. Namely, any keys hidden +// behind a Primitive will be considered undecoded. Executing this method will +// update the undecoded keys in the meta data. (See the example.) +func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error { + md.context = primValue.context + defer func() { md.context = nil }() + return md.unify(primValue.undecoded, rvalue(v)) } // unify performs a sort of type unification based on the structure of `rv`, @@ -149,9 +191,9 @@ func DecodeReader(r io.Reader, v interface{}) (MetaData, error) { // Any type mismatch produces an error. Finding a type that we don't know // how to handle produces an unsupported type error. func (md *MetaData) unify(data interface{}, rv reflect.Value) error { - // Special case. Look for a `Primitive` value. - if rv.Type() == reflect.TypeOf((*Primitive)(nil)).Elem() { + // TODO: #76 would make this superfluous after implemented. + if rv.Type() == primitiveType { // Save the undecoded data and the key context into the primitive // value. context := make(Key, len(md.context)) @@ -163,36 +205,24 @@ func (md *MetaData) unify(data interface{}, rv reflect.Value) error { return nil } - // Special case. Unmarshaler Interface support. - if rv.CanAddr() { - if v, ok := rv.Addr().Interface().(Unmarshaler); ok { - return v.UnmarshalTOML(data) - } + rvi := rv.Interface() + if v, ok := rvi.(Unmarshaler); ok { + return v.UnmarshalTOML(data) } - - // Special case. Handle time.Time values specifically. - // TODO: Remove this code when we decide to drop support for Go 1.1. - // This isn't necessary in Go 1.2 because time.Time satisfies the encoding - // interfaces. - if rv.Type().AssignableTo(rvalue(time.Time{}).Type()) { - return md.unifyDatetime(data, rv) - } - - // Special case. Look for a value satisfying the TextUnmarshaler interface. - if v, ok := rv.Interface().(TextUnmarshaler); ok { + if v, ok := rvi.(encoding.TextUnmarshaler); ok { return md.unifyText(data, v) } - // BUG(burntsushi) + + // TODO: // The behavior here is incorrect whenever a Go type satisfies the - // encoding.TextUnmarshaler interface but also corresponds to a TOML - // hash or array. In particular, the unmarshaler should only be applied - // to primitive TOML values. But at this point, it will be applied to - // all kinds of values and produce an incorrect error whenever those values - // are hashes or arrays (including arrays of tables). + // encoding.TextUnmarshaler interface but also corresponds to a TOML hash or + // array. In particular, the unmarshaler should only be applied to primitive + // TOML values. But at this point, it will be applied to all kinds of values + // and produce an incorrect error whenever those values are hashes or arrays + // (including arrays of tables). k := rv.Kind() - // laziness if k >= reflect.Int && k <= reflect.Uint64 { return md.unifyInt(data, rv) } @@ -218,17 +248,14 @@ func (md *MetaData) unify(data interface{}, rv reflect.Value) error { case reflect.Bool: return md.unifyBool(data, rv) case reflect.Interface: - // we only support empty interfaces. - if rv.NumMethod() > 0 { - return e("unsupported type %s", rv.Type()) + if rv.NumMethod() > 0 { // Only support empty interfaces are supported. + return md.e("unsupported type %s", rv.Type()) } return md.unifyAnything(data, rv) - case reflect.Float32: - fallthrough - case reflect.Float64: + case reflect.Float32, reflect.Float64: return md.unifyFloat64(data, rv) } - return e("unsupported type %s", rv.Kind()) + return md.e("unsupported type %s", rv.Kind()) } func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error { @@ -237,7 +264,7 @@ func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error { if mapping == nil { return nil } - return e("type mismatch for %s: expected table but found %T", + return md.e("type mismatch for %s: expected table but found %T", rv.Type().String(), mapping) } @@ -259,17 +286,18 @@ func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error { for _, i := range f.index { subv = indirect(subv.Field(i)) } + if isUnifiable(subv) { - md.decoded[md.context.add(key).String()] = true + md.decoded[md.context.add(key).String()] = struct{}{} md.context = append(md.context, key) - if err := md.unify(datum, subv); err != nil { + + err := md.unify(datum, subv) + if err != nil { return err } md.context = md.context[0 : len(md.context)-1] } else if f.name != "" { - // Bad user! No soup for you! - return e("cannot write unexported field %s.%s", - rv.Type().String(), f.name) + return md.e("cannot write unexported field %s.%s", rv.Type().String(), f.name) } } } @@ -277,28 +305,43 @@ func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error { } func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error { + keyType := rv.Type().Key().Kind() + if keyType != reflect.String && keyType != reflect.Interface { + return fmt.Errorf("toml: cannot decode to a map with non-string key type (%s in %q)", + keyType, rv.Type()) + } + tmap, ok := mapping.(map[string]interface{}) if !ok { if tmap == nil { return nil } - return badtype("map", mapping) + return md.badtype("map", mapping) } if rv.IsNil() { rv.Set(reflect.MakeMap(rv.Type())) } for k, v := range tmap { - md.decoded[md.context.add(k).String()] = true + md.decoded[md.context.add(k).String()] = struct{}{} md.context = append(md.context, k) - rvkey := indirect(reflect.New(rv.Type().Key())) rvval := reflect.Indirect(reflect.New(rv.Type().Elem())) - if err := md.unify(v, rvval); err != nil { + + err := md.unify(v, indirect(rvval)) + if err != nil { return err } md.context = md.context[0 : len(md.context)-1] - rvkey.SetString(k) + rvkey := indirect(reflect.New(rv.Type().Key())) + + switch keyType { + case reflect.Interface: + rvkey.Set(reflect.ValueOf(k)) + case reflect.String: + rvkey.SetString(k) + } + rv.SetMapIndex(rvkey, rvval) } return nil @@ -310,12 +353,10 @@ func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error { if !datav.IsValid() { return nil } - return badtype("slice", data) + return md.badtype("slice", data) } - sliceLen := datav.Len() - if sliceLen != rv.Len() { - return e("expected array length %d; got TOML array of length %d", - rv.Len(), sliceLen) + if l := datav.Len(); l != rv.Len() { + return md.e("expected array length %d; got TOML array of length %d", rv.Len(), l) } return md.unifySliceArray(datav, rv) } @@ -326,7 +367,7 @@ func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error { if !datav.IsValid() { return nil } - return badtype("slice", data) + return md.badtype("slice", data) } n := datav.Len() if rv.IsNil() || rv.Cap() < n { @@ -337,37 +378,45 @@ func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error { } func (md *MetaData) unifySliceArray(data, rv reflect.Value) error { - sliceLen := data.Len() - for i := 0; i < sliceLen; i++ { - v := data.Index(i).Interface() - sliceval := indirect(rv.Index(i)) - if err := md.unify(v, sliceval); err != nil { + l := data.Len() + for i := 0; i < l; i++ { + err := md.unify(data.Index(i).Interface(), indirect(rv.Index(i))) + if err != nil { return err } } return nil } -func (md *MetaData) unifyDatetime(data interface{}, rv reflect.Value) error { - if _, ok := data.(time.Time); ok { - rv.Set(reflect.ValueOf(data)) +func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error { + _, ok := rv.Interface().(json.Number) + if ok { + if i, ok := data.(int64); ok { + rv.SetString(strconv.FormatInt(i, 10)) + } else if f, ok := data.(float64); ok { + rv.SetString(strconv.FormatFloat(f, 'f', -1, 64)) + } else { + return md.badtype("string", data) + } return nil } - return badtype("time.Time", data) -} -func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error { if s, ok := data.(string); ok { rv.SetString(s) return nil } - return badtype("string", data) + return md.badtype("string", data) } func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error { + rvk := rv.Kind() + if num, ok := data.(float64); ok { - switch rv.Kind() { + switch rvk { case reflect.Float32: + if num < -math.MaxFloat32 || num > math.MaxFloat32 { + return md.parseErr(errParseRange{i: num, size: rvk.String()}) + } fallthrough case reflect.Float64: rv.SetFloat(num) @@ -376,54 +425,60 @@ func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error { } return nil } - return badtype("float", data) + + if num, ok := data.(int64); ok { + if (rvk == reflect.Float32 && (num < -maxSafeFloat32Int || num > maxSafeFloat32Int)) || + (rvk == reflect.Float64 && (num < -maxSafeFloat64Int || num > maxSafeFloat64Int)) { + return md.parseErr(errParseRange{i: num, size: rvk.String()}) + } + rv.SetFloat(float64(num)) + return nil + } + + return md.badtype("float", data) } func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error { - if num, ok := data.(int64); ok { - if rv.Kind() >= reflect.Int && rv.Kind() <= reflect.Int64 { - switch rv.Kind() { - case reflect.Int, reflect.Int64: - // No bounds checking necessary. - case reflect.Int8: - if num < math.MinInt8 || num > math.MaxInt8 { - return e("value %d is out of range for int8", num) - } - case reflect.Int16: - if num < math.MinInt16 || num > math.MaxInt16 { - return e("value %d is out of range for int16", num) - } - case reflect.Int32: - if num < math.MinInt32 || num > math.MaxInt32 { - return e("value %d is out of range for int32", num) - } + _, ok := rv.Interface().(time.Duration) + if ok { + // Parse as string duration, and fall back to regular integer parsing + // (as nanosecond) if this is not a string. + if s, ok := data.(string); ok { + dur, err := time.ParseDuration(s) + if err != nil { + return md.parseErr(errParseDuration{s}) } - rv.SetInt(num) - } else if rv.Kind() >= reflect.Uint && rv.Kind() <= reflect.Uint64 { - unum := uint64(num) - switch rv.Kind() { - case reflect.Uint, reflect.Uint64: - // No bounds checking necessary. - case reflect.Uint8: - if num < 0 || unum > math.MaxUint8 { - return e("value %d is out of range for uint8", num) - } - case reflect.Uint16: - if num < 0 || unum > math.MaxUint16 { - return e("value %d is out of range for uint16", num) - } - case reflect.Uint32: - if num < 0 || unum > math.MaxUint32 { - return e("value %d is out of range for uint32", num) - } - } - rv.SetUint(unum) - } else { - panic("unreachable") + rv.SetInt(int64(dur)) + return nil } - return nil } - return badtype("integer", data) + + num, ok := data.(int64) + if !ok { + return md.badtype("integer", data) + } + + rvk := rv.Kind() + switch { + case rvk >= reflect.Int && rvk <= reflect.Int64: + if (rvk == reflect.Int8 && (num < math.MinInt8 || num > math.MaxInt8)) || + (rvk == reflect.Int16 && (num < math.MinInt16 || num > math.MaxInt16)) || + (rvk == reflect.Int32 && (num < math.MinInt32 || num > math.MaxInt32)) { + return md.parseErr(errParseRange{i: num, size: rvk.String()}) + } + rv.SetInt(num) + case rvk >= reflect.Uint && rvk <= reflect.Uint64: + unum := uint64(num) + if rvk == reflect.Uint8 && (num < 0 || unum > math.MaxUint8) || + rvk == reflect.Uint16 && (num < 0 || unum > math.MaxUint16) || + rvk == reflect.Uint32 && (num < 0 || unum > math.MaxUint32) { + return md.parseErr(errParseRange{i: num, size: rvk.String()}) + } + rv.SetUint(unum) + default: + panic("unreachable") + } + return nil } func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error { @@ -431,7 +486,7 @@ func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error { rv.SetBool(b) return nil } - return badtype("boolean", data) + return md.badtype("boolean", data) } func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error { @@ -439,10 +494,16 @@ func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error { return nil } -func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error { +func (md *MetaData) unifyText(data interface{}, v encoding.TextUnmarshaler) error { var s string switch sdata := data.(type) { - case TextMarshaler: + case Marshaler: + text, err := sdata.MarshalTOML() + if err != nil { + return err + } + s = string(text) + case encoding.TextMarshaler: text, err := sdata.MarshalText() if err != nil { return err @@ -459,7 +520,7 @@ func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error { case float64: s = fmt.Sprintf("%f", sdata) default: - return badtype("primitive (string-like)", data) + return md.badtype("primitive (string-like)", data) } if err := v.UnmarshalText([]byte(s)); err != nil { return err @@ -467,22 +528,54 @@ func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error { return nil } +func (md *MetaData) badtype(dst string, data interface{}) error { + return md.e("incompatible types: TOML value has type %T; destination has type %s", data, dst) +} + +func (md *MetaData) parseErr(err error) error { + k := md.context.String() + return ParseError{ + LastKey: k, + Position: md.keyInfo[k].pos, + Line: md.keyInfo[k].pos.Line, + err: err, + input: string(md.data), + } +} + +func (md *MetaData) e(format string, args ...interface{}) error { + f := "toml: " + if len(md.context) > 0 { + f = fmt.Sprintf("toml: (last key %q): ", md.context) + p := md.keyInfo[md.context.String()].pos + if p.Line > 0 { + f = fmt.Sprintf("toml: line %d (last key %q): ", p.Line, md.context) + } + } + return fmt.Errorf(f+format, args...) +} + // rvalue returns a reflect.Value of `v`. All pointers are resolved. func rvalue(v interface{}) reflect.Value { return indirect(reflect.ValueOf(v)) } // indirect returns the value pointed to by a pointer. -// Pointers are followed until the value is not a pointer. -// New values are allocated for each nil pointer. // -// An exception to this rule is if the value satisfies an interface of -// interest to us (like encoding.TextUnmarshaler). +// Pointers are followed until the value is not a pointer. New values are +// allocated for each nil pointer. +// +// An exception to this rule is if the value satisfies an interface of interest +// to us (like encoding.TextUnmarshaler). func indirect(v reflect.Value) reflect.Value { if v.Kind() != reflect.Ptr { if v.CanSet() { pv := v.Addr() - if _, ok := pv.Interface().(TextUnmarshaler); ok { + pvi := pv.Interface() + if _, ok := pvi.(encoding.TextUnmarshaler); ok { + return pv + } + if _, ok := pvi.(Unmarshaler); ok { return pv } } @@ -498,12 +591,12 @@ func isUnifiable(rv reflect.Value) bool { if rv.CanSet() { return true } - if _, ok := rv.Interface().(TextUnmarshaler); ok { + rvi := rv.Interface() + if _, ok := rvi.(encoding.TextUnmarshaler); ok { + return true + } + if _, ok := rvi.(Unmarshaler); ok { return true } return false } - -func badtype(expected string, data interface{}) error { - return e("cannot load TOML value of type %T into a Go %s", data, expected) -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/decode_go116.go b/src/runtime/vendor/github.com/BurntSushi/toml/decode_go116.go new file mode 100644 index 000000000000..eddfb641b862 --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/decode_go116.go @@ -0,0 +1,19 @@ +//go:build go1.16 +// +build go1.16 + +package toml + +import ( + "io/fs" +) + +// DecodeFS is just like Decode, except it will automatically read the contents +// of the file at `path` from a fs.FS instance. +func DecodeFS(fsys fs.FS, path string, v interface{}) (MetaData, error) { + fp, err := fsys.Open(path) + if err != nil { + return MetaData{}, err + } + defer fp.Close() + return NewDecoder(fp).Decode(v) +} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/decode_meta.go b/src/runtime/vendor/github.com/BurntSushi/toml/decode_meta.go deleted file mode 100644 index b9914a6798cf..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/decode_meta.go +++ /dev/null @@ -1,121 +0,0 @@ -package toml - -import "strings" - -// MetaData allows access to meta information about TOML data that may not -// be inferrable via reflection. In particular, whether a key has been defined -// and the TOML type of a key. -type MetaData struct { - mapping map[string]interface{} - types map[string]tomlType - keys []Key - decoded map[string]bool - context Key // Used only during decoding. -} - -// IsDefined returns true if the key given exists in the TOML data. The key -// should be specified hierarchially. e.g., -// -// // access the TOML key 'a.b.c' -// IsDefined("a", "b", "c") -// -// IsDefined will return false if an empty key given. Keys are case sensitive. -func (md *MetaData) IsDefined(key ...string) bool { - if len(key) == 0 { - return false - } - - var hash map[string]interface{} - var ok bool - var hashOrVal interface{} = md.mapping - for _, k := range key { - if hash, ok = hashOrVal.(map[string]interface{}); !ok { - return false - } - if hashOrVal, ok = hash[k]; !ok { - return false - } - } - return true -} - -// Type returns a string representation of the type of the key specified. -// -// Type will return the empty string if given an empty key or a key that -// does not exist. Keys are case sensitive. -func (md *MetaData) Type(key ...string) string { - fullkey := strings.Join(key, ".") - if typ, ok := md.types[fullkey]; ok { - return typ.typeString() - } - return "" -} - -// Key is the type of any TOML key, including key groups. Use (MetaData).Keys -// to get values of this type. -type Key []string - -func (k Key) String() string { - return strings.Join(k, ".") -} - -func (k Key) maybeQuotedAll() string { - var ss []string - for i := range k { - ss = append(ss, k.maybeQuoted(i)) - } - return strings.Join(ss, ".") -} - -func (k Key) maybeQuoted(i int) string { - quote := false - for _, c := range k[i] { - if !isBareKeyChar(c) { - quote = true - break - } - } - if quote { - return "\"" + strings.Replace(k[i], "\"", "\\\"", -1) + "\"" - } - return k[i] -} - -func (k Key) add(piece string) Key { - newKey := make(Key, len(k)+1) - copy(newKey, k) - newKey[len(k)] = piece - return newKey -} - -// Keys returns a slice of every key in the TOML data, including key groups. -// Each key is itself a slice, where the first element is the top of the -// hierarchy and the last is the most specific. -// -// The list will have the same order as the keys appeared in the TOML data. -// -// All keys returned are non-empty. -func (md *MetaData) Keys() []Key { - return md.keys -} - -// Undecoded returns all keys that have not been decoded in the order in which -// they appear in the original TOML document. -// -// This includes keys that haven't been decoded because of a Primitive value. -// Once the Primitive value is decoded, the keys will be considered decoded. -// -// Also note that decoding into an empty interface will result in no decoding, -// and so no keys will be considered decoded. -// -// In this sense, the Undecoded keys correspond to keys in the TOML document -// that do not have a concrete type in your representation. -func (md *MetaData) Undecoded() []Key { - undecoded := make([]Key, 0, len(md.keys)) - for _, key := range md.keys { - if !md.decoded[key.String()] { - undecoded = append(undecoded, key) - } - } - return undecoded -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/deprecated.go b/src/runtime/vendor/github.com/BurntSushi/toml/deprecated.go new file mode 100644 index 000000000000..c6af3f239ddf --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/deprecated.go @@ -0,0 +1,21 @@ +package toml + +import ( + "encoding" + "io" +) + +// Deprecated: use encoding.TextMarshaler +type TextMarshaler encoding.TextMarshaler + +// Deprecated: use encoding.TextUnmarshaler +type TextUnmarshaler encoding.TextUnmarshaler + +// Deprecated: use MetaData.PrimitiveDecode. +func PrimitiveDecode(primValue Primitive, v interface{}) error { + md := MetaData{decoded: make(map[string]struct{})} + return md.unify(primValue.undecoded, rvalue(v)) +} + +// Deprecated: use NewDecoder(reader).Decode(&value). +func DecodeReader(r io.Reader, v interface{}) (MetaData, error) { return NewDecoder(r).Decode(v) } diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/doc.go b/src/runtime/vendor/github.com/BurntSushi/toml/doc.go index b371f396edca..099c4a77d2d3 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/doc.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/doc.go @@ -1,27 +1,13 @@ /* -Package toml provides facilities for decoding and encoding TOML configuration -files via reflection. There is also support for delaying decoding with -the Primitive type, and querying the set of keys in a TOML document with the -MetaData type. +Package toml implements decoding and encoding of TOML files. -The specification implemented: https://github.com/toml-lang/toml +This package supports TOML v1.0.0, as listed on https://toml.io -The sub-command github.com/BurntSushi/toml/cmd/tomlv can be used to verify -whether a file is a valid TOML document. It can also be used to print the -type of each key in a TOML document. +There is also support for delaying decoding with the Primitive type, and +querying the set of keys in a TOML document with the MetaData type. -Testing - -There are two important types of tests used for this package. The first is -contained inside '*_test.go' files and uses the standard Go unit testing -framework. These tests are primarily devoted to holistically testing the -decoder and encoder. - -The second type of testing is used to verify the implementation's adherence -to the TOML specification. These tests have been factored into their own -project: https://github.com/BurntSushi/toml-test - -The reason the tests are in a separate project is so that they can be used by -any implementation of TOML. Namely, it is language agnostic. +The github.com/BurntSushi/toml/cmd/tomlv package implements a TOML validator, +and can be used to verify if TOML document is valid. It can also be used to +print the type of each key. */ package toml diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/encode.go b/src/runtime/vendor/github.com/BurntSushi/toml/encode.go index d905c21a2466..dc8568d1b9b4 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/encode.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/encode.go @@ -2,57 +2,127 @@ package toml import ( "bufio" + "encoding" + "encoding/json" "errors" "fmt" "io" + "math" "reflect" "sort" "strconv" "strings" "time" + + "github.com/BurntSushi/toml/internal" ) type tomlEncodeError struct{ error } var ( - errArrayMixedElementTypes = errors.New( - "toml: cannot encode array with mixed element types") - errArrayNilElement = errors.New( - "toml: cannot encode array with nil element") - errNonString = errors.New( - "toml: cannot encode a map with non-string key type") - errAnonNonStruct = errors.New( - "toml: cannot encode an anonymous field that is not a struct") - errArrayNoTable = errors.New( - "toml: TOML array element cannot contain a table") - errNoKey = errors.New( - "toml: top-level values must be Go maps or structs") - errAnything = errors.New("") // used in testing + errArrayNilElement = errors.New("toml: cannot encode array with nil element") + errNonString = errors.New("toml: cannot encode a map with non-string key type") + errNoKey = errors.New("toml: top-level values must be Go maps or structs") + errAnything = errors.New("") // used in testing ) -var quotedReplacer = strings.NewReplacer( - "\t", "\\t", - "\n", "\\n", - "\r", "\\r", +var dblQuotedReplacer = strings.NewReplacer( "\"", "\\\"", "\\", "\\\\", + "\x00", `\u0000`, + "\x01", `\u0001`, + "\x02", `\u0002`, + "\x03", `\u0003`, + "\x04", `\u0004`, + "\x05", `\u0005`, + "\x06", `\u0006`, + "\x07", `\u0007`, + "\b", `\b`, + "\t", `\t`, + "\n", `\n`, + "\x0b", `\u000b`, + "\f", `\f`, + "\r", `\r`, + "\x0e", `\u000e`, + "\x0f", `\u000f`, + "\x10", `\u0010`, + "\x11", `\u0011`, + "\x12", `\u0012`, + "\x13", `\u0013`, + "\x14", `\u0014`, + "\x15", `\u0015`, + "\x16", `\u0016`, + "\x17", `\u0017`, + "\x18", `\u0018`, + "\x19", `\u0019`, + "\x1a", `\u001a`, + "\x1b", `\u001b`, + "\x1c", `\u001c`, + "\x1d", `\u001d`, + "\x1e", `\u001e`, + "\x1f", `\u001f`, + "\x7f", `\u007f`, +) + +var ( + marshalToml = reflect.TypeOf((*Marshaler)(nil)).Elem() + marshalText = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem() + timeType = reflect.TypeOf((*time.Time)(nil)).Elem() ) -// Encoder controls the encoding of Go values to a TOML document to some -// io.Writer. +// Marshaler is the interface implemented by types that can marshal themselves +// into valid TOML. +type Marshaler interface { + MarshalTOML() ([]byte, error) +} + +// Encoder encodes a Go to a TOML document. +// +// The mapping between Go values and TOML values should be precisely the same as +// for the Decode* functions. +// +// time.Time is encoded as a RFC 3339 string, and time.Duration as its string +// representation. +// +// The toml.Marshaler and encoder.TextMarshaler interfaces are supported to +// encoding the value as custom TOML. +// +// If you want to write arbitrary binary data then you will need to use +// something like base64 since TOML does not have any binary types. +// +// When encoding TOML hashes (Go maps or structs), keys without any sub-hashes +// are encoded first. +// +// Go maps will be sorted alphabetically by key for deterministic output. +// +// The toml struct tag can be used to provide the key name; if omitted the +// struct field name will be used. If the "omitempty" option is present the +// following value will be skipped: +// +// - arrays, slices, maps, and string with len of 0 +// - struct with all zero values +// - bool false +// +// If omitzero is given all int and float types with a value of 0 will be +// skipped. +// +// Encoding Go values without a corresponding TOML representation will return an +// error. Examples of this includes maps with non-string keys, slices with nil +// elements, embedded non-struct types, and nested slices containing maps or +// structs. (e.g. [][]map[string]string is not allowed but []map[string]string +// is okay, as is []map[string][]string). // -// The indentation level can be controlled with the Indent field. +// NOTE: only exported keys are encoded due to the use of reflection. Unexported +// keys are silently discarded. type Encoder struct { - // A single indentation level. By default it is two spaces. + // String to use for a single indentation level; default is two spaces. Indent string - // hasWritten is whether we have written any output to w yet. - hasWritten bool w *bufio.Writer + hasWritten bool // written any output to w yet? } -// NewEncoder returns a TOML encoder that encodes Go values to the io.Writer -// given. By default, a single indentation level is 2 spaces. +// NewEncoder create a new Encoder. func NewEncoder(w io.Writer) *Encoder { return &Encoder{ w: bufio.NewWriter(w), @@ -60,29 +130,10 @@ func NewEncoder(w io.Writer) *Encoder { } } -// Encode writes a TOML representation of the Go value to the underlying -// io.Writer. If the value given cannot be encoded to a valid TOML document, -// then an error is returned. +// Encode writes a TOML representation of the Go value to the Encoder's writer. // -// The mapping between Go values and TOML values should be precisely the same -// as for the Decode* functions. Similarly, the TextMarshaler interface is -// supported by encoding the resulting bytes as strings. (If you want to write -// arbitrary binary data then you will need to use something like base64 since -// TOML does not have any binary types.) -// -// When encoding TOML hashes (i.e., Go maps or structs), keys without any -// sub-hashes are encoded first. -// -// If a Go map is encoded, then its keys are sorted alphabetically for -// deterministic output. More control over this behavior may be provided if -// there is demand for it. -// -// Encoding Go values without a corresponding TOML representation---like map -// types with non-string keys---will cause an error to be returned. Similarly -// for mixed arrays/slices, arrays/slices with nil elements, embedded -// non-struct types and nested slices containing maps or structs. -// (e.g., [][]map[string]string is not allowed but []map[string]string is OK -// and so is []map[string][]string.) +// An error is returned if the value given cannot be encoded to a valid TOML +// document. func (enc *Encoder) Encode(v interface{}) error { rv := eindirect(reflect.ValueOf(v)) if err := enc.safeEncode(Key([]string{}), rv); err != nil { @@ -106,13 +157,15 @@ func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) { } func (enc *Encoder) encode(key Key, rv reflect.Value) { - // Special case. Time needs to be in ISO8601 format. - // Special case. If we can marshal the type to text, then we used that. - // Basically, this prevents the encoder for handling these types as - // generic structs (or whatever the underlying type of a TextMarshaler is). - switch rv.Interface().(type) { - case time.Time, TextMarshaler: - enc.keyEqElement(key, rv) + // If we can marshal the type to text, then we use that. This prevents the + // encoder for handling these types as generic structs (or whatever the + // underlying type of a TextMarshaler is). + switch { + case isMarshaler(rv): + enc.writeKeyValue(key, rv, false) + return + case rv.Type() == primitiveType: // TODO: #76 would make this superfluous after implemented. + enc.encode(key, reflect.ValueOf(rv.Interface().(Primitive).undecoded)) return } @@ -123,12 +176,12 @@ func (enc *Encoder) encode(key Key, rv reflect.Value) { reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.String, reflect.Bool: - enc.keyEqElement(key, rv) + enc.writeKeyValue(key, rv, false) case reflect.Array, reflect.Slice: if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) { enc.eArrayOfTables(key, rv) } else { - enc.keyEqElement(key, rv) + enc.writeKeyValue(key, rv, false) } case reflect.Interface: if rv.IsNil() { @@ -148,55 +201,114 @@ func (enc *Encoder) encode(key Key, rv reflect.Value) { case reflect.Struct: enc.eTable(key, rv) default: - panic(e("unsupported type for key '%s': %s", key, k)) + encPanic(fmt.Errorf("unsupported type for key '%s': %s", key, k)) } } -// eElement encodes any value that can be an array element (primitives and -// arrays). +// eElement encodes any value that can be an array element. func (enc *Encoder) eElement(rv reflect.Value) { switch v := rv.Interface().(type) { - case time.Time: - // Special case time.Time as a primitive. Has to come before - // TextMarshaler below because time.Time implements - // encoding.TextMarshaler, but we need to always use UTC. - enc.wf(v.UTC().Format("2006-01-02T15:04:05Z")) + case time.Time: // Using TextMarshaler adds extra quotes, which we don't want. + format := time.RFC3339Nano + switch v.Location() { + case internal.LocalDatetime: + format = "2006-01-02T15:04:05.999999999" + case internal.LocalDate: + format = "2006-01-02" + case internal.LocalTime: + format = "15:04:05.999999999" + } + switch v.Location() { + default: + enc.wf(v.Format(format)) + case internal.LocalDatetime, internal.LocalDate, internal.LocalTime: + enc.wf(v.In(time.UTC).Format(format)) + } return - case TextMarshaler: - // Special case. Use text marshaler if it's available for this value. - if s, err := v.MarshalText(); err != nil { + case Marshaler: + s, err := v.MarshalTOML() + if err != nil { encPanic(err) - } else { - enc.writeQuoted(string(s)) } + if s == nil { + encPanic(errors.New("MarshalTOML returned nil and no error")) + } + enc.w.Write(s) + return + case encoding.TextMarshaler: + s, err := v.MarshalText() + if err != nil { + encPanic(err) + } + if s == nil { + encPanic(errors.New("MarshalText returned nil and no error")) + } + enc.writeQuoted(string(s)) + return + case time.Duration: + enc.writeQuoted(v.String()) return + case json.Number: + n, _ := rv.Interface().(json.Number) + + if n == "" { /// Useful zero value. + enc.w.WriteByte('0') + return + } else if v, err := n.Int64(); err == nil { + enc.eElement(reflect.ValueOf(v)) + return + } else if v, err := n.Float64(); err == nil { + enc.eElement(reflect.ValueOf(v)) + return + } + encPanic(errors.New(fmt.Sprintf("Unable to convert \"%s\" to neither int64 nor float64", n))) } + switch rv.Kind() { + case reflect.Ptr: + enc.eElement(rv.Elem()) + return + case reflect.String: + enc.writeQuoted(rv.String()) case reflect.Bool: enc.wf(strconv.FormatBool(rv.Bool())) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, - reflect.Int64: + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: enc.wf(strconv.FormatInt(rv.Int(), 10)) - case reflect.Uint, reflect.Uint8, reflect.Uint16, - reflect.Uint32, reflect.Uint64: + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: enc.wf(strconv.FormatUint(rv.Uint(), 10)) case reflect.Float32: - enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 32))) + f := rv.Float() + if math.IsNaN(f) { + enc.wf("nan") + } else if math.IsInf(f, 0) { + enc.wf("%cinf", map[bool]byte{true: '-', false: '+'}[math.Signbit(f)]) + } else { + enc.wf(floatAddDecimal(strconv.FormatFloat(f, 'f', -1, 32))) + } case reflect.Float64: - enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 64))) + f := rv.Float() + if math.IsNaN(f) { + enc.wf("nan") + } else if math.IsInf(f, 0) { + enc.wf("%cinf", map[bool]byte{true: '-', false: '+'}[math.Signbit(f)]) + } else { + enc.wf(floatAddDecimal(strconv.FormatFloat(f, 'f', -1, 64))) + } case reflect.Array, reflect.Slice: enc.eArrayOrSliceElement(rv) + case reflect.Struct: + enc.eStruct(nil, rv, true) + case reflect.Map: + enc.eMap(nil, rv, true) case reflect.Interface: enc.eElement(rv.Elem()) - case reflect.String: - enc.writeQuoted(rv.String()) default: - panic(e("unexpected primitive type: %s", rv.Kind())) + encPanic(fmt.Errorf("unexpected type: %T", rv.Interface())) } } -// By the TOML spec, all floats must have a decimal with at least one -// number on either side. +// By the TOML spec, all floats must have a decimal with at least one number on +// either side. func floatAddDecimal(fstr string) string { if !strings.Contains(fstr, ".") { return fstr + ".0" @@ -205,14 +317,14 @@ func floatAddDecimal(fstr string) string { } func (enc *Encoder) writeQuoted(s string) { - enc.wf("\"%s\"", quotedReplacer.Replace(s)) + enc.wf("\"%s\"", dblQuotedReplacer.Replace(s)) } func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) { length := rv.Len() enc.wf("[") for i := 0; i < length; i++ { - elem := rv.Index(i) + elem := eindirect(rv.Index(i)) enc.eElement(elem) if i != length-1 { enc.wf(", ") @@ -226,44 +338,43 @@ func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) { encPanic(errNoKey) } for i := 0; i < rv.Len(); i++ { - trv := rv.Index(i) + trv := eindirect(rv.Index(i)) if isNil(trv) { continue } - panicIfInvalidKey(key) enc.newline() - enc.wf("%s[[%s]]", enc.indentStr(key), key.maybeQuotedAll()) + enc.wf("%s[[%s]]", enc.indentStr(key), key) enc.newline() - enc.eMapOrStruct(key, trv) + enc.eMapOrStruct(key, trv, false) } } func (enc *Encoder) eTable(key Key, rv reflect.Value) { - panicIfInvalidKey(key) if len(key) == 1 { // Output an extra newline between top-level tables. // (The newline isn't written if nothing else has been written though.) enc.newline() } if len(key) > 0 { - enc.wf("%s[%s]", enc.indentStr(key), key.maybeQuotedAll()) + enc.wf("%s[%s]", enc.indentStr(key), key) enc.newline() } - enc.eMapOrStruct(key, rv) + enc.eMapOrStruct(key, rv, false) } -func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value) { - switch rv := eindirect(rv); rv.Kind() { +func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value, inline bool) { + switch rv.Kind() { case reflect.Map: - enc.eMap(key, rv) + enc.eMap(key, rv, inline) case reflect.Struct: - enc.eStruct(key, rv) + enc.eStruct(key, rv, inline) default: + // Should never happen? panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String()) } } -func (enc *Encoder) eMap(key Key, rv reflect.Value) { +func (enc *Encoder) eMap(key Key, rv reflect.Value, inline bool) { rt := rv.Type() if rt.Key().Kind() != reflect.String { encPanic(errNonString) @@ -274,118 +385,179 @@ func (enc *Encoder) eMap(key Key, rv reflect.Value) { var mapKeysDirect, mapKeysSub []string for _, mapKey := range rv.MapKeys() { k := mapKey.String() - if typeIsHash(tomlTypeOfGo(rv.MapIndex(mapKey))) { + if typeIsTable(tomlTypeOfGo(eindirect(rv.MapIndex(mapKey)))) { mapKeysSub = append(mapKeysSub, k) } else { mapKeysDirect = append(mapKeysDirect, k) } } - var writeMapKeys = func(mapKeys []string) { + var writeMapKeys = func(mapKeys []string, trailC bool) { sort.Strings(mapKeys) - for _, mapKey := range mapKeys { - mrv := rv.MapIndex(reflect.ValueOf(mapKey)) - if isNil(mrv) { - // Don't write anything for nil fields. + for i, mapKey := range mapKeys { + val := eindirect(rv.MapIndex(reflect.ValueOf(mapKey))) + if isNil(val) { continue } - enc.encode(key.add(mapKey), mrv) + + if inline { + enc.writeKeyValue(Key{mapKey}, val, true) + if trailC || i != len(mapKeys)-1 { + enc.wf(", ") + } + } else { + enc.encode(key.add(mapKey), val) + } } } - writeMapKeys(mapKeysDirect) - writeMapKeys(mapKeysSub) + + if inline { + enc.wf("{") + } + writeMapKeys(mapKeysDirect, len(mapKeysSub) > 0) + writeMapKeys(mapKeysSub, false) + if inline { + enc.wf("}") + } +} + +const is32Bit = (32 << (^uint(0) >> 63)) == 32 + +func pointerTo(t reflect.Type) reflect.Type { + if t.Kind() == reflect.Ptr { + return pointerTo(t.Elem()) + } + return t } -func (enc *Encoder) eStruct(key Key, rv reflect.Value) { +func (enc *Encoder) eStruct(key Key, rv reflect.Value, inline bool) { // Write keys for fields directly under this key first, because if we write - // a field that creates a new table, then all keys under it will be in that + // a field that creates a new table then all keys under it will be in that // table (not the one we're writing here). - rt := rv.Type() - var fieldsDirect, fieldsSub [][]int - var addFields func(rt reflect.Type, rv reflect.Value, start []int) + // + // Fields is a [][]int: for fieldsDirect this always has one entry (the + // struct index). For fieldsSub it contains two entries: the parent field + // index from tv, and the field indexes for the fields of the sub. + var ( + rt = rv.Type() + fieldsDirect, fieldsSub [][]int + addFields func(rt reflect.Type, rv reflect.Value, start []int) + ) addFields = func(rt reflect.Type, rv reflect.Value, start []int) { for i := 0; i < rt.NumField(); i++ { f := rt.Field(i) - // skip unexported fields - if f.PkgPath != "" && !f.Anonymous { + isEmbed := f.Anonymous && pointerTo(f.Type).Kind() == reflect.Struct + if f.PkgPath != "" && !isEmbed { /// Skip unexported fields. + continue + } + opts := getOptions(f.Tag) + if opts.skip { continue } - frv := rv.Field(i) - if f.Anonymous { - t := f.Type - switch t.Kind() { - case reflect.Struct: - // Treat anonymous struct fields with - // tag names as though they are not - // anonymous, like encoding/json does. - if getOptions(f.Tag).name == "" { - addFields(t, frv, f.Index) - continue - } - case reflect.Ptr: - if t.Elem().Kind() == reflect.Struct && - getOptions(f.Tag).name == "" { - if !frv.IsNil() { - addFields(t.Elem(), frv.Elem(), f.Index) - } - continue - } - // Fall through to the normal field encoding logic below - // for non-struct anonymous fields. + + frv := eindirect(rv.Field(i)) + + // Treat anonymous struct fields with tag names as though they are + // not anonymous, like encoding/json does. + // + // Non-struct anonymous fields use the normal encoding logic. + if isEmbed { + if getOptions(f.Tag).name == "" && frv.Kind() == reflect.Struct { + addFields(frv.Type(), frv, append(start, f.Index...)) + continue } } - if typeIsHash(tomlTypeOfGo(frv)) { + if typeIsTable(tomlTypeOfGo(frv)) { fieldsSub = append(fieldsSub, append(start, f.Index...)) } else { - fieldsDirect = append(fieldsDirect, append(start, f.Index...)) + // Copy so it works correct on 32bit archs; not clear why this + // is needed. See #314, and https://www.reddit.com/r/golang/comments/pnx8v4 + // This also works fine on 64bit, but 32bit archs are somewhat + // rare and this is a wee bit faster. + if is32Bit { + copyStart := make([]int, len(start)) + copy(copyStart, start) + fieldsDirect = append(fieldsDirect, append(copyStart, f.Index...)) + } else { + fieldsDirect = append(fieldsDirect, append(start, f.Index...)) + } } } } addFields(rt, rv, nil) - var writeFields = func(fields [][]int) { + writeFields := func(fields [][]int) { for _, fieldIndex := range fields { - sft := rt.FieldByIndex(fieldIndex) - sf := rv.FieldByIndex(fieldIndex) - if isNil(sf) { - // Don't write anything for nil fields. + fieldType := rt.FieldByIndex(fieldIndex) + fieldVal := eindirect(rv.FieldByIndex(fieldIndex)) + + if isNil(fieldVal) { /// Don't write anything for nil fields. continue } - opts := getOptions(sft.Tag) + opts := getOptions(fieldType.Tag) if opts.skip { continue } - keyName := sft.Name + keyName := fieldType.Name if opts.name != "" { keyName = opts.name } - if opts.omitempty && isEmpty(sf) { + if opts.omitempty && isEmpty(fieldVal) { continue } - if opts.omitzero && isZero(sf) { + if opts.omitzero && isZero(fieldVal) { continue } - enc.encode(key.add(keyName), sf) + if inline { + enc.writeKeyValue(Key{keyName}, fieldVal, true) + if fieldIndex[0] != len(fields)-1 { + enc.wf(", ") + } + } else { + enc.encode(key.add(keyName), fieldVal) + } } } + + if inline { + enc.wf("{") + } writeFields(fieldsDirect) writeFields(fieldsSub) + if inline { + enc.wf("}") + } } -// tomlTypeName returns the TOML type name of the Go value's type. It is -// used to determine whether the types of array elements are mixed (which is -// forbidden). If the Go value is nil, then it is illegal for it to be an array -// element, and valueIsNil is returned as true. - -// Returns the TOML type of a Go value. The type may be `nil`, which means -// no concrete TOML type could be found. +// tomlTypeOfGo returns the TOML type name of the Go value's type. +// +// It is used to determine whether the types of array elements are mixed (which +// is forbidden). If the Go value is nil, then it is illegal for it to be an +// array element, and valueIsNil is returned as true. +// +// The type may be `nil`, which means no concrete TOML type could be found. func tomlTypeOfGo(rv reflect.Value) tomlType { if isNil(rv) || !rv.IsValid() { return nil } + + if rv.Kind() == reflect.Struct { + if rv.Type() == timeType { + return tomlDatetime + } + if isMarshaler(rv) { + return tomlString + } + return tomlHash + } + + if isMarshaler(rv) { + return tomlString + } + switch rv.Kind() { case reflect.Bool: return tomlBool @@ -397,7 +569,7 @@ func tomlTypeOfGo(rv reflect.Value) tomlType { case reflect.Float32, reflect.Float64: return tomlFloat case reflect.Array, reflect.Slice: - if typeEqual(tomlHash, tomlArrayType(rv)) { + if isTableArray(rv) { return tomlArrayHash } return tomlArray @@ -407,54 +579,35 @@ func tomlTypeOfGo(rv reflect.Value) tomlType { return tomlString case reflect.Map: return tomlHash - case reflect.Struct: - switch rv.Interface().(type) { - case time.Time: - return tomlDatetime - case TextMarshaler: - return tomlString - default: - return tomlHash - } default: - panic("unexpected reflect.Kind: " + rv.Kind().String()) + encPanic(errors.New("unsupported type: " + rv.Kind().String())) + panic("unreachable") } } -// tomlArrayType returns the element type of a TOML array. The type returned -// may be nil if it cannot be determined (e.g., a nil slice or a zero length -// slize). This function may also panic if it finds a type that cannot be -// expressed in TOML (such as nil elements, heterogeneous arrays or directly -// nested arrays of tables). -func tomlArrayType(rv reflect.Value) tomlType { - if isNil(rv) || !rv.IsValid() || rv.Len() == 0 { - return nil - } - firstType := tomlTypeOfGo(rv.Index(0)) - if firstType == nil { - encPanic(errArrayNilElement) +func isMarshaler(rv reflect.Value) bool { + return rv.Type().Implements(marshalText) || rv.Type().Implements(marshalToml) +} + +// isTableArray reports if all entries in the array or slice are a table. +func isTableArray(arr reflect.Value) bool { + if isNil(arr) || !arr.IsValid() || arr.Len() == 0 { + return false } - rvlen := rv.Len() - for i := 1; i < rvlen; i++ { - elem := rv.Index(i) - switch elemType := tomlTypeOfGo(elem); { - case elemType == nil: + ret := true + for i := 0; i < arr.Len(); i++ { + tt := tomlTypeOfGo(eindirect(arr.Index(i))) + // Don't allow nil. + if tt == nil { encPanic(errArrayNilElement) - case !typeEqual(firstType, elemType): - encPanic(errArrayMixedElementTypes) } - } - // If we have a nested array, then we must make sure that the nested - // array contains ONLY primitives. - // This checks arbitrarily nested arrays. - if typeEqual(firstType, tomlArray) || typeEqual(firstType, tomlArrayHash) { - nest := tomlArrayType(eindirect(rv.Index(0))) - if typeEqual(nest, tomlHash) || typeEqual(nest, tomlArrayHash) { - encPanic(errArrayNoTable) + + if ret && !typeEqual(tomlHash, tt) { + ret = false } } - return firstType + return ret } type tagOptions struct { @@ -499,6 +652,8 @@ func isEmpty(rv reflect.Value) bool { switch rv.Kind() { case reflect.Array, reflect.Slice, reflect.Map, reflect.String: return rv.Len() == 0 + case reflect.Struct: + return reflect.Zero(rv.Type()).Interface() == rv.Interface() case reflect.Bool: return !rv.Bool() } @@ -511,18 +666,32 @@ func (enc *Encoder) newline() { } } -func (enc *Encoder) keyEqElement(key Key, val reflect.Value) { +// Write a key/value pair: +// +// key = +// +// This is also used for "k = v" in inline tables; so something like this will +// be written in three calls: +// +// ┌────────────────────┐ +// │ ┌───┐ ┌─────┐│ +// v v v v vv +// key = {k = v, k2 = v2} +// +func (enc *Encoder) writeKeyValue(key Key, val reflect.Value, inline bool) { if len(key) == 0 { encPanic(errNoKey) } - panicIfInvalidKey(key) enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1)) enc.eElement(val) - enc.newline() + if !inline { + enc.newline() + } } func (enc *Encoder) wf(format string, v ...interface{}) { - if _, err := fmt.Fprintf(enc.w, format, v...); err != nil { + _, err := fmt.Fprintf(enc.w, format, v...) + if err != nil { encPanic(err) } enc.hasWritten = true @@ -536,13 +705,25 @@ func encPanic(err error) { panic(tomlEncodeError{err}) } +// Resolve any level of pointers to the actual value (e.g. **string → string). func eindirect(v reflect.Value) reflect.Value { - switch v.Kind() { - case reflect.Ptr, reflect.Interface: - return eindirect(v.Elem()) - default: + if v.Kind() != reflect.Ptr && v.Kind() != reflect.Interface { + if isMarshaler(v) { + return v + } + if v.CanAddr() { /// Special case for marshalers; see #358. + if pv := v.Addr(); isMarshaler(pv) { + return pv + } + } + return v + } + + if v.IsNil() { return v } + + return eindirect(v.Elem()) } func isNil(rv reflect.Value) bool { @@ -553,16 +734,3 @@ func isNil(rv reflect.Value) bool { return false } } - -func panicIfInvalidKey(key Key) { - for _, k := range key { - if len(k) == 0 { - encPanic(e("Key '%s' is not a valid table name. Key names "+ - "cannot be empty.", key.maybeQuotedAll())) - } - } -} - -func isValidKeyName(s string) bool { - return len(s) != 0 -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types.go b/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types.go deleted file mode 100644 index d36e1dd6002b..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types.go +++ /dev/null @@ -1,19 +0,0 @@ -// +build go1.2 - -package toml - -// In order to support Go 1.1, we define our own TextMarshaler and -// TextUnmarshaler types. For Go 1.2+, we just alias them with the -// standard library interfaces. - -import ( - "encoding" -) - -// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here -// so that Go 1.1 can be supported. -type TextMarshaler encoding.TextMarshaler - -// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined -// here so that Go 1.1 can be supported. -type TextUnmarshaler encoding.TextUnmarshaler diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go b/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go deleted file mode 100644 index e8d503d04690..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go +++ /dev/null @@ -1,18 +0,0 @@ -// +build !go1.2 - -package toml - -// These interfaces were introduced in Go 1.2, so we add them manually when -// compiling for Go 1.1. - -// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here -// so that Go 1.1 can be supported. -type TextMarshaler interface { - MarshalText() (text []byte, err error) -} - -// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined -// here so that Go 1.1 can be supported. -type TextUnmarshaler interface { - UnmarshalText(text []byte) error -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/error.go b/src/runtime/vendor/github.com/BurntSushi/toml/error.go new file mode 100644 index 000000000000..2ac24e77eb8c --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/error.go @@ -0,0 +1,276 @@ +package toml + +import ( + "fmt" + "strings" +) + +// ParseError is returned when there is an error parsing the TOML syntax. +// +// For example invalid syntax, duplicate keys, etc. +// +// In addition to the error message itself, you can also print detailed location +// information with context by using ErrorWithPosition(): +// +// toml: error: Key 'fruit' was already created and cannot be used as an array. +// +// At line 4, column 2-7: +// +// 2 | fruit = [] +// 3 | +// 4 | [[fruit]] # Not allowed +// ^^^^^ +// +// Furthermore, the ErrorWithUsage() can be used to print the above with some +// more detailed usage guidance: +// +// toml: error: newlines not allowed within inline tables +// +// At line 1, column 18: +// +// 1 | x = [{ key = 42 # +// ^ +// +// Error help: +// +// Inline tables must always be on a single line: +// +// table = {key = 42, second = 43} +// +// It is invalid to split them over multiple lines like so: +// +// # INVALID +// table = { +// key = 42, +// second = 43 +// } +// +// Use regular for this: +// +// [table] +// key = 42 +// second = 43 +type ParseError struct { + Message string // Short technical message. + Usage string // Longer message with usage guidance; may be blank. + Position Position // Position of the error + LastKey string // Last parsed key, may be blank. + Line int // Line the error occurred. Deprecated: use Position. + + err error + input string +} + +// Position of an error. +type Position struct { + Line int // Line number, starting at 1. + Start int // Start of error, as byte offset starting at 0. + Len int // Lenght in bytes. +} + +func (pe ParseError) Error() string { + msg := pe.Message + if msg == "" { // Error from errorf() + msg = pe.err.Error() + } + + if pe.LastKey == "" { + return fmt.Sprintf("toml: line %d: %s", pe.Position.Line, msg) + } + return fmt.Sprintf("toml: line %d (last key %q): %s", + pe.Position.Line, pe.LastKey, msg) +} + +// ErrorWithUsage() returns the error with detailed location context. +// +// See the documentation on ParseError. +func (pe ParseError) ErrorWithPosition() string { + if pe.input == "" { // Should never happen, but just in case. + return pe.Error() + } + + var ( + lines = strings.Split(pe.input, "\n") + col = pe.column(lines) + b = new(strings.Builder) + ) + + msg := pe.Message + if msg == "" { + msg = pe.err.Error() + } + + // TODO: don't show control characters as literals? This may not show up + // well everywhere. + + if pe.Position.Len == 1 { + fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d:\n\n", + msg, pe.Position.Line, col+1) + } else { + fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d-%d:\n\n", + msg, pe.Position.Line, col, col+pe.Position.Len) + } + if pe.Position.Line > 2 { + fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-2, lines[pe.Position.Line-3]) + } + if pe.Position.Line > 1 { + fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-1, lines[pe.Position.Line-2]) + } + fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line, lines[pe.Position.Line-1]) + fmt.Fprintf(b, "% 10s%s%s\n", "", strings.Repeat(" ", col), strings.Repeat("^", pe.Position.Len)) + return b.String() +} + +// ErrorWithUsage() returns the error with detailed location context and usage +// guidance. +// +// See the documentation on ParseError. +func (pe ParseError) ErrorWithUsage() string { + m := pe.ErrorWithPosition() + if u, ok := pe.err.(interface{ Usage() string }); ok && u.Usage() != "" { + lines := strings.Split(strings.TrimSpace(u.Usage()), "\n") + for i := range lines { + if lines[i] != "" { + lines[i] = " " + lines[i] + } + } + return m + "Error help:\n\n" + strings.Join(lines, "\n") + "\n" + } + return m +} + +func (pe ParseError) column(lines []string) int { + var pos, col int + for i := range lines { + ll := len(lines[i]) + 1 // +1 for the removed newline + if pos+ll >= pe.Position.Start { + col = pe.Position.Start - pos + if col < 0 { // Should never happen, but just in case. + col = 0 + } + break + } + pos += ll + } + + return col +} + +type ( + errLexControl struct{ r rune } + errLexEscape struct{ r rune } + errLexUTF8 struct{ b byte } + errLexInvalidNum struct{ v string } + errLexInvalidDate struct{ v string } + errLexInlineTableNL struct{} + errLexStringNL struct{} + errParseRange struct { + i interface{} // int or float + size string // "int64", "uint16", etc. + } + errParseDuration struct{ d string } +) + +func (e errLexControl) Error() string { + return fmt.Sprintf("TOML files cannot contain control characters: '0x%02x'", e.r) +} +func (e errLexControl) Usage() string { return "" } + +func (e errLexEscape) Error() string { return fmt.Sprintf(`invalid escape in string '\%c'`, e.r) } +func (e errLexEscape) Usage() string { return usageEscape } +func (e errLexUTF8) Error() string { return fmt.Sprintf("invalid UTF-8 byte: 0x%02x", e.b) } +func (e errLexUTF8) Usage() string { return "" } +func (e errLexInvalidNum) Error() string { return fmt.Sprintf("invalid number: %q", e.v) } +func (e errLexInvalidNum) Usage() string { return "" } +func (e errLexInvalidDate) Error() string { return fmt.Sprintf("invalid date: %q", e.v) } +func (e errLexInvalidDate) Usage() string { return "" } +func (e errLexInlineTableNL) Error() string { return "newlines not allowed within inline tables" } +func (e errLexInlineTableNL) Usage() string { return usageInlineNewline } +func (e errLexStringNL) Error() string { return "strings cannot contain newlines" } +func (e errLexStringNL) Usage() string { return usageStringNewline } +func (e errParseRange) Error() string { return fmt.Sprintf("%v is out of range for %s", e.i, e.size) } +func (e errParseRange) Usage() string { return usageIntOverflow } +func (e errParseDuration) Error() string { return fmt.Sprintf("invalid duration: %q", e.d) } +func (e errParseDuration) Usage() string { return usageDuration } + +const usageEscape = ` +A '\' inside a "-delimited string is interpreted as an escape character. + +The following escape sequences are supported: +\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX + +To prevent a '\' from being recognized as an escape character, use either: + +- a ' or '''-delimited string; escape characters aren't processed in them; or +- write two backslashes to get a single backslash: '\\'. + +If you're trying to add a Windows path (e.g. "C:\Users\martin") then using '/' +instead of '\' will usually also work: "C:/Users/martin". +` + +const usageInlineNewline = ` +Inline tables must always be on a single line: + + table = {key = 42, second = 43} + +It is invalid to split them over multiple lines like so: + + # INVALID + table = { + key = 42, + second = 43 + } + +Use regular for this: + + [table] + key = 42 + second = 43 +` + +const usageStringNewline = ` +Strings must always be on a single line, and cannot span more than one line: + + # INVALID + string = "Hello, + world!" + +Instead use """ or ''' to split strings over multiple lines: + + string = """Hello, + world!""" +` + +const usageIntOverflow = ` +This number is too large; this may be an error in the TOML, but it can also be a +bug in the program that uses too small of an integer. + +The maximum and minimum values are: + + size │ lowest │ highest + ───────┼────────────────┼────────── + int8 │ -128 │ 127 + int16 │ -32,768 │ 32,767 + int32 │ -2,147,483,648 │ 2,147,483,647 + int64 │ -9.2 × 10¹⁷ │ 9.2 × 10¹⁷ + uint8 │ 0 │ 255 + uint16 │ 0 │ 65535 + uint32 │ 0 │ 4294967295 + uint64 │ 0 │ 1.8 × 10¹⁸ + +int refers to int32 on 32-bit systems and int64 on 64-bit systems. +` + +const usageDuration = ` +A duration must be as "number", without any spaces. Valid units are: + + ns nanoseconds (billionth of a second) + us, µs microseconds (millionth of a second) + ms milliseconds (thousands of a second) + s seconds + m minutes + h hours + +You can combine multiple units; for example "5m10s" for 5 minutes and 10 +seconds. +` diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/internal/tz.go b/src/runtime/vendor/github.com/BurntSushi/toml/internal/tz.go new file mode 100644 index 000000000000..022f15bc2b81 --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/internal/tz.go @@ -0,0 +1,36 @@ +package internal + +import "time" + +// Timezones used for local datetime, date, and time TOML types. +// +// The exact way times and dates without a timezone should be interpreted is not +// well-defined in the TOML specification and left to the implementation. These +// defaults to current local timezone offset of the computer, but this can be +// changed by changing these variables before decoding. +// +// TODO: +// Ideally we'd like to offer people the ability to configure the used timezone +// by setting Decoder.Timezone and Encoder.Timezone; however, this is a bit +// tricky: the reason we use three different variables for this is to support +// round-tripping – without these specific TZ names we wouldn't know which +// format to use. +// +// There isn't a good way to encode this right now though, and passing this sort +// of information also ties in to various related issues such as string format +// encoding, encoding of comments, etc. +// +// So, for the time being, just put this in internal until we can write a good +// comprehensive API for doing all of this. +// +// The reason they're exported is because they're referred from in e.g. +// internal/tag. +// +// Note that this behaviour is valid according to the TOML spec as the exact +// behaviour is left up to implementations. +var ( + localOffset = func() int { _, o := time.Now().Zone(); return o }() + LocalDatetime = time.FixedZone("datetime-local", localOffset) + LocalDate = time.FixedZone("date-local", localOffset) + LocalTime = time.FixedZone("time-local", localOffset) +) diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/lex.go b/src/runtime/vendor/github.com/BurntSushi/toml/lex.go index e0a742a8870f..28ed4dd353c5 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/lex.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/lex.go @@ -2,6 +2,8 @@ package toml import ( "fmt" + "reflect" + "runtime" "strings" "unicode" "unicode/utf8" @@ -29,33 +31,20 @@ const ( itemArrayTableStart itemArrayTableEnd itemKeyStart + itemKeyEnd itemCommentStart itemInlineTableStart itemInlineTableEnd ) -const ( - eof = 0 - comma = ',' - tableStart = '[' - tableEnd = ']' - arrayTableStart = '[' - arrayTableEnd = ']' - tableSep = '.' - keySep = '=' - arrayStart = '[' - arrayEnd = ']' - commentStart = '#' - stringStart = '"' - stringEnd = '"' - rawStringStart = '\'' - rawStringEnd = '\'' - inlineTableStart = '{' - inlineTableEnd = '}' -) +const eof = 0 type stateFn func(lx *lexer) stateFn +func (p Position) String() string { + return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len) +} + type lexer struct { input string start int @@ -64,26 +53,26 @@ type lexer struct { state stateFn items chan item - // Allow for backing up up to three runes. - // This is necessary because TOML contains 3-rune tokens (""" and '''). - prevWidths [3]int - nprev int // how many of prevWidths are in use - // If we emit an eof, we can still back up, but it is not OK to call - // next again. - atEOF bool + // Allow for backing up up to 4 runes. This is necessary because TOML + // contains 3-rune tokens (""" and '''). + prevWidths [4]int + nprev int // how many of prevWidths are in use + atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again. // A stack of state functions used to maintain context. - // The idea is to reuse parts of the state machine in various places. - // For example, values can appear at the top level or within arbitrarily - // nested arrays. The last state on the stack is used after a value has - // been lexed. Similarly for comments. + // + // The idea is to reuse parts of the state machine in various places. For + // example, values can appear at the top level or within arbitrarily nested + // arrays. The last state on the stack is used after a value has been lexed. + // Similarly for comments. stack []stateFn } type item struct { - typ itemType - val string - line int + typ itemType + val string + err error + pos Position } func (lx *lexer) nextItem() item { @@ -93,6 +82,7 @@ func (lx *lexer) nextItem() item { return item default: lx.state = lx.state(lx) + //fmt.Printf(" STATE %-24s current: %-10s stack: %s\n", lx.state, lx.current(), lx.stack) } } } @@ -101,9 +91,9 @@ func lex(input string) *lexer { lx := &lexer{ input: input, state: lexTop, - line: 1, items: make(chan item, 10), stack: make([]stateFn, 0, 10), + line: 1, } return lx } @@ -125,19 +115,36 @@ func (lx *lexer) current() string { return lx.input[lx.start:lx.pos] } +func (lx lexer) getPos() Position { + p := Position{ + Line: lx.line, + Start: lx.start, + Len: lx.pos - lx.start, + } + if p.Len <= 0 { + p.Len = 1 + } + return p +} + func (lx *lexer) emit(typ itemType) { - lx.items <- item{typ, lx.current(), lx.line} + // Needed for multiline strings ending with an incomplete UTF-8 sequence. + if lx.start > lx.pos { + lx.error(errLexUTF8{lx.input[lx.pos]}) + return + } + lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()} lx.start = lx.pos } func (lx *lexer) emitTrim(typ itemType) { - lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} + lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())} lx.start = lx.pos } func (lx *lexer) next() (r rune) { if lx.atEOF { - panic("next called after EOF") + panic("BUG in lexer: next called after EOF") } if lx.pos >= len(lx.input) { lx.atEOF = true @@ -147,12 +154,25 @@ func (lx *lexer) next() (r rune) { if lx.input[lx.pos] == '\n' { lx.line++ } + lx.prevWidths[3] = lx.prevWidths[2] lx.prevWidths[2] = lx.prevWidths[1] lx.prevWidths[1] = lx.prevWidths[0] - if lx.nprev < 3 { + if lx.nprev < 4 { lx.nprev++ } + r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) + if r == utf8.RuneError { + lx.error(errLexUTF8{lx.input[lx.pos]}) + return utf8.RuneError + } + + // Note: don't use peek() here, as this calls next(). + if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) { + lx.errorControlChar(r) + return utf8.RuneError + } + lx.prevWidths[0] = w lx.pos += w return r @@ -163,19 +183,21 @@ func (lx *lexer) ignore() { lx.start = lx.pos } -// backup steps back one rune. Can be called only twice between calls to next. +// backup steps back one rune. Can be called 4 times between calls to next. func (lx *lexer) backup() { if lx.atEOF { lx.atEOF = false return } if lx.nprev < 1 { - panic("backed up too far") + panic("BUG in lexer: backed up too far") } w := lx.prevWidths[0] lx.prevWidths[0] = lx.prevWidths[1] lx.prevWidths[1] = lx.prevWidths[2] + lx.prevWidths[2] = lx.prevWidths[3] lx.nprev-- + lx.pos -= w if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { lx.line-- @@ -211,18 +233,58 @@ func (lx *lexer) skip(pred func(rune) bool) { } } -// errorf stops all lexing by emitting an error and returning `nil`. +// error stops all lexing by emitting an error and returning `nil`. +// // Note that any value that is a character is escaped if it's a special // character (newlines, tabs, etc.). +func (lx *lexer) error(err error) stateFn { + if lx.atEOF { + return lx.errorPrevLine(err) + } + lx.items <- item{typ: itemError, pos: lx.getPos(), err: err} + return nil +} + +// errorfPrevline is like error(), but sets the position to the last column of +// the previous line. +// +// This is so that unexpected EOF or NL errors don't show on a new blank line. +func (lx *lexer) errorPrevLine(err error) stateFn { + pos := lx.getPos() + pos.Line-- + pos.Len = 1 + pos.Start = lx.pos - 1 + lx.items <- item{typ: itemError, pos: pos, err: err} + return nil +} + +// errorPos is like error(), but allows explicitly setting the position. +func (lx *lexer) errorPos(start, length int, err error) stateFn { + pos := lx.getPos() + pos.Start = start + pos.Len = length + lx.items <- item{typ: itemError, pos: pos, err: err} + return nil +} + +// errorf is like error, and creates a new error. func (lx *lexer) errorf(format string, values ...interface{}) stateFn { - lx.items <- item{ - itemError, - fmt.Sprintf(format, values...), - lx.line, + if lx.atEOF { + pos := lx.getPos() + pos.Line-- + pos.Len = 1 + pos.Start = lx.pos - 1 + lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)} + return nil } + lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)} return nil } +func (lx *lexer) errorControlChar(cc rune) stateFn { + return lx.errorPos(lx.pos-1, 1, errLexControl{cc}) +} + // lexTop consumes elements at the top level of TOML data. func lexTop(lx *lexer) stateFn { r := lx.next() @@ -230,10 +292,10 @@ func lexTop(lx *lexer) stateFn { return lexSkip(lx, lexTop) } switch r { - case commentStart: + case '#': lx.push(lexTop) return lexCommentStart - case tableStart: + case '[': return lexTableStart case eof: if lx.pos > lx.start { @@ -256,7 +318,7 @@ func lexTop(lx *lexer) stateFn { func lexTopEnd(lx *lexer) stateFn { r := lx.next() switch { - case r == commentStart: + case r == '#': // a comment will read to a newline for us. lx.push(lexTop) return lexCommentStart @@ -269,8 +331,9 @@ func lexTopEnd(lx *lexer) stateFn { lx.emit(itemEOF) return nil } - return lx.errorf("expected a top-level item to end with a newline, "+ - "comment, or EOF, but got %q instead", r) + return lx.errorf( + "expected a top-level item to end with a newline, comment, or EOF, but got %q instead", + r) } // lexTable lexes the beginning of a table. Namely, it makes sure that @@ -279,7 +342,7 @@ func lexTopEnd(lx *lexer) stateFn { // It also handles the case that this is an item in an array of tables. // e.g., '[[name]]'. func lexTableStart(lx *lexer) stateFn { - if lx.peek() == arrayTableStart { + if lx.peek() == '[' { lx.next() lx.emit(itemArrayTableStart) lx.push(lexArrayTableEnd) @@ -296,9 +359,8 @@ func lexTableEnd(lx *lexer) stateFn { } func lexArrayTableEnd(lx *lexer) stateFn { - if r := lx.next(); r != arrayTableEnd { - return lx.errorf("expected end of table array name delimiter %q, "+ - "but got %q instead", arrayTableEnd, r) + if r := lx.next(); r != ']' { + return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r) } lx.emit(itemArrayTableEnd) return lexTopEnd @@ -307,31 +369,18 @@ func lexArrayTableEnd(lx *lexer) stateFn { func lexTableNameStart(lx *lexer) stateFn { lx.skip(isWhitespace) switch r := lx.peek(); { - case r == tableEnd || r == eof: - return lx.errorf("unexpected end of table name " + - "(table names cannot be empty)") - case r == tableSep: - return lx.errorf("unexpected table separator " + - "(table names cannot be empty)") - case r == stringStart || r == rawStringStart: + case r == ']' || r == eof: + return lx.errorf("unexpected end of table name (table names cannot be empty)") + case r == '.': + return lx.errorf("unexpected table separator (table names cannot be empty)") + case r == '"' || r == '\'': lx.ignore() lx.push(lexTableNameEnd) - return lexValue // reuse string lexing + return lexQuotedName default: - return lexBareTableName - } -} - -// lexBareTableName lexes the name of a table. It assumes that at least one -// valid character for the table has already been read. -func lexBareTableName(lx *lexer) stateFn { - r := lx.next() - if isBareKeyChar(r) { - return lexBareTableName + lx.push(lexTableNameEnd) + return lexBareName } - lx.backup() - lx.emit(itemText) - return lexTableNameEnd } // lexTableNameEnd reads the end of a piece of a table name, optionally @@ -341,69 +390,107 @@ func lexTableNameEnd(lx *lexer) stateFn { switch r := lx.next(); { case isWhitespace(r): return lexTableNameEnd - case r == tableSep: + case r == '.': lx.ignore() return lexTableNameStart - case r == tableEnd: + case r == ']': return lx.pop() default: - return lx.errorf("expected '.' or ']' to end table name, "+ - "but got %q instead", r) + return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r) } } -// lexKeyStart consumes a key name up until the first non-whitespace character. -// lexKeyStart will ignore whitespace. -func lexKeyStart(lx *lexer) stateFn { - r := lx.peek() +// lexBareName lexes one part of a key or table. +// +// It assumes that at least one valid character for the table has already been +// read. +// +// Lexes only one part, e.g. only 'a' inside 'a.b'. +func lexBareName(lx *lexer) stateFn { + r := lx.next() + if isBareKeyChar(r) { + return lexBareName + } + lx.backup() + lx.emit(itemText) + return lx.pop() +} + +// lexBareName lexes one part of a key or table. +// +// It assumes that at least one valid character for the table has already been +// read. +// +// Lexes only one part, e.g. only '"a"' inside '"a".b'. +func lexQuotedName(lx *lexer) stateFn { + r := lx.next() switch { - case r == keySep: - return lx.errorf("unexpected key separator %q", keySep) - case isWhitespace(r) || isNL(r): - lx.next() - return lexSkip(lx, lexKeyStart) - case r == stringStart || r == rawStringStart: - lx.ignore() - lx.emit(itemKeyStart) - lx.push(lexKeyEnd) - return lexValue // reuse string lexing + case isWhitespace(r): + return lexSkip(lx, lexValue) + case r == '"': + lx.ignore() // ignore the '"' + return lexString + case r == '\'': + lx.ignore() // ignore the "'" + return lexRawString + case r == eof: + return lx.errorf("unexpected EOF; expected value") default: + return lx.errorf("expected value but found %q instead", r) + } +} + +// lexKeyStart consumes all key parts until a '='. +func lexKeyStart(lx *lexer) stateFn { + lx.skip(isWhitespace) + switch r := lx.peek(); { + case r == '=' || r == eof: + return lx.errorf("unexpected '=': key name appears blank") + case r == '.': + return lx.errorf("unexpected '.': keys cannot start with a '.'") + case r == '"' || r == '\'': lx.ignore() + fallthrough + default: // Bare key lx.emit(itemKeyStart) - return lexBareKey + return lexKeyNameStart } } -// lexBareKey consumes the text of a bare key. Assumes that the first character -// (which is not whitespace) has not yet been consumed. -func lexBareKey(lx *lexer) stateFn { - switch r := lx.next(); { - case isBareKeyChar(r): - return lexBareKey - case isWhitespace(r): - lx.backup() - lx.emit(itemText) - return lexKeyEnd - case r == keySep: - lx.backup() - lx.emit(itemText) - return lexKeyEnd +func lexKeyNameStart(lx *lexer) stateFn { + lx.skip(isWhitespace) + switch r := lx.peek(); { + case r == '=' || r == eof: + return lx.errorf("unexpected '='") + case r == '.': + return lx.errorf("unexpected '.'") + case r == '"' || r == '\'': + lx.ignore() + lx.push(lexKeyEnd) + return lexQuotedName default: - return lx.errorf("bare keys cannot contain %q", r) + lx.push(lexKeyEnd) + return lexBareName } } // lexKeyEnd consumes the end of a key and trims whitespace (up to the key // separator). func lexKeyEnd(lx *lexer) stateFn { + lx.skip(isWhitespace) switch r := lx.next(); { - case r == keySep: - return lexSkip(lx, lexValue) case isWhitespace(r): return lexSkip(lx, lexKeyEnd) + case r == eof: + return lx.errorf("unexpected EOF; expected key separator '='") + case r == '.': + lx.ignore() + return lexKeyNameStart + case r == '=': + lx.emit(itemKeyEnd) + return lexSkip(lx, lexValue) default: - return lx.errorf("expected key separator %q, but got %q instead", - keySep, r) + return lx.errorf("expected '.' or '=', but got %q instead", r) } } @@ -422,17 +509,17 @@ func lexValue(lx *lexer) stateFn { return lexNumberOrDateStart } switch r { - case arrayStart: + case '[': lx.ignore() lx.emit(itemArray) return lexArrayValue - case inlineTableStart: + case '{': lx.ignore() lx.emit(itemInlineTableStart) return lexInlineTableValue - case stringStart: - if lx.accept(stringStart) { - if lx.accept(stringStart) { + case '"': + if lx.accept('"') { + if lx.accept('"') { lx.ignore() // Ignore """ return lexMultilineString } @@ -440,9 +527,9 @@ func lexValue(lx *lexer) stateFn { } lx.ignore() // ignore the '"' return lexString - case rawStringStart: - if lx.accept(rawStringStart) { - if lx.accept(rawStringStart) { + case '\'': + if lx.accept('\'') { + if lx.accept('\'') { lx.ignore() // Ignore """ return lexMultilineRawString } @@ -450,10 +537,15 @@ func lexValue(lx *lexer) stateFn { } lx.ignore() // ignore the "'" return lexRawString - case '+', '-': - return lexNumberStart case '.': // special error case, be kind to users return lx.errorf("floats must start with a digit, not '.'") + case 'i', 'n': + if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) { + lx.emit(itemFloat) + return lx.pop() + } + case '-', '+': + return lexDecimalNumberStart } if unicode.IsLetter(r) { // Be permissive here; lexBool will give a nice error if the @@ -463,6 +555,9 @@ func lexValue(lx *lexer) stateFn { lx.backup() return lexBool } + if r == eof { + return lx.errorf("unexpected EOF; expected value") + } return lx.errorf("expected value but found %q instead", r) } @@ -473,14 +568,12 @@ func lexArrayValue(lx *lexer) stateFn { switch { case isWhitespace(r) || isNL(r): return lexSkip(lx, lexArrayValue) - case r == commentStart: + case r == '#': lx.push(lexArrayValue) return lexCommentStart - case r == comma: + case r == ',': return lx.errorf("unexpected comma") - case r == arrayEnd: - // NOTE(caleb): The spec isn't clear about whether you can have - // a trailing comma or not, so we'll allow it. + case r == ']': return lexArrayEnd } @@ -493,23 +586,20 @@ func lexArrayValue(lx *lexer) stateFn { // the next value (or the end of the array): it ignores whitespace and newlines // and expects either a ',' or a ']'. func lexArrayValueEnd(lx *lexer) stateFn { - r := lx.next() - switch { + switch r := lx.next(); { case isWhitespace(r) || isNL(r): return lexSkip(lx, lexArrayValueEnd) - case r == commentStart: + case r == '#': lx.push(lexArrayValueEnd) return lexCommentStart - case r == comma: + case r == ',': lx.ignore() return lexArrayValue // move on to the next value - case r == arrayEnd: + case r == ']': return lexArrayEnd + default: + return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r)) } - return lx.errorf( - "expected a comma or array terminator %q, but got %q instead", - arrayEnd, r, - ) } // lexArrayEnd finishes the lexing of an array. @@ -528,13 +618,13 @@ func lexInlineTableValue(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexInlineTableValue) case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: + return lx.errorPrevLine(errLexInlineTableNL{}) + case r == '#': lx.push(lexInlineTableValue) return lexCommentStart - case r == comma: + case r == ',': return lx.errorf("unexpected comma") - case r == inlineTableEnd: + case r == '}': return lexInlineTableEnd } lx.backup() @@ -546,23 +636,33 @@ func lexInlineTableValue(lx *lexer) stateFn { // key/value pair and the next pair (or the end of the table): // it ignores whitespace and expects either a ',' or a '}'. func lexInlineTableValueEnd(lx *lexer) stateFn { - r := lx.next() - switch { + switch r := lx.next(); { case isWhitespace(r): return lexSkip(lx, lexInlineTableValueEnd) case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: + return lx.errorPrevLine(errLexInlineTableNL{}) + case r == '#': lx.push(lexInlineTableValueEnd) return lexCommentStart - case r == comma: + case r == ',': lx.ignore() + lx.skip(isWhitespace) + if lx.peek() == '}' { + return lx.errorf("trailing comma not allowed in inline tables") + } return lexInlineTableValue - case r == inlineTableEnd: + case r == '}': return lexInlineTableEnd + default: + return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r)) } - return lx.errorf("expected a comma or an inline table terminator %q, "+ - "but got %q instead", inlineTableEnd, r) +} + +func runeOrEOF(r rune) string { + if r == eof { + return "end of file" + } + return "'" + string(r) + "'" } // lexInlineTableEnd finishes the lexing of an inline table. @@ -579,13 +679,13 @@ func lexString(lx *lexer) stateFn { r := lx.next() switch { case r == eof: - return lx.errorf("unexpected EOF") + return lx.errorf(`unexpected EOF; expected '"'`) case isNL(r): - return lx.errorf("strings cannot contain newlines") + return lx.errorPrevLine(errLexStringNL{}) case r == '\\': lx.push(lexString) return lexStringEscape - case r == stringEnd: + case r == '"': lx.backup() lx.emit(itemString) lx.next() @@ -598,19 +698,47 @@ func lexString(lx *lexer) stateFn { // lexMultilineString consumes the inner contents of a string. It assumes that // the beginning '"""' has already been consumed and ignored. func lexMultilineString(lx *lexer) stateFn { - switch lx.next() { + r := lx.next() + switch r { + default: + return lexMultilineString case eof: - return lx.errorf("unexpected EOF") + return lx.errorf(`unexpected EOF; expected '"""'`) case '\\': return lexMultilineStringEscape - case stringEnd: - if lx.accept(stringEnd) { - if lx.accept(stringEnd) { - lx.backup() + case '"': + /// Found " → try to read two more "". + if lx.accept('"') { + if lx.accept('"') { + /// Peek ahead: the string can contain " and "", including at the + /// end: """str""""" + /// 6 or more at the end, however, is an error. + if lx.peek() == '"' { + /// Check if we already lexed 5 's; if so we have 6 now, and + /// that's just too many man! + /// + /// Second check is for the edge case: + /// + /// two quotes allowed. + /// vv + /// """lol \"""""" + /// ^^ ^^^---- closing three + /// escaped + /// + /// But ugly, but it works + if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) { + return lx.errorf(`unexpected '""""""'`) + } + lx.backup() + lx.backup() + return lexMultilineString + } + + lx.backup() /// backup: don't include the """ in the item. lx.backup() lx.backup() lx.emit(itemMultilineString) - lx.next() + lx.next() /// Read over ''' again and discard it. lx.next() lx.next() lx.ignore() @@ -618,8 +746,8 @@ func lexMultilineString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineString } - return lexMultilineString } // lexRawString consumes a raw string. Nothing can be escaped in such a string. @@ -627,35 +755,54 @@ func lexMultilineString(lx *lexer) stateFn { func lexRawString(lx *lexer) stateFn { r := lx.next() switch { + default: + return lexRawString case r == eof: - return lx.errorf("unexpected EOF") + return lx.errorf(`unexpected EOF; expected "'"`) case isNL(r): - return lx.errorf("strings cannot contain newlines") - case r == rawStringEnd: + return lx.errorPrevLine(errLexStringNL{}) + case r == '\'': lx.backup() lx.emit(itemRawString) lx.next() lx.ignore() return lx.pop() } - return lexRawString } // lexMultilineRawString consumes a raw string. Nothing can be escaped in such // a string. It assumes that the beginning "'''" has already been consumed and // ignored. func lexMultilineRawString(lx *lexer) stateFn { - switch lx.next() { + r := lx.next() + switch r { + default: + return lexMultilineRawString case eof: - return lx.errorf("unexpected EOF") - case rawStringEnd: - if lx.accept(rawStringEnd) { - if lx.accept(rawStringEnd) { - lx.backup() + return lx.errorf(`unexpected EOF; expected "'''"`) + case '\'': + /// Found ' → try to read two more ''. + if lx.accept('\'') { + if lx.accept('\'') { + /// Peek ahead: the string can contain ' and '', including at the + /// end: '''str''''' + /// 6 or more at the end, however, is an error. + if lx.peek() == '\'' { + /// Check if we already lexed 5 's; if so we have 6 now, and + /// that's just too many man! + if strings.HasSuffix(lx.current(), "'''''") { + return lx.errorf(`unexpected "''''''"`) + } + lx.backup() + lx.backup() + return lexMultilineRawString + } + + lx.backup() /// backup: don't include the ''' in the item. lx.backup() lx.backup() lx.emit(itemRawMultilineString) - lx.next() + lx.next() /// Read over ''' again and discard it. lx.next() lx.next() lx.ignore() @@ -663,15 +810,14 @@ func lexMultilineRawString(lx *lexer) stateFn { } lx.backup() } + return lexMultilineRawString } - return lexMultilineRawString } // lexMultilineStringEscape consumes an escaped character. It assumes that the // preceding '\\' has already been consumed. func lexMultilineStringEscape(lx *lexer) stateFn { - // Handle the special case first: - if isNL(lx.next()) { + if isNL(lx.next()) { /// \ escaping newline. return lexMultilineString } lx.backup() @@ -694,6 +840,10 @@ func lexStringEscape(lx *lexer) stateFn { fallthrough case '"': fallthrough + case ' ', '\t': + // Inside """ .. """ strings you can use \ to escape newlines, and any + // amount of whitespace can be between the \ and \n. + fallthrough case '\\': return lx.pop() case 'u': @@ -701,9 +851,7 @@ func lexStringEscape(lx *lexer) stateFn { case 'U': return lexLongUnicodeEscape } - return lx.errorf("invalid escape character %q; only the following "+ - "escape characters are allowed: "+ - `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) + return lx.error(errLexEscape{r}) } func lexShortUnicodeEscape(lx *lexer) stateFn { @@ -711,8 +859,9 @@ func lexShortUnicodeEscape(lx *lexer) stateFn { for i := 0; i < 4; i++ { r = lx.next() if !isHexadecimal(r) { - return lx.errorf(`expected four hexadecimal digits after '\u', `+ - "but got %q instead", lx.current()) + return lx.errorf( + `expected four hexadecimal digits after '\u', but got %q instead`, + lx.current()) } } return lx.pop() @@ -723,28 +872,33 @@ func lexLongUnicodeEscape(lx *lexer) stateFn { for i := 0; i < 8; i++ { r = lx.next() if !isHexadecimal(r) { - return lx.errorf(`expected eight hexadecimal digits after '\U', `+ - "but got %q instead", lx.current()) + return lx.errorf( + `expected eight hexadecimal digits after '\U', but got %q instead`, + lx.current()) } } return lx.pop() } -// lexNumberOrDateStart consumes either an integer, a float, or datetime. +// lexNumberOrDateStart processes the first character of a value which begins +// with a digit. It exists to catch values starting with '0', so that +// lexBaseNumberOrDate can differentiate base prefixed integers from other +// types. func lexNumberOrDateStart(lx *lexer) stateFn { r := lx.next() - if isDigit(r) { - return lexNumberOrDate - } switch r { - case '_': - return lexNumber - case 'e', 'E': - return lexFloat - case '.': - return lx.errorf("floats must start with a digit, not '.'") + case '0': + return lexBaseNumberOrDate } - return lx.errorf("expected a digit but got %q", r) + + if !isDigit(r) { + // The only way to reach this state is if the value starts + // with a digit, so specifically treat anything else as an + // error. + return lx.errorf("expected a digit but got %q", r) + } + + return lexNumberOrDate } // lexNumberOrDate consumes either an integer, float or datetime. @@ -754,10 +908,10 @@ func lexNumberOrDate(lx *lexer) stateFn { return lexNumberOrDate } switch r { - case '-': + case '-', ':': return lexDatetime case '_': - return lexNumber + return lexDecimalNumber case '.', 'e', 'E': return lexFloat } @@ -775,41 +929,156 @@ func lexDatetime(lx *lexer) stateFn { return lexDatetime } switch r { - case '-', 'T', ':', '.', 'Z', '+': + case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+': return lexDatetime } lx.backup() - lx.emit(itemDatetime) + lx.emitTrim(itemDatetime) return lx.pop() } -// lexNumberStart consumes either an integer or a float. It assumes that a sign -// has already been read, but that *no* digits have been consumed. -// lexNumberStart will move to the appropriate integer or float states. -func lexNumberStart(lx *lexer) stateFn { - // We MUST see a digit. Even floats have to start with a digit. +// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix. +func lexHexInteger(lx *lexer) stateFn { r := lx.next() - if !isDigit(r) { - if r == '.' { - return lx.errorf("floats must start with a digit, not '.'") + if isHexadecimal(r) { + return lexHexInteger + } + switch r { + case '_': + return lexHexInteger + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexOctalInteger consumes an octal integer after seeing the '0o' prefix. +func lexOctalInteger(lx *lexer) stateFn { + r := lx.next() + if isOctal(r) { + return lexOctalInteger + } + switch r { + case '_': + return lexOctalInteger + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix. +func lexBinaryInteger(lx *lexer) stateFn { + r := lx.next() + if isBinary(r) { + return lexBinaryInteger + } + switch r { + case '_': + return lexBinaryInteger + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexDecimalNumber consumes a decimal float or integer. +func lexDecimalNumber(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexDecimalNumber + } + switch r { + case '.', 'e', 'E': + return lexFloat + case '_': + return lexDecimalNumber + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexDecimalNumber consumes the first digit of a number beginning with a sign. +// It assumes the sign has already been consumed. Values which start with a sign +// are only allowed to be decimal integers or floats. +// +// The special "nan" and "inf" values are also recognized. +func lexDecimalNumberStart(lx *lexer) stateFn { + r := lx.next() + + // Special error cases to give users better error messages + switch r { + case 'i': + if !lx.accept('n') || !lx.accept('f') { + return lx.errorf("invalid float: '%s'", lx.current()) } - return lx.errorf("expected a digit but got %q", r) + lx.emit(itemFloat) + return lx.pop() + case 'n': + if !lx.accept('a') || !lx.accept('n') { + return lx.errorf("invalid float: '%s'", lx.current()) + } + lx.emit(itemFloat) + return lx.pop() + case '0': + p := lx.peek() + switch p { + case 'b', 'o', 'x': + return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p) + } + case '.': + return lx.errorf("floats must start with a digit, not '.'") } - return lexNumber + + if isDigit(r) { + return lexDecimalNumber + } + + return lx.errorf("expected a digit but got %q", r) } -// lexNumber consumes an integer or a float after seeing the first digit. -func lexNumber(lx *lexer) stateFn { +// lexBaseNumberOrDate differentiates between the possible values which +// start with '0'. It assumes that before reaching this state, the initial '0' +// has been consumed. +func lexBaseNumberOrDate(lx *lexer) stateFn { r := lx.next() + // Note: All datetimes start with at least two digits, so we don't + // handle date characters (':', '-', etc.) here. if isDigit(r) { - return lexNumber + return lexNumberOrDate } switch r { case '_': - return lexNumber + // Can only be decimal, because there can't be an underscore + // between the '0' and the base designator, and dates can't + // contain underscores. + return lexDecimalNumber case '.', 'e', 'E': return lexFloat + case 'b': + r = lx.peek() + if !isBinary(r) { + lx.errorf("not a binary number: '%s%c'", lx.current(), r) + } + return lexBinaryInteger + case 'o': + r = lx.peek() + if !isOctal(r) { + lx.errorf("not an octal number: '%s%c'", lx.current(), r) + } + return lexOctalInteger + case 'x': + r = lx.peek() + if !isHexadecimal(r) { + lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r) + } + return lexHexInteger } lx.backup() @@ -867,49 +1136,31 @@ func lexCommentStart(lx *lexer) stateFn { // It will consume *up to* the first newline character, and pass control // back to the last state on the stack. func lexComment(lx *lexer) stateFn { - r := lx.peek() - if isNL(r) || r == eof { + switch r := lx.next(); { + case isNL(r) || r == eof: + lx.backup() lx.emit(itemText) return lx.pop() + default: + return lexComment } - lx.next() - return lexComment } // lexSkip ignores all slurped input and moves on to the next state. func lexSkip(lx *lexer, nextState stateFn) stateFn { - return func(lx *lexer) stateFn { - lx.ignore() - return nextState - } -} - -// isWhitespace returns true if `r` is a whitespace character according -// to the spec. -func isWhitespace(r rune) bool { - return r == '\t' || r == ' ' -} - -func isNL(r rune) bool { - return r == '\n' || r == '\r' -} - -func isDigit(r rune) bool { - return r >= '0' && r <= '9' -} - -func isHexadecimal(r rune) bool { - return (r >= '0' && r <= '9') || - (r >= 'a' && r <= 'f') || - (r >= 'A' && r <= 'F') + lx.ignore() + return nextState } -func isBareKeyChar(r rune) bool { - return (r >= 'A' && r <= 'Z') || - (r >= 'a' && r <= 'z') || - (r >= '0' && r <= '9') || - r == '_' || - r == '-' +func (s stateFn) String() string { + name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name() + if i := strings.LastIndexByte(name, '.'); i > -1 { + name = name[i+1:] + } + if s == nil { + name = "" + } + return name + "()" } func (itype itemType) String() string { @@ -938,12 +1189,18 @@ func (itype itemType) String() string { return "TableEnd" case itemKeyStart: return "KeyStart" + case itemKeyEnd: + return "KeyEnd" case itemArray: return "Array" case itemArrayEnd: return "ArrayEnd" case itemCommentStart: return "CommentStart" + case itemInlineTableStart: + return "InlineTableStart" + case itemInlineTableEnd: + return "InlineTableEnd" } panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) } @@ -951,3 +1208,26 @@ func (itype itemType) String() string { func (item item) String() string { return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) } + +func isWhitespace(r rune) bool { return r == '\t' || r == ' ' } +func isNL(r rune) bool { return r == '\n' || r == '\r' } +func isControl(r rune) bool { // Control characters except \t, \r, \n + switch r { + case '\t', '\r', '\n': + return false + default: + return (r >= 0x00 && r <= 0x1f) || r == 0x7f + } +} +func isDigit(r rune) bool { return r >= '0' && r <= '9' } +func isBinary(r rune) bool { return r == '0' || r == '1' } +func isOctal(r rune) bool { return r >= '0' && r <= '7' } +func isHexadecimal(r rune) bool { + return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F') +} +func isBareKeyChar(r rune) bool { + return (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= '0' && r <= '9') || + r == '_' || r == '-' +} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/meta.go b/src/runtime/vendor/github.com/BurntSushi/toml/meta.go new file mode 100644 index 000000000000..d284f2a0c8a9 --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/meta.go @@ -0,0 +1,121 @@ +package toml + +import ( + "strings" +) + +// MetaData allows access to meta information about TOML data that's not +// accessible otherwise. +// +// It allows checking if a key is defined in the TOML data, whether any keys +// were undecoded, and the TOML type of a key. +type MetaData struct { + context Key // Used only during decoding. + + keyInfo map[string]keyInfo + mapping map[string]interface{} + keys []Key + decoded map[string]struct{} + data []byte // Input file; for errors. +} + +// IsDefined reports if the key exists in the TOML data. +// +// The key should be specified hierarchically, for example to access the TOML +// key "a.b.c" you would use IsDefined("a", "b", "c"). Keys are case sensitive. +// +// Returns false for an empty key. +func (md *MetaData) IsDefined(key ...string) bool { + if len(key) == 0 { + return false + } + + var ( + hash map[string]interface{} + ok bool + hashOrVal interface{} = md.mapping + ) + for _, k := range key { + if hash, ok = hashOrVal.(map[string]interface{}); !ok { + return false + } + if hashOrVal, ok = hash[k]; !ok { + return false + } + } + return true +} + +// Type returns a string representation of the type of the key specified. +// +// Type will return the empty string if given an empty key or a key that does +// not exist. Keys are case sensitive. +func (md *MetaData) Type(key ...string) string { + if ki, ok := md.keyInfo[Key(key).String()]; ok { + return ki.tomlType.typeString() + } + return "" +} + +// Keys returns a slice of every key in the TOML data, including key groups. +// +// Each key is itself a slice, where the first element is the top of the +// hierarchy and the last is the most specific. The list will have the same +// order as the keys appeared in the TOML data. +// +// All keys returned are non-empty. +func (md *MetaData) Keys() []Key { + return md.keys +} + +// Undecoded returns all keys that have not been decoded in the order in which +// they appear in the original TOML document. +// +// This includes keys that haven't been decoded because of a Primitive value. +// Once the Primitive value is decoded, the keys will be considered decoded. +// +// Also note that decoding into an empty interface will result in no decoding, +// and so no keys will be considered decoded. +// +// In this sense, the Undecoded keys correspond to keys in the TOML document +// that do not have a concrete type in your representation. +func (md *MetaData) Undecoded() []Key { + undecoded := make([]Key, 0, len(md.keys)) + for _, key := range md.keys { + if _, ok := md.decoded[key.String()]; !ok { + undecoded = append(undecoded, key) + } + } + return undecoded +} + +// Key represents any TOML key, including key groups. Use (MetaData).Keys to get +// values of this type. +type Key []string + +func (k Key) String() string { + ss := make([]string, len(k)) + for i := range k { + ss[i] = k.maybeQuoted(i) + } + return strings.Join(ss, ".") +} + +func (k Key) maybeQuoted(i int) string { + if k[i] == "" { + return `""` + } + for _, c := range k[i] { + if !isBareKeyChar(c) { + return `"` + dblQuotedReplacer.Replace(k[i]) + `"` + } + } + return k[i] +} + +func (k Key) add(piece string) Key { + newKey := make(Key, len(k)+1) + copy(newKey, k) + newKey[len(k)] = piece + return newKey +} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/parse.go b/src/runtime/vendor/github.com/BurntSushi/toml/parse.go index 50869ef9266e..d2542d6f926f 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/parse.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/parse.go @@ -5,54 +5,69 @@ import ( "strconv" "strings" "time" - "unicode" "unicode/utf8" + + "github.com/BurntSushi/toml/internal" ) type parser struct { - mapping map[string]interface{} - types map[string]tomlType - lx *lexer - - // A list of keys in the order that they appear in the TOML data. - ordered []Key - - // the full key for the current hash in scope - context Key + lx *lexer + context Key // Full key for the current hash in scope. + currentKey string // Base key name for everything except hashes. + pos Position // Current position in the TOML file. - // the base key name for everything except hashes - currentKey string + ordered []Key // List of keys in the order that they appear in the TOML data. - // rough approximation of line number - approxLine int - - // A map of 'key.group.names' to whether they were created implicitly. - implicits map[string]bool + keyInfo map[string]keyInfo // Map keyname → info about the TOML key. + mapping map[string]interface{} // Map keyname → key value. + implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names"). } -type parseError string - -func (pe parseError) Error() string { - return string(pe) +type keyInfo struct { + pos Position + tomlType tomlType } func parse(data string) (p *parser, err error) { defer func() { if r := recover(); r != nil { - var ok bool - if err, ok = r.(parseError); ok { + if pErr, ok := r.(ParseError); ok { + pErr.input = data + err = pErr return } panic(r) } }() + // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() + // which mangles stuff. + if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { + data = data[2:] + } + + // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 + // file (second byte in surrogate pair being NULL). Again, do this here to + // avoid having to deal with UTF-8/16 stuff in the lexer. + ex := 6 + if len(data) < 6 { + ex = len(data) + } + if i := strings.IndexRune(data[:ex], 0); i > -1 { + return nil, ParseError{ + Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8", + Position: Position{Line: 1, Start: i, Len: 1}, + Line: 1, + input: data, + } + } + p = &parser{ + keyInfo: make(map[string]keyInfo), mapping: make(map[string]interface{}), - types: make(map[string]tomlType), lx: lex(data), ordered: make([]Key, 0), - implicits: make(map[string]bool), + implicits: make(map[string]struct{}), } for { item := p.next() @@ -65,20 +80,57 @@ func parse(data string) (p *parser, err error) { return p, nil } +func (p *parser) panicErr(it item, err error) { + panic(ParseError{ + err: err, + Position: it.pos, + Line: it.pos.Len, + LastKey: p.current(), + }) +} + +func (p *parser) panicItemf(it item, format string, v ...interface{}) { + panic(ParseError{ + Message: fmt.Sprintf(format, v...), + Position: it.pos, + Line: it.pos.Len, + LastKey: p.current(), + }) +} + func (p *parser) panicf(format string, v ...interface{}) { - msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", - p.approxLine, p.current(), fmt.Sprintf(format, v...)) - panic(parseError(msg)) + panic(ParseError{ + Message: fmt.Sprintf(format, v...), + Position: p.pos, + Line: p.pos.Line, + LastKey: p.current(), + }) } func (p *parser) next() item { it := p.lx.nextItem() + //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val) if it.typ == itemError { - p.panicf("%s", it.val) + if it.err != nil { + panic(ParseError{ + Position: it.pos, + Line: it.pos.Line, + LastKey: p.current(), + err: it.err, + }) + } + + p.panicItemf(it, "%s", it.val) } return it } +func (p *parser) nextPos() item { + it := p.next() + p.pos = it.pos + return it +} + func (p *parser) bug(format string, v ...interface{}) { panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) } @@ -97,44 +149,60 @@ func (p *parser) assertEqual(expected, got itemType) { func (p *parser) topLevel(item item) { switch item.typ { - case itemCommentStart: - p.approxLine = item.line + case itemCommentStart: // # .. p.expect(itemText) - case itemTableStart: - kg := p.next() - p.approxLine = kg.line + case itemTableStart: // [ .. ] + name := p.nextPos() var key Key - for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { - key = append(key, p.keyString(kg)) + for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { + key = append(key, p.keyString(name)) } - p.assertEqual(itemTableEnd, kg.typ) + p.assertEqual(itemTableEnd, name.typ) - p.establishContext(key, false) - p.setType("", tomlHash) + p.addContext(key, false) + p.setType("", tomlHash, item.pos) p.ordered = append(p.ordered, key) - case itemArrayTableStart: - kg := p.next() - p.approxLine = kg.line + case itemArrayTableStart: // [[ .. ]] + name := p.nextPos() var key Key - for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { - key = append(key, p.keyString(kg)) + for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { + key = append(key, p.keyString(name)) } - p.assertEqual(itemArrayTableEnd, kg.typ) + p.assertEqual(itemArrayTableEnd, name.typ) - p.establishContext(key, true) - p.setType("", tomlArrayHash) + p.addContext(key, true) + p.setType("", tomlArrayHash, item.pos) p.ordered = append(p.ordered, key) - case itemKeyStart: - kname := p.next() - p.approxLine = kname.line - p.currentKey = p.keyString(kname) - - val, typ := p.value(p.next()) - p.setValue(p.currentKey, val) - p.setType(p.currentKey, typ) + case itemKeyStart: // key = .. + outerContext := p.context + /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') + k := p.nextPos() + var key Key + for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { + key = append(key, p.keyString(k)) + } + p.assertEqual(itemKeyEnd, k.typ) + + /// The current key is the last part. + p.currentKey = key[len(key)-1] + + /// All the other parts (if any) are the context; need to set each part + /// as implicit. + context := key[:len(key)-1] + for i := range context { + p.addImplicitContext(append(p.context, context[i:i+1]...)) + } + + /// Set value. + vItem := p.next() + val, typ := p.value(vItem, false) + p.set(p.currentKey, val, typ, vItem.pos) p.ordered = append(p.ordered, p.context.add(p.currentKey)) + + /// Remove the context we added (preserving any context from [tbl] lines). + p.context = outerContext p.currentKey = "" default: p.bug("Unexpected type at top level: %s", item.typ) @@ -148,180 +216,261 @@ func (p *parser) keyString(it item) string { return it.val case itemString, itemMultilineString, itemRawString, itemRawMultilineString: - s, _ := p.value(it) + s, _ := p.value(it, false) return s.(string) default: p.bug("Unexpected key type: %s", it.typ) - panic("unreachable") } + panic("unreachable") } +var datetimeRepl = strings.NewReplacer( + "z", "Z", + "t", "T", + " ", "T") + // value translates an expected value from the lexer into a Go value wrapped // as an empty interface. -func (p *parser) value(it item) (interface{}, tomlType) { +func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) { switch it.typ { case itemString: - return p.replaceEscapes(it.val), p.typeOfPrimitive(it) + return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it) case itemMultilineString: - trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) - return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) + return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it) case itemRawString: return it.val, p.typeOfPrimitive(it) case itemRawMultilineString: return stripFirstNewline(it.val), p.typeOfPrimitive(it) + case itemInteger: + return p.valueInteger(it) + case itemFloat: + return p.valueFloat(it) case itemBool: switch it.val { case "true": return true, p.typeOfPrimitive(it) case "false": return false, p.typeOfPrimitive(it) + default: + p.bug("Expected boolean value, but got '%s'.", it.val) } - p.bug("Expected boolean value, but got '%s'.", it.val) - case itemInteger: - if !numUnderscoresOK(it.val) { - p.panicf("Invalid integer %q: underscores must be surrounded by digits", - it.val) - } - val := strings.Replace(it.val, "_", "", -1) - num, err := strconv.ParseInt(val, 10, 64) - if err != nil { - // Distinguish integer values. Normally, it'd be a bug if the lexer - // provides an invalid integer, but it's possible that the number is - // out of range of valid values (which the lexer cannot determine). - // So mark the former as a bug but the latter as a legitimate user - // error. - if e, ok := err.(*strconv.NumError); ok && - e.Err == strconv.ErrRange { - - p.panicf("Integer '%s' is out of the range of 64-bit "+ - "signed integers.", it.val) - } else { - p.bug("Expected integer value, but got '%s'.", it.val) - } + case itemDatetime: + return p.valueDatetime(it) + case itemArray: + return p.valueArray(it) + case itemInlineTableStart: + return p.valueInlineTable(it, parentIsArray) + default: + p.bug("Unexpected value type: %s", it.typ) + } + panic("unreachable") +} + +func (p *parser) valueInteger(it item) (interface{}, tomlType) { + if !numUnderscoresOK(it.val) { + p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val) + } + if numHasLeadingZero(it.val) { + p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val) + } + + num, err := strconv.ParseInt(it.val, 0, 64) + if err != nil { + // Distinguish integer values. Normally, it'd be a bug if the lexer + // provides an invalid integer, but it's possible that the number is + // out of range of valid values (which the lexer cannot determine). + // So mark the former as a bug but the latter as a legitimate user + // error. + if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { + p.panicErr(it, errParseRange{i: it.val, size: "int64"}) + } else { + p.bug("Expected integer value, but got '%s'.", it.val) } - return num, p.typeOfPrimitive(it) - case itemFloat: - parts := strings.FieldsFunc(it.val, func(r rune) bool { - switch r { - case '.', 'e', 'E': - return true - } - return false - }) - for _, part := range parts { - if !numUnderscoresOK(part) { - p.panicf("Invalid float %q: underscores must be "+ - "surrounded by digits", it.val) - } + } + return num, p.typeOfPrimitive(it) +} + +func (p *parser) valueFloat(it item) (interface{}, tomlType) { + parts := strings.FieldsFunc(it.val, func(r rune) bool { + switch r { + case '.', 'e', 'E': + return true } - if !numPeriodsOK(it.val) { - // As a special case, numbers like '123.' or '1.e2', - // which are valid as far as Go/strconv are concerned, - // must be rejected because TOML says that a fractional - // part consists of '.' followed by 1+ digits. - p.panicf("Invalid float %q: '.' must be followed "+ - "by one or more digits", it.val) - } - val := strings.Replace(it.val, "_", "", -1) - num, err := strconv.ParseFloat(val, 64) - if err != nil { - if e, ok := err.(*strconv.NumError); ok && - e.Err == strconv.ErrRange { - - p.panicf("Float '%s' is out of the range of 64-bit "+ - "IEEE-754 floating-point numbers.", it.val) - } else { - p.panicf("Invalid float value: %q", it.val) - } + return false + }) + for _, part := range parts { + if !numUnderscoresOK(part) { + p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val) } - return num, p.typeOfPrimitive(it) - case itemDatetime: - var t time.Time - var ok bool - var err error - for _, format := range []string{ - "2006-01-02T15:04:05Z07:00", - "2006-01-02T15:04:05", - "2006-01-02", - } { - t, err = time.ParseInLocation(format, it.val, time.Local) - if err == nil { - ok = true - break - } + } + if len(parts) > 0 && numHasLeadingZero(parts[0]) { + p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val) + } + if !numPeriodsOK(it.val) { + // As a special case, numbers like '123.' or '1.e2', + // which are valid as far as Go/strconv are concerned, + // must be rejected because TOML says that a fractional + // part consists of '.' followed by 1+ digits. + p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val) + } + val := strings.Replace(it.val, "_", "", -1) + if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does. + val = "nan" + } + num, err := strconv.ParseFloat(val, 64) + if err != nil { + if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { + p.panicErr(it, errParseRange{i: it.val, size: "float64"}) + } else { + p.panicItemf(it, "Invalid float value: %q", it.val) } - if !ok { - p.panicf("Invalid TOML Datetime: %q.", it.val) + } + return num, p.typeOfPrimitive(it) +} + +var dtTypes = []struct { + fmt string + zone *time.Location +}{ + {time.RFC3339Nano, time.Local}, + {"2006-01-02T15:04:05.999999999", internal.LocalDatetime}, + {"2006-01-02", internal.LocalDate}, + {"15:04:05.999999999", internal.LocalTime}, +} + +func (p *parser) valueDatetime(it item) (interface{}, tomlType) { + it.val = datetimeRepl.Replace(it.val) + var ( + t time.Time + ok bool + err error + ) + for _, dt := range dtTypes { + t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) + if err == nil { + ok = true + break } - return t, p.typeOfPrimitive(it) - case itemArray: - array := make([]interface{}, 0) - types := make([]tomlType, 0) + } + if !ok { + p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val) + } + return t, p.typeOfPrimitive(it) +} - for it = p.next(); it.typ != itemArrayEnd; it = p.next() { - if it.typ == itemCommentStart { - p.expect(itemText) - continue - } +func (p *parser) valueArray(it item) (interface{}, tomlType) { + p.setType(p.currentKey, tomlArray, it.pos) - val, typ := p.value(it) - array = append(array, val) - types = append(types, typ) + var ( + types []tomlType + + // Initialize to a non-nil empty slice. This makes it consistent with + // how S = [] decodes into a non-nil slice inside something like struct + // { S []string }. See #338 + array = []interface{}{} + ) + for it = p.next(); it.typ != itemArrayEnd; it = p.next() { + if it.typ == itemCommentStart { + p.expect(itemText) + continue } - return array, p.typeOfArray(types) - case itemInlineTableStart: - var ( - hash = make(map[string]interface{}) - outerContext = p.context - outerKey = p.currentKey - ) - p.context = append(p.context, p.currentKey) - p.currentKey = "" - for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { - if it.typ != itemKeyStart { - p.bug("Expected key start but instead found %q, around line %d", - it.val, p.approxLine) - } - if it.typ == itemCommentStart { - p.expect(itemText) - continue - } + val, typ := p.value(it, true) + array = append(array, val) + types = append(types, typ) - // retrieve key - k := p.next() - p.approxLine = k.line - kname := p.keyString(k) + // XXX: types isn't used here, we need it to record the accurate type + // information. + // + // Not entirely sure how to best store this; could use "key[0]", + // "key[1]" notation, or maybe store it on the Array type? + } + return array, tomlArray +} + +func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) { + var ( + hash = make(map[string]interface{}) + outerContext = p.context + outerKey = p.currentKey + ) + + p.context = append(p.context, p.currentKey) + prevContext := p.context + p.currentKey = "" + + p.addImplicit(p.context) + p.addContext(p.context, parentIsArray) - // retrieve value - p.currentKey = kname - val, typ := p.value(p.next()) - // make sure we keep metadata up to date - p.setType(kname, typ) - p.ordered = append(p.ordered, p.context.add(p.currentKey)) - hash[kname] = val + /// Loop over all table key/value pairs. + for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { + if it.typ == itemCommentStart { + p.expect(itemText) + continue } - p.context = outerContext - p.currentKey = outerKey - return hash, tomlHash + + /// Read all key parts. + k := p.nextPos() + var key Key + for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { + key = append(key, p.keyString(k)) + } + p.assertEqual(itemKeyEnd, k.typ) + + /// The current key is the last part. + p.currentKey = key[len(key)-1] + + /// All the other parts (if any) are the context; need to set each part + /// as implicit. + context := key[:len(key)-1] + for i := range context { + p.addImplicitContext(append(p.context, context[i:i+1]...)) + } + + /// Set the value. + val, typ := p.value(p.next(), false) + p.set(p.currentKey, val, typ, it.pos) + p.ordered = append(p.ordered, p.context.add(p.currentKey)) + hash[p.currentKey] = val + + /// Restore context. + p.context = prevContext } - p.bug("Unexpected value type: %s", it.typ) - panic("unreachable") + p.context = outerContext + p.currentKey = outerKey + return hash, tomlHash +} + +// numHasLeadingZero checks if this number has leading zeroes, allowing for '0', +// +/- signs, and base prefixes. +func numHasLeadingZero(s string) bool { + if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x + return true + } + if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { + return true + } + return false } // numUnderscoresOK checks whether each underscore in s is surrounded by // characters that are not underscores. func numUnderscoresOK(s string) bool { + switch s { + case "nan", "+nan", "-nan", "inf", "-inf", "+inf": + return true + } accept := false for _, r := range s { if r == '_' { if !accept { return false } - accept = false - continue } - accept = true + + // isHexadecimal is a superset of all the permissable characters + // surrounding an underscore. + accept = isHexadecimal(r) } return accept } @@ -338,13 +487,12 @@ func numPeriodsOK(s string) bool { return !period } -// establishContext sets the current context of the parser, -// where the context is either a hash or an array of hashes. Which one is -// set depends on the value of the `array` parameter. +// Set the current context of the parser, where the context is either a hash or +// an array of hashes, depending on the value of the `array` parameter. // // Establishing the context also makes sure that the key isn't a duplicate, and // will create implicit hashes automatically. -func (p *parser) establishContext(key Key, array bool) { +func (p *parser) addContext(key Key, array bool) { var ok bool // Always start at the top level and drill down for our context. @@ -383,7 +531,7 @@ func (p *parser) establishContext(key Key, array bool) { // list of tables for it. k := key[len(key)-1] if _, ok := hashContext[k]; !ok { - hashContext[k] = make([]map[string]interface{}, 0, 5) + hashContext[k] = make([]map[string]interface{}, 0, 4) } // Add a new table. But make sure the key hasn't already been used @@ -391,8 +539,7 @@ func (p *parser) establishContext(key Key, array bool) { if hash, ok := hashContext[k].([]map[string]interface{}); ok { hashContext[k] = append(hash, make(map[string]interface{})) } else { - p.panicf("Key '%s' was already created and cannot be used as "+ - "an array.", keyContext) + p.panicf("Key '%s' was already created and cannot be used as an array.", key) } } else { p.setValue(key[len(key)-1], make(map[string]interface{})) @@ -400,15 +547,23 @@ func (p *parser) establishContext(key Key, array bool) { p.context = append(p.context, key[len(key)-1]) } +// set calls setValue and setType. +func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) { + p.setValue(key, val) + p.setType(key, typ, pos) + +} + // setValue sets the given key to the given value in the current context. // It will make sure that the key hasn't already been defined, account for // implicit key groups. func (p *parser) setValue(key string, value interface{}) { - var tmpHash interface{} - var ok bool - - hash := p.mapping - keyContext := make(Key, 0) + var ( + tmpHash interface{} + ok bool + hash = p.mapping + keyContext Key + ) for _, k := range p.context { keyContext = append(keyContext, k) if tmpHash, ok = hash[k]; !ok { @@ -422,24 +577,26 @@ func (p *parser) setValue(key string, value interface{}) { case map[string]interface{}: hash = t default: - p.bug("Expected hash to have type 'map[string]interface{}', but "+ - "it has '%T' instead.", tmpHash) + p.panicf("Key '%s' has already been defined.", keyContext) } } keyContext = append(keyContext, key) if _, ok := hash[key]; ok { - // Typically, if the given key has already been set, then we have - // to raise an error since duplicate keys are disallowed. However, - // it's possible that a key was previously defined implicitly. In this - // case, it is allowed to be redefined concretely. (See the - // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) + // Normally redefining keys isn't allowed, but the key could have been + // defined implicitly and it's allowed to be redefined concretely. (See + // the `valid/implicit-and-explicit-after.toml` in toml-test) // // But we have to make sure to stop marking it as an implicit. (So that // another redefinition provokes an error.) // // Note that since it has already been defined (as a hash), we don't // want to overwrite it. So our business is done. + if p.isArray(keyContext) { + p.removeImplicit(keyContext) + hash[key] = value + return + } if p.isImplicit(keyContext) { p.removeImplicit(keyContext) return @@ -449,40 +606,39 @@ func (p *parser) setValue(key string, value interface{}) { // key, which is *always* wrong. p.panicf("Key '%s' has already been defined.", keyContext) } + hash[key] = value } -// setType sets the type of a particular value at a given key. -// It should be called immediately AFTER setValue. +// setType sets the type of a particular value at a given key. It should be +// called immediately AFTER setValue. // // Note that if `key` is empty, then the type given will be applied to the // current context (which is either a table or an array of tables). -func (p *parser) setType(key string, typ tomlType) { +func (p *parser) setType(key string, typ tomlType, pos Position) { keyContext := make(Key, 0, len(p.context)+1) - for _, k := range p.context { - keyContext = append(keyContext, k) - } + keyContext = append(keyContext, p.context...) if len(key) > 0 { // allow type setting for hashes keyContext = append(keyContext, key) } - p.types[keyContext.String()] = typ -} - -// addImplicit sets the given Key as having been created implicitly. -func (p *parser) addImplicit(key Key) { - p.implicits[key.String()] = true -} - -// removeImplicit stops tagging the given key as having been implicitly -// created. -func (p *parser) removeImplicit(key Key) { - p.implicits[key.String()] = false + // Special case to make empty keys ("" = 1) work. + // Without it it will set "" rather than `""`. + // TODO: why is this needed? And why is this only needed here? + if len(keyContext) == 0 { + keyContext = Key{""} + } + p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos} } -// isImplicit returns true if the key group pointed to by the key was created -// implicitly. -func (p *parser) isImplicit(key Key) bool { - return p.implicits[key.String()] +// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and +// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). +func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} } +func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) } +func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok } +func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray } +func (p *parser) addImplicitContext(key Key) { + p.addImplicit(key) + p.addContext(key, false) } // current returns the full key name of the current context. @@ -497,24 +653,62 @@ func (p *parser) current() string { } func stripFirstNewline(s string) string { - if len(s) == 0 || s[0] != '\n' { - return s + if len(s) > 0 && s[0] == '\n' { + return s[1:] } - return s[1:] + if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { + return s[2:] + } + return s } -func stripEscapedWhitespace(s string) string { - esc := strings.Split(s, "\\\n") - if len(esc) > 1 { - for i := 1; i < len(esc); i++ { - esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) +// Remove newlines inside triple-quoted strings if a line ends with "\". +func (p *parser) stripEscapedNewlines(s string) string { + split := strings.Split(s, "\n") + if len(split) < 1 { + return s + } + + escNL := false // Keep track of the last non-blank line was escaped. + for i, line := range split { + line = strings.TrimRight(line, " \t\r") + + if len(line) == 0 || line[len(line)-1] != '\\' { + split[i] = strings.TrimRight(split[i], "\r") + if !escNL && i != len(split)-1 { + split[i] += "\n" + } + continue + } + + escBS := true + for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- { + escBS = !escBS + } + if escNL { + line = strings.TrimLeft(line, " \t\r") + } + escNL = !escBS + + if escBS { + split[i] += "\n" + continue + } + + if i == len(split)-1 { + p.panicf("invalid escape: '\\ '") + } + + split[i] = line[:len(line)-1] // Remove \ + if len(split)-1 > i { + split[i+1] = strings.TrimLeft(split[i+1], " \t\r") } } - return strings.Join(esc, "") + return strings.Join(split, "") } -func (p *parser) replaceEscapes(str string) string { - var replaced []rune +func (p *parser) replaceEscapes(it item, str string) string { + replaced := make([]rune, 0, len(str)) s := []byte(str) r := 0 for r < len(s) { @@ -532,7 +726,8 @@ func (p *parser) replaceEscapes(str string) string { switch s[r] { default: p.bug("Expected valid escape code after \\, but got %q.", s[r]) - return "" + case ' ', '\t': + p.panicItemf(it, "invalid escape: '\\%c'", s[r]) case 'b': replaced = append(replaced, rune(0x0008)) r += 1 @@ -558,14 +753,14 @@ func (p *parser) replaceEscapes(str string) string { // At this point, we know we have a Unicode escape of the form // `uXXXX` at [r, r+5). (Because the lexer guarantees this // for us.) - escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) + escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5]) replaced = append(replaced, escaped) r += 5 case 'U': // At this point, we know we have a Unicode escape of the form // `uXXXX` at [r, r+9). (Because the lexer guarantees this // for us.) - escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) + escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9]) replaced = append(replaced, escaped) r += 9 } @@ -573,20 +768,14 @@ func (p *parser) replaceEscapes(str string) string { return string(replaced) } -func (p *parser) asciiEscapeToUnicode(bs []byte) rune { +func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune { s := string(bs) hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) if err != nil { - p.bug("Could not parse '%s' as a hexadecimal number, but the "+ - "lexer claims it's OK: %s", s, err) + p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err) } if !utf8.ValidRune(rune(hex)) { - p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) + p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s) } return rune(hex) } - -func isStringType(ty itemType) bool { - return ty == itemString || ty == itemMultilineString || - ty == itemRawString || ty == itemRawMultilineString -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/session.vim b/src/runtime/vendor/github.com/BurntSushi/toml/session.vim deleted file mode 100644 index 562164be0603..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/session.vim +++ /dev/null @@ -1 +0,0 @@ -au BufWritePost *.go silent!make tags > /dev/null 2>&1 diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/type_check.go b/src/runtime/vendor/github.com/BurntSushi/toml/type_check.go deleted file mode 100644 index c73f8afc1a6d..000000000000 --- a/src/runtime/vendor/github.com/BurntSushi/toml/type_check.go +++ /dev/null @@ -1,91 +0,0 @@ -package toml - -// tomlType represents any Go type that corresponds to a TOML type. -// While the first draft of the TOML spec has a simplistic type system that -// probably doesn't need this level of sophistication, we seem to be militating -// toward adding real composite types. -type tomlType interface { - typeString() string -} - -// typeEqual accepts any two types and returns true if they are equal. -func typeEqual(t1, t2 tomlType) bool { - if t1 == nil || t2 == nil { - return false - } - return t1.typeString() == t2.typeString() -} - -func typeIsHash(t tomlType) bool { - return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash) -} - -type tomlBaseType string - -func (btype tomlBaseType) typeString() string { - return string(btype) -} - -func (btype tomlBaseType) String() string { - return btype.typeString() -} - -var ( - tomlInteger tomlBaseType = "Integer" - tomlFloat tomlBaseType = "Float" - tomlDatetime tomlBaseType = "Datetime" - tomlString tomlBaseType = "String" - tomlBool tomlBaseType = "Bool" - tomlArray tomlBaseType = "Array" - tomlHash tomlBaseType = "Hash" - tomlArrayHash tomlBaseType = "ArrayHash" -) - -// typeOfPrimitive returns a tomlType of any primitive value in TOML. -// Primitive values are: Integer, Float, Datetime, String and Bool. -// -// Passing a lexer item other than the following will cause a BUG message -// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime. -func (p *parser) typeOfPrimitive(lexItem item) tomlType { - switch lexItem.typ { - case itemInteger: - return tomlInteger - case itemFloat: - return tomlFloat - case itemDatetime: - return tomlDatetime - case itemString: - return tomlString - case itemMultilineString: - return tomlString - case itemRawString: - return tomlString - case itemRawMultilineString: - return tomlString - case itemBool: - return tomlBool - } - p.bug("Cannot infer primitive type of lex item '%s'.", lexItem) - panic("unreachable") -} - -// typeOfArray returns a tomlType for an array given a list of types of its -// values. -// -// In the current spec, if an array is homogeneous, then its type is always -// "Array". If the array is not homogeneous, an error is generated. -func (p *parser) typeOfArray(types []tomlType) tomlType { - // Empty arrays are cool. - if len(types) == 0 { - return tomlArray - } - - theType := types[0] - for _, t := range types[1:] { - if !typeEqual(theType, t) { - p.panicf("Array contains values of type '%s' and '%s', but "+ - "arrays must be homogeneous.", theType, t) - } - } - return tomlArray -} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/type_fields.go b/src/runtime/vendor/github.com/BurntSushi/toml/type_fields.go index 608997c22f68..254ca82e5494 100644 --- a/src/runtime/vendor/github.com/BurntSushi/toml/type_fields.go +++ b/src/runtime/vendor/github.com/BurntSushi/toml/type_fields.go @@ -70,8 +70,8 @@ func typeFields(t reflect.Type) []field { next := []field{{typ: t}} // Count of queued names for current level and the next. - count := map[reflect.Type]int{} - nextCount := map[reflect.Type]int{} + var count map[reflect.Type]int + var nextCount map[reflect.Type]int // Types already visited at an earlier level. visited := map[reflect.Type]bool{} diff --git a/src/runtime/vendor/github.com/BurntSushi/toml/type_toml.go b/src/runtime/vendor/github.com/BurntSushi/toml/type_toml.go new file mode 100644 index 000000000000..4e90d77373b9 --- /dev/null +++ b/src/runtime/vendor/github.com/BurntSushi/toml/type_toml.go @@ -0,0 +1,70 @@ +package toml + +// tomlType represents any Go type that corresponds to a TOML type. +// While the first draft of the TOML spec has a simplistic type system that +// probably doesn't need this level of sophistication, we seem to be militating +// toward adding real composite types. +type tomlType interface { + typeString() string +} + +// typeEqual accepts any two types and returns true if they are equal. +func typeEqual(t1, t2 tomlType) bool { + if t1 == nil || t2 == nil { + return false + } + return t1.typeString() == t2.typeString() +} + +func typeIsTable(t tomlType) bool { + return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash) +} + +type tomlBaseType string + +func (btype tomlBaseType) typeString() string { + return string(btype) +} + +func (btype tomlBaseType) String() string { + return btype.typeString() +} + +var ( + tomlInteger tomlBaseType = "Integer" + tomlFloat tomlBaseType = "Float" + tomlDatetime tomlBaseType = "Datetime" + tomlString tomlBaseType = "String" + tomlBool tomlBaseType = "Bool" + tomlArray tomlBaseType = "Array" + tomlHash tomlBaseType = "Hash" + tomlArrayHash tomlBaseType = "ArrayHash" +) + +// typeOfPrimitive returns a tomlType of any primitive value in TOML. +// Primitive values are: Integer, Float, Datetime, String and Bool. +// +// Passing a lexer item other than the following will cause a BUG message +// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime. +func (p *parser) typeOfPrimitive(lexItem item) tomlType { + switch lexItem.typ { + case itemInteger: + return tomlInteger + case itemFloat: + return tomlFloat + case itemDatetime: + return tomlDatetime + case itemString: + return tomlString + case itemMultilineString: + return tomlString + case itemRawString: + return tomlString + case itemRawMultilineString: + return tomlString + case itemBool: + return tomlBool + } + p.bug("Cannot infer primitive type of lex item '%s'.", lexItem) + panic("unreachable") +} diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/file.go b/src/runtime/vendor/github.com/Microsoft/go-winio/file.go index 0385e4108129..293ab54c80c3 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/file.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/file.go @@ -1,3 +1,4 @@ +//go:build windows // +build windows package winio @@ -143,6 +144,11 @@ func (f *win32File) Close() error { return nil } +// IsClosed checks if the file has been closed +func (f *win32File) IsClosed() bool { + return f.closing.isSet() +} + // prepareIo prepares for a new IO operation. // The caller must call f.wg.Done() when the IO is finished, prior to Close() returning. func (f *win32File) prepareIo() (*ioOperation, error) { diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/go.mod b/src/runtime/vendor/github.com/Microsoft/go-winio/go.mod deleted file mode 100644 index 98a8dea0e7e1..000000000000 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/Microsoft/go-winio - -go 1.12 - -require ( - github.com/pkg/errors v0.9.1 - github.com/sirupsen/logrus v1.7.0 - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c -) diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/go.sum b/src/runtime/vendor/github.com/Microsoft/go-winio/go.sum deleted file mode 100644 index aa6ad3b571af..000000000000 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/go.sum +++ /dev/null @@ -1,14 +0,0 @@ -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM= -github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/hvsock.go b/src/runtime/vendor/github.com/Microsoft/go-winio/hvsock.go index b632f8f8bb98..b2b644d002aa 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/hvsock.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/hvsock.go @@ -1,3 +1,4 @@ +//go:build windows // +build windows package winio @@ -252,15 +253,23 @@ func (conn *HvsockConn) Close() error { return conn.sock.Close() } +func (conn *HvsockConn) IsClosed() bool { + return conn.sock.IsClosed() +} + func (conn *HvsockConn) shutdown(how int) error { - err := syscall.Shutdown(conn.sock.handle, syscall.SHUT_RD) + if conn.IsClosed() { + return ErrFileClosed + } + + err := syscall.Shutdown(conn.sock.handle, how) if err != nil { return os.NewSyscallError("shutdown", err) } return nil } -// CloseRead shuts down the read end of the socket. +// CloseRead shuts down the read end of the socket, preventing future read operations. func (conn *HvsockConn) CloseRead() error { err := conn.shutdown(syscall.SHUT_RD) if err != nil { @@ -269,8 +278,8 @@ func (conn *HvsockConn) CloseRead() error { return nil } -// CloseWrite shuts down the write end of the socket, notifying the other endpoint that -// no more data will be written. +// CloseWrite shuts down the write end of the socket, preventing future write operations and +// notifying the other endpoint that no more data will be written. func (conn *HvsockConn) CloseWrite() error { err := conn.shutdown(syscall.SHUT_WR) if err != nil { diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go index f497c0e39178..2d9161e2deee 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid.go @@ -14,8 +14,6 @@ import ( "encoding/binary" "fmt" "strconv" - - "golang.org/x/sys/windows" ) // Variant specifies which GUID variant (or "type") of the GUID. It determines @@ -41,13 +39,6 @@ type Version uint8 var _ = (encoding.TextMarshaler)(GUID{}) var _ = (encoding.TextUnmarshaler)(&GUID{}) -// GUID represents a GUID/UUID. It has the same structure as -// golang.org/x/sys/windows.GUID so that it can be used with functions expecting -// that type. It is defined as its own type so that stringification and -// marshaling can be supported. The representation matches that used by native -// Windows code. -type GUID windows.GUID - // NewV4 returns a new version 4 (pseudorandom) GUID, as defined by RFC 4122. func NewV4() (GUID, error) { var b [16]byte diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go new file mode 100644 index 000000000000..f64d828c0ba4 --- /dev/null +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_nonwindows.go @@ -0,0 +1,15 @@ +// +build !windows + +package guid + +// GUID represents a GUID/UUID. It has the same structure as +// golang.org/x/sys/windows.GUID so that it can be used with functions expecting +// that type. It is defined as its own type as that is only available to builds +// targeted at `windows`. The representation matches that used by native Windows +// code. +type GUID struct { + Data1 uint32 + Data2 uint16 + Data3 uint16 + Data4 [8]byte +} diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go new file mode 100644 index 000000000000..83617f4eee9a --- /dev/null +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/guid/guid_windows.go @@ -0,0 +1,10 @@ +package guid + +import "golang.org/x/sys/windows" + +// GUID represents a GUID/UUID. It has the same structure as +// golang.org/x/sys/windows.GUID so that it can be used with functions expecting +// that type. It is defined as its own type so that stringification and +// marshaling can be supported. The representation matches that used by native +// Windows code. +type GUID windows.GUID diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go index fca241590cca..602920786c90 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go @@ -3,11 +3,10 @@ package security import ( + "fmt" "os" "syscall" "unsafe" - - "github.com/pkg/errors" ) type ( @@ -72,7 +71,7 @@ func GrantVmGroupAccess(name string) error { // Stat (to determine if `name` is a directory). s, err := os.Stat(name) if err != nil { - return errors.Wrapf(err, "%s os.Stat %s", gvmga, name) + return fmt.Errorf("%s os.Stat %s: %w", gvmga, name, err) } // Get a handle to the file/directory. Must defer Close on success. @@ -88,7 +87,7 @@ func GrantVmGroupAccess(name string) error { sd := uintptr(0) origDACL := uintptr(0) if err := getSecurityInfo(fd, uint32(ot), uint32(si), nil, nil, &origDACL, nil, &sd); err != nil { - return errors.Wrapf(err, "%s GetSecurityInfo %s", gvmga, name) + return fmt.Errorf("%s GetSecurityInfo %s: %w", gvmga, name, err) } defer syscall.LocalFree((syscall.Handle)(unsafe.Pointer(sd))) @@ -102,7 +101,7 @@ func GrantVmGroupAccess(name string) error { // And finally use SetSecurityInfo to apply the updated DACL. if err := setSecurityInfo(fd, uint32(ot), uint32(si), uintptr(0), uintptr(0), newDACL, uintptr(0)); err != nil { - return errors.Wrapf(err, "%s SetSecurityInfo %s", gvmga, name) + return fmt.Errorf("%s SetSecurityInfo %s: %w", gvmga, name, err) } return nil @@ -120,7 +119,7 @@ func createFile(name string, isDir bool) (syscall.Handle, error) { } fd, err := syscall.CreateFile(&namep[0], da, sm, nil, syscall.OPEN_EXISTING, fa, 0) if err != nil { - return 0, errors.Wrapf(err, "%s syscall.CreateFile %s", gvmga, name) + return 0, fmt.Errorf("%s syscall.CreateFile %s: %w", gvmga, name, err) } return fd, nil } @@ -131,7 +130,7 @@ func generateDACLWithAcesAdded(name string, isDir bool, origDACL uintptr) (uintp // Generate pointers to the SIDs based on the string SIDs sid, err := syscall.StringToSid(sidVmGroup) if err != nil { - return 0, errors.Wrapf(err, "%s syscall.StringToSid %s %s", gvmga, name, sidVmGroup) + return 0, fmt.Errorf("%s syscall.StringToSid %s %s: %w", gvmga, name, sidVmGroup, err) } inheritance := inheritModeNoInheritance @@ -154,7 +153,7 @@ func generateDACLWithAcesAdded(name string, isDir bool, origDACL uintptr) (uintp modifiedDACL := uintptr(0) if err := setEntriesInAcl(uintptr(uint32(1)), uintptr(unsafe.Pointer(&eaArray[0])), origDACL, &modifiedDACL); err != nil { - return 0, errors.Wrapf(err, "%s SetEntriesInAcl %s", gvmga, name) + return 0, fmt.Errorf("%s SetEntriesInAcl %s: %w", gvmga, name, err) } return modifiedDACL, nil diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/vhd.go b/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/vhd.go index a33a36c0ffba..f7f78fc23048 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/vhd.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/vhd.go @@ -1,3 +1,4 @@ +//go:build windows // +build windows package vhd @@ -7,14 +8,13 @@ import ( "syscall" "github.com/Microsoft/go-winio/pkg/guid" - "github.com/pkg/errors" "golang.org/x/sys/windows" ) //go:generate go run mksyscall_windows.go -output zvhd_windows.go vhd.go //sys createVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtualDiskAccessMask uint32, securityDescriptor *uintptr, createVirtualDiskFlags uint32, providerSpecificFlags uint32, parameters *CreateVirtualDiskParameters, overlapped *syscall.Overlapped, handle *syscall.Handle) (win32err error) = virtdisk.CreateVirtualDisk -//sys openVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *OpenVirtualDiskParameters, handle *syscall.Handle) (win32err error) = virtdisk.OpenVirtualDisk +//sys openVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (win32err error) = virtdisk.OpenVirtualDisk //sys attachVirtualDisk(handle syscall.Handle, securityDescriptor *uintptr, attachVirtualDiskFlag uint32, providerSpecificFlags uint32, parameters *AttachVirtualDiskParameters, overlapped *syscall.Overlapped) (win32err error) = virtdisk.AttachVirtualDisk //sys detachVirtualDisk(handle syscall.Handle, detachVirtualDiskFlags uint32, providerSpecificFlags uint32) (win32err error) = virtdisk.DetachVirtualDisk //sys getVirtualDiskPhysicalPath(handle syscall.Handle, diskPathSizeInBytes *uint32, buffer *uint16) (win32err error) = virtdisk.GetVirtualDiskPhysicalPath @@ -62,13 +62,27 @@ type OpenVirtualDiskParameters struct { Version2 OpenVersion2 } +// The higher level `OpenVersion2` struct uses bools to refer to `GetInfoOnly` and `ReadOnly` for ease of use. However, +// the internal windows structure uses `BOOLS` aka int32s for these types. `openVersion2` is used for translating +// `OpenVersion2` fields to the correct windows internal field types on the `Open____` methods. +type openVersion2 struct { + getInfoOnly int32 + readOnly int32 + resiliencyGUID guid.GUID +} + +type openVirtualDiskParameters struct { + version uint32 + version2 openVersion2 +} + type AttachVersion2 struct { RestrictedOffset uint64 RestrictedLength uint64 } type AttachVirtualDiskParameters struct { - Version uint32 // Must always be set to 2 + Version uint32 Version2 AttachVersion2 } @@ -146,16 +160,13 @@ func CreateVhdx(path string, maxSizeInGb, blockSizeInMb uint32) error { return err } - if err := syscall.CloseHandle(handle); err != nil { - return err - } - return nil + return syscall.CloseHandle(handle) } // DetachVirtualDisk detaches a virtual hard disk by handle. func DetachVirtualDisk(handle syscall.Handle) (err error) { if err := detachVirtualDisk(handle, 0, 0); err != nil { - return errors.Wrap(err, "failed to detach virtual disk") + return fmt.Errorf("failed to detach virtual disk: %w", err) } return nil } @@ -185,7 +196,7 @@ func AttachVirtualDisk(handle syscall.Handle, attachVirtualDiskFlag AttachVirtua parameters, nil, ); err != nil { - return errors.Wrap(err, "failed to attach virtual disk") + return fmt.Errorf("failed to attach virtual disk: %w", err) } return nil } @@ -209,7 +220,7 @@ func AttachVhd(path string) (err error) { AttachVirtualDiskFlagNone, ¶ms, ); err != nil { - return errors.Wrap(err, "failed to attach virtual disk") + return fmt.Errorf("failed to attach virtual disk: %w", err) } return nil } @@ -234,19 +245,35 @@ func OpenVirtualDiskWithParameters(vhdPath string, virtualDiskAccessMask Virtual var ( handle syscall.Handle defaultType VirtualStorageType + getInfoOnly int32 + readOnly int32 ) if parameters.Version != 2 { return handle, fmt.Errorf("only version 2 VHDs are supported, found version: %d", parameters.Version) } + if parameters.Version2.GetInfoOnly { + getInfoOnly = 1 + } + if parameters.Version2.ReadOnly { + readOnly = 1 + } + params := &openVirtualDiskParameters{ + version: parameters.Version, + version2: openVersion2{ + getInfoOnly, + readOnly, + parameters.Version2.ResiliencyGUID, + }, + } if err := openVirtualDisk( &defaultType, vhdPath, uint32(virtualDiskAccessMask), uint32(openVirtualDiskFlags), - parameters, + params, &handle, ); err != nil { - return 0, errors.Wrap(err, "failed to open virtual disk") + return 0, fmt.Errorf("failed to open virtual disk: %w", err) } return handle, nil } @@ -272,7 +299,7 @@ func CreateVirtualDisk(path string, virtualDiskAccessMask VirtualDiskAccessMask, nil, &handle, ); err != nil { - return handle, errors.Wrap(err, "failed to create virtual disk") + return handle, fmt.Errorf("failed to create virtual disk: %w", err) } return handle, nil } @@ -290,7 +317,7 @@ func GetVirtualDiskPhysicalPath(handle syscall.Handle) (_ string, err error) { &diskPathSizeInBytes, &diskPhysicalPathBuf[0], ); err != nil { - return "", errors.Wrap(err, "failed to get disk physical path") + return "", fmt.Errorf("failed to get disk physical path: %w", err) } return windows.UTF16ToString(diskPhysicalPathBuf[:]), nil } @@ -314,10 +341,10 @@ func CreateDiffVhd(diffVhdPath, baseVhdPath string, blockSizeInMB uint32) error createParams, ) if err != nil { - return fmt.Errorf("failed to create differencing vhd: %s", err) + return fmt.Errorf("failed to create differencing vhd: %w", err) } if err := syscall.CloseHandle(vhdHandle); err != nil { - return fmt.Errorf("failed to close differencing vhd handle: %s", err) + return fmt.Errorf("failed to close differencing vhd handle: %w", err) } return nil } diff --git a/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/zvhd_windows.go b/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/zvhd_windows.go index 7fb5f3651b95..1d7498db3bee 100644 --- a/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/zvhd_windows.go +++ b/src/runtime/vendor/github.com/Microsoft/go-winio/vhd/zvhd_windows.go @@ -88,7 +88,7 @@ func getVirtualDiskPhysicalPath(handle syscall.Handle, diskPathSizeInBytes *uint return } -func openVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *OpenVirtualDiskParameters, handle *syscall.Handle) (win32err error) { +func openVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (win32err error) { var _p0 *uint16 _p0, win32err = syscall.UTF16PtrFromString(path) if win32err != nil { @@ -97,7 +97,7 @@ func openVirtualDisk(virtualStorageType *VirtualStorageType, path string, virtua return _openVirtualDisk(virtualStorageType, _p0, virtualDiskAccessMask, openVirtualDiskFlags, parameters, handle) } -func _openVirtualDisk(virtualStorageType *VirtualStorageType, path *uint16, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *OpenVirtualDiskParameters, handle *syscall.Handle) (win32err error) { +func _openVirtualDisk(virtualStorageType *VirtualStorageType, path *uint16, virtualDiskAccessMask uint32, openVirtualDiskFlags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (win32err error) { r0, _, _ := syscall.Syscall6(procOpenVirtualDisk.Addr(), 6, uintptr(unsafe.Pointer(virtualStorageType)), uintptr(unsafe.Pointer(path)), uintptr(virtualDiskAccessMask), uintptr(openVirtualDiskFlags), uintptr(unsafe.Pointer(parameters)), uintptr(unsafe.Pointer(handle))) if r0 != 0 { win32err = syscall.Errno(r0) diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/go.mod b/src/runtime/vendor/github.com/Microsoft/hcsshim/go.mod deleted file mode 100644 index 9c60dd302511..000000000000 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/go.mod +++ /dev/null @@ -1,39 +0,0 @@ -module github.com/Microsoft/hcsshim - -go 1.13 - -require ( - github.com/BurntSushi/toml v0.3.1 - github.com/Microsoft/go-winio v0.4.17 - github.com/cenkalti/backoff/v4 v4.1.1 - github.com/containerd/cgroups v1.0.1 - github.com/containerd/console v1.0.2 - github.com/containerd/containerd v1.5.7 - github.com/containerd/go-runc v1.0.0 - github.com/containerd/ttrpc v1.1.0 - github.com/containerd/typeurl v1.0.2 - github.com/gogo/protobuf v1.3.2 - github.com/golang/mock v1.6.0 - github.com/google/go-cmp v0.5.6 - github.com/google/go-containerregistry v0.5.1 - github.com/linuxkit/virtsock v0.0.0-20201010232012-f8cee7dfc7a3 - github.com/mattn/go-shellwords v1.0.6 - github.com/opencontainers/runc v1.0.2 - github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 - github.com/pkg/errors v0.9.1 - github.com/sirupsen/logrus v1.8.1 - github.com/urfave/cli v1.22.2 - github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 - github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae - go.etcd.io/bbolt v1.3.6 - go.opencensus.io v0.22.3 - golang.org/x/net v0.0.0-20210825183410-e898025ed96a // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c - golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e - google.golang.org/grpc v1.40.0 -) - -replace ( - google.golang.org/genproto => google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 - google.golang.org/grpc => google.golang.org/grpc v1.27.1 -) diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/go.sum b/src/runtime/vendor/github.com/Microsoft/hcsshim/go.sum deleted file mode 100644 index 93c37657f3cf..000000000000 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/go.sum +++ /dev/null @@ -1,993 +0,0 @@ -bazil.org/fuse v0.0.0-20160811212531-371fbbdaa898/go.mod h1:Xbm+BRKSBEpa4q4hTSxohYNQpsxXPbPry4JJWOB3LB8= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= -cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= -github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= -github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= -github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= -github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= -github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= -github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= -github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= -github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= -github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= -github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= -github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA= -github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw= -github.com/Microsoft/go-winio v0.4.16-0.20201130162521-d1ffc52c7331/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= -github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= -github.com/Microsoft/go-winio v0.4.17-0.20210211115548-6eac466e5fa3/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.4.17-0.20210324224401-5516f17a5958/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.4.17 h1:iT12IBVClFevaf8PuVyi3UmZOVh4OqnaLxDTW2O6j3w= -github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= -github.com/Microsoft/hcsshim v0.8.7-0.20190325164909-8abdbb8205e4/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg= -github.com/Microsoft/hcsshim v0.8.7/go.mod h1:OHd7sQqRFrYd3RmSgbgji+ctCwkbq2wbEYNSzOYtcBQ= -github.com/Microsoft/hcsshim v0.8.9/go.mod h1:5692vkUqntj1idxauYlpoINNKeqCiG6Sg38RRsjT5y8= -github.com/Microsoft/hcsshim v0.8.14/go.mod h1:NtVKoYxQuTLx6gEq0L96c9Ju4JbRJ4nY2ow3VK6a9Lg= -github.com/Microsoft/hcsshim v0.8.15/go.mod h1:x38A4YbHbdxJtc0sF6oIz+RG0npwSCAvn69iY6URG00= -github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600= -github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4= -github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da/go.mod h1:5hlzMzRKMLyo42nCZ9oml8AdTlq/0cvIaBv6tK1RehU= -github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3/go.mod h1:mw7qgWloBUl75W/gVH3cQszUg1+gUITj7D6NY7ywVnY= -github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alexflint/go-filemutex v0.0.0-20171022225611-72bdc8eae2ae/go.mod h1:CgnQgUtFrFz9mxFNtED3jI5tLDjKlOM+oUF/sTk6ps0= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= -github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= -github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= -github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= -github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= -github.com/bshuster-repo/logrus-logstash-hook v0.4.1/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk= -github.com/buger/jsonparser v0.0.0-20180808090653-f4dd9f5a6b44/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= -github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= -github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50= -github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= -github.com/cenkalti/backoff/v4 v4.1.1 h1:G2HAfAmvm/GcKan2oOQpBXOd2tT2G57ZnZGWa1PxPBQ= -github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= -github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= -github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg= -github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc= -github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= -github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE= -github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU= -github.com/containerd/aufs v0.0.0-20210316121734-20793ff83c97/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= -github.com/containerd/aufs v1.0.0/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj3gNv2PU= -github.com/containerd/btrfs v0.0.0-20201111183144-404b9149801e/go.mod h1:jg2QkJcsabfHugurUvvPhS3E08Oxiuh5W/g1ybB4e0E= -github.com/containerd/btrfs v0.0.0-20210316141732-918d888fb676/go.mod h1:zMcX3qkXTAi9GI50+0HOeuV8LU2ryCE/V2vG/ZBiTss= -github.com/containerd/btrfs v1.0.0/go.mod h1:zMcX3qkXTAi9GI50+0HOeuV8LU2ryCE/V2vG/ZBiTss= -github.com/containerd/cgroups v0.0.0-20190717030353-c4b9ac5c7601/go.mod h1:X9rLEHIqSf/wfK8NsPqxJmeZgW4pcfzdXITDrUSJ6uI= -github.com/containerd/cgroups v0.0.0-20190919134610-bf292b21730f/go.mod h1:OApqhQ4XNSNC13gXIwDjhOQxjWa/NxkwZXJ1EvqT0ko= -github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1pT8KYB3TCXK/ocprsh7MAkoW8bZVzPdih9snmM= -github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= -github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= -github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= -github.com/containerd/cgroups v1.0.1 h1:iJnMvco9XGvKUvNQkv88bE4uJXxRQH18efbKo9w5vHQ= -github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= -github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= -github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= -github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= -github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= -github.com/containerd/console v1.0.2 h1:Pi6D+aZXM+oUw1czuKgH5IJ+y0jhYcwBJfx5/Ghn9dE= -github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ= -github.com/containerd/containerd v1.2.10/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.0-beta.2.0.20190828155532-0293cbd26c69/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.0/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.1-0.20191213020239-082f7e3aed57/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.3.2/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.0-beta.2.0.20200729163537-40b22ef07410/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.1/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.4.3/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= -github.com/containerd/containerd v1.5.0-beta.1/go.mod h1:5HfvG1V2FsKesEGQ17k5/T7V960Tmcumvqn8Mc+pCYQ= -github.com/containerd/containerd v1.5.0-beta.3/go.mod h1:/wr9AVtEM7x9c+n0+stptlo/uBBoBORwEx6ardVcmKU= -github.com/containerd/containerd v1.5.0-beta.4/go.mod h1:GmdgZd2zA2GYIBZ0w09ZvgqEq8EfBp/m3lcVZIvPHhI= -github.com/containerd/containerd v1.5.0-rc.0/go.mod h1:V/IXoMqNGgBlabz3tHD2TWDoTJseu1FGOKuoA4nNb2s= -github.com/containerd/containerd v1.5.1/go.mod h1:0DOxVqwDy2iZvrZp2JUx/E+hS0UNTVn7dJnIOwtYR4g= -github.com/containerd/containerd v1.5.7 h1:rQyoYtj4KddB3bxG6SAqd4+08gePNyJjRqvOIfV3rkM= -github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c= -github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20190815185530-f2a389ac0a02/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20191127005431-f65d91d395eb/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe/go.mod h1:cECdGN1O8G9bgKTlLhuPJimka6Xb/Gg7vYzCTNVxhvo= -github.com/containerd/continuity v0.0.0-20201208142359-180525291bb7/go.mod h1:kR3BEg7bDFaEddKm54WSmrol1fKWDU1nKYkgrcgZT7Y= -github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EXlVlkqNba9rJe3j7w3Xa924itAMLgZH4UD/Q4PExuQ= -github.com/containerd/continuity v0.1.0 h1:UFRRY5JemiAhPZrr/uE0n8fMTLcZsUvySPr1+D7pgr8= -github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM= -github.com/containerd/fifo v0.0.0-20180307165137-3d5202aec260/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= -github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= -github.com/containerd/fifo v0.0.0-20200410184934-f15a3290365b/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0= -github.com/containerd/fifo v0.0.0-20201026212402-0724c46b320c/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0= -github.com/containerd/fifo v0.0.0-20210316144830-115abcc95a1d/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4= -github.com/containerd/fifo v1.0.0 h1:6PirWBr9/L7GDamKr+XM0IeUFXu5mf3M/BPpH9gaLBU= -github.com/containerd/fifo v1.0.0/go.mod h1:ocF/ME1SX5b1AOlWi9r677YJmCPSwwWnQ9O123vzpE4= -github.com/containerd/go-cni v1.0.1/go.mod h1:+vUpYxKvAF72G9i1WoDOiPGRtQpqsNW/ZHtSlv++smU= -github.com/containerd/go-cni v1.0.2/go.mod h1:nrNABBHzu0ZwCug9Ije8hL2xBCYh/pjfMb1aZGrrohk= -github.com/containerd/go-runc v0.0.0-20180907222934-5a6d9f37cfa3/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0= -github.com/containerd/go-runc v0.0.0-20190911050354-e029b79d8cda/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0= -github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328/go.mod h1:PpyHrqVs8FTi9vpyHwPwiNEGaACDxT/N/pLcvMSRA9g= -github.com/containerd/go-runc v0.0.0-20201020171139-16b287bc67d0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= -github.com/containerd/go-runc v1.0.0 h1:oU+lLv1ULm5taqgV/CJivypVODI4SUz1znWjv3nNYS0= -github.com/containerd/go-runc v1.0.0/go.mod h1:cNU0ZbCgCQVZK4lgG3P+9tn9/PaJNmoDXPpoJhDR+Ok= -github.com/containerd/imgcrypt v1.0.1/go.mod h1:mdd8cEPW7TPgNG4FpuP3sGBiQ7Yi/zak9TYCG3juvb0= -github.com/containerd/imgcrypt v1.0.4-0.20210301171431-0ae5c75f59ba/go.mod h1:6TNsg0ctmizkrOgXRNQjAPFWpMYRWuiB6dSF4Pfa5SA= -github.com/containerd/imgcrypt v1.1.1-0.20210312161619-7ed62a527887/go.mod h1:5AZJNI6sLHJljKuI9IHnw1pWqo/F0nGDOuR9zgTs7ow= -github.com/containerd/imgcrypt v1.1.1/go.mod h1:xpLnwiQmEUJPvQoAapeb2SNCxz7Xr6PJrXQb0Dpc4ms= -github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFYfE5+So4M5syatU0N0f0LbWpuqyMi4/BE8c= -github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= -github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY= -github.com/containerd/stargz-snapshotter/estargz v0.4.1 h1:5e7heayhB7CcgdTkqfZqrNaNv15gABwr3Q2jBTbLlt4= -github.com/containerd/stargz-snapshotter/estargz v0.4.1/go.mod h1:x7Q9dg9QYb4+ELgxmo4gBUeJB0tl5dqH1Sdz0nJU1QM= -github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= -github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o= -github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8= -github.com/containerd/ttrpc v1.0.1/go.mod h1:UAxOpgT9ziI0gJrmKvgcZivgxOp8iFPSk8httJEt98Y= -github.com/containerd/ttrpc v1.0.2/go.mod h1:UAxOpgT9ziI0gJrmKvgcZivgxOp8iFPSk8httJEt98Y= -github.com/containerd/ttrpc v1.1.0 h1:GbtyLRxb0gOLR0TYQWt3O6B0NvT8tMdorEHqIQo/lWI= -github.com/containerd/ttrpc v1.1.0/go.mod h1:XX4ZTnoOId4HklF4edwc4DcqskFZuvXB1Evzy5KFQpQ= -github.com/containerd/typeurl v0.0.0-20180627222232-a93fcdb778cd/go.mod h1:Cm3kwCdlkCfMSHURc+r6fwoGH6/F1hH3S4sg0rLFWPc= -github.com/containerd/typeurl v0.0.0-20190911142611-5eb25027c9fd/go.mod h1:GeKYzf2pQcqv7tJ0AoCuuhtnqhva5LNU3U+OyKxxJpk= -github.com/containerd/typeurl v1.0.1/go.mod h1:TB1hUtrpaiO88KEK56ijojHS1+NeF0izUACaJW2mdXg= -github.com/containerd/typeurl v1.0.2 h1:Chlt8zIieDbzQFzXzAeBEF92KhExuE4p9p92/QmY7aY= -github.com/containerd/typeurl v1.0.2/go.mod h1:9trJWW2sRlGub4wZJRTW83VtbOLS6hwcDZXTn6oPz9s= -github.com/containerd/zfs v0.0.0-20200918131355-0a33824f23a2/go.mod h1:8IgZOBdv8fAgXddBT4dBXJPtxyRsejFIpXoklgxgEjw= -github.com/containerd/zfs v0.0.0-20210301145711-11e8f1707f62/go.mod h1:A9zfAbMlQwE+/is6hi0Xw8ktpL+6glmqZYtevJgaB8Y= -github.com/containerd/zfs v0.0.0-20210315114300-dde8f0fda960/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containerd/zfs v0.0.0-20210324211415-d5c4544f0433/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containerd/zfs v1.0.0/go.mod h1:m+m51S1DvAP6r3FcmYCp54bQ34pyOwTieQDNRIRHsFY= -github.com/containernetworking/cni v0.7.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/cni v0.8.0/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/cni v0.8.1/go.mod h1:LGwApLUm2FpoOfxTDEeq8T9ipbpZ61X79hmU3w8FmsY= -github.com/containernetworking/plugins v0.8.6/go.mod h1:qnw5mN19D8fIwkqW7oHHYDHVlzhJpcY6TQxn/fUyDDM= -github.com/containernetworking/plugins v0.9.1/go.mod h1:xP/idU2ldlzN6m4p5LmGiwRDjeJr6FLK6vuiUwoH7P8= -github.com/containers/ocicrypt v1.0.1/go.mod h1:MeJDzk1RJHv89LjsH0Sp5KTY3ZYkjXO/C+bKAeWFIrc= -github.com/containers/ocicrypt v1.1.0/go.mod h1:b8AOe0YR67uU8OqfVNcznfFpAzu3rdgUV4GP9qXPfu4= -github.com/containers/ocicrypt v1.1.1/go.mod h1:Dm55fwWm1YZAjYRaJ94z2mfZikIyIN4B0oB3dj3jFxY= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= -github.com/coreos/go-iptables v0.5.0/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= -github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20161114122254-48702e0da86b/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= -github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= -github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= -github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= -github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s= -github.com/d2g/dhcp4server v0.0.0-20181031114812-7d4a0a7f59a5/go.mod h1:Eo87+Kg/IX2hfWJfwxMzLyuSZyxSoAug2nGa1G2QAi8= -github.com/d2g/hardwareaddr v0.0.0-20190221164911-e7d9fbe030e4/go.mod h1:bMl4RjIciD2oAxI7DmWRx6gbeqrkoLqv3MV0vzNad+I= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/denverdino/aliyungo v0.0.0-20190125010748-a747050bb1ba/go.mod h1:dV8lFg6daOBZbT6/BDGIz6Y3WFGn8juu6G+CQ6LHtl0= -github.com/dgrijalva/jwt-go v0.0.0-20170104182250-a601269ab70c/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= -github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= -github.com/docker/cli v0.0.0-20191017083524-a8ff7f821017 h1:2HQmlpI3yI9deH18Q6xiSOIjXD4sLI55Y/gfpa8/558= -github.com/docker/cli v0.0.0-20191017083524-a8ff7f821017/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/distribution v0.0.0-20190905152932-14b96e55d84c/go.mod h1:0+TTO4EOBfRPhZXAeF1Vu+W3hHZ8eLp8PgKVZlcvtFY= -github.com/docker/distribution v2.7.1-0.20190205005809-0d3efadf0154+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug= -github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7 h1:Cvj7S8I4Xpx78KAl6TwTmMHuHlZ/0SM60NUneGJQ7IE= -github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/docker-credential-helpers v0.6.3 h1:zI2p9+1NQYdnG6sMU26EX4aVGlqbInSQxQXLvzJ4RPQ= -github.com/docker/docker-credential-helpers v0.6.3/go.mod h1:WRaJzqw3CTB9bk10avuGsjVBZsD05qeibJ1/TYlvc0Y= -github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= -github.com/docker/go-events v0.0.0-20170721190031-9461782956ad/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= -github.com/docker/go-metrics v0.0.0-20180209012529-399ea8c73916/go.mod h1:/u0gXw0Gay3ceNrsHubL3BtdOL2fHf93USgMTe0W5dI= -github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw= -github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE= -github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= -github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fullsailor/pkcs7 v0.0.0-20190404230743-d7302db945fa/go.mod h1:KnogPXtdwXqoenmZCw6S+25EAm2MkxbG0deNDu4cbSA= -github.com/garyburd/redigo v0.0.0-20150301180006-535138d7bcd7/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= -github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= -github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= -github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= -github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= -github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= -github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= -github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= -github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= -github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/godbus/dbus v0.0.0-20151105175453-c7fdd8b5cd55/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= -github.com/godbus/dbus v0.0.0-20180201030542-885f9cc04c9c/go.mod h1:/YcGZj5zSblfDWMMoOzV4fas9FZnQYTkDnsGvmh2Grw= -github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8= -github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= -github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v1.2.0/go.mod h1:Njal3psf3qN6dwBtQfUmBZh2ybovJ0tlu3o/AC7HYjU= -github.com/gogo/googleapis v1.4.0/go.mod h1:5YRNX2z1oM5gXdAkurHa942MDgEJyk02w4OecKY87+c= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.0/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-containerregistry v0.5.1 h1:/+mFTs4AlwsJ/mJe8NDtKb7BxLtbZFpcn8vDsneEkwQ= -github.com/google/go-containerregistry v0.5.1/go.mod h1:Ct15B4yir3PLOP5jsy0GNeYVaIZs/MK/Jz5any1wFW0= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/handlers v0.0.0-20150720190736-60c7bfde3e33/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= -github.com/gorilla/mux v1.7.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/hashicorp/errwrap v0.0.0-20141028054710-7554cd9344ce/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-multierror v0.0.0-20161216184304-ed905158d874/go.mod h1:JMRHfdO9jKNzS/+BTlxCjKNQHg/jZAft8U7LloJvN7I= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= -github.com/imdario/mergo v0.3.8/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= -github.com/imdario/mergo v0.3.10/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= -github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/joefitzgerald/rainbow-reporter v0.1.0/go.mod h1:481CNgqmVHQZzdIbN52CupLJyoVwB10FQ/IQlF1pdL8= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= -github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/linuxkit/virtsock v0.0.0-20201010232012-f8cee7dfc7a3 h1:jUp75lepDg0phMUJBCmvaeFDldD2N3S1lBuPwUTszio= -github.com/linuxkit/virtsock v0.0.0-20201010232012-f8cee7dfc7a3/go.mod h1:3r6x7q95whyfWQpmGZTu3gk3v2YkMi05HEzl7Tf7YEo= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= -github.com/marstr/guid v1.1.0/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= -github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI= -github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY= -github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs= -github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/osext v0.0.0-20151018003038-5e2d6d41470f/go.mod h1:OkQIRizQZAeMln+1tSwduZz7+Af5oFlKirV/MSYes2A= -github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= -github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= -github.com/moby/sys/mountinfo v0.4.1 h1:1O+1cHA1aujwEwwVMa2Xm2l+gIpUHyd3+D+d7LZh1kM= -github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= -github.com/moby/sys/symlink v0.1.0/go.mod h1:GGDODQmbFOjFsXvfLVn3+ZRxkch54RkSiGqsZeMYowQ= -github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= -github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/ncw/swift v1.0.47/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v0.0.0-20151202141238-7f8ab55aaf3b/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/gomega v0.0.0-20151007035656-2152b45fa28a/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= -github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= -github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= -github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= -github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v1.0.0-rc1.0.20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.0.0/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= -github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= -github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= -github.com/opencontainers/runc v1.0.2 h1:opHZMaswlyxz1OuGpBE53Dwe4/xF7EZTY0A2L/FpCOg= -github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= -github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs= -github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE= -github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= -github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= -github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= -github.com/prometheus/client_golang v0.0.0-20180209125602-c332b6f63c06/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= -github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.0.0-20180110214958-89604d197083/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= -github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= -github.com/prometheus/procfs v0.0.0-20180125133057-cb4147076ac7/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.0-20190522114515-bc1a522cf7b1/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= -github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= -github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= -github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U= -github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= -github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= -github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= -github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= -github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= -github.com/urfave/cli v0.0.0-20171014202726-7bc6a0acffa5/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo= -github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 h1:cPXZWzzG0NllBLdjWoD1nDfaqu98YMv+OneaKc8sPOA= -github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= -github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3Cym0ZtKyq7L16eZUtYKs+BaHDN6mAns= -github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= -github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= -github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs= -github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA= -github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= -go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= -go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= -go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3 h1:8sGtKOrtQqkN1bp2AtX+misvLIlOmsEsNd+9NIcPEm8= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181009213950-7c1a557ab941/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/net v0.0.0-20201006153459-a7d1128ccaa0/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20210825183410-e898025ed96a h1:bRuuGXV8wwSdGTB+CtJf+FjgO1APK1CoO39T4BN/XBw= -golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190812073006-9eafafc0a87e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200120151820-655fe14d7479/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200817155316-9781c653f443/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200922070232-aee5d888a860/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201117170446-d9b008d0a637/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201202213521-69691e467435/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM= -golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190706070813-72ffa07ba3db/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200616133436-c1934b75d054/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200916195026-c9a70fc28ce3/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/cloud v0.0.0-20151119220103-975617b05ea8/go.mod h1:0H1ncTHf11KCFhTc/+EFRbzSCOZx+VUbRMk55Yv5MYk= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 h1:YzfoEYWbODU5Fbt37+h7X16BWQbad7Q4S6gclTKFXM8= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20141024133853-64131543e789/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= -gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/square/go-jose.v2 v2.5.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= -gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= -gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= -gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.20.1/go.mod h1:KqwcCVogGxQY3nBlRpwt+wpAMF/KjaCc7RpywacvqUo= -k8s.io/api v0.20.4/go.mod h1:++lNL1AJMkDymriNniQsWRkMDzRaX2Y/POTUi8yvqYQ= -k8s.io/api v0.20.6/go.mod h1:X9e8Qag6JV/bL5G6bU8sdVRltWKmdHsFUGS3eVndqE8= -k8s.io/apimachinery v0.20.1/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= -k8s.io/apimachinery v0.20.4/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= -k8s.io/apimachinery v0.20.6/go.mod h1:ejZXtW1Ra6V1O5H8xPBGz+T3+4gfkTCeExAHKU57MAc= -k8s.io/apiserver v0.20.1/go.mod h1:ro5QHeQkgMS7ZGpvf4tSMx6bBOgPfE+f52KwvXfScaU= -k8s.io/apiserver v0.20.4/go.mod h1:Mc80thBKOyy7tbvFtB4kJv1kbdD0eIH8k8vianJcbFM= -k8s.io/apiserver v0.20.6/go.mod h1:QIJXNt6i6JB+0YQRNcS0hdRHJlMhflFmsBDeSgT1r8Q= -k8s.io/client-go v0.20.1/go.mod h1:/zcHdt1TeWSd5HoUe6elJmHSQ6uLLgp4bIJHVEuy+/Y= -k8s.io/client-go v0.20.4/go.mod h1:LiMv25ND1gLUdBeYxBIwKpkSC5IsozMMmOOeSJboP+k= -k8s.io/client-go v0.20.6/go.mod h1:nNQMnOvEUEsOzRRFIIkdmYOjAZrC8bgq0ExboWSU1I0= -k8s.io/code-generator v0.19.7/go.mod h1:lwEq3YnLYb/7uVXLorOJfxg+cUu2oihFhHZ0n9NIla0= -k8s.io/component-base v0.20.1/go.mod h1:guxkoJnNoh8LNrbtiQOlyp2Y2XFCZQmrcg2n/DeYNLk= -k8s.io/component-base v0.20.4/go.mod h1:t4p9EdiagbVCJKrQ1RsA5/V4rFQNDfRlevJajlGwgjI= -k8s.io/component-base v0.20.6/go.mod h1:6f1MPBAeI+mvuts3sIdtpjljHWBQ2cIy38oBIWMYnrM= -k8s.io/cri-api v0.17.3/go.mod h1:X1sbHmuXhwaHs9xxYffLqJogVsnI+f6cPRcgPel7ywM= -k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= -k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI= -k8s.io/cri-api v0.20.6/go.mod h1:ew44AjNXwyn1s0U4xCKGodU7J1HzBeZ1MpGrpa5r8Yc= -k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= -k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= -k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= -k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= -k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= -k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6/go.mod h1:UuqjUnNftUyPE5H64/qeyjQoUZhGpeFDVdxjTeEVN2o= -k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAGcJo0Tvi+dK12EcqSLqcWsryKMpfM= -k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk= -k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.14/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.15/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/structured-merge-diff/v4 v4.0.3/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go index 3d640ac7bdd6..5d6acd69e618 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/iocp.go @@ -57,7 +57,7 @@ func pollIOCP(ctx context.Context, iocpHandle windows.Handle) { }).Warn("failed to parse job object message") continue } - if err := msq.Write(notification); err == queue.ErrQueueClosed { + if err := msq.Enqueue(notification); err == queue.ErrQueueClosed { // Write will only return an error when the queue is closed. // The only time a queue would ever be closed is when we call `Close` on // the job it belongs to which also removes it from the jobMap, so something diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go index 9c2726416418..c9fdd921a7f8 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/jobobject.go @@ -68,6 +68,9 @@ type Options struct { // `UseNTVariant` specifies if we should use the `Nt` variant of Open/CreateJobObject. // Defaults to false. UseNTVariant bool + // `IOTracking` enables tracking I/O statistics on the job object. More specifically this + // calls SetInformationJobObject with the JobObjectIoAttribution class. + EnableIOTracking bool } // Create creates a job object. @@ -134,6 +137,12 @@ func Create(ctx context.Context, options *Options) (_ *JobObject, err error) { job.mq = mq } + if options.EnableIOTracking { + if err := enableIOTracking(jobHandle); err != nil { + return nil, err + } + } + return job, nil } @@ -235,7 +244,7 @@ func (job *JobObject) PollNotification() (interface{}, error) { if job.mq == nil { return nil, ErrNotRegistered } - return job.mq.ReadOrWait() + return job.mq.Dequeue() } // UpdateProcThreadAttribute updates the passed in ProcThreadAttributeList to contain what is necessary to @@ -330,7 +339,7 @@ func (job *JobObject) Pids() ([]uint32, error) { err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicProcessIdList, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ) @@ -356,7 +365,7 @@ func (job *JobObject) Pids() ([]uint32, error) { if err = winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicProcessIdList, - uintptr(unsafe.Pointer(&buf[0])), + unsafe.Pointer(&buf[0]), uint32(len(buf)), nil, ); err != nil { @@ -384,7 +393,7 @@ func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMA if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectMemoryUsageInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -406,7 +415,7 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_ if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectBasicAccountingInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -415,7 +424,9 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_ return &info, nil } -// QueryStorageStats gets the storage (I/O) stats for the job object. +// QueryStorageStats gets the storage (I/O) stats for the job object. This call will error +// if either `EnableIOTracking` wasn't set to true on creation of the job, or SetIOTracking() +// hasn't been called since creation of the job. func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION, error) { job.handleLock.RLock() defer job.handleLock.RUnlock() @@ -430,7 +441,7 @@ func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_IO_ATTRIBUTION_INFO if err := winapi.QueryInformationJobObject( job.handle, winapi.JobObjectIoAttribution, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -476,7 +487,7 @@ func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) { status := winapi.NtQueryInformationProcess( h, winapi.ProcessVmCounters, - uintptr(unsafe.Pointer(&vmCounters)), + unsafe.Pointer(&vmCounters), uint32(unsafe.Sizeof(vmCounters)), nil, ) @@ -497,3 +508,31 @@ func (job *JobObject) QueryPrivateWorkingSet() (uint64, error) { return jobWorkingSetSize, nil } + +// SetIOTracking enables IO tracking for processes in the job object. +// This enables use of the QueryStorageStats method. +func (job *JobObject) SetIOTracking() error { + job.handleLock.RLock() + defer job.handleLock.RUnlock() + + if job.handle == 0 { + return ErrAlreadyClosed + } + + return enableIOTracking(job.handle) +} + +func enableIOTracking(job windows.Handle) error { + info := winapi.JOBOBJECT_IO_ATTRIBUTION_INFORMATION{ + ControlFlags: winapi.JOBOBJECT_IO_ATTRIBUTION_CONTROL_ENABLE, + } + if _, err := windows.SetInformationJobObject( + job, + winapi.JobObjectIoAttribution, + uintptr(unsafe.Pointer(&info)), + uint32(unsafe.Sizeof(info)), + ); err != nil { + return fmt.Errorf("failed to enable IO tracking on job object: %w", err) + } + return nil +} diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go index 4be297788e02..4efde292c49d 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/jobobject/limits.go @@ -202,7 +202,7 @@ func (job *JobObject) getExtendedInformation() (*windows.JOBOBJECT_EXTENDED_LIMI if err := winapi.QueryInformationJobObject( job.handle, windows.JobObjectExtendedLimitInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { @@ -224,7 +224,7 @@ func (job *JobObject) getCPURateControlInformation() (*winapi.JOBOBJECT_CPU_RATE if err := winapi.QueryInformationJobObject( job.handle, windows.JobObjectCpuRateControlInformation, - uintptr(unsafe.Pointer(&info)), + unsafe.Pointer(&info), uint32(unsafe.Sizeof(info)), nil, ); err != nil { diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go index e177c9a62997..4eb9bb9f1f39 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/queue/mq.go @@ -5,10 +5,7 @@ import ( "sync" ) -var ( - ErrQueueClosed = errors.New("the queue is closed for reading and writing") - ErrQueueEmpty = errors.New("the queue is empty") -) +var ErrQueueClosed = errors.New("the queue is closed for reading and writing") // MessageQueue represents a threadsafe message queue to be used to retrieve or // write messages to. @@ -29,8 +26,8 @@ func NewMessageQueue() *MessageQueue { } } -// Write writes `msg` to the queue. -func (mq *MessageQueue) Write(msg interface{}) error { +// Enqueue writes `msg` to the queue. +func (mq *MessageQueue) Enqueue(msg interface{}) error { mq.m.Lock() defer mq.m.Unlock() @@ -43,55 +40,37 @@ func (mq *MessageQueue) Write(msg interface{}) error { return nil } -// Read will read a value from the queue if available, otherwise return an error. -func (mq *MessageQueue) Read() (interface{}, error) { +// Dequeue will read a value from the queue and remove it. If the queue +// is empty, this will block until the queue is closed or a value gets enqueued. +func (mq *MessageQueue) Dequeue() (interface{}, error) { mq.m.Lock() defer mq.m.Unlock() - if mq.closed { - return nil, ErrQueueClosed - } - if mq.isEmpty() { - return nil, ErrQueueEmpty + + for !mq.closed && mq.size() == 0 { + mq.c.Wait() } - val := mq.messages[0] - mq.messages[0] = nil - mq.messages = mq.messages[1:] - return val, nil -} -// ReadOrWait will read a value from the queue if available, else it will wait for a -// value to become available. This will block forever if nothing gets written or until -// the queue gets closed. -func (mq *MessageQueue) ReadOrWait() (interface{}, error) { - mq.m.Lock() + // We got woken up, check if it's because the queue got closed. if mq.closed { - mq.m.Unlock() return nil, ErrQueueClosed } - if mq.isEmpty() { - for !mq.closed && mq.isEmpty() { - mq.c.Wait() - } - mq.m.Unlock() - return mq.Read() - } + val := mq.messages[0] mq.messages[0] = nil mq.messages = mq.messages[1:] - mq.m.Unlock() return val, nil } -// IsEmpty returns if the queue is empty -func (mq *MessageQueue) IsEmpty() bool { +// Size returns the size of the queue. +func (mq *MessageQueue) Size() int { mq.m.RLock() defer mq.m.RUnlock() - return len(mq.messages) == 0 + return mq.size() } -// Nonexported empty check that doesn't lock so we can call this in Read and Write. -func (mq *MessageQueue) isEmpty() bool { - return len(mq.messages) == 0 +// Nonexported size check to check if the queue is empty inside already locked functions. +func (mq *MessageQueue) size() int { + return len(mq.messages) } // Close closes the queue for future writes or reads. Any attempts to read or write from the @@ -99,13 +78,15 @@ func (mq *MessageQueue) isEmpty() bool { func (mq *MessageQueue) Close() { mq.m.Lock() defer mq.m.Unlock() - // Already closed + + // Already closed, noop if mq.closed { return } + mq.messages = nil mq.closed = true - // If there's anybody currently waiting on a value from ReadOrWait, we need to + // If there's anybody currently waiting on a value from Dequeue, we need to // broadcast so the read(s) can return ErrQueueClosed. mq.c.Broadcast() } diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go index 479649db3634..7eb13f8f0a83 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/jobobject.go @@ -175,7 +175,7 @@ type JOBOBJECT_ASSOCIATE_COMPLETION_PORT struct { // LPDWORD lpReturnLength // ); // -//sys QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) = kernel32.QueryInformationJobObject +//sys QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo unsafe.Pointer, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) = kernel32.QueryInformationJobObject // HANDLE OpenJobObjectW( // DWORD dwDesiredAccess, diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go index 5f9e03fd28e8..222529f433a5 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/process.go @@ -18,7 +18,7 @@ const ProcessVmCounters = 3 // [out, optional] PULONG ReturnLength // ); // -//sys NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQueryInformationProcess +//sys NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo unsafe.Pointer, processInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQueryInformationProcess // typedef struct _VM_COUNTERS_EX // { diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go index 327f57d7c296..78fe01a4b412 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/system.go @@ -12,7 +12,8 @@ const STATUS_INFO_LENGTH_MISMATCH = 0xC0000004 // ULONG SystemInformationLength, // PULONG ReturnLength // ); -//sys NtQuerySystemInformation(systemInfoClass int, systemInformation uintptr, systemInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQuerySystemInformation +// +//sys NtQuerySystemInformation(systemInfoClass int, systemInformation unsafe.Pointer, systemInfoLength uint32, returnLength *uint32) (status uint32) = ntdll.NtQuerySystemInformation type SYSTEM_PROCESS_INFORMATION struct { NextEntryOffset uint32 // ULONG diff --git a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go index 39fb3e1adc08..1f16cf0b8e15 100644 --- a/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go +++ b/src/runtime/vendor/github.com/Microsoft/hcsshim/internal/winapi/zsyscall_windows.go @@ -100,7 +100,7 @@ func resizePseudoConsole(hPc windows.Handle, size uint32) (hr error) { return } -func NtQuerySystemInformation(systemInfoClass int, systemInformation uintptr, systemInfoLength uint32, returnLength *uint32) (status uint32) { +func NtQuerySystemInformation(systemInfoClass int, systemInformation unsafe.Pointer, systemInfoLength uint32, returnLength *uint32) (status uint32) { r0, _, _ := syscall.Syscall6(procNtQuerySystemInformation.Addr(), 4, uintptr(systemInfoClass), uintptr(systemInformation), uintptr(systemInfoLength), uintptr(unsafe.Pointer(returnLength)), 0, 0) status = uint32(r0) return @@ -152,7 +152,7 @@ func IsProcessInJob(procHandle windows.Handle, jobHandle windows.Handle, result return } -func QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo uintptr, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) { +func QueryInformationJobObject(jobHandle windows.Handle, infoClass uint32, jobObjectInfo unsafe.Pointer, jobObjectInformationLength uint32, lpReturnLength *uint32) (err error) { r1, _, e1 := syscall.Syscall6(procQueryInformationJobObject.Addr(), 5, uintptr(jobHandle), uintptr(infoClass), uintptr(jobObjectInfo), uintptr(jobObjectInformationLength), uintptr(unsafe.Pointer(lpReturnLength)), 0) if r1 == 0 { if e1 != 0 { @@ -244,7 +244,7 @@ func LocalFree(ptr uintptr) { return } -func NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo uintptr, processInfoLength uint32, returnLength *uint32) (status uint32) { +func NtQueryInformationProcess(processHandle windows.Handle, processInfoClass uint32, processInfo unsafe.Pointer, processInfoLength uint32, returnLength *uint32) (status uint32) { r0, _, _ := syscall.Syscall6(procNtQueryInformationProcess.Addr(), 5, uintptr(processHandle), uintptr(processInfoClass), uintptr(processInfo), uintptr(processInfoLength), uintptr(unsafe.Pointer(returnLength)), 0) status = uint32(r0) return diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/.gitignore b/src/runtime/vendor/github.com/asaskevich/govalidator/.gitignore new file mode 100644 index 000000000000..8d69a9418aa3 --- /dev/null +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/.gitignore @@ -0,0 +1,15 @@ +bin/ +.idea/ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/.travis.yml b/src/runtime/vendor/github.com/asaskevich/govalidator/.travis.yml index e29f8eef5efd..bb83c6670df6 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/.travis.yml +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/.travis.yml @@ -1,14 +1,12 @@ language: go - +dist: xenial go: - - 1.1 - - 1.2 - - 1.3 - - 1.4 - - 1.5 - - 1.6 - - tip + - '1.10' + - '1.11' + - '1.12' + - '1.13' + - 'tip' -notifications: - email: - - bwatas@gmail.com +script: + - go test -coverpkg=./... -coverprofile=coverage.info -timeout=5s + - bash <(curl -s https://codecov.io/bash) diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/CODE_OF_CONDUCT.md b/src/runtime/vendor/github.com/asaskevich/govalidator/CODE_OF_CONDUCT.md new file mode 100644 index 000000000000..4b462b0d81b1 --- /dev/null +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/CODE_OF_CONDUCT.md @@ -0,0 +1,43 @@ +# Contributor Code of Conduct + +This project adheres to [The Code Manifesto](http://codemanifesto.com) +as its guidelines for contributor interactions. + +## The Code Manifesto + +We want to work in an ecosystem that empowers developers to reach their +potential — one that encourages growth and effective collaboration. A space +that is safe for all. + +A space such as this benefits everyone that participates in it. It encourages +new developers to enter our field. It is through discussion and collaboration +that we grow, and through growth that we improve. + +In the effort to create such a place, we hold to these values: + +1. **Discrimination limits us.** This includes discrimination on the basis of + race, gender, sexual orientation, gender identity, age, nationality, + technology and any other arbitrary exclusion of a group of people. +2. **Boundaries honor us.** Your comfort levels are not everyone’s comfort + levels. Remember that, and if brought to your attention, heed it. +3. **We are our biggest assets.** None of us were born masters of our trade. + Each of us has been helped along the way. Return that favor, when and where + you can. +4. **We are resources for the future.** As an extension of #3, share what you + know. Make yourself a resource to help those that come after you. +5. **Respect defines us.** Treat others as you wish to be treated. Make your + discussions, criticisms and debates from a position of respectfulness. Ask + yourself, is it true? Is it necessary? Is it constructive? Anything less is + unacceptable. +6. **Reactions require grace.** Angry responses are valid, but abusive language + and vindictive actions are toxic. When something happens that offends you, + handle it assertively, but be respectful. Escalate reasonably, and try to + allow the offender an opportunity to explain themselves, and possibly + correct the issue. +7. **Opinions are just that: opinions.** Each and every one of us, due to our + background and upbringing, have varying opinions. That is perfectly + acceptable. Remember this: if you respect your own opinions, you should + respect the opinions of others. +8. **To err is human.** You might not intend it, but mistakes do happen and + contribute to build experience. Tolerate honest mistakes, and don't + hesitate to apologize if you make one yourself. diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/CONTRIBUTING.md b/src/runtime/vendor/github.com/asaskevich/govalidator/CONTRIBUTING.md index f0f7e3a8add0..7ed268a1edd9 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/CONTRIBUTING.md +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/CONTRIBUTING.md @@ -11,7 +11,7 @@ If you don't know what to do, there are some features and functions that need to - [ ] Update actual [list of functions](https://github.com/asaskevich/govalidator#list-of-functions) - [ ] Update [list of validators](https://github.com/asaskevich/govalidator#validatestruct-2) that available for `ValidateStruct` and add new - [ ] Implement new validators: `IsFQDN`, `IsIMEI`, `IsPostalCode`, `IsISIN`, `IsISRC` etc -- [ ] Implement [validation by maps](https://github.com/asaskevich/govalidator/issues/224) +- [x] Implement [validation by maps](https://github.com/asaskevich/govalidator/issues/224) - [ ] Implement fuzzing testing - [ ] Implement some struct/map/array utilities - [ ] Implement map/array validation @@ -37,7 +37,7 @@ Anyone can file an expense. If the expense makes sense for the development of th ### Contributors Thank you to all the people who have already contributed to govalidator! - + ### Backers diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/LICENSE b/src/runtime/vendor/github.com/asaskevich/govalidator/LICENSE index 2f9a31fadf67..cacba9102400 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/LICENSE +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2014 Alex Saskevich +Copyright (c) 2014-2020 Alex Saskevich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/README.md b/src/runtime/vendor/github.com/asaskevich/govalidator/README.md index 40f9a87811b3..39121ea8e37c 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/README.md +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/README.md @@ -1,7 +1,8 @@ govalidator =========== -[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/asaskevich/govalidator?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![GoDoc](https://godoc.org/github.com/asaskevich/govalidator?status.png)](https://godoc.org/github.com/asaskevich/govalidator) [![Coverage Status](https://img.shields.io/coveralls/asaskevich/govalidator.svg)](https://coveralls.io/r/asaskevich/govalidator?branch=master) [![wercker status](https://app.wercker.com/status/1ec990b09ea86c910d5f08b0e02c6043/s "wercker status")](https://app.wercker.com/project/bykey/1ec990b09ea86c910d5f08b0e02c6043) -[![Build Status](https://travis-ci.org/asaskevich/govalidator.svg?branch=master)](https://travis-ci.org/asaskevich/govalidator) [![Go Report Card](https://goreportcard.com/badge/github.com/asaskevich/govalidator)](https://goreportcard.com/report/github.com/asaskevich/govalidator) [![GoSearch](http://go-search.org/badge?id=github.com%2Fasaskevich%2Fgovalidator)](http://go-search.org/view?id=github.com%2Fasaskevich%2Fgovalidator) [![Backers on Open Collective](https://opencollective.com/govalidator/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/govalidator/sponsors/badge.svg)](#sponsors) [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator?ref=badge_shield) +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/asaskevich/govalidator?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![GoDoc](https://godoc.org/github.com/asaskevich/govalidator?status.png)](https://godoc.org/github.com/asaskevich/govalidator) +[![Build Status](https://travis-ci.org/asaskevich/govalidator.svg?branch=master)](https://travis-ci.org/asaskevich/govalidator) +[![Coverage](https://codecov.io/gh/asaskevich/govalidator/branch/master/graph/badge.svg)](https://codecov.io/gh/asaskevich/govalidator) [![Go Report Card](https://goreportcard.com/badge/github.com/asaskevich/govalidator)](https://goreportcard.com/report/github.com/asaskevich/govalidator) [![GoSearch](http://go-search.org/badge?id=github.com%2Fasaskevich%2Fgovalidator)](http://go-search.org/view?id=github.com%2Fasaskevich%2Fgovalidator) [![Backers on Open Collective](https://opencollective.com/govalidator/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/govalidator/sponsors/badge.svg)](#sponsors) [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator?ref=badge_shield) A package of validators and sanitizers for strings, structs and collections. Based on [validator.js](https://github.com/chriso/validator.js). @@ -13,7 +14,7 @@ Type the following command in your terminal: or you can get specified release of the package with `gopkg.in`: - go get gopkg.in/asaskevich/govalidator.v4 + go get gopkg.in/asaskevich/govalidator.v10 After it the package is ready to use. @@ -83,14 +84,14 @@ This was changed to prevent data races when accessing custom validators. import "github.com/asaskevich/govalidator" // before -govalidator.CustomTypeTagMap["customByteArrayValidator"] = CustomTypeValidator(func(i interface{}, o interface{}) bool { +govalidator.CustomTypeTagMap["customByteArrayValidator"] = func(i interface{}, o interface{}) bool { // ... -}) +} // after -govalidator.CustomTypeTagMap.Set("customByteArrayValidator", CustomTypeValidator(func(i interface{}, o interface{}) bool { +govalidator.CustomTypeTagMap.Set("customByteArrayValidator", func(i interface{}, o interface{}) bool { // ... -})) +}) ``` #### List of functions: @@ -108,23 +109,34 @@ func Filter(array []interface{}, iterator ConditionIterator) []interface{} func Find(array []interface{}, iterator ConditionIterator) interface{} func GetLine(s string, index int) (string, error) func GetLines(s string) []string -func InRange(value, left, right float64) bool +func HasLowerCase(str string) bool +func HasUpperCase(str string) bool +func HasWhitespace(str string) bool +func HasWhitespaceOnly(str string) bool +func InRange(value interface{}, left interface{}, right interface{}) bool +func InRangeFloat32(value, left, right float32) bool +func InRangeFloat64(value, left, right float64) bool +func InRangeInt(value, left, right interface{}) bool func IsASCII(str string) bool func IsAlpha(str string) bool func IsAlphanumeric(str string) bool func IsBase64(str string) bool func IsByteLength(str string, min, max int) bool func IsCIDR(str string) bool +func IsCRC32(str string) bool +func IsCRC32b(str string) bool func IsCreditCard(str string) bool func IsDNSName(str string) bool func IsDataURI(str string) bool func IsDialString(str string) bool func IsDivisibleBy(str, num string) bool func IsEmail(str string) bool +func IsExistingEmail(email string) bool func IsFilePath(str string) (bool, int) func IsFloat(str string) bool func IsFullWidth(str string) bool func IsHalfWidth(str string) bool +func IsHash(str string, algorithm string) bool func IsHexadecimal(str string) bool func IsHexcolor(str string) bool func IsHost(str string) bool @@ -136,22 +148,27 @@ func IsISBN10(str string) bool func IsISBN13(str string) bool func IsISO3166Alpha2(str string) bool func IsISO3166Alpha3(str string) bool +func IsISO4217(str string) bool func IsISO693Alpha2(str string) bool func IsISO693Alpha3b(str string) bool -func IsISO4217(str string) bool func IsIn(str string, params ...string) bool +func IsInRaw(str string, params ...string) bool func IsInt(str string) bool func IsJSON(str string) bool func IsLatitude(str string) bool func IsLongitude(str string) bool func IsLowerCase(str string) bool func IsMAC(str string) bool +func IsMD4(str string) bool +func IsMD5(str string) bool +func IsMagnetURI(str string) bool func IsMongoID(str string) bool func IsMultibyte(str string) bool func IsNatural(value float64) bool func IsNegative(value float64) bool func IsNonNegative(value float64) bool func IsNonPositive(value float64) bool +func IsNotNull(str string) bool func IsNull(str string) bool func IsNumeric(str string) bool func IsPort(str string) bool @@ -162,9 +179,21 @@ func IsRFC3339WithoutZone(str string) bool func IsRGBcolor(str string) bool func IsRequestURI(rawurl string) bool func IsRequestURL(rawurl string) bool +func IsRipeMD128(str string) bool +func IsRipeMD160(str string) bool +func IsRsaPub(str string, params ...string) bool +func IsRsaPublicKey(str string, keylen int) bool +func IsSHA1(str string) bool +func IsSHA256(str string) bool +func IsSHA384(str string) bool +func IsSHA512(str string) bool func IsSSN(str string) bool func IsSemver(str string) bool +func IsTiger128(str string) bool +func IsTiger160(str string) bool +func IsTiger192(str string) bool func IsTime(str string, format string) bool +func IsType(v interface{}, params ...string) bool func IsURL(str string) bool func IsUTFDigit(str string) bool func IsUTFLetter(str string) bool @@ -174,16 +203,20 @@ func IsUUID(str string) bool func IsUUIDv3(str string) bool func IsUUIDv4(str string) bool func IsUUIDv5(str string) bool +func IsUnixTime(str string) bool func IsUpperCase(str string) bool func IsVariableWidth(str string) bool func IsWhole(value float64) bool func LeftTrim(str, chars string) string func Map(array []interface{}, iterator ResultIterator) []interface{} func Matches(str, pattern string) bool +func MaxStringLength(str string, params ...string) bool +func MinStringLength(str string, params ...string) bool func NormalizeEmail(str string) (string, error) func PadBoth(str string, padStr string, padLen int) string func PadLeft(str string, padStr string, padLen int) string func PadRight(str string, padStr string, padLen int) string +func PrependPathToErrors(err error, path string) error func Range(str string, params ...string) bool func RemoveTags(s string) string func ReplacePattern(str, pattern, replace string) string @@ -192,18 +225,21 @@ func RightTrim(str, chars string) string func RuneLength(str string, params ...string) bool func SafeFileName(str string) string func SetFieldsRequiredByDefault(value bool) +func SetNilPtrAllowedByRequired(value bool) func Sign(value float64) float64 func StringLength(str string, params ...string) bool func StringMatches(s string, params ...string) bool func StripLow(str string, keepNewLines bool) string func ToBoolean(str string) (bool, error) func ToFloat(str string) (float64, error) -func ToInt(str string) (int64, error) +func ToInt(value interface{}) (res int64, err error) func ToJSON(obj interface{}) (string, error) func ToString(obj interface{}) string func Trim(str, chars string) string func Truncate(str string, length int, ending string) string +func TruncatingErrorf(str string, args ...interface{}) error func UnderscoreToCamelCase(s string) string +func ValidateMap(inputMap map[string]interface{}, validationMap map[string]interface{}) (bool, error) func ValidateStruct(s interface{}) (bool, error) func WhiteList(str, chars string) string type ConditionIterator @@ -214,6 +250,8 @@ type Errors func (es Errors) Error() string func (es Errors) Errors() []error type ISO3166Entry +type ISO693Entry +type InterfaceParamValidator type Iterator type ParamValidator type ResultIterator @@ -227,6 +265,27 @@ type Validator ```go println(govalidator.IsURL(`http://user@pass:domain.com/path/page`)) ``` +###### IsType +```go +println(govalidator.IsType("Bob", "string")) +println(govalidator.IsType(1, "int")) +i := 1 +println(govalidator.IsType(&i, "*int")) +``` + +IsType can be used through the tag `type` which is essential for map validation: +```go +type User struct { + Name string `valid:"type(string)"` + Age int `valid:"type(int)"` + Meta interface{} `valid:"type(string)"` +} +result, err := govalidator.ValidateStruct(User{"Bob", 20, "meta"}) +if err != nil { + println("error: " + err.Error()) +} +println(result) +``` ###### ToString ```go type User struct { @@ -334,6 +393,13 @@ Validators with parameters "matches(pattern)": StringMatches, "in(string1|string2|...|stringN)": IsIn, "rsapub(keylength)" : IsRsaPub, +"minstringlength(int): MinStringLength, +"maxstringlength(int): MaxStringLength, +``` +Validators with parameters for any type + +```go +"type(type)": IsType, ``` And here is small example of usage: @@ -370,6 +436,41 @@ if err != nil { } println(result) ``` +###### ValidateMap [#2](https://github.com/asaskevich/govalidator/pull/338) +If you want to validate maps, you can use the map to be validated and a validation map that contain the same tags used in ValidateStruct, both maps have to be in the form `map[string]interface{}` + +So here is small example of usage: +```go +var mapTemplate = map[string]interface{}{ + "name":"required,alpha", + "family":"required,alpha", + "email":"required,email", + "cell-phone":"numeric", + "address":map[string]interface{}{ + "line1":"required,alphanum", + "line2":"alphanum", + "postal-code":"numeric", + }, +} + +var inputMap = map[string]interface{}{ + "name":"Bob", + "family":"Smith", + "email":"foo@bar.baz", + "address":map[string]interface{}{ + "line1":"", + "line2":"", + "postal-code":"", + }, +} + +result, err := govalidator.ValidateMap(inputMap, mapTemplate) +if err != nil { + println("error: " + err.Error()) +} +println(result) +``` + ###### WhiteList ```go // Remove all characters from string ignoring characters between "a" and "z" @@ -389,7 +490,7 @@ type StructWithCustomByteArray struct { CustomMinLength int `valid:"-"` } -govalidator.CustomTypeTagMap.Set("customByteArrayValidator", CustomTypeValidator(func(i interface{}, context interface{}) bool { +govalidator.CustomTypeTagMap.Set("customByteArrayValidator", func(i interface{}, context interface{}) bool { switch v := context.(type) { // you can type switch on the context interface being validated case StructWithCustomByteArray: // you can check and validate against some other field in the context, @@ -409,14 +510,25 @@ govalidator.CustomTypeTagMap.Set("customByteArrayValidator", CustomTypeValidator } } return false -})) -govalidator.CustomTypeTagMap.Set("customMinLengthValidator", CustomTypeValidator(func(i interface{}, context interface{}) bool { +}) +govalidator.CustomTypeTagMap.Set("customMinLengthValidator", func(i interface{}, context interface{}) bool { switch v := context.(type) { // this validates a field against the value in another field, i.e. dependent validation case StructWithCustomByteArray: return len(v.ID) >= v.CustomMinLength } return false -})) +}) +``` + +###### Loop over Error() +By default .Error() returns all errors in a single String. To access each error you can do this: +```go + if err != nil { + errs := err.(govalidator.Errors).Errors() + for _, e := range errs { + fmt.Println(e.Error()) + } + } ``` ###### Custom error messages @@ -445,7 +557,7 @@ If you don't know what to do, there are some features and functions that need to - [ ] Update actual [list of functions](https://github.com/asaskevich/govalidator#list-of-functions) - [ ] Update [list of validators](https://github.com/asaskevich/govalidator#validatestruct-2) that available for `ValidateStruct` and add new - [ ] Implement new validators: `IsFQDN`, `IsIMEI`, `IsPostalCode`, `IsISIN`, `IsISRC` etc -- [ ] Implement [validation by maps](https://github.com/asaskevich/govalidator/issues/224) +- [x] Implement [validation by maps](https://github.com/asaskevich/govalidator/issues/224) - [ ] Implement fuzzing testing - [ ] Implement some struct/map/array utilities - [ ] Implement map/array validation @@ -475,7 +587,7 @@ This project exists thanks to all the people who contribute. [[Contribute](CONTR * [Matt Sanford](https://github.com/mzsanford) * [Simon ccl1115](https://github.com/ccl1115) - + ### Backers @@ -504,4 +616,4 @@ Support this project by becoming a sponsor. Your logo will show up here with a l ## License -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator?ref=badge_large) \ No newline at end of file +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fasaskevich%2Fgovalidator?ref=badge_large) diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/arrays.go b/src/runtime/vendor/github.com/asaskevich/govalidator/arrays.go index 5bace2654d3b..3e1da7cb480e 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/arrays.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/arrays.go @@ -9,6 +9,35 @@ type ResultIterator func(interface{}, int) interface{} // ConditionIterator is the function that accepts element of slice/array and its index and returns boolean type ConditionIterator func(interface{}, int) bool +// ReduceIterator is the function that accepts two element of slice/array and returns result of merging those values +type ReduceIterator func(interface{}, interface{}) interface{} + +// Some validates that any item of array corresponds to ConditionIterator. Returns boolean. +func Some(array []interface{}, iterator ConditionIterator) bool { + res := false + for index, data := range array { + res = res || iterator(data, index) + } + return res +} + +// Every validates that every item of array corresponds to ConditionIterator. Returns boolean. +func Every(array []interface{}, iterator ConditionIterator) bool { + res := true + for index, data := range array { + res = res && iterator(data, index) + } + return res +} + +// Reduce boils down a list of values into a single value by ReduceIterator +func Reduce(array []interface{}, iterator ReduceIterator, initialValue interface{}) interface{} { + for _, data := range array { + initialValue = iterator(initialValue, data) + } + return initialValue +} + // Each iterates over the slice and apply Iterator to every item func Each(array []interface{}, iterator Iterator) { for index, data := range array { diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/converter.go b/src/runtime/vendor/github.com/asaskevich/govalidator/converter.go index cf1e5d569ba0..d68e990fc256 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/converter.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/converter.go @@ -10,7 +10,7 @@ import ( // ToString convert the input to a string. func ToString(obj interface{}) string { res := fmt.Sprintf("%v", obj) - return string(res) + return res } // ToJSON convert the input to a valid JSON string @@ -23,12 +23,27 @@ func ToJSON(obj interface{}) (string, error) { } // ToFloat convert the input string to a float, or 0.0 if the input is not a float. -func ToFloat(str string) (float64, error) { - res, err := strconv.ParseFloat(str, 64) - if err != nil { - res = 0.0 +func ToFloat(value interface{}) (res float64, err error) { + val := reflect.ValueOf(value) + + switch value.(type) { + case int, int8, int16, int32, int64: + res = float64(val.Int()) + case uint, uint8, uint16, uint32, uint64: + res = float64(val.Uint()) + case float32, float64: + res = val.Float() + case string: + res, err = strconv.ParseFloat(val.String(), 64) + if err != nil { + res = 0 + } + default: + err = fmt.Errorf("ToInt: unknown interface type %T", value) + res = 0 } - return res, err + + return } // ToInt convert the input string or any int type to an integer type 64, or 0 if the input is not an integer. @@ -40,6 +55,8 @@ func ToInt(value interface{}) (res int64, err error) { res = val.Int() case uint, uint8, uint16, uint32, uint64: res = int64(val.Uint()) + case float32, float64: + res = int64(val.Float()) case string: if IsInt(val.String()) { res, err = strconv.ParseInt(val.String(), 0, 64) @@ -47,11 +64,11 @@ func ToInt(value interface{}) (res int64, err error) { res = 0 } } else { - err = fmt.Errorf("math: square root of negative number %g", value) + err = fmt.Errorf("ToInt: invalid numeric format %g", value) res = 0 } default: - err = fmt.Errorf("math: square root of negative number %g", value) + err = fmt.Errorf("ToInt: unknown interface type %T", value) res = 0 } diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/doc.go b/src/runtime/vendor/github.com/asaskevich/govalidator/doc.go new file mode 100644 index 000000000000..55dce62dc8c3 --- /dev/null +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/doc.go @@ -0,0 +1,3 @@ +package govalidator + +// A package of validators and sanitizers for strings, structures and collections. diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/error.go b/src/runtime/vendor/github.com/asaskevich/govalidator/error.go index 655b750cb8f6..1da2336f47ee 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/error.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/error.go @@ -1,6 +1,9 @@ package govalidator -import "strings" +import ( + "sort" + "strings" +) // Errors is an array of multiple errors and conforms to the error interface. type Errors []error @@ -15,6 +18,7 @@ func (es Errors) Error() string { for _, e := range es { errs = append(errs, e.Error()) } + sort.Strings(errs) return strings.Join(errs, ";") } diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/numerics.go b/src/runtime/vendor/github.com/asaskevich/govalidator/numerics.go index 7e6c652e140c..5041d9e86844 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/numerics.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/numerics.go @@ -2,7 +2,6 @@ package govalidator import ( "math" - "reflect" ) // Abs returns absolute value of number @@ -41,7 +40,7 @@ func IsNonPositive(value float64) bool { return value <= 0 } -// InRange returns true if value lies between left and right border +// InRangeInt returns true if value lies between left and right border func InRangeInt(value, left, right interface{}) bool { value64, _ := ToInt(value) left64, _ := ToInt(left) @@ -52,7 +51,7 @@ func InRangeInt(value, left, right interface{}) bool { return value64 >= left64 && value64 <= right64 } -// InRange returns true if value lies between left and right border +// InRangeFloat32 returns true if value lies between left and right border func InRangeFloat32(value, left, right float32) bool { if left > right { left, right = right, left @@ -60,7 +59,7 @@ func InRangeFloat32(value, left, right float32) bool { return value >= left && value <= right } -// InRange returns true if value lies between left and right border +// InRangeFloat64 returns true if value lies between left and right border func InRangeFloat64(value, left, right float64) bool { if left > right { left, right = right, left @@ -68,20 +67,24 @@ func InRangeFloat64(value, left, right float64) bool { return value >= left && value <= right } -// InRange returns true if value lies between left and right border, generic type to handle int, float32 or float64, all types must the same type +// InRange returns true if value lies between left and right border, generic type to handle int, float32, float64 and string. +// All types must the same type. +// False if value doesn't lie in range or if it incompatible or not comparable func InRange(value interface{}, left interface{}, right interface{}) bool { - - reflectValue := reflect.TypeOf(value).Kind() - reflectLeft := reflect.TypeOf(left).Kind() - reflectRight := reflect.TypeOf(right).Kind() - - if reflectValue == reflect.Int && reflectLeft == reflect.Int && reflectRight == reflect.Int { - return InRangeInt(value.(int), left.(int), right.(int)) - } else if reflectValue == reflect.Float32 && reflectLeft == reflect.Float32 && reflectRight == reflect.Float32 { - return InRangeFloat32(value.(float32), left.(float32), right.(float32)) - } else if reflectValue == reflect.Float64 && reflectLeft == reflect.Float64 && reflectRight == reflect.Float64 { - return InRangeFloat64(value.(float64), left.(float64), right.(float64)) - } else { + switch value.(type) { + case int: + intValue, _ := ToInt(value) + intLeft, _ := ToInt(left) + intRight, _ := ToInt(right) + return InRangeInt(intValue, intLeft, intRight) + case float32, float64: + intValue, _ := ToFloat(value) + intLeft, _ := ToFloat(left) + intRight, _ := ToFloat(right) + return InRangeFloat64(intValue, intLeft, intRight) + case string: + return value.(string) >= left.(string) && value.(string) <= right.(string) + default: return false } } diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/patterns.go b/src/runtime/vendor/github.com/asaskevich/govalidator/patterns.go index 61a05d438e18..106ed94f80ad 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/patterns.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/patterns.go @@ -4,49 +4,52 @@ import "regexp" // Basic regular expressions for validating strings const ( - Email string = "^(((([a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+(\\.([a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+)*)|((\\x22)((((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(([\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]|\\x21|[\\x23-\\x5b]|[\\x5d-\\x7e]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(\\([\\x01-\\x09\\x0b\\x0c\\x0d-\\x7f]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}]))))*(((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(\\x22)))@((([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])([a-zA-Z]|\\d|-|\\.|_|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.)+(([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])([a-zA-Z]|\\d|-|_|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.?$" - CreditCard string = "^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\\d{3})\\d{11})$" - ISBN10 string = "^(?:[0-9]{9}X|[0-9]{10})$" - ISBN13 string = "^(?:[0-9]{13})$" - UUID3 string = "^[0-9a-f]{8}-[0-9a-f]{4}-3[0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$" - UUID4 string = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" - UUID5 string = "^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" - UUID string = "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" - Alpha string = "^[a-zA-Z]+$" - Alphanumeric string = "^[a-zA-Z0-9]+$" - Numeric string = "^[0-9]+$" - Int string = "^(?:[-+]?(?:0|[1-9][0-9]*))$" - Float string = "^(?:[-+]?(?:[0-9]+))?(?:\\.[0-9]*)?(?:[eE][\\+\\-]?(?:[0-9]+))?$" - Hexadecimal string = "^[0-9a-fA-F]+$" - Hexcolor string = "^#?([0-9a-fA-F]{3}|[0-9a-fA-F]{6})$" - RGBcolor string = "^rgb\\(\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*,\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*,\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*\\)$" - ASCII string = "^[\x00-\x7F]+$" - Multibyte string = "[^\x00-\x7F]" - FullWidth string = "[^\u0020-\u007E\uFF61-\uFF9F\uFFA0-\uFFDC\uFFE8-\uFFEE0-9a-zA-Z]" - HalfWidth string = "[\u0020-\u007E\uFF61-\uFF9F\uFFA0-\uFFDC\uFFE8-\uFFEE0-9a-zA-Z]" - Base64 string = "^(?:[A-Za-z0-9+\\/]{4})*(?:[A-Za-z0-9+\\/]{2}==|[A-Za-z0-9+\\/]{3}=|[A-Za-z0-9+\\/]{4})$" - PrintableASCII string = "^[\x20-\x7E]+$" - DataURI string = "^data:.+\\/(.+);base64$" - Latitude string = "^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$" - Longitude string = "^[-+]?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)$" - DNSName string = `^([a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62}){1}(\.[a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62})*[\._]?$` - IP string = `(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))` - URLSchema string = `((ftp|tcp|udp|wss?|https?):\/\/)` - URLUsername string = `(\S+(:\S*)?@)` - URLPath string = `((\/|\?|#)[^\s]*)` - URLPort string = `(:(\d{1,5}))` - URLIP string = `([1-9]\d?|1\d\d|2[01]\d|22[0-3])(\.(1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.([0-9]\d?|1\d\d|2[0-4]\d|25[0-4]))` - URLSubdomain string = `((www\.)|([a-zA-Z0-9]+([-_\.]?[a-zA-Z0-9])*[a-zA-Z0-9]\.[a-zA-Z0-9]+))` - URL string = `^` + URLSchema + `?` + URLUsername + `?` + `((` + URLIP + `|(\[` + IP + `\])|(([a-zA-Z0-9]([a-zA-Z0-9-_]+)?[a-zA-Z0-9]([-\.][a-zA-Z0-9]+)*)|(` + URLSubdomain + `?))?(([a-zA-Z\x{00a1}-\x{ffff}0-9]+-?-?)*[a-zA-Z\x{00a1}-\x{ffff}0-9]+)(?:\.([a-zA-Z\x{00a1}-\x{ffff}]{1,}))?))\.?` + URLPort + `?` + URLPath + `?$` - SSN string = `^\d{3}[- ]?\d{2}[- ]?\d{4}$` - WinPath string = `^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$` - UnixPath string = `^(/[^/\x00]*)+/?$` - Semver string = "^v?(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)(-(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(\\.(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\\+[0-9a-zA-Z-]+(\\.[0-9a-zA-Z-]+)*)?$" - tagName string = "valid" - hasLowerCase string = ".*[[:lower:]]" - hasUpperCase string = ".*[[:upper:]]" - hasWhitespace string = ".*[[:space:]]" - hasWhitespaceOnly string = "^[[:space:]]+$" + Email string = "^(((([a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+(\\.([a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+)*)|((\\x22)((((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(([\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]|\\x21|[\\x23-\\x5b]|[\\x5d-\\x7e]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(\\([\\x01-\\x09\\x0b\\x0c\\x0d-\\x7f]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}]))))*(((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(\\x22)))@((([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])([a-zA-Z]|\\d|-|\\.|_|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*([a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.)+(([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])([a-zA-Z]|\\d|-|_|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*([a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.?$" + CreditCard string = "^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|(222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\\d{3})\\d{11}|6[27][0-9]{14})$" + ISBN10 string = "^(?:[0-9]{9}X|[0-9]{10})$" + ISBN13 string = "^(?:[0-9]{13})$" + UUID3 string = "^[0-9a-f]{8}-[0-9a-f]{4}-3[0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$" + UUID4 string = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + UUID5 string = "^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + UUID string = "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + Alpha string = "^[a-zA-Z]+$" + Alphanumeric string = "^[a-zA-Z0-9]+$" + Numeric string = "^[0-9]+$" + Int string = "^(?:[-+]?(?:0|[1-9][0-9]*))$" + Float string = "^(?:[-+]?(?:[0-9]+))?(?:\\.[0-9]*)?(?:[eE][\\+\\-]?(?:[0-9]+))?$" + Hexadecimal string = "^[0-9a-fA-F]+$" + Hexcolor string = "^#?([0-9a-fA-F]{3}|[0-9a-fA-F]{6})$" + RGBcolor string = "^rgb\\(\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*,\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*,\\s*(0|[1-9]\\d?|1\\d\\d?|2[0-4]\\d|25[0-5])\\s*\\)$" + ASCII string = "^[\x00-\x7F]+$" + Multibyte string = "[^\x00-\x7F]" + FullWidth string = "[^\u0020-\u007E\uFF61-\uFF9F\uFFA0-\uFFDC\uFFE8-\uFFEE0-9a-zA-Z]" + HalfWidth string = "[\u0020-\u007E\uFF61-\uFF9F\uFFA0-\uFFDC\uFFE8-\uFFEE0-9a-zA-Z]" + Base64 string = "^(?:[A-Za-z0-9+\\/]{4})*(?:[A-Za-z0-9+\\/]{2}==|[A-Za-z0-9+\\/]{3}=|[A-Za-z0-9+\\/]{4})$" + PrintableASCII string = "^[\x20-\x7E]+$" + DataURI string = "^data:.+\\/(.+);base64$" + MagnetURI string = "^magnet:\\?xt=urn:[a-zA-Z0-9]+:[a-zA-Z0-9]{32,40}&dn=.+&tr=.+$" + Latitude string = "^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$" + Longitude string = "^[-+]?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)$" + DNSName string = `^([a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62}){1}(\.[a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62})*[\._]?$` + IP string = `(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))` + URLSchema string = `((ftp|tcp|udp|wss?|https?):\/\/)` + URLUsername string = `(\S+(:\S*)?@)` + URLPath string = `((\/|\?|#)[^\s]*)` + URLPort string = `(:(\d{1,5}))` + URLIP string = `([1-9]\d?|1\d\d|2[01]\d|22[0-3]|24\d|25[0-5])(\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])){2}(?:\.([0-9]\d?|1\d\d|2[0-4]\d|25[0-5]))` + URLSubdomain string = `((www\.)|([a-zA-Z0-9]+([-_\.]?[a-zA-Z0-9])*[a-zA-Z0-9]\.[a-zA-Z0-9]+))` + URL = `^` + URLSchema + `?` + URLUsername + `?` + `((` + URLIP + `|(\[` + IP + `\])|(([a-zA-Z0-9]([a-zA-Z0-9-_]+)?[a-zA-Z0-9]([-\.][a-zA-Z0-9]+)*)|(` + URLSubdomain + `?))?(([a-zA-Z\x{00a1}-\x{ffff}0-9]+-?-?)*[a-zA-Z\x{00a1}-\x{ffff}0-9]+)(?:\.([a-zA-Z\x{00a1}-\x{ffff}]{1,}))?))\.?` + URLPort + `?` + URLPath + `?$` + SSN string = `^\d{3}[- ]?\d{2}[- ]?\d{4}$` + WinPath string = `^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$` + UnixPath string = `^(/[^/\x00]*)+/?$` + Semver string = "^v?(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)(-(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(\\.(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\\+[0-9a-zA-Z-]+(\\.[0-9a-zA-Z-]+)*)?$" + tagName string = "valid" + hasLowerCase string = ".*[[:lower:]]" + hasUpperCase string = ".*[[:upper:]]" + hasWhitespace string = ".*[[:space:]]" + hasWhitespaceOnly string = "^[[:space:]]+$" + IMEI string = "^[0-9a-f]{14}$|^\\d{15}$|^\\d{18}$" + IMSI string = "^\\d{14,15}$" ) // Used by IsFilePath func @@ -60,42 +63,45 @@ const ( ) var ( - userRegexp = regexp.MustCompile("^[a-zA-Z0-9!#$%&'*+/=?^_`{|}~.-]+$") - hostRegexp = regexp.MustCompile("^[^\\s]+\\.[^\\s]+$") - userDotRegexp = regexp.MustCompile("(^[.]{1})|([.]{1}$)|([.]{2,})") - rxEmail = regexp.MustCompile(Email) - rxCreditCard = regexp.MustCompile(CreditCard) - rxISBN10 = regexp.MustCompile(ISBN10) - rxISBN13 = regexp.MustCompile(ISBN13) - rxUUID3 = regexp.MustCompile(UUID3) - rxUUID4 = regexp.MustCompile(UUID4) - rxUUID5 = regexp.MustCompile(UUID5) - rxUUID = regexp.MustCompile(UUID) - rxAlpha = regexp.MustCompile(Alpha) - rxAlphanumeric = regexp.MustCompile(Alphanumeric) - rxNumeric = regexp.MustCompile(Numeric) - rxInt = regexp.MustCompile(Int) - rxFloat = regexp.MustCompile(Float) - rxHexadecimal = regexp.MustCompile(Hexadecimal) - rxHexcolor = regexp.MustCompile(Hexcolor) - rxRGBcolor = regexp.MustCompile(RGBcolor) - rxASCII = regexp.MustCompile(ASCII) - rxPrintableASCII = regexp.MustCompile(PrintableASCII) - rxMultibyte = regexp.MustCompile(Multibyte) - rxFullWidth = regexp.MustCompile(FullWidth) - rxHalfWidth = regexp.MustCompile(HalfWidth) - rxBase64 = regexp.MustCompile(Base64) - rxDataURI = regexp.MustCompile(DataURI) - rxLatitude = regexp.MustCompile(Latitude) - rxLongitude = regexp.MustCompile(Longitude) - rxDNSName = regexp.MustCompile(DNSName) - rxURL = regexp.MustCompile(URL) - rxSSN = regexp.MustCompile(SSN) - rxWinPath = regexp.MustCompile(WinPath) - rxUnixPath = regexp.MustCompile(UnixPath) - rxSemver = regexp.MustCompile(Semver) - rxHasLowerCase = regexp.MustCompile(hasLowerCase) - rxHasUpperCase = regexp.MustCompile(hasUpperCase) - rxHasWhitespace = regexp.MustCompile(hasWhitespace) - rxHasWhitespaceOnly = regexp.MustCompile(hasWhitespaceOnly) + userRegexp = regexp.MustCompile("^[a-zA-Z0-9!#$%&'*+/=?^_`{|}~.-]+$") + hostRegexp = regexp.MustCompile("^[^\\s]+\\.[^\\s]+$") + userDotRegexp = regexp.MustCompile("(^[.]{1})|([.]{1}$)|([.]{2,})") + rxEmail = regexp.MustCompile(Email) + rxCreditCard = regexp.MustCompile(CreditCard) + rxISBN10 = regexp.MustCompile(ISBN10) + rxISBN13 = regexp.MustCompile(ISBN13) + rxUUID3 = regexp.MustCompile(UUID3) + rxUUID4 = regexp.MustCompile(UUID4) + rxUUID5 = regexp.MustCompile(UUID5) + rxUUID = regexp.MustCompile(UUID) + rxAlpha = regexp.MustCompile(Alpha) + rxAlphanumeric = regexp.MustCompile(Alphanumeric) + rxNumeric = regexp.MustCompile(Numeric) + rxInt = regexp.MustCompile(Int) + rxFloat = regexp.MustCompile(Float) + rxHexadecimal = regexp.MustCompile(Hexadecimal) + rxHexcolor = regexp.MustCompile(Hexcolor) + rxRGBcolor = regexp.MustCompile(RGBcolor) + rxASCII = regexp.MustCompile(ASCII) + rxPrintableASCII = regexp.MustCompile(PrintableASCII) + rxMultibyte = regexp.MustCompile(Multibyte) + rxFullWidth = regexp.MustCompile(FullWidth) + rxHalfWidth = regexp.MustCompile(HalfWidth) + rxBase64 = regexp.MustCompile(Base64) + rxDataURI = regexp.MustCompile(DataURI) + rxMagnetURI = regexp.MustCompile(MagnetURI) + rxLatitude = regexp.MustCompile(Latitude) + rxLongitude = regexp.MustCompile(Longitude) + rxDNSName = regexp.MustCompile(DNSName) + rxURL = regexp.MustCompile(URL) + rxSSN = regexp.MustCompile(SSN) + rxWinPath = regexp.MustCompile(WinPath) + rxUnixPath = regexp.MustCompile(UnixPath) + rxSemver = regexp.MustCompile(Semver) + rxHasLowerCase = regexp.MustCompile(hasLowerCase) + rxHasUpperCase = regexp.MustCompile(hasUpperCase) + rxHasWhitespace = regexp.MustCompile(hasWhitespace) + rxHasWhitespaceOnly = regexp.MustCompile(hasWhitespaceOnly) + rxIMEI = regexp.MustCompile(IMEI) + rxIMSI = regexp.MustCompile(IMSI) ) diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/types.go b/src/runtime/vendor/github.com/asaskevich/govalidator/types.go index 4f7e9274ade0..54218bf05a2f 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/types.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/types.go @@ -14,8 +14,11 @@ type Validator func(str string) bool // The second parameter should be the context (in the case of validating a struct: the whole object being validated). type CustomTypeValidator func(i interface{}, o interface{}) bool -// ParamValidator is a wrapper for validator functions that accepts additional parameters. +// ParamValidator is a wrapper for validator functions that accept additional parameters. type ParamValidator func(str string, params ...string) bool + +// InterfaceParamValidator is a wrapper for functions that accept variants parameters for an interface value +type InterfaceParamValidator func(in interface{}, params ...string) bool type tagOptionsMap map[string]tagOption func (t tagOptionsMap) orderedKeys() []string { @@ -46,26 +49,40 @@ type UnsupportedTypeError struct { // It implements the methods to sort by string. type stringValues []reflect.Value +// InterfaceParamTagMap is a map of functions accept variants parameters for an interface value +var InterfaceParamTagMap = map[string]InterfaceParamValidator{ + "type": IsType, +} + +// InterfaceParamTagRegexMap maps interface param tags to their respective regexes. +var InterfaceParamTagRegexMap = map[string]*regexp.Regexp{ + "type": regexp.MustCompile(`^type\((.*)\)$`), +} + // ParamTagMap is a map of functions accept variants parameters var ParamTagMap = map[string]ParamValidator{ - "length": ByteLength, - "range": Range, - "runelength": RuneLength, - "stringlength": StringLength, - "matches": StringMatches, - "in": isInRaw, - "rsapub": IsRsaPub, + "length": ByteLength, + "range": Range, + "runelength": RuneLength, + "stringlength": StringLength, + "matches": StringMatches, + "in": IsInRaw, + "rsapub": IsRsaPub, + "minstringlength": MinStringLength, + "maxstringlength": MaxStringLength, } // ParamTagRegexMap maps param tags to their respective regexes. var ParamTagRegexMap = map[string]*regexp.Regexp{ - "range": regexp.MustCompile("^range\\((\\d+)\\|(\\d+)\\)$"), - "length": regexp.MustCompile("^length\\((\\d+)\\|(\\d+)\\)$"), - "runelength": regexp.MustCompile("^runelength\\((\\d+)\\|(\\d+)\\)$"), - "stringlength": regexp.MustCompile("^stringlength\\((\\d+)\\|(\\d+)\\)$"), - "in": regexp.MustCompile(`^in\((.*)\)`), - "matches": regexp.MustCompile(`^matches\((.+)\)$`), - "rsapub": regexp.MustCompile("^rsapub\\((\\d+)\\)$"), + "range": regexp.MustCompile("^range\\((\\d+)\\|(\\d+)\\)$"), + "length": regexp.MustCompile("^length\\((\\d+)\\|(\\d+)\\)$"), + "runelength": regexp.MustCompile("^runelength\\((\\d+)\\|(\\d+)\\)$"), + "stringlength": regexp.MustCompile("^stringlength\\((\\d+)\\|(\\d+)\\)$"), + "in": regexp.MustCompile(`^in\((.*)\)`), + "matches": regexp.MustCompile(`^matches\((.+)\)$`), + "rsapub": regexp.MustCompile("^rsapub\\((\\d+)\\)$"), + "minstringlength": regexp.MustCompile("^minstringlength\\((\\d+)\\)$"), + "maxstringlength": regexp.MustCompile("^maxstringlength\\((\\d+)\\)$"), } type customTypeTagMap struct { @@ -114,6 +131,7 @@ var TagMap = map[string]Validator{ "int": IsInt, "float": IsFloat, "null": IsNull, + "notnull": IsNotNull, "uuid": IsUUID, "uuidv3": IsUUIDv3, "uuidv4": IsUUIDv4, @@ -146,6 +164,7 @@ var TagMap = map[string]Validator{ "ISO3166Alpha2": IsISO3166Alpha2, "ISO3166Alpha3": IsISO3166Alpha3, "ISO4217": IsISO4217, + "IMEI": IsIMEI, } // ISO3166Entry stores country codes @@ -430,10 +449,10 @@ var ISO4217List = []string{ "PAB", "PEN", "PGK", "PHP", "PKR", "PLN", "PYG", "QAR", "RON", "RSD", "RUB", "RWF", - "SAR", "SBD", "SCR", "SDG", "SEK", "SGD", "SHP", "SLL", "SOS", "SRD", "SSP", "STD", "SVC", "SYP", "SZL", + "SAR", "SBD", "SCR", "SDG", "SEK", "SGD", "SHP", "SLL", "SOS", "SRD", "SSP", "STD", "STN", "SVC", "SYP", "SZL", "THB", "TJS", "TMT", "TND", "TOP", "TRY", "TTD", "TWD", "TZS", - "UAH", "UGX", "USD", "USN", "UYI", "UYU", "UZS", - "VEF", "VND", "VUV", + "UAH", "UGX", "USD", "USN", "UYI", "UYU", "UYW", "UZS", + "VEF", "VES", "VND", "VUV", "WST", "XAF", "XAG", "XAU", "XBA", "XBB", "XBC", "XBD", "XCD", "XDR", "XOF", "XPD", "XPF", "XPT", "XSU", "XTS", "XUA", "XXX", "YER", diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/utils.go b/src/runtime/vendor/github.com/asaskevich/govalidator/utils.go index a0b706a743ce..f4c30f824a22 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/utils.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/utils.go @@ -12,20 +12,20 @@ import ( "unicode/utf8" ) -// Contains check if the string contains the substring. +// Contains checks if the string contains the substring. func Contains(str, substring string) bool { return strings.Contains(str, substring) } -// Matches check if string matches the pattern (pattern is regular expression) +// Matches checks if string matches the pattern (pattern is regular expression) // In case of error return false func Matches(str, pattern string) bool { match, _ := regexp.MatchString(pattern, str) return match } -// LeftTrim trim characters from the left-side of the input. -// If second argument is empty, it's will be remove leading spaces. +// LeftTrim trims characters from the left side of the input. +// If second argument is empty, it will remove leading spaces. func LeftTrim(str, chars string) string { if chars == "" { return strings.TrimLeftFunc(str, unicode.IsSpace) @@ -34,8 +34,8 @@ func LeftTrim(str, chars string) string { return r.ReplaceAllString(str, "") } -// RightTrim trim characters from the right-side of the input. -// If second argument is empty, it's will be remove spaces. +// RightTrim trims characters from the right side of the input. +// If second argument is empty, it will remove trailing spaces. func RightTrim(str, chars string) string { if chars == "" { return strings.TrimRightFunc(str, unicode.IsSpace) @@ -44,27 +44,27 @@ func RightTrim(str, chars string) string { return r.ReplaceAllString(str, "") } -// Trim trim characters from both sides of the input. -// If second argument is empty, it's will be remove spaces. +// Trim trims characters from both sides of the input. +// If second argument is empty, it will remove spaces. func Trim(str, chars string) string { return LeftTrim(RightTrim(str, chars), chars) } -// WhiteList remove characters that do not appear in the whitelist. +// WhiteList removes characters that do not appear in the whitelist. func WhiteList(str, chars string) string { pattern := "[^" + chars + "]+" r, _ := regexp.Compile(pattern) return r.ReplaceAllString(str, "") } -// BlackList remove characters that appear in the blacklist. +// BlackList removes characters that appear in the blacklist. func BlackList(str, chars string) string { pattern := "[" + chars + "]+" r, _ := regexp.Compile(pattern) return r.ReplaceAllString(str, "") } -// StripLow remove characters with a numerical value < 32 and 127, mostly control characters. +// StripLow removes characters with a numerical value < 32 and 127, mostly control characters. // If keep_new_lines is true, newline characters are preserved (\n and \r, hex 0xA and 0xD). func StripLow(str string, keepNewLines bool) string { chars := "" @@ -76,13 +76,13 @@ func StripLow(str string, keepNewLines bool) string { return BlackList(str, chars) } -// ReplacePattern replace regular expression pattern in string +// ReplacePattern replaces regular expression pattern in string func ReplacePattern(str, pattern, replace string) string { r, _ := regexp.Compile(pattern) return r.ReplaceAllString(str, replace) } -// Escape replace <, >, & and " with HTML entities. +// Escape replaces <, >, & and " with HTML entities. var Escape = html.EscapeString func addSegment(inrune, segment []rune) []rune { @@ -120,7 +120,7 @@ func CamelCaseToUnderscore(str string) string { return string(output) } -// Reverse return reversed string +// Reverse returns reversed string func Reverse(s string) string { r := []rune(s) for i, j := 0, len(r)-1; i < j; i, j = i+1, j-1 { @@ -129,12 +129,12 @@ func Reverse(s string) string { return string(r) } -// GetLines split string by "\n" and return array of lines +// GetLines splits string by "\n" and return array of lines func GetLines(s string) []string { return strings.Split(s, "\n") } -// GetLine return specified line of multiline string +// GetLine returns specified line of multiline string func GetLine(s string, index int) (string, error) { lines := GetLines(s) if index < 0 || index >= len(lines) { @@ -143,12 +143,12 @@ func GetLine(s string, index int) (string, error) { return lines[index], nil } -// RemoveTags remove all tags from HTML string +// RemoveTags removes all tags from HTML string func RemoveTags(s string) string { return ReplacePattern(s, "<[^>]*>", "") } -// SafeFileName return safe string that can be used in file names +// SafeFileName returns safe string that can be used in file names func SafeFileName(str string) string { name := strings.ToLower(str) name = path.Clean(path.Base(name)) @@ -210,23 +210,23 @@ func Truncate(str string, length int, ending string) string { return str } -// PadLeft pad left side of string if size of string is less then indicated pad length +// PadLeft pads left side of a string if size of string is less then indicated pad length func PadLeft(str string, padStr string, padLen int) string { return buildPadStr(str, padStr, padLen, true, false) } -// PadRight pad right side of string if size of string is less then indicated pad length +// PadRight pads right side of a string if size of string is less then indicated pad length func PadRight(str string, padStr string, padLen int) string { return buildPadStr(str, padStr, padLen, false, true) } -// PadBoth pad sides of string if size of string is less then indicated pad length +// PadBoth pads both sides of a string if size of string is less then indicated pad length func PadBoth(str string, padStr string, padLen int) string { return buildPadStr(str, padStr, padLen, true, true) } -// PadString either left, right or both sides, not the padding string can be unicode and more then one -// character +// PadString either left, right or both sides. +// Note that padding string can be unicode and more then one character func buildPadStr(str string, padStr string, padLen int, padLeft bool, padRight bool) string { // When padded length is less then the current string size diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/validator.go b/src/runtime/vendor/github.com/asaskevich/govalidator/validator.go index b18bbcb4c99f..5c918fc4bc7d 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/validator.go +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/validator.go @@ -32,7 +32,7 @@ var ( const maxURLRuneCount = 2083 const minURLRuneCount = 3 -const RF3339WithoutZone = "2006-01-02T15:04:05" +const rfc3339WithoutZone = "2006-01-02T15:04:05" // SetFieldsRequiredByDefault causes validation to fail when struct fields // do not include validations or are not explicitly marked as exempt (using `valid:"-"` or `valid:"email,optional"`). @@ -63,13 +63,13 @@ func SetNilPtrAllowedByRequired(value bool) { nilPtrAllowedByRequired = value } -// IsEmail check if the string is an email. +// IsEmail checks if the string is an email. func IsEmail(str string) bool { // TODO uppercase letters are not supported return rxEmail.MatchString(str) } -// IsExistingEmail check if the string is an email of existing domain +// IsExistingEmail checks if the string is an email of existing domain func IsExistingEmail(email string) bool { if len(email) < 6 || len(email) > 254 { @@ -84,13 +84,13 @@ func IsExistingEmail(email string) bool { if len(user) > 64 { return false } - if userDotRegexp.MatchString(user) || !userRegexp.MatchString(user) || !hostRegexp.MatchString(host) { - return false - } switch host { case "localhost", "example.com": return true } + if userDotRegexp.MatchString(user) || !userRegexp.MatchString(user) || !hostRegexp.MatchString(host) { + return false + } if _, err := net.LookupMX(host); err != nil { if _, err := net.LookupIP(host); err != nil { return false @@ -100,7 +100,7 @@ func IsExistingEmail(email string) bool { return true } -// IsURL check if the string is an URL. +// IsURL checks if the string is an URL. func IsURL(str string) bool { if str == "" || utf8.RuneCountInString(str) >= maxURLRuneCount || len(str) <= minURLRuneCount || strings.HasPrefix(str, ".") { return false @@ -124,7 +124,7 @@ func IsURL(str string) bool { return rxURL.MatchString(str) } -// IsRequestURL check if the string rawurl, assuming +// IsRequestURL checks if the string rawurl, assuming // it was received in an HTTP request, is a valid // URL confirm to RFC 3986 func IsRequestURL(rawurl string) bool { @@ -138,7 +138,7 @@ func IsRequestURL(rawurl string) bool { return true } -// IsRequestURI check if the string rawurl, assuming +// IsRequestURI checks if the string rawurl, assuming // it was received in an HTTP request, is an // absolute URI or an absolute path. func IsRequestURI(rawurl string) bool { @@ -146,7 +146,7 @@ func IsRequestURI(rawurl string) bool { return err == nil } -// IsAlpha check if the string contains only letters (a-zA-Z). Empty string is valid. +// IsAlpha checks if the string contains only letters (a-zA-Z). Empty string is valid. func IsAlpha(str string) bool { if IsNull(str) { return true @@ -154,7 +154,7 @@ func IsAlpha(str string) bool { return rxAlpha.MatchString(str) } -//IsUTFLetter check if the string contains only unicode letter characters. +//IsUTFLetter checks if the string contains only unicode letter characters. //Similar to IsAlpha but for all languages. Empty string is valid. func IsUTFLetter(str string) bool { if IsNull(str) { @@ -170,7 +170,7 @@ func IsUTFLetter(str string) bool { } -// IsAlphanumeric check if the string contains only letters and numbers. Empty string is valid. +// IsAlphanumeric checks if the string contains only letters and numbers. Empty string is valid. func IsAlphanumeric(str string) bool { if IsNull(str) { return true @@ -178,7 +178,7 @@ func IsAlphanumeric(str string) bool { return rxAlphanumeric.MatchString(str) } -// IsUTFLetterNumeric check if the string contains only unicode letters and numbers. Empty string is valid. +// IsUTFLetterNumeric checks if the string contains only unicode letters and numbers. Empty string is valid. func IsUTFLetterNumeric(str string) bool { if IsNull(str) { return true @@ -192,7 +192,7 @@ func IsUTFLetterNumeric(str string) bool { } -// IsNumeric check if the string contains only numbers. Empty string is valid. +// IsNumeric checks if the string contains only numbers. Empty string is valid. func IsNumeric(str string) bool { if IsNull(str) { return true @@ -200,7 +200,7 @@ func IsNumeric(str string) bool { return rxNumeric.MatchString(str) } -// IsUTFNumeric check if the string contains only unicode numbers of any kind. +// IsUTFNumeric checks if the string contains only unicode numbers of any kind. // Numbers can be 0-9 but also Fractions ¾,Roman Ⅸ and Hangzhou 〩. Empty string is valid. func IsUTFNumeric(str string) bool { if IsNull(str) { @@ -222,7 +222,7 @@ func IsUTFNumeric(str string) bool { } -// IsUTFDigit check if the string contains only unicode radix-10 decimal digits. Empty string is valid. +// IsUTFDigit checks if the string contains only unicode radix-10 decimal digits. Empty string is valid. func IsUTFDigit(str string) bool { if IsNull(str) { return true @@ -243,22 +243,22 @@ func IsUTFDigit(str string) bool { } -// IsHexadecimal check if the string is a hexadecimal number. +// IsHexadecimal checks if the string is a hexadecimal number. func IsHexadecimal(str string) bool { return rxHexadecimal.MatchString(str) } -// IsHexcolor check if the string is a hexadecimal color. +// IsHexcolor checks if the string is a hexadecimal color. func IsHexcolor(str string) bool { return rxHexcolor.MatchString(str) } -// IsRGBcolor check if the string is a valid RGB color in form rgb(RRR, GGG, BBB). +// IsRGBcolor checks if the string is a valid RGB color in form rgb(RRR, GGG, BBB). func IsRGBcolor(str string) bool { return rxRGBcolor.MatchString(str) } -// IsLowerCase check if the string is lowercase. Empty string is valid. +// IsLowerCase checks if the string is lowercase. Empty string is valid. func IsLowerCase(str string) bool { if IsNull(str) { return true @@ -266,7 +266,7 @@ func IsLowerCase(str string) bool { return str == strings.ToLower(str) } -// IsUpperCase check if the string is uppercase. Empty string is valid. +// IsUpperCase checks if the string is uppercase. Empty string is valid. func IsUpperCase(str string) bool { if IsNull(str) { return true @@ -274,7 +274,7 @@ func IsUpperCase(str string) bool { return str == strings.ToUpper(str) } -// HasLowerCase check if the string contains at least 1 lowercase. Empty string is valid. +// HasLowerCase checks if the string contains at least 1 lowercase. Empty string is valid. func HasLowerCase(str string) bool { if IsNull(str) { return true @@ -282,7 +282,7 @@ func HasLowerCase(str string) bool { return rxHasLowerCase.MatchString(str) } -// HasUpperCase check if the string contians as least 1 uppercase. Empty string is valid. +// HasUpperCase checks if the string contains as least 1 uppercase. Empty string is valid. func HasUpperCase(str string) bool { if IsNull(str) { return true @@ -290,7 +290,7 @@ func HasUpperCase(str string) bool { return rxHasUpperCase.MatchString(str) } -// IsInt check if the string is an integer. Empty string is valid. +// IsInt checks if the string is an integer. Empty string is valid. func IsInt(str string) bool { if IsNull(str) { return true @@ -298,12 +298,12 @@ func IsInt(str string) bool { return rxInt.MatchString(str) } -// IsFloat check if the string is a float. +// IsFloat checks if the string is a float. func IsFloat(str string) bool { return str != "" && rxFloat.MatchString(str) } -// IsDivisibleBy check if the string is a number that's divisible by another. +// IsDivisibleBy checks if the string is a number that's divisible by another. // If second argument is not valid integer or zero, it's return false. // Otherwise, if first argument is not valid integer or zero, it's return true (Invalid string converts to zero). func IsDivisibleBy(str, num string) bool { @@ -316,47 +316,52 @@ func IsDivisibleBy(str, num string) bool { return (p == 0) || (p%q == 0) } -// IsNull check if the string is null. +// IsNull checks if the string is null. func IsNull(str string) bool { return len(str) == 0 } +// IsNotNull checks if the string is not null. +func IsNotNull(str string) bool { + return !IsNull(str) +} + // HasWhitespaceOnly checks the string only contains whitespace func HasWhitespaceOnly(str string) bool { - return len(str) > 0 && rxHasWhitespaceOnly.MatchString(str) + return len(str) > 0 && rxHasWhitespaceOnly.MatchString(str) } // HasWhitespace checks if the string contains any whitespace func HasWhitespace(str string) bool { - return len(str) > 0 && rxHasWhitespace.MatchString(str) + return len(str) > 0 && rxHasWhitespace.MatchString(str) } -// IsByteLength check if the string's length (in bytes) falls in a range. +// IsByteLength checks if the string's length (in bytes) falls in a range. func IsByteLength(str string, min, max int) bool { return len(str) >= min && len(str) <= max } -// IsUUIDv3 check if the string is a UUID version 3. +// IsUUIDv3 checks if the string is a UUID version 3. func IsUUIDv3(str string) bool { return rxUUID3.MatchString(str) } -// IsUUIDv4 check if the string is a UUID version 4. +// IsUUIDv4 checks if the string is a UUID version 4. func IsUUIDv4(str string) bool { return rxUUID4.MatchString(str) } -// IsUUIDv5 check if the string is a UUID version 5. +// IsUUIDv5 checks if the string is a UUID version 5. func IsUUIDv5(str string) bool { return rxUUID5.MatchString(str) } -// IsUUID check if the string is a UUID (version 3, 4 or 5). +// IsUUID checks if the string is a UUID (version 3, 4 or 5). func IsUUID(str string) bool { return rxUUID.MatchString(str) } -// IsCreditCard check if the string is a credit card. +// IsCreditCard checks if the string is a credit card. func IsCreditCard(str string) bool { sanitized := notNumberRegexp.ReplaceAllString(str, "") if !rxCreditCard.MatchString(sanitized) { @@ -372,7 +377,7 @@ func IsCreditCard(str string) bool { if shouldDouble { tmpNum *= 2 if tmpNum >= 10 { - sum += ((tmpNum % 10) + 1) + sum += (tmpNum % 10) + 1 } else { sum += tmpNum } @@ -385,18 +390,18 @@ func IsCreditCard(str string) bool { return sum%10 == 0 } -// IsISBN10 check if the string is an ISBN version 10. +// IsISBN10 checks if the string is an ISBN version 10. func IsISBN10(str string) bool { return IsISBN(str, 10) } -// IsISBN13 check if the string is an ISBN version 13. +// IsISBN13 checks if the string is an ISBN version 13. func IsISBN13(str string) bool { return IsISBN(str, 13) } -// IsISBN check if the string is an ISBN (version 10 or 13). -// If version value is not equal to 10 or 13, it will be check both variants. +// IsISBN checks if the string is an ISBN (version 10 or 13). +// If version value is not equal to 10 or 13, it will be checks both variants. func IsISBN(str string, version int) bool { sanitized := whiteSpacesAndMinus.ReplaceAllString(str, "") var checksum int32 @@ -430,13 +435,13 @@ func IsISBN(str string, version int) bool { return IsISBN(str, 10) || IsISBN(str, 13) } -// IsJSON check if the string is valid JSON (note: uses json.Unmarshal). +// IsJSON checks if the string is valid JSON (note: uses json.Unmarshal). func IsJSON(str string) bool { var js json.RawMessage return json.Unmarshal([]byte(str), &js) == nil } -// IsMultibyte check if the string contains one or more multibyte chars. Empty string is valid. +// IsMultibyte checks if the string contains one or more multibyte chars. Empty string is valid. func IsMultibyte(str string) bool { if IsNull(str) { return true @@ -444,7 +449,7 @@ func IsMultibyte(str string) bool { return rxMultibyte.MatchString(str) } -// IsASCII check if the string contains ASCII chars only. Empty string is valid. +// IsASCII checks if the string contains ASCII chars only. Empty string is valid. func IsASCII(str string) bool { if IsNull(str) { return true @@ -452,7 +457,7 @@ func IsASCII(str string) bool { return rxASCII.MatchString(str) } -// IsPrintableASCII check if the string contains printable ASCII chars only. Empty string is valid. +// IsPrintableASCII checks if the string contains printable ASCII chars only. Empty string is valid. func IsPrintableASCII(str string) bool { if IsNull(str) { return true @@ -460,7 +465,7 @@ func IsPrintableASCII(str string) bool { return rxPrintableASCII.MatchString(str) } -// IsFullWidth check if the string contains any full-width chars. Empty string is valid. +// IsFullWidth checks if the string contains any full-width chars. Empty string is valid. func IsFullWidth(str string) bool { if IsNull(str) { return true @@ -468,7 +473,7 @@ func IsFullWidth(str string) bool { return rxFullWidth.MatchString(str) } -// IsHalfWidth check if the string contains any half-width chars. Empty string is valid. +// IsHalfWidth checks if the string contains any half-width chars. Empty string is valid. func IsHalfWidth(str string) bool { if IsNull(str) { return true @@ -476,7 +481,7 @@ func IsHalfWidth(str string) bool { return rxHalfWidth.MatchString(str) } -// IsVariableWidth check if the string contains a mixture of full and half-width chars. Empty string is valid. +// IsVariableWidth checks if the string contains a mixture of full and half-width chars. Empty string is valid. func IsVariableWidth(str string) bool { if IsNull(str) { return true @@ -484,12 +489,12 @@ func IsVariableWidth(str string) bool { return rxHalfWidth.MatchString(str) && rxFullWidth.MatchString(str) } -// IsBase64 check if a string is base64 encoded. +// IsBase64 checks if a string is base64 encoded. func IsBase64(str string) bool { return rxBase64.MatchString(str) } -// IsFilePath check is a string is Win or Unix file path and returns it's type. +// IsFilePath checks is a string is Win or Unix file path and returns it's type. func IsFilePath(str string) (bool, int) { if rxWinPath.MatchString(str) { //check windows path limit see: @@ -513,6 +518,11 @@ func IsDataURI(str string) bool { return IsBase64(dataURI[1]) } +// IsMagnetURI checks if a string is valid magnet URI +func IsMagnetURI(str string) bool { + return rxMagnetURI.MatchString(str) +} + // IsISO3166Alpha2 checks if a string is valid two-letter country code func IsISO3166Alpha2(str string) bool { for _, entry := range ISO3166List { @@ -565,7 +575,7 @@ func IsDNSName(str string) bool { // IsHash checks if a string is a hash of type algorithm. // Algorithm is one of ['md4', 'md5', 'sha1', 'sha256', 'sha384', 'sha512', 'ripemd128', 'ripemd160', 'tiger128', 'tiger160', 'tiger192', 'crc32', 'crc32b'] func IsHash(str string, algorithm string) bool { - len := "0" + var len string algo := strings.ToLower(algorithm) if algo == "crc32" || algo == "crc32b" { @@ -589,9 +599,73 @@ func IsHash(str string, algorithm string) bool { return Matches(str, "^[a-f0-9]{"+len+"}$") } +// IsSHA512 checks is a string is a SHA512 hash. Alias for `IsHash(str, "sha512")` +func IsSHA512(str string) bool { + return IsHash(str, "sha512") +} + +// IsSHA384 checks is a string is a SHA384 hash. Alias for `IsHash(str, "sha384")` +func IsSHA384(str string) bool { + return IsHash(str, "sha384") +} + +// IsSHA256 checks is a string is a SHA256 hash. Alias for `IsHash(str, "sha256")` +func IsSHA256(str string) bool { + return IsHash(str, "sha256") +} + +// IsTiger192 checks is a string is a Tiger192 hash. Alias for `IsHash(str, "tiger192")` +func IsTiger192(str string) bool { + return IsHash(str, "tiger192") +} + +// IsTiger160 checks is a string is a Tiger160 hash. Alias for `IsHash(str, "tiger160")` +func IsTiger160(str string) bool { + return IsHash(str, "tiger160") +} + +// IsRipeMD160 checks is a string is a RipeMD160 hash. Alias for `IsHash(str, "ripemd160")` +func IsRipeMD160(str string) bool { + return IsHash(str, "ripemd160") +} + +// IsSHA1 checks is a string is a SHA-1 hash. Alias for `IsHash(str, "sha1")` +func IsSHA1(str string) bool { + return IsHash(str, "sha1") +} + +// IsTiger128 checks is a string is a Tiger128 hash. Alias for `IsHash(str, "tiger128")` +func IsTiger128(str string) bool { + return IsHash(str, "tiger128") +} + +// IsRipeMD128 checks is a string is a RipeMD128 hash. Alias for `IsHash(str, "ripemd128")` +func IsRipeMD128(str string) bool { + return IsHash(str, "ripemd128") +} + +// IsCRC32 checks is a string is a CRC32 hash. Alias for `IsHash(str, "crc32")` +func IsCRC32(str string) bool { + return IsHash(str, "crc32") +} + +// IsCRC32b checks is a string is a CRC32b hash. Alias for `IsHash(str, "crc32b")` +func IsCRC32b(str string) bool { + return IsHash(str, "crc32b") +} + +// IsMD5 checks is a string is a MD5 hash. Alias for `IsHash(str, "md5")` +func IsMD5(str string) bool { + return IsHash(str, "md5") +} + +// IsMD4 checks is a string is a MD4 hash. Alias for `IsHash(str, "md4")` +func IsMD4(str string) bool { + return IsHash(str, "md4") +} + // IsDialString validates the given string for usage with the various Dial() functions func IsDialString(str string) bool { - if h, p, err := net.SplitHostPort(str); err == nil && h != "" && p != "" && (IsDNSName(h) || IsIP(h)) && IsPort(p) { return true } @@ -599,7 +673,7 @@ func IsDialString(str string) bool { return false } -// IsIP checks if a string is either IP version 4 or 6. +// IsIP checks if a string is either IP version 4 or 6. Alias for `net.ParseIP` func IsIP(str string) bool { return net.ParseIP(str) != nil } @@ -612,25 +686,25 @@ func IsPort(str string) bool { return false } -// IsIPv4 check if the string is an IP version 4. +// IsIPv4 checks if the string is an IP version 4. func IsIPv4(str string) bool { ip := net.ParseIP(str) return ip != nil && strings.Contains(str, ".") } -// IsIPv6 check if the string is an IP version 6. +// IsIPv6 checks if the string is an IP version 6. func IsIPv6(str string) bool { ip := net.ParseIP(str) return ip != nil && strings.Contains(str, ":") } -// IsCIDR check if the string is an valid CIDR notiation (IPV4 & IPV6) +// IsCIDR checks if the string is an valid CIDR notiation (IPV4 & IPV6) func IsCIDR(str string) bool { _, _, err := net.ParseCIDR(str) return err == nil } -// IsMAC check if a string is valid MAC address. +// IsMAC checks if a string is valid MAC address. // Possible MAC formats: // 01:23:45:67:89:ab // 01:23:45:67:89:ab:cd:ef @@ -648,22 +722,70 @@ func IsHost(str string) bool { return IsIP(str) || IsDNSName(str) } -// IsMongoID check if the string is a valid hex-encoded representation of a MongoDB ObjectId. +// IsMongoID checks if the string is a valid hex-encoded representation of a MongoDB ObjectId. func IsMongoID(str string) bool { return rxHexadecimal.MatchString(str) && (len(str) == 24) } -// IsLatitude check if a string is valid latitude. +// IsLatitude checks if a string is valid latitude. func IsLatitude(str string) bool { return rxLatitude.MatchString(str) } -// IsLongitude check if a string is valid longitude. +// IsLongitude checks if a string is valid longitude. func IsLongitude(str string) bool { return rxLongitude.MatchString(str) } -// IsRsaPublicKey check if a string is valid public key with provided length +// IsIMEI checks if a string is valid IMEI +func IsIMEI(str string) bool { + return rxIMEI.MatchString(str) +} + +// IsIMSI checks if a string is valid IMSI +func IsIMSI(str string) bool { + if !rxIMSI.MatchString(str) { + return false + } + + mcc, err := strconv.ParseInt(str[0:3], 10, 32) + if err != nil { + return false + } + + switch mcc { + case 202, 204, 206, 208, 212, 213, 214, 216, 218, 219: + case 220, 221, 222, 226, 228, 230, 231, 232, 234, 235: + case 238, 240, 242, 244, 246, 247, 248, 250, 255, 257: + case 259, 260, 262, 266, 268, 270, 272, 274, 276, 278: + case 280, 282, 283, 284, 286, 288, 289, 290, 292, 293: + case 294, 295, 297, 302, 308, 310, 311, 312, 313, 314: + case 315, 316, 330, 332, 334, 338, 340, 342, 344, 346: + case 348, 350, 352, 354, 356, 358, 360, 362, 363, 364: + case 365, 366, 368, 370, 372, 374, 376, 400, 401, 402: + case 404, 405, 406, 410, 412, 413, 414, 415, 416, 417: + case 418, 419, 420, 421, 422, 424, 425, 426, 427, 428: + case 429, 430, 431, 432, 434, 436, 437, 438, 440, 441: + case 450, 452, 454, 455, 456, 457, 460, 461, 466, 467: + case 470, 472, 502, 505, 510, 514, 515, 520, 525, 528: + case 530, 536, 537, 539, 540, 541, 542, 543, 544, 545: + case 546, 547, 548, 549, 550, 551, 552, 553, 554, 555: + case 602, 603, 604, 605, 606, 607, 608, 609, 610, 611: + case 612, 613, 614, 615, 616, 617, 618, 619, 620, 621: + case 622, 623, 624, 625, 626, 627, 628, 629, 630, 631: + case 632, 633, 634, 635, 636, 637, 638, 639, 640, 641: + case 642, 643, 645, 646, 647, 648, 649, 650, 651, 652: + case 653, 654, 655, 657, 658, 659, 702, 704, 706, 708: + case 710, 712, 714, 716, 722, 724, 730, 732, 734, 736: + case 738, 740, 742, 744, 746, 748, 750, 995: + return true + default: + return false + } + return true +} + +// IsRsaPublicKey checks if a string is valid public key with provided length func IsRsaPublicKey(str string, keylen int) bool { bb := bytes.NewBufferString(str) pemBytes, err := ioutil.ReadAll(bb) @@ -717,7 +839,7 @@ func toJSONName(tag string) string { return name } -func PrependPathToErrors(err error, path string) error { +func prependPathToErrors(err error, path string) error { switch err2 := err.(type) { case Error: err2.Path = append([]string{path}, err2.Path...) @@ -725,16 +847,125 @@ func PrependPathToErrors(err error, path string) error { case Errors: errors := err2.Errors() for i, err3 := range errors { - errors[i] = PrependPathToErrors(err3, path) + errors[i] = prependPathToErrors(err3, path) } return err2 } - fmt.Println(err) return err } +// ValidateArray performs validation according to condition iterator that validates every element of the array +func ValidateArray(array []interface{}, iterator ConditionIterator) bool { + return Every(array, iterator) +} + +// ValidateMap use validation map for fields. +// result will be equal to `false` if there are any errors. +// s is the map containing the data to be validated. +// m is the validation map in the form: +// map[string]interface{}{"name":"required,alpha","address":map[string]interface{}{"line1":"required,alphanum"}} +func ValidateMap(s map[string]interface{}, m map[string]interface{}) (bool, error) { + if s == nil { + return true, nil + } + result := true + var err error + var errs Errors + var index int + val := reflect.ValueOf(s) + for key, value := range s { + presentResult := true + validator, ok := m[key] + if !ok { + presentResult = false + var err error + err = fmt.Errorf("all map keys has to be present in the validation map; got %s", key) + err = prependPathToErrors(err, key) + errs = append(errs, err) + } + valueField := reflect.ValueOf(value) + mapResult := true + typeResult := true + structResult := true + resultField := true + switch subValidator := validator.(type) { + case map[string]interface{}: + var err error + if v, ok := value.(map[string]interface{}); !ok { + mapResult = false + err = fmt.Errorf("map validator has to be for the map type only; got %s", valueField.Type().String()) + err = prependPathToErrors(err, key) + errs = append(errs, err) + } else { + mapResult, err = ValidateMap(v, subValidator) + if err != nil { + mapResult = false + err = prependPathToErrors(err, key) + errs = append(errs, err) + } + } + case string: + if (valueField.Kind() == reflect.Struct || + (valueField.Kind() == reflect.Ptr && valueField.Elem().Kind() == reflect.Struct)) && + subValidator != "-" { + var err error + structResult, err = ValidateStruct(valueField.Interface()) + if err != nil { + err = prependPathToErrors(err, key) + errs = append(errs, err) + } + } + resultField, err = typeCheck(valueField, reflect.StructField{ + Name: key, + PkgPath: "", + Type: val.Type(), + Tag: reflect.StructTag(fmt.Sprintf("%s:%q", tagName, subValidator)), + Offset: 0, + Index: []int{index}, + Anonymous: false, + }, val, nil) + if err != nil { + errs = append(errs, err) + } + case nil: + // already handlerd when checked before + default: + typeResult = false + err = fmt.Errorf("map validator has to be either map[string]interface{} or string; got %s", valueField.Type().String()) + err = prependPathToErrors(err, key) + errs = append(errs, err) + } + result = result && presentResult && typeResult && resultField && structResult && mapResult + index++ + } + // checks required keys + requiredResult := true + for key, value := range m { + if schema, ok := value.(string); ok { + tags := parseTagIntoMap(schema) + if required, ok := tags["required"]; ok { + if _, ok := s[key]; !ok { + requiredResult = false + if required.customErrorMessage != "" { + err = Error{key, fmt.Errorf(required.customErrorMessage), true, "required", []string{}} + } else { + err = Error{key, fmt.Errorf("required field missing"), false, "required", []string{}} + } + errs = append(errs, err) + } + } + } + } + + if len(errs) > 0 { + err = errs + } + return result && requiredResult, err +} + // ValidateStruct use tags for fields. // result will be equal to `false` if there are any errors. +// todo currently there is no guarantee that errors will be returned in predictable order (tests may to fail) func ValidateStruct(s interface{}) (bool, error) { if s == nil { return true, nil @@ -766,7 +997,7 @@ func ValidateStruct(s interface{}) (bool, error) { var err error structResult, err = ValidateStruct(valueField.Interface()) if err != nil { - err = PrependPathToErrors(err, typeField.Name) + err = prependPathToErrors(err, typeField.Name) errs = append(errs, err) } } @@ -803,6 +1034,42 @@ func ValidateStruct(s interface{}) (bool, error) { return result, err } +// ValidateStructAsync performs async validation of the struct and returns results through the channels +func ValidateStructAsync(s interface{}) (<-chan bool, <-chan error) { + res := make(chan bool) + errors := make(chan error) + + go func() { + defer close(res) + defer close(errors) + + isValid, isFailed := ValidateStruct(s) + + res <- isValid + errors <- isFailed + }() + + return res, errors +} + +// ValidateMapAsync performs async validation of the map and returns results through the channels +func ValidateMapAsync(s map[string]interface{}, m map[string]interface{}) (<-chan bool, <-chan error) { + res := make(chan bool) + errors := make(chan error) + + go func() { + defer close(res) + defer close(errors) + + isValid, isFailed := ValidateMap(s, m) + + res <- isValid + errors <- isFailed + }() + + return res, errors +} + // parseTagIntoMap parses a struct tag `valid:required~Some error message,length(2|3)` into map[string]string{"required": "Some error message", "length(2|3)": ""} func parseTagIntoMap(tag string) tagOptionsMap { optionsMap := make(tagOptionsMap) @@ -851,28 +1118,45 @@ func IsSSN(str string) bool { return rxSSN.MatchString(str) } -// IsSemver check if string is valid semantic version +// IsSemver checks if string is valid semantic version func IsSemver(str string) bool { return rxSemver.MatchString(str) } -// IsTime check if string is valid according to given format +// IsType checks if interface is of some type +func IsType(v interface{}, params ...string) bool { + if len(params) == 1 { + typ := params[0] + return strings.Replace(reflect.TypeOf(v).String(), " ", "", -1) == strings.Replace(typ, " ", "", -1) + } + return false +} + +// IsTime checks if string is valid according to given format func IsTime(str string, format string) bool { _, err := time.Parse(format, str) return err == nil } -// IsRFC3339 check if string is valid timestamp value according to RFC3339 +// IsUnixTime checks if string is valid unix timestamp value +func IsUnixTime(str string) bool { + if _, err := strconv.Atoi(str); err == nil { + return true + } + return false +} + +// IsRFC3339 checks if string is valid timestamp value according to RFC3339 func IsRFC3339(str string) bool { return IsTime(str, time.RFC3339) } -// IsRFC3339WithoutZone check if string is valid timestamp value according to RFC3339 which excludes the timezone. +// IsRFC3339WithoutZone checks if string is valid timestamp value according to RFC3339 which excludes the timezone. func IsRFC3339WithoutZone(str string) bool { - return IsTime(str, RF3339WithoutZone) + return IsTime(str, rfc3339WithoutZone) } -// IsISO4217 check if string is valid ISO currency code +// IsISO4217 checks if string is valid ISO currency code func IsISO4217(str string) bool { for _, currency := range ISO4217List { if str == currency { @@ -883,7 +1167,7 @@ func IsISO4217(str string) bool { return false } -// ByteLength check string's length +// ByteLength checks string's length func ByteLength(str string, params ...string) bool { if len(params) == 2 { min, _ := ToInt(params[0]) @@ -894,13 +1178,13 @@ func ByteLength(str string, params ...string) bool { return false } -// RuneLength check string's length +// RuneLength checks string's length // Alias for StringLength func RuneLength(str string, params ...string) bool { return StringLength(str, params...) } -// IsRsaPub check whether string is valid RSA key +// IsRsaPub checks whether string is valid RSA key // Alias for IsRsaPublicKey func IsRsaPub(str string, params ...string) bool { if len(params) == 1 { @@ -920,7 +1204,7 @@ func StringMatches(s string, params ...string) bool { return false } -// StringLength check string's length (including multi byte strings) +// StringLength checks string's length (including multi byte strings) func StringLength(str string, params ...string) bool { if len(params) == 2 { @@ -933,7 +1217,31 @@ func StringLength(str string, params ...string) bool { return false } -// Range check string's length +// MinStringLength checks string's minimum length (including multi byte strings) +func MinStringLength(str string, params ...string) bool { + + if len(params) == 1 { + strLength := utf8.RuneCountInString(str) + min, _ := ToInt(params[0]) + return strLength >= int(min) + } + + return false +} + +// MaxStringLength checks string's maximum length (including multi byte strings) +func MaxStringLength(str string, params ...string) bool { + + if len(params) == 1 { + strLength := utf8.RuneCountInString(str) + max, _ := ToInt(params[0]) + return strLength <= int(max) + } + + return false +} + +// Range checks string's length func Range(str string, params ...string) bool { if len(params) == 2 { value, _ := ToFloat(str) @@ -945,7 +1253,8 @@ func Range(str string, params ...string) bool { return false } -func isInRaw(str string, params ...string) bool { +// IsInRaw checks if string is in list of allowed values +func IsInRaw(str string, params ...string) bool { if len(params) == 1 { rawParams := params[0] @@ -957,7 +1266,7 @@ func isInRaw(str string, params ...string) bool { return false } -// IsIn check if string str is a member of the set of strings params +// IsIn checks if string str is a member of the set of strings params func IsIn(str string, params ...string) bool { for _, param := range params { if str == param { @@ -995,7 +1304,7 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options tag := t.Tag.Get(tagName) - // Check if the field should be ignored + // checks if the field should be ignored switch tag { case "": if v.Kind() != reflect.Slice && v.Kind() != reflect.Map { @@ -1015,7 +1324,7 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options } if isEmptyValue(v) { - // an empty value is not validated, check only required + // an empty value is not validated, checks only required isValid, resultErr = checkRequired(v, t, options) for key := range options { delete(options, key) @@ -1062,26 +1371,65 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options }() } + for _, validatorSpec := range optionsOrder { + validatorStruct := options[validatorSpec] + var negate bool + validator := validatorSpec + customMsgExists := len(validatorStruct.customErrorMessage) > 0 + + // checks whether the tag looks like '!something' or 'something' + if validator[0] == '!' { + validator = validator[1:] + negate = true + } + + // checks for interface param validators + for key, value := range InterfaceParamTagRegexMap { + ps := value.FindStringSubmatch(validator) + if len(ps) == 0 { + continue + } + + validatefunc, ok := InterfaceParamTagMap[key] + if !ok { + continue + } + + delete(options, validatorSpec) + + field := fmt.Sprint(v) + if result := validatefunc(v.Interface(), ps[1:]...); (!result && !negate) || (result && negate) { + if customMsgExists { + return false, Error{t.Name, TruncatingErrorf(validatorStruct.customErrorMessage, field, validator), customMsgExists, stripParams(validatorSpec), []string{}} + } + if negate { + return false, Error{t.Name, fmt.Errorf("%s does validate as %s", field, validator), customMsgExists, stripParams(validatorSpec), []string{}} + } + return false, Error{t.Name, fmt.Errorf("%s does not validate as %s", field, validator), customMsgExists, stripParams(validatorSpec), []string{}} + } + } + } + switch v.Kind() { case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.String: - // for each tag option check the map of validator functions + // for each tag option checks the map of validator functions for _, validatorSpec := range optionsOrder { validatorStruct := options[validatorSpec] var negate bool validator := validatorSpec customMsgExists := len(validatorStruct.customErrorMessage) > 0 - // Check whether the tag looks like '!something' or 'something' + // checks whether the tag looks like '!something' or 'something' if validator[0] == '!' { validator = validator[1:] negate = true } - // Check for param validators + // checks for param validators for key, value := range ParamTagRegexMap { ps := value.FindStringSubmatch(validator) if len(ps) == 0 { @@ -1121,10 +1469,10 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options delete(options, validatorSpec) switch v.Kind() { - case reflect.String, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Float32, reflect.Float64: + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64: field := fmt.Sprint(v) // make value into string, then validate with regex if result := validatefunc(field); !result && !negate || result && negate { if customMsgExists { @@ -1162,7 +1510,7 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options } else { resultItem, err = ValidateStruct(v.MapIndex(k).Interface()) if err != nil { - err = PrependPathToErrors(err, t.Name+"."+sv[i].Interface().(string)) + err = prependPathToErrors(err, t.Name+"."+sv[i].Interface().(string)) return false, err } } @@ -1182,7 +1530,7 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options } else { resultItem, err = ValidateStruct(v.Index(i).Interface()) if err != nil { - err = PrependPathToErrors(err, t.Name+"."+strconv.Itoa(i)) + err = prependPathToErrors(err, t.Name+"."+strconv.Itoa(i)) return false, err } } @@ -1196,13 +1544,13 @@ func typeCheck(v reflect.Value, t reflect.StructField, o reflect.Value, options } return ValidateStruct(v.Interface()) case reflect.Ptr: - // If the value is a pointer then check its element + // If the value is a pointer then checks its element if v.IsNil() { return true, nil } return typeCheck(v.Elem(), t, o, options) case reflect.Struct: - return ValidateStruct(v.Interface()) + return true, nil default: return false, &UnsupportedTypeError{v.Type()} } @@ -1212,6 +1560,7 @@ func stripParams(validatorString string) string { return paramsRegexp.ReplaceAllString(validatorString, "") } +// isEmptyValue checks whether value empty or not func isEmptyValue(v reflect.Value) bool { switch v.Kind() { case reflect.String, reflect.Array: @@ -1252,11 +1601,11 @@ func ErrorsByField(e error) map[string]string { } // prototype for ValidateStruct - switch e.(type) { + switch e := e.(type) { case Error: - m[e.(Error).Name] = e.(Error).Err.Error() + m[e.Name] = e.Err.Error() case Errors: - for _, item := range e.(Errors).Errors() { + for _, item := range e.Errors() { n := ErrorsByField(item) for k, v := range n { m[k] = v diff --git a/src/runtime/vendor/github.com/asaskevich/govalidator/wercker.yml b/src/runtime/vendor/github.com/asaskevich/govalidator/wercker.yml index cac7a5fcf063..bc5f7b0864bd 100644 --- a/src/runtime/vendor/github.com/asaskevich/govalidator/wercker.yml +++ b/src/runtime/vendor/github.com/asaskevich/govalidator/wercker.yml @@ -12,4 +12,4 @@ build: - script: name: go test code: | - go test -race ./... + go test -race -v ./... diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/.gitignore b/src/runtime/vendor/github.com/bits-and-blooms/bitset/.gitignore deleted file mode 100644 index 5c204d28b0e3..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof - -target diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/.travis.yml b/src/runtime/vendor/github.com/bits-and-blooms/bitset/.travis.yml deleted file mode 100644 index 094aa5ce070c..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/.travis.yml +++ /dev/null @@ -1,37 +0,0 @@ -language: go - -sudo: false - -branches: - except: - - release - -branches: - only: - - master - - travis - -go: - - "1.11.x" - - tip - -matrix: - allow_failures: - - go: tip - -before_install: - - if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi; - - if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi; - - go get github.com/mattn/goveralls - -before_script: - - make deps - -script: - - make qa - -after_failure: - - cat ./target/test/report.xml - -after_success: - - if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi; diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/LICENSE b/src/runtime/vendor/github.com/bits-and-blooms/bitset/LICENSE deleted file mode 100644 index 59cab8a939be..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2014 Will Fitzgerald. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/README.md b/src/runtime/vendor/github.com/bits-and-blooms/bitset/README.md deleted file mode 100644 index 97e83071e41a..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/README.md +++ /dev/null @@ -1,93 +0,0 @@ -# bitset - -*Go language library to map between non-negative integers and boolean values* - -[![Test](https://github.com/bits-and-blooms/bitset/workflows/Test/badge.svg)](https://github.com/willf/bitset/actions?query=workflow%3ATest) -[![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset) -[![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc) - - -## Description - -Package bitset implements bitsets, a mapping between non-negative integers and boolean values. -It should be more efficient than map[uint] bool. - -It provides methods for setting, clearing, flipping, and testing individual integers. - -But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits. - -BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used. - -Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining. - -### Example use: - -```go -package main - -import ( - "fmt" - "math/rand" - - "github.com/bits-and-blooms/bitset" -) - -func main() { - fmt.Printf("Hello from BitSet!\n") - var b bitset.BitSet - // play some Go Fish - for i := 0; i < 100; i++ { - card1 := uint(rand.Intn(52)) - card2 := uint(rand.Intn(52)) - b.Set(card1) - if b.Test(card2) { - fmt.Println("Go Fish!") - } - b.Clear(card1) - } - - // Chaining - b.Set(10).Set(11) - - for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) { - fmt.Println("The following bit is set:", i) - } - if b.Intersection(bitset.New(100).Set(10)).Count() == 1 { - fmt.Println("Intersection works.") - } else { - fmt.Println("Intersection doesn't work???") - } -} -``` - -As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. - -Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc - -## Memory Usage - -The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring). - -## Implementation Note - -Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed. - -It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped. - -## Installation - -```bash -go get github.com/bits-and-blooms/bitset -``` - -## Contributing - -If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)") - -## Running all tests - -Before committing the code, please check if it passes tests, has adequate coverage, etc. -```bash -go test -go test -cover -``` diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/azure-pipelines.yml b/src/runtime/vendor/github.com/bits-and-blooms/bitset/azure-pipelines.yml deleted file mode 100644 index f9b295918404..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/azure-pipelines.yml +++ /dev/null @@ -1,39 +0,0 @@ -# Go -# Build your Go project. -# Add steps that test, save build artifacts, deploy, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/go - -trigger: -- master - -pool: - vmImage: 'Ubuntu-16.04' - -variables: - GOBIN: '$(GOPATH)/bin' # Go binaries path - GOROOT: '/usr/local/go1.11' # Go installation path - GOPATH: '$(system.defaultWorkingDirectory)/gopath' # Go workspace path - modulePath: '$(GOPATH)/src/github.com/$(build.repository.name)' # Path to the module's code - -steps: -- script: | - mkdir -p '$(GOBIN)' - mkdir -p '$(GOPATH)/pkg' - mkdir -p '$(modulePath)' - shopt -s extglob - shopt -s dotglob - mv !(gopath) '$(modulePath)' - echo '##vso[task.prependpath]$(GOBIN)' - echo '##vso[task.prependpath]$(GOROOT)/bin' - displayName: 'Set up the Go workspace' - -- script: | - go version - go get -v -t -d ./... - if [ -f Gopkg.toml ]; then - curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh - dep ensure - fi - go build -v . - workingDirectory: '$(modulePath)' - displayName: 'Get dependencies, then build' diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/bitset.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/bitset.go deleted file mode 100644 index d688806a54b8..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/bitset.go +++ /dev/null @@ -1,952 +0,0 @@ -/* -Package bitset implements bitsets, a mapping -between non-negative integers and boolean values. It should be more -efficient than map[uint] bool. - -It provides methods for setting, clearing, flipping, and testing -individual integers. - -But it also provides set intersection, union, difference, -complement, and symmetric operations, as well as tests to -check whether any, all, or no bits are set, and querying a -bitset's current length and number of positive bits. - -BitSets are expanded to the size of the largest set bit; the -memory allocation is approximately Max bits, where Max is -the largest set bit. BitSets are never shrunk. On creation, -a hint can be given for the number of bits that will be used. - -Many of the methods, including Set,Clear, and Flip, return -a BitSet pointer, which allows for chaining. - -Example use: - - import "bitset" - var b BitSet - b.Set(10).Set(11) - if b.Test(1000) { - b.Clear(1000) - } - if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { - fmt.Println("Intersection works.") - } - -As an alternative to BitSets, one should check out the 'big' package, -which provides a (less set-theoretical) view of bitsets. - -*/ -package bitset - -import ( - "bufio" - "bytes" - "encoding/base64" - "encoding/binary" - "encoding/json" - "errors" - "fmt" - "io" - "strconv" -) - -// the wordSize of a bit set -const wordSize = uint(64) - -// log2WordSize is lg(wordSize) -const log2WordSize = uint(6) - -// allBits has every bit set -const allBits uint64 = 0xffffffffffffffff - -// default binary BigEndian -var binaryOrder binary.ByteOrder = binary.BigEndian - -// default json encoding base64.URLEncoding -var base64Encoding = base64.URLEncoding - -// Base64StdEncoding Marshal/Unmarshal BitSet with base64.StdEncoding(Default: base64.URLEncoding) -func Base64StdEncoding() { base64Encoding = base64.StdEncoding } - -// LittleEndian Marshal/Unmarshal Binary as Little Endian(Default: binary.BigEndian) -func LittleEndian() { binaryOrder = binary.LittleEndian } - -// A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0. -type BitSet struct { - length uint - set []uint64 -} - -// Error is used to distinguish errors (panics) generated in this package. -type Error string - -// safeSet will fixup b.set to be non-nil and return the field value -func (b *BitSet) safeSet() []uint64 { - if b.set == nil { - b.set = make([]uint64, wordsNeeded(0)) - } - return b.set -} - -// From is a constructor used to create a BitSet from an array of integers -func From(buf []uint64) *BitSet { - return &BitSet{uint(len(buf)) * 64, buf} -} - -// Bytes returns the bitset as array of integers -func (b *BitSet) Bytes() []uint64 { - return b.set -} - -// wordsNeeded calculates the number of words needed for i bits -func wordsNeeded(i uint) int { - if i > (Cap() - wordSize + 1) { - return int(Cap() >> log2WordSize) - } - return int((i + (wordSize - 1)) >> log2WordSize) -} - -// New creates a new BitSet with a hint that length bits will be required -func New(length uint) (bset *BitSet) { - defer func() { - if r := recover(); r != nil { - bset = &BitSet{ - 0, - make([]uint64, 0), - } - } - }() - - bset = &BitSet{ - length, - make([]uint64, wordsNeeded(length)), - } - - return bset -} - -// Cap returns the total possible capacity, or number of bits -func Cap() uint { - return ^uint(0) -} - -// Len returns the number of bits in the BitSet. -// Note the difference to method Count, see example. -func (b *BitSet) Len() uint { - return b.length -} - -// extendSetMaybe adds additional words to incorporate new bits if needed -func (b *BitSet) extendSetMaybe(i uint) { - if i >= b.length { // if we need more bits, make 'em - if i >= Cap() { - panic("You are exceeding the capacity") - } - nsize := wordsNeeded(i + 1) - if b.set == nil { - b.set = make([]uint64, nsize) - } else if cap(b.set) >= nsize { - b.set = b.set[:nsize] // fast resize - } else if len(b.set) < nsize { - newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x - copy(newset, b.set) - b.set = newset - } - b.length = i + 1 - } -} - -// Test whether bit i is set. -func (b *BitSet) Test(i uint) bool { - if i >= b.length { - return false - } - return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 -} - -// Set bit i to 1, the capacity of the bitset is automatically -// increased accordingly. -// If i>= Cap(), this function will panic. -// Warning: using a very large value for 'i' -// may lead to a memory shortage and a panic: the caller is responsible -// for providing sensible parameters in line with their memory capacity. -func (b *BitSet) Set(i uint) *BitSet { - b.extendSetMaybe(i) - b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1)) - return b -} - -// Clear bit i to 0 -func (b *BitSet) Clear(i uint) *BitSet { - if i >= b.length { - return b - } - b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) - return b -} - -// SetTo sets bit i to value. -// If i>= Cap(), this function will panic. -// Warning: using a very large value for 'i' -// may lead to a memory shortage and a panic: the caller is responsible -// for providing sensible parameters in line with their memory capacity. -func (b *BitSet) SetTo(i uint, value bool) *BitSet { - if value { - return b.Set(i) - } - return b.Clear(i) -} - -// Flip bit at i. -// If i>= Cap(), this function will panic. -// Warning: using a very large value for 'i' -// may lead to a memory shortage and a panic: the caller is responsible -// for providing sensible parameters in line with their memory capacity. -func (b *BitSet) Flip(i uint) *BitSet { - if i >= b.length { - return b.Set(i) - } - b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) - return b -} - -// FlipRange bit in [start, end). -// If end>= Cap(), this function will panic. -// Warning: using a very large value for 'end' -// may lead to a memory shortage and a panic: the caller is responsible -// for providing sensible parameters in line with their memory capacity. -func (b *BitSet) FlipRange(start, end uint) *BitSet { - if start >= end { - return b - } - - b.extendSetMaybe(end - 1) - var startWord uint = start >> log2WordSize - var endWord uint = end >> log2WordSize - b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1))) - for i := startWord; i < endWord; i++ { - b.set[i] = ^b.set[i] - } - b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) - return b -} - -// Shrink shrinks BitSet so that the provided value is the last possible -// set value. It clears all bits > the provided index and reduces the size -// and length of the set. -// -// Note that the parameter value is not the new length in bits: it is the -// maximal value that can be stored in the bitset after the function call. -// The new length in bits is the parameter value + 1. Thus it is not possible -// to use this function to set the length to 0, the minimal value of the length -// after this function call is 1. -// -// A new slice is allocated to store the new bits, so you may see an increase in -// memory usage until the GC runs. Normally this should not be a problem, but if you -// have an extremely large BitSet its important to understand that the old BitSet will -// remain in memory until the GC frees it. -func (b *BitSet) Shrink(lastbitindex uint) *BitSet { - length := lastbitindex + 1 - idx := wordsNeeded(length) - if idx > len(b.set) { - return b - } - shrunk := make([]uint64, idx) - copy(shrunk, b.set[:idx]) - b.set = shrunk - b.length = length - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) - return b -} - -// Compact shrinks BitSet to so that we preserve all set bits, while minimizing -// memory usage. Compact calls Shrink. -func (b *BitSet) Compact() *BitSet { - idx := len(b.set) - 1 - for ; idx >= 0 && b.set[idx] == 0; idx-- { - } - newlength := uint((idx + 1) << log2WordSize) - if newlength >= b.length { - return b // nothing to do - } - if newlength > 0 { - return b.Shrink(newlength - 1) - } - // We preserve one word - return b.Shrink(63) -} - -// InsertAt takes an index which indicates where a bit should be -// inserted. Then it shifts all the bits in the set to the left by 1, starting -// from the given index position, and sets the index position to 0. -// -// Depending on the size of your BitSet, and where you are inserting the new entry, -// this method could be extremely slow and in some cases might cause the entire BitSet -// to be recopied. -func (b *BitSet) InsertAt(idx uint) *BitSet { - insertAtElement := (idx >> log2WordSize) - - // if length of set is a multiple of wordSize we need to allocate more space first - if b.isLenExactMultiple() { - b.set = append(b.set, uint64(0)) - } - - var i uint - for i = uint(len(b.set) - 1); i > insertAtElement; i-- { - // all elements above the position where we want to insert can simply by shifted - b.set[i] <<= 1 - - // we take the most significant bit of the previous element and set it as - // the least significant bit of the current element - b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63 - } - - // generate a mask to extract the data that we need to shift left - // within the element where we insert a bit - dataMask := ^(uint64(1)< 0x40000 { - buffer.WriteString("...") - break - } - buffer.WriteString(strconv.FormatInt(int64(i), 10)) - i, e = b.NextSet(i + 1) - if e { - buffer.WriteString(",") - } - } - buffer.WriteString("}") - return buffer.String() -} - -// DeleteAt deletes the bit at the given index position from -// within the bitset -// All the bits residing on the left of the deleted bit get -// shifted right by 1 -// The running time of this operation may potentially be -// relatively slow, O(length) -func (b *BitSet) DeleteAt(i uint) *BitSet { - // the index of the slice element where we'll delete a bit - deleteAtElement := i >> log2WordSize - - // generate a mask for the data that needs to be shifted right - // within that slice element that gets modified - dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1) - - // extract the data that we'll shift right from the slice element - data := b.set[deleteAtElement] & dataMask - - // set the masked area to 0 while leaving the rest as it is - b.set[deleteAtElement] &= ^dataMask - - // shift the previously extracted data to the right and then - // set it in the previously masked area - b.set[deleteAtElement] |= (data >> 1) & dataMask - - // loop over all the consecutive slice elements to copy each - // lowest bit into the highest position of the previous element, - // then shift the entire content to the right by 1 - for i := int(deleteAtElement) + 1; i < len(b.set); i++ { - b.set[i-1] |= (b.set[i] & 1) << 63 - b.set[i] >>= 1 - } - - b.length = b.length - 1 - - return b -} - -// NextSet returns the next bit set from the specified index, -// including possibly the current index -// along with an error code (true = valid, false = no set bit found) -// for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} -// -// Users concerned with performance may want to use NextSetMany to -// retrieve several values at once. -func (b *BitSet) NextSet(i uint) (uint, bool) { - x := int(i >> log2WordSize) - if x >= len(b.set) { - return 0, false - } - w := b.set[x] - w = w >> (i & (wordSize - 1)) - if w != 0 { - return i + trailingZeroes64(w), true - } - x = x + 1 - for x < len(b.set) { - if b.set[x] != 0 { - return uint(x)*wordSize + trailingZeroes64(b.set[x]), true - } - x = x + 1 - - } - return 0, false -} - -// NextSetMany returns many next bit sets from the specified index, -// including possibly the current index and up to cap(buffer). -// If the returned slice has len zero, then no more set bits were found -// -// buffer := make([]uint, 256) // this should be reused -// j := uint(0) -// j, buffer = bitmap.NextSetMany(j, buffer) -// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) { -// for k := range buffer { -// do something with buffer[k] -// } -// j += 1 -// } -// -// -// It is possible to retrieve all set bits as follow: -// -// indices := make([]uint, bitmap.Count()) -// bitmap.NextSetMany(0, indices) -// -// However if bitmap.Count() is large, it might be preferable to -// use several calls to NextSetMany, for performance reasons. -func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { - myanswer := buffer - capacity := cap(buffer) - x := int(i >> log2WordSize) - if x >= len(b.set) || capacity == 0 { - return 0, myanswer[:0] - } - skip := i & (wordSize - 1) - word := b.set[x] >> skip - myanswer = myanswer[:capacity] - size := int(0) - for word != 0 { - r := trailingZeroes64(word) - t := word & ((^word) + 1) - myanswer[size] = r + i - size++ - if size == capacity { - goto End - } - word = word ^ t - } - x++ - for idx, word := range b.set[x:] { - for word != 0 { - r := trailingZeroes64(word) - t := word & ((^word) + 1) - myanswer[size] = r + (uint(x+idx) << 6) - size++ - if size == capacity { - goto End - } - word = word ^ t - } - } -End: - if size > 0 { - return myanswer[size-1], myanswer[:size] - } - return 0, myanswer[:0] -} - -// NextClear returns the next clear bit from the specified index, -// including possibly the current index -// along with an error code (true = valid, false = no bit found i.e. all bits are set) -func (b *BitSet) NextClear(i uint) (uint, bool) { - x := int(i >> log2WordSize) - if x >= len(b.set) { - return 0, false - } - w := b.set[x] - w = w >> (i & (wordSize - 1)) - wA := allBits >> (i & (wordSize - 1)) - index := i + trailingZeroes64(^w) - if w != wA && index < b.length { - return index, true - } - x++ - for x < len(b.set) { - index = uint(x)*wordSize + trailingZeroes64(^b.set[x]) - if b.set[x] != allBits && index < b.length { - return index, true - } - x++ - } - return 0, false -} - -// ClearAll clears the entire BitSet -func (b *BitSet) ClearAll() *BitSet { - if b != nil && b.set != nil { - for i := range b.set { - b.set[i] = 0 - } - } - return b -} - -// wordCount returns the number of words used in a bit set -func (b *BitSet) wordCount() int { - return len(b.set) -} - -// Clone this BitSet -func (b *BitSet) Clone() *BitSet { - c := New(b.length) - if b.set != nil { // Clone should not modify current object - copy(c.set, b.set) - } - return c -} - -// Copy into a destination BitSet -// Returning the size of the destination BitSet -// like array copy -func (b *BitSet) Copy(c *BitSet) (count uint) { - if c == nil { - return - } - if b.set != nil { // Copy should not modify current object - copy(c.set, b.set) - } - count = c.length - if b.length < c.length { - count = b.length - } - return -} - -// Count (number of set bits). -// Also known as "popcount" or "population count". -func (b *BitSet) Count() uint { - if b != nil && b.set != nil { - return uint(popcntSlice(b.set)) - } - return 0 -} - -// Equal tests the equivalence of two BitSets. -// False if they are of different sizes, otherwise true -// only if all the same bits are set -func (b *BitSet) Equal(c *BitSet) bool { - if c == nil || b == nil { - return c == b - } - if b.length != c.length { - return false - } - if b.length == 0 { // if they have both length == 0, then could have nil set - return true - } - // testing for equality shoud not transform the bitset (no call to safeSet) - - for p, v := range b.set { - if c.set[p] != v { - return false - } - } - return true -} - -func panicIfNull(b *BitSet) { - if b == nil { - panic(Error("BitSet must not be null")) - } -} - -// Difference of base set and other set -// This is the BitSet equivalent of &^ (and not) -func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { - panicIfNull(b) - panicIfNull(compare) - result = b.Clone() // clone b (in case b is bigger than compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - for i := 0; i < l; i++ { - result.set[i] = b.set[i] &^ compare.set[i] - } - return -} - -// DifferenceCardinality computes the cardinality of the differnce -func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { - panicIfNull(b) - panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - cnt := uint64(0) - cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) - cnt += popcntSlice(b.set[l:]) - return uint(cnt) -} - -// InPlaceDifference computes the difference of base set and other set -// This is the BitSet equivalent of &^ (and not) -func (b *BitSet) InPlaceDifference(compare *BitSet) { - panicIfNull(b) - panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - for i := 0; i < l; i++ { - b.set[i] &^= compare.set[i] - } -} - -// Convenience function: return two bitsets ordered by -// increasing length. Note: neither can be nil -func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { - if a.length <= b.length { - ap, bp = a, b - } else { - ap, bp = b, a - } - return -} - -// Intersection of base set and other set -// This is the BitSet equivalent of & (and) -func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - result = New(b.length) - for i, word := range b.set { - result.set[i] = word & compare.set[i] - } - return -} - -// IntersectionCardinality computes the cardinality of the union -func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - cnt := popcntAndSlice(b.set, compare.set) - return uint(cnt) -} - -// InPlaceIntersection destructively computes the intersection of -// base set and the compare set. -// This is the BitSet equivalent of & (and) -func (b *BitSet) InPlaceIntersection(compare *BitSet) { - panicIfNull(b) - panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - for i := 0; i < l; i++ { - b.set[i] &= compare.set[i] - } - for i := l; i < len(b.set); i++ { - b.set[i] = 0 - } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) - } -} - -// Union of base set and other set -// This is the BitSet equivalent of | (or) -func (b *BitSet) Union(compare *BitSet) (result *BitSet) { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - result = compare.Clone() - for i, word := range b.set { - result.set[i] = word | compare.set[i] - } - return -} - -// UnionCardinality computes the cardinality of the uniton of the base set -// and the compare set. -func (b *BitSet) UnionCardinality(compare *BitSet) uint { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - cnt := popcntOrSlice(b.set, compare.set) - if len(compare.set) > len(b.set) { - cnt += popcntSlice(compare.set[len(b.set):]) - } - return uint(cnt) -} - -// InPlaceUnion creates the destructive union of base set and compare set. -// This is the BitSet equivalent of | (or). -func (b *BitSet) InPlaceUnion(compare *BitSet) { - panicIfNull(b) - panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) - } - for i := 0; i < l; i++ { - b.set[i] |= compare.set[i] - } - if len(compare.set) > l { - for i := l; i < len(compare.set); i++ { - b.set[i] = compare.set[i] - } - } -} - -// SymmetricDifference of base set and other set -// This is the BitSet equivalent of ^ (xor) -func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - // compare is bigger, so clone it - result = compare.Clone() - for i, word := range b.set { - result.set[i] = word ^ compare.set[i] - } - return -} - -// SymmetricDifferenceCardinality computes the cardinality of the symmetric difference -func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { - panicIfNull(b) - panicIfNull(compare) - b, compare = sortByLength(b, compare) - cnt := popcntXorSlice(b.set, compare.set) - if len(compare.set) > len(b.set) { - cnt += popcntSlice(compare.set[len(b.set):]) - } - return uint(cnt) -} - -// InPlaceSymmetricDifference creates the destructive SymmetricDifference of base set and other set -// This is the BitSet equivalent of ^ (xor) -func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { - panicIfNull(b) - panicIfNull(compare) - l := int(compare.wordCount()) - if l > int(b.wordCount()) { - l = int(b.wordCount()) - } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) - } - for i := 0; i < l; i++ { - b.set[i] ^= compare.set[i] - } - if len(compare.set) > l { - for i := l; i < len(compare.set); i++ { - b.set[i] = compare.set[i] - } - } -} - -// Is the length an exact multiple of word sizes? -func (b *BitSet) isLenExactMultiple() bool { - return b.length%wordSize == 0 -} - -// Clean last word by setting unused bits to 0 -func (b *BitSet) cleanLastWord() { - if !b.isLenExactMultiple() { - b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) - } -} - -// Complement computes the (local) complement of a biset (up to length bits) -func (b *BitSet) Complement() (result *BitSet) { - panicIfNull(b) - result = New(b.length) - for i, word := range b.set { - result.set[i] = ^word - } - result.cleanLastWord() - return -} - -// All returns true if all bits are set, false otherwise. Returns true for -// empty sets. -func (b *BitSet) All() bool { - panicIfNull(b) - return b.Count() == b.length -} - -// None returns true if no bit is set, false otherwise. Returns true for -// empty sets. -func (b *BitSet) None() bool { - panicIfNull(b) - if b != nil && b.set != nil { - for _, word := range b.set { - if word > 0 { - return false - } - } - return true - } - return true -} - -// Any returns true if any bit is set, false otherwise -func (b *BitSet) Any() bool { - panicIfNull(b) - return !b.None() -} - -// IsSuperSet returns true if this is a superset of the other set -func (b *BitSet) IsSuperSet(other *BitSet) bool { - for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { - if !b.Test(i) { - return false - } - } - return true -} - -// IsStrictSuperSet returns true if this is a strict superset of the other set -func (b *BitSet) IsStrictSuperSet(other *BitSet) bool { - return b.Count() > other.Count() && b.IsSuperSet(other) -} - -// DumpAsBits dumps a bit set as a string of bits -func (b *BitSet) DumpAsBits() string { - if b.set == nil { - return "." - } - buffer := bytes.NewBufferString("") - i := len(b.set) - 1 - for ; i >= 0; i-- { - fmt.Fprintf(buffer, "%064b.", b.set[i]) - } - return buffer.String() -} - -// BinaryStorageSize returns the binary storage requirements -func (b *BitSet) BinaryStorageSize() int { - return binary.Size(uint64(0)) + binary.Size(b.set) -} - -// WriteTo writes a BitSet to a stream -func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { - length := uint64(b.length) - - // Write length - err := binary.Write(stream, binaryOrder, length) - if err != nil { - return 0, err - } - - // Write set - err = binary.Write(stream, binaryOrder, b.set) - return int64(b.BinaryStorageSize()), err -} - -// ReadFrom reads a BitSet from a stream written using WriteTo -func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { - var length uint64 - - // Read length first - err := binary.Read(stream, binaryOrder, &length) - if err != nil { - return 0, err - } - newset := New(uint(length)) - - if uint64(newset.length) != length { - return 0, errors.New("unmarshalling error: type mismatch") - } - - // Read remaining bytes as set - err = binary.Read(stream, binaryOrder, newset.set) - if err != nil { - return 0, err - } - - *b = *newset - return int64(b.BinaryStorageSize()), nil -} - -// MarshalBinary encodes a BitSet into a binary form and returns the result. -func (b *BitSet) MarshalBinary() ([]byte, error) { - var buf bytes.Buffer - writer := bufio.NewWriter(&buf) - - _, err := b.WriteTo(writer) - if err != nil { - return []byte{}, err - } - - err = writer.Flush() - - return buf.Bytes(), err -} - -// UnmarshalBinary decodes the binary form generated by MarshalBinary. -func (b *BitSet) UnmarshalBinary(data []byte) error { - buf := bytes.NewReader(data) - reader := bufio.NewReader(buf) - - _, err := b.ReadFrom(reader) - - return err -} - -// MarshalJSON marshals a BitSet as a JSON structure -func (b *BitSet) MarshalJSON() ([]byte, error) { - buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) - _, err := b.WriteTo(buffer) - if err != nil { - return nil, err - } - - // URLEncode all bytes - return json.Marshal(base64Encoding.EncodeToString(buffer.Bytes())) -} - -// UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON -func (b *BitSet) UnmarshalJSON(data []byte) error { - // Unmarshal as string - var s string - err := json.Unmarshal(data, &s) - if err != nil { - return err - } - - // URLDecode string - buf, err := base64Encoding.DecodeString(s) - if err != nil { - return err - } - - _, err = b.ReadFrom(bytes.NewReader(buf)) - return err -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/go.mod b/src/runtime/vendor/github.com/bits-and-blooms/bitset/go.mod deleted file mode 100644 index c43e4522b7f9..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/bits-and-blooms/bitset - -go 1.14 diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt.go deleted file mode 100644 index 76577a838284..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt.go +++ /dev/null @@ -1,53 +0,0 @@ -package bitset - -// bit population count, take from -// https://code.google.com/p/go/issues/detail?id=4988#c11 -// credit: https://code.google.com/u/arnehormann/ -func popcount(x uint64) (n uint64) { - x -= (x >> 1) & 0x5555555555555555 - x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 - x += x >> 4 - x &= 0x0f0f0f0f0f0f0f0f - x *= 0x0101010101010101 - return x >> 56 -} - -func popcntSliceGo(s []uint64) uint64 { - cnt := uint64(0) - for _, x := range s { - cnt += popcount(x) - } - return cnt -} - -func popcntMaskSliceGo(s, m []uint64) uint64 { - cnt := uint64(0) - for i := range s { - cnt += popcount(s[i] &^ m[i]) - } - return cnt -} - -func popcntAndSliceGo(s, m []uint64) uint64 { - cnt := uint64(0) - for i := range s { - cnt += popcount(s[i] & m[i]) - } - return cnt -} - -func popcntOrSliceGo(s, m []uint64) uint64 { - cnt := uint64(0) - for i := range s { - cnt += popcount(s[i] | m[i]) - } - return cnt -} - -func popcntXorSliceGo(s, m []uint64) uint64 { - cnt := uint64(0) - for i := range s { - cnt += popcount(s[i] ^ m[i]) - } - return cnt -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go deleted file mode 100644 index fc8ff4f367c2..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_19.go +++ /dev/null @@ -1,45 +0,0 @@ -// +build go1.9 - -package bitset - -import "math/bits" - -func popcntSlice(s []uint64) uint64 { - var cnt int - for _, x := range s { - cnt += bits.OnesCount64(x) - } - return uint64(cnt) -} - -func popcntMaskSlice(s, m []uint64) uint64 { - var cnt int - for i := range s { - cnt += bits.OnesCount64(s[i] &^ m[i]) - } - return uint64(cnt) -} - -func popcntAndSlice(s, m []uint64) uint64 { - var cnt int - for i := range s { - cnt += bits.OnesCount64(s[i] & m[i]) - } - return uint64(cnt) -} - -func popcntOrSlice(s, m []uint64) uint64 { - var cnt int - for i := range s { - cnt += bits.OnesCount64(s[i] | m[i]) - } - return uint64(cnt) -} - -func popcntXorSlice(s, m []uint64) uint64 { - var cnt int - for i := range s { - cnt += bits.OnesCount64(s[i] ^ m[i]) - } - return uint64(cnt) -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go deleted file mode 100644 index 4cf64f24ad03..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go +++ /dev/null @@ -1,68 +0,0 @@ -// +build !go1.9 -// +build amd64,!appengine - -package bitset - -// *** the following functions are defined in popcnt_amd64.s - -//go:noescape - -func hasAsm() bool - -// useAsm is a flag used to select the GO or ASM implementation of the popcnt function -var useAsm = hasAsm() - -//go:noescape - -func popcntSliceAsm(s []uint64) uint64 - -//go:noescape - -func popcntMaskSliceAsm(s, m []uint64) uint64 - -//go:noescape - -func popcntAndSliceAsm(s, m []uint64) uint64 - -//go:noescape - -func popcntOrSliceAsm(s, m []uint64) uint64 - -//go:noescape - -func popcntXorSliceAsm(s, m []uint64) uint64 - -func popcntSlice(s []uint64) uint64 { - if useAsm { - return popcntSliceAsm(s) - } - return popcntSliceGo(s) -} - -func popcntMaskSlice(s, m []uint64) uint64 { - if useAsm { - return popcntMaskSliceAsm(s, m) - } - return popcntMaskSliceGo(s, m) -} - -func popcntAndSlice(s, m []uint64) uint64 { - if useAsm { - return popcntAndSliceAsm(s, m) - } - return popcntAndSliceGo(s, m) -} - -func popcntOrSlice(s, m []uint64) uint64 { - if useAsm { - return popcntOrSliceAsm(s, m) - } - return popcntOrSliceGo(s, m) -} - -func popcntXorSlice(s, m []uint64) uint64 { - if useAsm { - return popcntXorSliceAsm(s, m) - } - return popcntXorSliceGo(s, m) -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s b/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s deleted file mode 100644 index 666c0dcc17f5..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s +++ /dev/null @@ -1,104 +0,0 @@ -// +build !go1.9 -// +build amd64,!appengine - -TEXT ·hasAsm(SB),4,$0-1 -MOVQ $1, AX -CPUID -SHRQ $23, CX -ANDQ $1, CX -MOVB CX, ret+0(FP) -RET - -#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 - -TEXT ·popcntSliceAsm(SB),4,$0-32 -XORQ AX, AX -MOVQ s+0(FP), SI -MOVQ s_len+8(FP), CX -TESTQ CX, CX -JZ popcntSliceEnd -popcntSliceLoop: -BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX -ADDQ DX, AX -ADDQ $8, SI -LOOP popcntSliceLoop -popcntSliceEnd: -MOVQ AX, ret+24(FP) -RET - -TEXT ·popcntMaskSliceAsm(SB),4,$0-56 -XORQ AX, AX -MOVQ s+0(FP), SI -MOVQ s_len+8(FP), CX -TESTQ CX, CX -JZ popcntMaskSliceEnd -MOVQ m+24(FP), DI -popcntMaskSliceLoop: -MOVQ (DI), DX -NOTQ DX -ANDQ (SI), DX -POPCNTQ_DX_DX -ADDQ DX, AX -ADDQ $8, SI -ADDQ $8, DI -LOOP popcntMaskSliceLoop -popcntMaskSliceEnd: -MOVQ AX, ret+48(FP) -RET - -TEXT ·popcntAndSliceAsm(SB),4,$0-56 -XORQ AX, AX -MOVQ s+0(FP), SI -MOVQ s_len+8(FP), CX -TESTQ CX, CX -JZ popcntAndSliceEnd -MOVQ m+24(FP), DI -popcntAndSliceLoop: -MOVQ (DI), DX -ANDQ (SI), DX -POPCNTQ_DX_DX -ADDQ DX, AX -ADDQ $8, SI -ADDQ $8, DI -LOOP popcntAndSliceLoop -popcntAndSliceEnd: -MOVQ AX, ret+48(FP) -RET - -TEXT ·popcntOrSliceAsm(SB),4,$0-56 -XORQ AX, AX -MOVQ s+0(FP), SI -MOVQ s_len+8(FP), CX -TESTQ CX, CX -JZ popcntOrSliceEnd -MOVQ m+24(FP), DI -popcntOrSliceLoop: -MOVQ (DI), DX -ORQ (SI), DX -POPCNTQ_DX_DX -ADDQ DX, AX -ADDQ $8, SI -ADDQ $8, DI -LOOP popcntOrSliceLoop -popcntOrSliceEnd: -MOVQ AX, ret+48(FP) -RET - -TEXT ·popcntXorSliceAsm(SB),4,$0-56 -XORQ AX, AX -MOVQ s+0(FP), SI -MOVQ s_len+8(FP), CX -TESTQ CX, CX -JZ popcntXorSliceEnd -MOVQ m+24(FP), DI -popcntXorSliceLoop: -MOVQ (DI), DX -XORQ (SI), DX -POPCNTQ_DX_DX -ADDQ DX, AX -ADDQ $8, SI -ADDQ $8, DI -LOOP popcntXorSliceLoop -popcntXorSliceEnd: -MOVQ AX, ret+48(FP) -RET diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go deleted file mode 100644 index 21e0ff7b4fc5..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build !go1.9 -// +build !amd64 appengine - -package bitset - -func popcntSlice(s []uint64) uint64 { - return popcntSliceGo(s) -} - -func popcntMaskSlice(s, m []uint64) uint64 { - return popcntMaskSliceGo(s, m) -} - -func popcntAndSlice(s, m []uint64) uint64 { - return popcntAndSliceGo(s, m) -} - -func popcntOrSlice(s, m []uint64) uint64 { - return popcntOrSliceGo(s, m) -} - -func popcntXorSlice(s, m []uint64) uint64 { - return popcntXorSliceGo(s, m) -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go deleted file mode 100644 index c52b61be9fc2..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build !go1.9 - -package bitset - -var deBruijn = [...]byte{ - 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, - 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, - 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, - 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, -} - -func trailingZeroes64(v uint64) uint { - return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58]) -} diff --git a/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go b/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go deleted file mode 100644 index 36a988e714d1..000000000000 --- a/src/runtime/vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build go1.9 - -package bitset - -import "math/bits" - -func trailingZeroes64(v uint64) uint { - return uint(bits.TrailingZeros64(v)) -} diff --git a/src/runtime/vendor/github.com/blang/semver/v4/go.mod b/src/runtime/vendor/github.com/blang/semver/v4/go.mod deleted file mode 100644 index 06d26221870d..000000000000 --- a/src/runtime/vendor/github.com/blang/semver/v4/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/blang/semver/v4 - -go 1.14 diff --git a/src/runtime/vendor/github.com/cespare/xxhash/v2/go.mod b/src/runtime/vendor/github.com/cespare/xxhash/v2/go.mod deleted file mode 100644 index 49f67608bf6b..000000000000 --- a/src/runtime/vendor/github.com/cespare/xxhash/v2/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/cespare/xxhash/v2 - -go 1.11 diff --git a/src/runtime/vendor/github.com/cilium/ebpf/go.mod b/src/runtime/vendor/github.com/cilium/ebpf/go.mod deleted file mode 100644 index f5edf690ab06..000000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/cilium/ebpf - -go 1.16 - -require ( - github.com/frankban/quicktest v1.11.3 - github.com/google/go-cmp v0.5.4 - golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34 -) diff --git a/src/runtime/vendor/github.com/cilium/ebpf/go.sum b/src/runtime/vendor/github.com/cilium/ebpf/go.sum deleted file mode 100644 index 1ef5a4767e81..000000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/go.sum +++ /dev/null @@ -1,13 +0,0 @@ -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34 h1:GkvMjFtXUmahfDtashnc1mnrCtuBVcwse5QV2lUk/tI= -golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/runtime/vendor/github.com/containerd/cgroups/README.md b/src/runtime/vendor/github.com/containerd/cgroups/README.md index fc2c7a9be4d2..d2073af3abc8 100644 --- a/src/runtime/vendor/github.com/containerd/cgroups/README.md +++ b/src/runtime/vendor/github.com/containerd/cgroups/README.md @@ -9,7 +9,7 @@ Go package for creating, managing, inspecting, and destroying cgroups. The resources format for settings on the cgroup uses the OCI runtime-spec found [here](https://github.com/opencontainers/runtime-spec). -## Examples +## Examples (v1) ### Create a new cgroup @@ -26,7 +26,7 @@ uses the v1 implementation of cgroups. ```go shares := uint64(100) control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{ - CPU: &specs.CPU{ + CPU: &specs.LinuxCPU{ Shares: &shares, }, }) @@ -58,7 +58,7 @@ if err := control.Add(cgroups.Process{Pid:1234}); err != nil { } ``` -### Update the cgroup +### Update the cgroup To update the resources applied in the cgroup @@ -133,6 +133,61 @@ event := cgroups.OOMEvent() efd, err := control.RegisterMemoryEvent(event) ``` +## Examples (v2/unified) + +### Check that the current system is running cgroups v2 + +```go +var cgroupV2 bool +if cgroups.Mode() == cgroups.Unified { + cgroupV2 = true +} +``` + +### Create a new cgroup + +This creates a new systemd v2 cgroup slice. Systemd slices consider ["-" a special character](https://www.freedesktop.org/software/systemd/man/systemd.slice.html), +so the resulting slice would be located here on disk: + +* /sys/fs/cgroup/my.slice/my-cgroup.slice/my-cgroup-abc.slice + +```go +import ( + cgroupsv2 "github.com/containerd/cgroups/v2" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +res := cgroupsv2.Resources{} +// dummy PID of -1 is used for creating a "general slice" to be used as a parent cgroup. +// see https://github.com/containerd/cgroups/blob/1df78138f1e1e6ee593db155c6b369466f577651/v2/manager.go#L732-L735 +m, err := cgroupsv2.NewSystemd("/", "my-cgroup-abc.slice", -1, &res) +if err != nil { + return err +} +``` + +### Load an existing cgroup + +```go +m, err := cgroupsv2.LoadSystemd("/", "my-cgroup-abc.slice") +if err != nil { + return err +} +``` + +### Delete a cgroup + +```go +m, err := cgroupsv2.LoadSystemd("/", "my-cgroup-abc.slice") +if err != nil { + return err +} +err = m.DeleteSystemd() +if err != nil { + return err +} +``` + ### Attention All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name diff --git a/src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile b/src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile index 4596ad8a7d45..9a4aac8cb344 100644 --- a/src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile +++ b/src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile @@ -3,19 +3,19 @@ Vagrant.configure("2") do |config| # Fedora box is used for testing cgroup v2 support - config.vm.box = "fedora/32-cloud-base" + config.vm.box = "fedora/35-cloud-base" config.vm.provider :virtualbox do |v| - v.memory = 2048 + v.memory = 4096 v.cpus = 2 end config.vm.provider :libvirt do |v| - v.memory = 2048 + v.memory = 4096 v.cpus = 2 end config.vm.provision "shell", inline: <<-SHELL set -eux -o pipefail # configuration - GO_VERSION="1.15" + GO_VERSION="1.17.7" # install gcc and Golang dnf -y install gcc diff --git a/src/runtime/vendor/github.com/containerd/cgroups/go.mod b/src/runtime/vendor/github.com/containerd/cgroups/go.mod deleted file mode 100644 index 80d3f6ea630f..000000000000 --- a/src/runtime/vendor/github.com/containerd/cgroups/go.mod +++ /dev/null @@ -1,18 +0,0 @@ -module github.com/containerd/cgroups - -go 1.16 - -require ( - github.com/cilium/ebpf v0.4.0 - github.com/coreos/go-systemd/v22 v22.3.2 - github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect - github.com/docker/go-units v0.4.0 - github.com/godbus/dbus/v5 v5.0.4 - github.com/gogo/protobuf v1.3.2 - github.com/opencontainers/runtime-spec v1.0.2 - github.com/sirupsen/logrus v1.8.1 - github.com/stretchr/testify v1.7.0 - github.com/urfave/cli v1.22.2 - go.uber.org/goleak v1.1.12 - golang.org/x/sys v0.0.0-20210510120138-977fb7262007 -) diff --git a/src/runtime/vendor/github.com/containerd/cgroups/go.sum b/src/runtime/vendor/github.com/containerd/cgroups/go.sum deleted file mode 100644 index cda30b49f6c5..000000000000 --- a/src/runtime/vendor/github.com/containerd/cgroups/go.sum +++ /dev/null @@ -1,98 +0,0 @@ -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/cilium/ebpf v0.4.0 h1:QlHdikaxALkqWasW8hAC1mfR0jdmvbfaBdBPFmRSglA= -github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= -github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo= -github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= -go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA= -golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/src/runtime/vendor/github.com/containerd/cgroups/utils.go b/src/runtime/vendor/github.com/containerd/cgroups/utils.go index 2297980d93ce..217138975682 100644 --- a/src/runtime/vendor/github.com/containerd/cgroups/utils.go +++ b/src/runtime/vendor/github.com/containerd/cgroups/utils.go @@ -261,21 +261,28 @@ func parseKV(raw string) (string, uint64, error) { // "pids": "/user.slice/user-1000.slice" // etc. // -// Note that for cgroup v2 unified hierarchy, there are no per-controller -// cgroup paths, so the resulting map will have a single element where the key -// is empty string ("") and the value is the cgroup path the is in. +// The resulting map does not have an element for cgroup v2 unified hierarchy. +// Use ParseCgroupFileUnified to get the unified path. func ParseCgroupFile(path string) (map[string]string, error) { + x, _, err := ParseCgroupFileUnified(path) + return x, err +} + +// ParseCgroupFileUnified returns legacy subsystem paths as the first value, +// and returns the unified path as the second value. +func ParseCgroupFileUnified(path string) (map[string]string, string, error) { f, err := os.Open(path) if err != nil { - return nil, err + return nil, "", err } defer f.Close() - return parseCgroupFromReader(f) + return parseCgroupFromReaderUnified(f) } -func parseCgroupFromReader(r io.Reader) (map[string]string, error) { +func parseCgroupFromReaderUnified(r io.Reader) (map[string]string, string, error) { var ( cgroups = make(map[string]string) + unified = "" s = bufio.NewScanner(r) ) for s.Scan() { @@ -284,18 +291,20 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { parts = strings.SplitN(text, ":", 3) ) if len(parts) < 3 { - return nil, fmt.Errorf("invalid cgroup entry: %q", text) + return nil, unified, fmt.Errorf("invalid cgroup entry: %q", text) } for _, subs := range strings.Split(parts[1], ",") { - if subs != "" { + if subs == "" { + unified = parts[2] + } else { cgroups[subs] = parts[2] } } } if err := s.Err(); err != nil { - return nil, err + return nil, unified, err } - return cgroups, nil + return cgroups, unified, nil } func getCgroupDestination(subsystem string) (string, error) { diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/cpu.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/cpu.go new file mode 100644 index 000000000000..65282ff082d2 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/cpu.go @@ -0,0 +1,83 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "math" + "strconv" + "strings" +) + +type CPUMax string + +func NewCPUMax(quota *int64, period *uint64) CPUMax { + max := "max" + if quota != nil { + max = strconv.FormatInt(*quota, 10) + } + return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " ")) +} + +type CPU struct { + Weight *uint64 + Max CPUMax + Cpus string + Mems string +} + +func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) { + var ( + quota int64 + period uint64 + ) + values := strings.Split(string(c), " ") + if values[0] == "max" { + quota = math.MaxInt64 + } else { + quota, _ = strconv.ParseInt(values[0], 10, 64) + } + period, _ = strconv.ParseUint(values[1], 10, 64) + return quota, period +} + +func (r *CPU) Values() (o []Value) { + if r.Weight != nil { + o = append(o, Value{ + filename: "cpu.weight", + value: *r.Weight, + }) + } + if r.Max != "" { + o = append(o, Value{ + filename: "cpu.max", + value: r.Max, + }) + } + if r.Cpus != "" { + o = append(o, Value{ + filename: "cpuset.cpus", + value: r.Cpus, + }) + } + if r.Mems != "" { + o = append(o, Value{ + filename: "cpuset.mems", + value: r.Mems, + }) + } + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/devicefilter.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/devicefilter.go new file mode 100644 index 000000000000..0882036c2dcd --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/devicefilter.go @@ -0,0 +1,200 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Devicefilter containes eBPF device filter program +// +// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c +// +// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) +// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 +// +// This particular Go implementation based on runc version +// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go + +package v2 + +import ( + "errors" + "fmt" + "math" + + "github.com/cilium/ebpf/asm" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" +) + +const ( + // license string format is same as kernel MODULE_LICENSE macro + license = "Apache" +) + +// DeviceFilter returns eBPF device filter program and its license string +func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) { + p := &program{} + p.init() + for i := len(devices) - 1; i >= 0; i-- { + if err := p.appendDevice(devices[i]); err != nil { + return nil, "", err + } + } + insts, err := p.finalize() + return insts, license, err +} + +type program struct { + insts asm.Instructions + hasWildCard bool + blockID int +} + +func (p *program) init() { + // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 + /* + u32 access_type + u32 major + u32 minor + */ + // R2 <- type (lower 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R2, asm.R1, 0, asm.Half)) + + // R3 <- access (upper 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), + // RSh: bitwise shift right + asm.RSh.Imm32(asm.R3, 16)) + + // R4 <- major (u32 major at R1[4]) + p.insts = append(p.insts, + asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) + + // R5 <- minor (u32 minor at R1[8]) + p.insts = append(p.insts, + asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) +} + +// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element. +func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error { + if p.blockID < 0 { + return errors.New("the program is finalized") + } + if p.hasWildCard { + // All entries after wildcard entry are ignored + return nil + } + + bpfType := int32(-1) + hasType := true + switch dev.Type { + case string('c'): + bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) + case string('b'): + bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) + case string('a'): + hasType = false + default: + // if not specified in OCI json, typ is set to DeviceTypeAll + return fmt.Errorf("invalid DeviceType %q", dev.Type) + } + if *dev.Major > math.MaxUint32 { + return fmt.Errorf("invalid major %d", *dev.Major) + } + if *dev.Minor > math.MaxUint32 { + return fmt.Errorf("invalid minor %d", *dev.Major) + } + hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1 + hasMinor := *dev.Minor >= 0 + bpfAccess := int32(0) + for _, r := range dev.Access { + switch r { + case 'r': + bpfAccess |= unix.BPF_DEVCG_ACC_READ + case 'w': + bpfAccess |= unix.BPF_DEVCG_ACC_WRITE + case 'm': + bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD + default: + return fmt.Errorf("unknown device access %v", r) + } + } + // If the access is rwm, skip the check. + hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) + + blockSym := fmt.Sprintf("block-%d", p.blockID) + nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1) + prevBlockLastIdx := len(p.insts) - 1 + if hasType { + p.insts = append(p.insts, + // if (R2 != bpfType) goto next + asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), + ) + } + if hasAccess { + p.insts = append(p.insts, + // if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next + asm.Mov.Reg32(asm.R1, asm.R3), + asm.And.Imm32(asm.R1, bpfAccess), + asm.JEq.Imm(asm.R1, 0, nextBlockSym), + ) + } + if hasMajor { + p.insts = append(p.insts, + // if (R4 != major) goto next + asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym), + ) + } + if hasMinor { + p.insts = append(p.insts, + // if (R5 != minor) goto next + asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym), + ) + } + if !hasType && !hasAccess && !hasMajor && !hasMinor { + p.hasWildCard = true + } + p.insts = append(p.insts, acceptBlock(dev.Allow)...) + // set blockSym to the first instruction we added in this iteration + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) + p.blockID++ + return nil +} + +func (p *program) finalize() (asm.Instructions, error) { + if p.hasWildCard { + // acceptBlock with asm.Return() is already inserted + return p.insts, nil + } + blockSym := fmt.Sprintf("block-%d", p.blockID) + p.insts = append(p.insts, + // R0 <- 0 + asm.Mov.Imm32(asm.R0, 0).Sym(blockSym), + asm.Return(), + ) + p.blockID = -1 + return p.insts, nil +} + +func acceptBlock(accept bool) asm.Instructions { + v := int32(0) + if accept { + v = 1 + } + return []asm.Instruction{ + // R0 <- v + asm.Mov.Imm32(asm.R0, v), + asm.Return(), + } +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/ebpf.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/ebpf.go new file mode 100644 index 000000000000..45bf5f99e371 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/ebpf.go @@ -0,0 +1,96 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/link" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" +) + +// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/ directory. +// +// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . +// +// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 +func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) { + nilCloser := func() error { + return nil + } + spec := &ebpf.ProgramSpec{ + Type: ebpf.CGroupDevice, + Instructions: insts, + License: license, + } + prog, err := ebpf.NewProgram(spec) + if err != nil { + return nilCloser, err + } + err = link.RawAttachProgram(link.RawAttachProgramOptions{ + Target: dirFD, + Program: prog, + Attach: ebpf.AttachCGroupDevice, + Flags: unix.BPF_F_ALLOW_MULTI, + }) + if err != nil { + return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) + } + closer := func() error { + err = link.RawDetachProgram(link.RawDetachProgramOptions{ + Target: dirFD, + Program: prog, + Attach: ebpf.AttachCGroupDevice, + }) + if err != nil { + return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) + } + return nil + } + return closer, nil +} + +func isRWM(cgroupPermissions string) bool { + r := false + w := false + m := false + for _, rn := range cgroupPermissions { + switch rn { + case 'r': + r = true + case 'w': + w = true + case 'm': + m = true + } + } + return r && w && m +} + +// the logic is from runc +// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44 +func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool { + for _, dev := range devices { + if dev.Allow || !isRWM(dev.Access) { + return false + } + } + return true +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/errors.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/errors.go new file mode 100644 index 000000000000..eeae362b2790 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/errors.go @@ -0,0 +1,26 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "errors" +) + +var ( + ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed") + ErrInvalidGroupPath = errors.New("cgroups: invalid group path") +) diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/hugetlb.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/hugetlb.go new file mode 100644 index 000000000000..16b35bd780be --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/hugetlb.go @@ -0,0 +1,37 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import "strings" + +type HugeTlb []HugeTlbEntry + +type HugeTlbEntry struct { + HugePageSize string + Limit uint64 +} + +func (r *HugeTlb) Values() (o []Value) { + for _, e := range *r { + o = append(o, Value{ + filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."), + value: e.Limit, + }) + } + + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/io.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/io.go new file mode 100644 index 000000000000..70078d576ec3 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/io.go @@ -0,0 +1,64 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import "fmt" + +type IOType string + +const ( + ReadBPS IOType = "rbps" + WriteBPS IOType = "wbps" + ReadIOPS IOType = "riops" + WriteIOPS IOType = "wiops" +) + +type BFQ struct { + Weight uint16 +} + +type Entry struct { + Type IOType + Major int64 + Minor int64 + Rate uint64 +} + +func (e Entry) String() string { + return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate) +} + +type IO struct { + BFQ BFQ + Max []Entry +} + +func (i *IO) Values() (o []Value) { + if i.BFQ.Weight != 0 { + o = append(o, Value{ + filename: "io.bfq.weight", + value: i.BFQ.Weight, + }) + } + for _, e := range i.Max { + o = append(o, Value{ + filename: "io.max", + value: e.String(), + }) + } + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/manager.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/manager.go new file mode 100644 index 000000000000..a03b00e08051 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/manager.go @@ -0,0 +1,879 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "bufio" + "context" + "errors" + "fmt" + "io/ioutil" + "math" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "github.com/containerd/cgroups/v2/stats" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/godbus/dbus/v5" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const ( + subtreeControl = "cgroup.subtree_control" + controllersFile = "cgroup.controllers" + defaultCgroup2Path = "/sys/fs/cgroup" + defaultSlice = "system.slice" +) + +var ( + canDelegate bool +) + +type Event struct { + Low uint64 + High uint64 + Max uint64 + OOM uint64 + OOMKill uint64 +} + +// Resources for a cgroups v2 unified hierarchy +type Resources struct { + CPU *CPU + Memory *Memory + Pids *Pids + IO *IO + RDMA *RDMA + HugeTlb *HugeTlb + // When len(Devices) is zero, devices are not controlled + Devices []specs.LinuxDeviceCgroup +} + +// Values returns the raw filenames and values that +// can be written to the unified hierarchy +func (r *Resources) Values() (o []Value) { + if r.CPU != nil { + o = append(o, r.CPU.Values()...) + } + if r.Memory != nil { + o = append(o, r.Memory.Values()...) + } + if r.Pids != nil { + o = append(o, r.Pids.Values()...) + } + if r.IO != nil { + o = append(o, r.IO.Values()...) + } + if r.RDMA != nil { + o = append(o, r.RDMA.Values()...) + } + if r.HugeTlb != nil { + o = append(o, r.HugeTlb.Values()...) + } + return o +} + +// EnabledControllers returns the list of all not nil resource controllers +func (r *Resources) EnabledControllers() (c []string) { + if r.CPU != nil { + c = append(c, "cpu") + c = append(c, "cpuset") + } + if r.Memory != nil { + c = append(c, "memory") + } + if r.Pids != nil { + c = append(c, "pids") + } + if r.IO != nil { + c = append(c, "io") + } + if r.RDMA != nil { + c = append(c, "rdma") + } + if r.HugeTlb != nil { + c = append(c, "hugetlb") + } + return +} + +// Value of a cgroup setting +type Value struct { + filename string + value interface{} +} + +// write the value to the full, absolute path, of a unified hierarchy +func (c *Value) write(path string, perm os.FileMode) error { + var data []byte + switch t := c.value.(type) { + case uint64: + data = []byte(strconv.FormatUint(t, 10)) + case uint16: + data = []byte(strconv.FormatUint(uint64(t), 10)) + case int64: + data = []byte(strconv.FormatInt(t, 10)) + case []byte: + data = t + case string: + data = []byte(t) + case CPUMax: + data = []byte(t) + default: + return ErrInvalidFormat + } + + // Retry writes on EINTR; see: + // https://github.com/golang/go/issues/38033 + for { + err := ioutil.WriteFile( + filepath.Join(path, c.filename), + data, + perm, + ) + if err == nil { + return nil + } else if !errors.Is(err, syscall.EINTR) { + return err + } + } +} + +func writeValues(path string, values []Value) error { + for _, o := range values { + if err := o.write(path, defaultFilePerm); err != nil { + return err + } + } + return nil +} + +func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) { + if resources == nil { + return nil, errors.New("resources reference is nil") + } + if err := VerifyGroupPath(group); err != nil { + return nil, err + } + path := filepath.Join(mountpoint, group) + if err := os.MkdirAll(path, defaultDirPerm); err != nil { + return nil, err + } + m := Manager{ + unifiedMountpoint: mountpoint, + path: path, + } + if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil { + // clean up cgroup dir on failure + os.Remove(path) + return nil, err + } + if err := setResources(path, resources); err != nil { + os.Remove(path) + return nil, err + } + return &m, nil +} + +func LoadManager(mountpoint string, group string) (*Manager, error) { + if err := VerifyGroupPath(group); err != nil { + return nil, err + } + path := filepath.Join(mountpoint, group) + return &Manager{ + unifiedMountpoint: mountpoint, + path: path, + }, nil +} + +type Manager struct { + unifiedMountpoint string + path string +} + +func setResources(path string, resources *Resources) error { + if resources != nil { + if err := writeValues(path, resources.Values()); err != nil { + return err + } + if err := setDevices(path, resources.Devices); err != nil { + return err + } + } + return nil +} + +func (c *Manager) RootControllers() ([]string, error) { + b, err := ioutil.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile)) + if err != nil { + return nil, err + } + return strings.Fields(string(b)), nil +} + +func (c *Manager) Controllers() ([]string, error) { + b, err := ioutil.ReadFile(filepath.Join(c.path, controllersFile)) + if err != nil { + return nil, err + } + return strings.Fields(string(b)), nil +} + +func (c *Manager) Update(resources *Resources) error { + return setResources(c.path, resources) +} + +type ControllerToggle int + +const ( + Enable ControllerToggle = iota + 1 + Disable +) + +func toggleFunc(controllers []string, prefix string) []string { + out := make([]string, len(controllers)) + for i, c := range controllers { + out[i] = prefix + c + } + return out +} + +func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error { + // when c.path is like /foo/bar/baz, the following files need to be written: + // * /sys/fs/cgroup/cgroup.subtree_control + // * /sys/fs/cgroup/foo/cgroup.subtree_control + // * /sys/fs/cgroup/foo/bar/cgroup.subtree_control + // Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written. + split := strings.Split(c.path, "/") + var lastErr error + for i := range split { + f := strings.Join(split[:i], "/") + if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path { + continue + } + filePath := filepath.Join(f, subtreeControl) + if err := c.writeSubtreeControl(filePath, controllers, t); err != nil { + // When running as rootless, the user may face EPERM on parent groups, but it is neglible when the + // controller is already written. + // So we only return the last error. + lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err) + } else { + lastErr = nil + } + } + return lastErr +} + +func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error { + f, err := os.OpenFile(filePath, os.O_WRONLY, 0) + if err != nil { + return err + } + defer f.Close() + switch t { + case Enable: + controllers = toggleFunc(controllers, "+") + case Disable: + controllers = toggleFunc(controllers, "-") + } + _, err = f.WriteString(strings.Join(controllers, " ")) + return err +} + +func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) { + if strings.HasPrefix(name, "/") { + return nil, errors.New("name must be relative") + } + path := filepath.Join(c.path, name) + if err := os.MkdirAll(path, defaultDirPerm); err != nil { + return nil, err + } + m := Manager{ + unifiedMountpoint: c.unifiedMountpoint, + path: path, + } + if resources != nil { + if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil { + // clean up cgroup dir on failure + os.Remove(path) + return nil, err + } + } + if err := setResources(path, resources); err != nil { + // clean up cgroup dir on failure + os.Remove(path) + return nil, err + } + return &m, nil +} + +func (c *Manager) AddProc(pid uint64) error { + v := Value{ + filename: cgroupProcs, + value: pid, + } + return writeValues(c.path, []Value{v}) +} + +func (c *Manager) Delete() error { + return remove(c.path) +} + +func (c *Manager) Procs(recursive bool) ([]uint64, error) { + var processes []uint64 + err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !recursive && info.IsDir() { + if p == c.path { + return nil + } + return filepath.SkipDir + } + _, name := filepath.Split(p) + if name != cgroupProcs { + return nil + } + procs, err := parseCgroupProcsFile(p) + if err != nil { + return err + } + processes = append(processes, procs...) + return nil + }) + return processes, err +} + +func (c *Manager) MoveTo(destination *Manager) error { + processes, err := c.Procs(true) + if err != nil { + return err + } + for _, p := range processes { + if err := destination.AddProc(p); err != nil { + if strings.Contains(err.Error(), "no such process") { + continue + } + return err + } + } + return nil +} + +var singleValueFiles = []string{ + "pids.current", + "pids.max", +} + +func (c *Manager) Stat() (*stats.Metrics, error) { + controllers, err := c.Controllers() + if err != nil { + return nil, err + } + out := make(map[string]interface{}) + for _, controller := range controllers { + switch controller { + case "cpu", "memory": + if err := readKVStatsFile(c.path, controller+".stat", out); err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err + } + } + } + for _, name := range singleValueFiles { + if err := readSingleFile(c.path, name, out); err != nil { + if os.IsNotExist(err) { + continue + } + return nil, err + } + } + memoryEvents := make(map[string]interface{}) + if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil { + if !os.IsNotExist(err) { + return nil, err + } + } + var metrics stats.Metrics + + metrics.Pids = &stats.PidsStat{ + Current: getPidValue("pids.current", out), + Limit: getPidValue("pids.max", out), + } + metrics.CPU = &stats.CPUStat{ + UsageUsec: getUint64Value("usage_usec", out), + UserUsec: getUint64Value("user_usec", out), + SystemUsec: getUint64Value("system_usec", out), + NrPeriods: getUint64Value("nr_periods", out), + NrThrottled: getUint64Value("nr_throttled", out), + ThrottledUsec: getUint64Value("throttled_usec", out), + } + metrics.Memory = &stats.MemoryStat{ + Anon: getUint64Value("anon", out), + File: getUint64Value("file", out), + KernelStack: getUint64Value("kernel_stack", out), + Slab: getUint64Value("slab", out), + Sock: getUint64Value("sock", out), + Shmem: getUint64Value("shmem", out), + FileMapped: getUint64Value("file_mapped", out), + FileDirty: getUint64Value("file_dirty", out), + FileWriteback: getUint64Value("file_writeback", out), + AnonThp: getUint64Value("anon_thp", out), + InactiveAnon: getUint64Value("inactive_anon", out), + ActiveAnon: getUint64Value("active_anon", out), + InactiveFile: getUint64Value("inactive_file", out), + ActiveFile: getUint64Value("active_file", out), + Unevictable: getUint64Value("unevictable", out), + SlabReclaimable: getUint64Value("slab_reclaimable", out), + SlabUnreclaimable: getUint64Value("slab_unreclaimable", out), + Pgfault: getUint64Value("pgfault", out), + Pgmajfault: getUint64Value("pgmajfault", out), + WorkingsetRefault: getUint64Value("workingset_refault", out), + WorkingsetActivate: getUint64Value("workingset_activate", out), + WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out), + Pgrefill: getUint64Value("pgrefill", out), + Pgscan: getUint64Value("pgscan", out), + Pgsteal: getUint64Value("pgsteal", out), + Pgactivate: getUint64Value("pgactivate", out), + Pgdeactivate: getUint64Value("pgdeactivate", out), + Pglazyfree: getUint64Value("pglazyfree", out), + Pglazyfreed: getUint64Value("pglazyfreed", out), + ThpFaultAlloc: getUint64Value("thp_fault_alloc", out), + ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out), + Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")), + UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")), + SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")), + SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")), + } + if len(memoryEvents) > 0 { + metrics.MemoryEvents = &stats.MemoryEvents{ + Low: getUint64Value("low", memoryEvents), + High: getUint64Value("high", memoryEvents), + Max: getUint64Value("max", memoryEvents), + Oom: getUint64Value("oom", memoryEvents), + OomKill: getUint64Value("oom_kill", memoryEvents), + } + } + metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)} + metrics.Rdma = &stats.RdmaStat{ + Current: rdmaStats(filepath.Join(c.path, "rdma.current")), + Limit: rdmaStats(filepath.Join(c.path, "rdma.max")), + } + metrics.Hugetlb = readHugeTlbStats(c.path) + + return &metrics, nil +} + +func getUint64Value(key string, out map[string]interface{}) uint64 { + v, ok := out[key] + if !ok { + return 0 + } + switch t := v.(type) { + case uint64: + return t + } + return 0 +} + +func getPidValue(key string, out map[string]interface{}) uint64 { + v, ok := out[key] + if !ok { + return 0 + } + switch t := v.(type) { + case uint64: + return t + case string: + if t == "max" { + return math.MaxUint64 + } + } + return 0 +} + +func readSingleFile(path string, file string, out map[string]interface{}) error { + f, err := os.Open(filepath.Join(path, file)) + if err != nil { + return err + } + defer f.Close() + data, err := ioutil.ReadAll(f) + if err != nil { + return err + } + s := strings.TrimSpace(string(data)) + v, err := parseUint(s, 10, 64) + if err != nil { + // if we cannot parse as a uint, parse as a string + out[file] = s + return nil + } + out[file] = v + return nil +} + +func readKVStatsFile(path string, file string, out map[string]interface{}) error { + f, err := os.Open(filepath.Join(path, file)) + if err != nil { + return err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + name, value, err := parseKV(s.Text()) + if err != nil { + return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err) + } + out[name] = value + } + return s.Err() +} + +func (c *Manager) Freeze() error { + return c.freeze(c.path, Frozen) +} + +func (c *Manager) Thaw() error { + return c.freeze(c.path, Thawed) +} + +func (c *Manager) freeze(path string, state State) error { + values := state.Values() + for { + if err := writeValues(path, values); err != nil { + return err + } + current, err := fetchState(path) + if err != nil { + return err + } + if current == state { + return nil + } + time.Sleep(1 * time.Millisecond) + } +} + +func (c *Manager) isCgroupEmpty() bool { + // In case of any error we return true so that we exit and don't leak resources + out := make(map[string]interface{}) + if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil { + return true + } + if v, ok := out["populated"]; ok { + populated, ok := v.(uint64) + if !ok { + return true + } + return populated == 0 + } + return true +} + +// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor +func (c *Manager) MemoryEventFD() (int, uint32, error) { + fpath := filepath.Join(c.path, "memory.events") + fd, err := syscall.InotifyInit() + if err != nil { + return 0, 0, errors.New("failed to create inotify fd") + } + wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY) + if err != nil { + syscall.Close(fd) + return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err) + } + // monitor to detect process exit/cgroup deletion + evpath := filepath.Join(c.path, "cgroup.events") + if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil { + syscall.Close(fd) + return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err) + } + + return fd, uint32(wd), nil +} + +func (c *Manager) EventChan() (<-chan Event, <-chan error) { + ec := make(chan Event) + errCh := make(chan error, 1) + go c.waitForEvents(ec, errCh) + + return ec, errCh +} + +func parseMemoryEvents(out map[string]interface{}) (Event, error) { + e := Event{} + if v, ok := out["high"]; ok { + e.High, ok = v.(uint64) + if !ok { + return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v) + } + } + if v, ok := out["low"]; ok { + e.Low, ok = v.(uint64) + if !ok { + return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v) + } + } + if v, ok := out["max"]; ok { + e.Max, ok = v.(uint64) + if !ok { + return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v) + } + } + if v, ok := out["oom"]; ok { + e.OOM, ok = v.(uint64) + if !ok { + return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v) + } + } + if v, ok := out["oom_kill"]; ok { + e.OOMKill, ok = v.(uint64) + if !ok { + return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v) + } + } + return e, nil +} + +func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) { + defer close(errCh) + + fd, _, err := c.MemoryEventFD() + if err != nil { + errCh <- err + return + } + defer syscall.Close(fd) + + for { + buffer := make([]byte, syscall.SizeofInotifyEvent*10) + bytesRead, err := syscall.Read(fd, buffer) + if err != nil { + errCh <- err + return + } + if bytesRead >= syscall.SizeofInotifyEvent { + out := make(map[string]interface{}) + if err := readKVStatsFile(c.path, "memory.events", out); err != nil { + // When cgroup is deleted read may return -ENODEV instead of -ENOENT from open. + if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) { + errCh <- err + } + return + } + e, err := parseMemoryEvents(out) + if err != nil { + errCh <- err + return + } + ec <- e + if c.isCgroupEmpty() { + return + } + } + } +} + +func setDevices(path string, devices []specs.LinuxDeviceCgroup) error { + if len(devices) == 0 { + return nil + } + insts, license, err := DeviceFilter(devices) + if err != nil { + return err + } + dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600) + if err != nil { + return fmt.Errorf("cannot get dir FD for %s", path) + } + defer unix.Close(dirFD) + if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { + if !canSkipEBPFError(devices) { + return err + } + } + return nil +} + +// getSystemdFullPath returns the full systemd path when creating a systemd slice group. +// the reason this is necessary is because the "-" character has a special meaning in +// systemd slice. For example, when creating a slice called "my-group-112233.slice", +// systemd will create a hierarchy like this: +// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice +func getSystemdFullPath(slice, group string) string { + return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group)) +} + +// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path. +func dashesToPath(in string) string { + path := "" + if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") { + parts := strings.Split(in, "-") + for i := range parts { + s := strings.Join(parts[0:i+1], "-") + if !strings.HasSuffix(s, ".slice") { + s += ".slice" + } + path = filepath.Join(path, s) + } + } else { + path = filepath.Join(path, in) + } + return path +} + +func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) { + if slice == "" { + slice = defaultSlice + } + ctx := context.TODO() + path := getSystemdFullPath(slice, group) + conn, err := systemdDbus.NewWithContext(ctx) + if err != nil { + return &Manager{}, err + } + defer conn.Close() + + properties := []systemdDbus.Property{ + systemdDbus.PropDescription("cgroup " + group), + newSystemdProperty("DefaultDependencies", false), + newSystemdProperty("MemoryAccounting", true), + newSystemdProperty("CPUAccounting", true), + newSystemdProperty("IOAccounting", true), + } + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(group, ".slice") { + properties = append(properties, systemdDbus.PropWants(defaultSlice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(defaultSlice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)})) + } + + if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 { + properties = append(properties, + newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min))) + } + + if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 { + properties = append(properties, + newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max))) + } + + if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 { + properties = append(properties, + newSystemdProperty("CPUWeight", *resources.CPU.Weight)) + } + + if resources.CPU != nil && resources.CPU.Max != "" { + quota, period := resources.CPU.Max.extractQuotaAndPeriod() + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + // corresponds to USEC_INFINITY in systemd + // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd + // always setting a property value ensures we can apply a quota and remove it later + cpuQuotaPerSecUSec := uint64(math.MaxUint64) + if quota > 0 { + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + cpuQuotaPerSecUSec = uint64(quota*1000000) / period + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + } + } + properties = append(properties, + newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + // If we can delegate, we add the property back in + if canDelegate { + properties = append(properties, newSystemdProperty("Delegate", true)) + } + + if resources.Pids != nil && resources.Pids.Max > 0 { + properties = append(properties, + newSystemdProperty("TasksAccounting", true), + newSystemdProperty("TasksMax", uint64(resources.Pids.Max))) + } + + statusChan := make(chan string, 1) + if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err == nil { + select { + case <-statusChan: + case <-time.After(time.Second): + logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group) + } + } else if !isUnitExists(err) { + return &Manager{}, err + } + + return &Manager{ + path: path, + }, nil +} + +func LoadSystemd(slice, group string) (*Manager, error) { + if slice == "" { + slice = defaultSlice + } + path := getSystemdFullPath(slice, group) + return &Manager{ + path: path, + }, nil +} + +func (c *Manager) DeleteSystemd() error { + ctx := context.TODO() + conn, err := systemdDbus.NewWithContext(ctx) + if err != nil { + return err + } + defer conn.Close() + group := systemdUnitFromPath(c.path) + ch := make(chan string) + _, err = conn.StopUnitContext(ctx, group, "replace", ch) + if err != nil { + return err + } + <-ch + return nil +} + +func newSystemdProperty(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/memory.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/memory.go new file mode 100644 index 000000000000..6f4733be60ff --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/memory.go @@ -0,0 +1,59 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +type Memory struct { + Swap *int64 + Min *int64 + Max *int64 + Low *int64 + High *int64 +} + +func (r *Memory) Values() (o []Value) { + if r.Swap != nil { + o = append(o, Value{ + filename: "memory.swap.max", + value: *r.Swap, + }) + } + if r.Min != nil { + o = append(o, Value{ + filename: "memory.min", + value: *r.Min, + }) + } + if r.Max != nil { + o = append(o, Value{ + filename: "memory.max", + value: *r.Max, + }) + } + if r.Low != nil { + o = append(o, Value{ + filename: "memory.low", + value: *r.Low, + }) + } + if r.High != nil { + o = append(o, Value{ + filename: "memory.high", + value: *r.High, + }) + } + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/paths.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/paths.go new file mode 100644 index 000000000000..c4778c14244b --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/paths.go @@ -0,0 +1,60 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "fmt" + "path/filepath" + "strings" +) + +// NestedGroupPath will nest the cgroups based on the calling processes cgroup +// placing its child processes inside its own path +func NestedGroupPath(suffix string) (string, error) { + path, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + return filepath.Join(path, suffix), nil +} + +// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup +// This is commonly used for the Load function to restore an existing container +func PidGroupPath(pid int) (string, error) { + p := fmt.Sprintf("/proc/%d/cgroup", pid) + return parseCgroupFile(p) +} + +// VerifyGroupPath verifies the format of group path string g. +// The format is same as the third field in /proc/PID/cgroup. +// e.g. "/user.slice/user-1001.slice/session-1.scope" +// +// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix. +// +// VerifyGroupPath doesn't verify whether g actually exists on the system. +func VerifyGroupPath(g string) error { + if !strings.HasPrefix(g, "/") { + return ErrInvalidGroupPath + } + if filepath.Clean(g) != g { + return ErrInvalidGroupPath + } + if strings.HasPrefix(g, "/sys/fs/cgroup") { + return ErrInvalidGroupPath + } + return nil +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/pids.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/pids.go new file mode 100644 index 000000000000..0b5aa0c3bf78 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/pids.go @@ -0,0 +1,37 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import "strconv" + +type Pids struct { + Max int64 +} + +func (r *Pids) Values() (o []Value) { + if r.Max != 0 { + limit := "max" + if r.Max > 0 { + limit = strconv.FormatInt(r.Max, 10) + } + o = append(o, Value{ + filename: "pids.max", + value: limit, + }) + } + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/rdma.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/rdma.go new file mode 100644 index 000000000000..44caa4f57a3c --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/rdma.go @@ -0,0 +1,46 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "fmt" +) + +type RDMA struct { + Limit []RDMAEntry +} + +type RDMAEntry struct { + Device string + HcaHandles uint32 + HcaObjects uint32 +} + +func (r RDMAEntry) String() string { + return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects) +} + +func (r *RDMA) Values() (o []Value) { + for _, e := range r.Limit { + o = append(o, Value{ + filename: "rdma.max", + value: e.String(), + }) + } + + return o +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/state.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/state.go new file mode 100644 index 000000000000..09b75b6c3dd3 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/state.go @@ -0,0 +1,65 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "io/ioutil" + "path/filepath" + "strings" +) + +// State is a type that represents the state of the current cgroup +type State string + +const ( + Unknown State = "" + Thawed State = "thawed" + Frozen State = "frozen" + Deleted State = "deleted" + + cgroupFreeze = "cgroup.freeze" +) + +func (s State) Values() []Value { + v := Value{ + filename: cgroupFreeze, + } + switch s { + case Frozen: + v.value = "1" + case Thawed: + v.value = "0" + } + return []Value{ + v, + } +} + +func fetchState(path string) (State, error) { + current, err := ioutil.ReadFile(filepath.Join(path, cgroupFreeze)) + if err != nil { + return Unknown, err + } + switch strings.TrimSpace(string(current)) { + case "1": + return Frozen, nil + case "0": + return Thawed, nil + default: + return Unknown, nil + } +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/doc.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/doc.go new file mode 100644 index 000000000000..e51e12f80040 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/doc.go @@ -0,0 +1,17 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package stats diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go new file mode 100644 index 000000000000..0bd493998f71 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go @@ -0,0 +1,3992 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: github.com/containerd/cgroups/v2/stats/metrics.proto + +package stats + +import ( + fmt "fmt" + _ "github.com/gogo/protobuf/gogoproto" + proto "github.com/gogo/protobuf/proto" + io "io" + math "math" + math_bits "math/bits" + reflect "reflect" + strings "strings" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package + +type Metrics struct { + Pids *PidsStat `protobuf:"bytes,1,opt,name=pids,proto3" json:"pids,omitempty"` + CPU *CPUStat `protobuf:"bytes,2,opt,name=cpu,proto3" json:"cpu,omitempty"` + Memory *MemoryStat `protobuf:"bytes,4,opt,name=memory,proto3" json:"memory,omitempty"` + Rdma *RdmaStat `protobuf:"bytes,5,opt,name=rdma,proto3" json:"rdma,omitempty"` + Io *IOStat `protobuf:"bytes,6,opt,name=io,proto3" json:"io,omitempty"` + Hugetlb []*HugeTlbStat `protobuf:"bytes,7,rep,name=hugetlb,proto3" json:"hugetlb,omitempty"` + MemoryEvents *MemoryEvents `protobuf:"bytes,8,opt,name=memory_events,json=memoryEvents,proto3" json:"memory_events,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *Metrics) Reset() { *m = Metrics{} } +func (*Metrics) ProtoMessage() {} +func (*Metrics) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{0} +} +func (m *Metrics) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Metrics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_Metrics.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *Metrics) XXX_Merge(src proto.Message) { + xxx_messageInfo_Metrics.Merge(m, src) +} +func (m *Metrics) XXX_Size() int { + return m.Size() +} +func (m *Metrics) XXX_DiscardUnknown() { + xxx_messageInfo_Metrics.DiscardUnknown(m) +} + +var xxx_messageInfo_Metrics proto.InternalMessageInfo + +type PidsStat struct { + Current uint64 `protobuf:"varint,1,opt,name=current,proto3" json:"current,omitempty"` + Limit uint64 `protobuf:"varint,2,opt,name=limit,proto3" json:"limit,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *PidsStat) Reset() { *m = PidsStat{} } +func (*PidsStat) ProtoMessage() {} +func (*PidsStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{1} +} +func (m *PidsStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *PidsStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_PidsStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *PidsStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_PidsStat.Merge(m, src) +} +func (m *PidsStat) XXX_Size() int { + return m.Size() +} +func (m *PidsStat) XXX_DiscardUnknown() { + xxx_messageInfo_PidsStat.DiscardUnknown(m) +} + +var xxx_messageInfo_PidsStat proto.InternalMessageInfo + +type CPUStat struct { + UsageUsec uint64 `protobuf:"varint,1,opt,name=usage_usec,json=usageUsec,proto3" json:"usage_usec,omitempty"` + UserUsec uint64 `protobuf:"varint,2,opt,name=user_usec,json=userUsec,proto3" json:"user_usec,omitempty"` + SystemUsec uint64 `protobuf:"varint,3,opt,name=system_usec,json=systemUsec,proto3" json:"system_usec,omitempty"` + NrPeriods uint64 `protobuf:"varint,4,opt,name=nr_periods,json=nrPeriods,proto3" json:"nr_periods,omitempty"` + NrThrottled uint64 `protobuf:"varint,5,opt,name=nr_throttled,json=nrThrottled,proto3" json:"nr_throttled,omitempty"` + ThrottledUsec uint64 `protobuf:"varint,6,opt,name=throttled_usec,json=throttledUsec,proto3" json:"throttled_usec,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *CPUStat) Reset() { *m = CPUStat{} } +func (*CPUStat) ProtoMessage() {} +func (*CPUStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{2} +} +func (m *CPUStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *CPUStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_CPUStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *CPUStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_CPUStat.Merge(m, src) +} +func (m *CPUStat) XXX_Size() int { + return m.Size() +} +func (m *CPUStat) XXX_DiscardUnknown() { + xxx_messageInfo_CPUStat.DiscardUnknown(m) +} + +var xxx_messageInfo_CPUStat proto.InternalMessageInfo + +type MemoryStat struct { + Anon uint64 `protobuf:"varint,1,opt,name=anon,proto3" json:"anon,omitempty"` + File uint64 `protobuf:"varint,2,opt,name=file,proto3" json:"file,omitempty"` + KernelStack uint64 `protobuf:"varint,3,opt,name=kernel_stack,json=kernelStack,proto3" json:"kernel_stack,omitempty"` + Slab uint64 `protobuf:"varint,4,opt,name=slab,proto3" json:"slab,omitempty"` + Sock uint64 `protobuf:"varint,5,opt,name=sock,proto3" json:"sock,omitempty"` + Shmem uint64 `protobuf:"varint,6,opt,name=shmem,proto3" json:"shmem,omitempty"` + FileMapped uint64 `protobuf:"varint,7,opt,name=file_mapped,json=fileMapped,proto3" json:"file_mapped,omitempty"` + FileDirty uint64 `protobuf:"varint,8,opt,name=file_dirty,json=fileDirty,proto3" json:"file_dirty,omitempty"` + FileWriteback uint64 `protobuf:"varint,9,opt,name=file_writeback,json=fileWriteback,proto3" json:"file_writeback,omitempty"` + AnonThp uint64 `protobuf:"varint,10,opt,name=anon_thp,json=anonThp,proto3" json:"anon_thp,omitempty"` + InactiveAnon uint64 `protobuf:"varint,11,opt,name=inactive_anon,json=inactiveAnon,proto3" json:"inactive_anon,omitempty"` + ActiveAnon uint64 `protobuf:"varint,12,opt,name=active_anon,json=activeAnon,proto3" json:"active_anon,omitempty"` + InactiveFile uint64 `protobuf:"varint,13,opt,name=inactive_file,json=inactiveFile,proto3" json:"inactive_file,omitempty"` + ActiveFile uint64 `protobuf:"varint,14,opt,name=active_file,json=activeFile,proto3" json:"active_file,omitempty"` + Unevictable uint64 `protobuf:"varint,15,opt,name=unevictable,proto3" json:"unevictable,omitempty"` + SlabReclaimable uint64 `protobuf:"varint,16,opt,name=slab_reclaimable,json=slabReclaimable,proto3" json:"slab_reclaimable,omitempty"` + SlabUnreclaimable uint64 `protobuf:"varint,17,opt,name=slab_unreclaimable,json=slabUnreclaimable,proto3" json:"slab_unreclaimable,omitempty"` + Pgfault uint64 `protobuf:"varint,18,opt,name=pgfault,proto3" json:"pgfault,omitempty"` + Pgmajfault uint64 `protobuf:"varint,19,opt,name=pgmajfault,proto3" json:"pgmajfault,omitempty"` + WorkingsetRefault uint64 `protobuf:"varint,20,opt,name=workingset_refault,json=workingsetRefault,proto3" json:"workingset_refault,omitempty"` + WorkingsetActivate uint64 `protobuf:"varint,21,opt,name=workingset_activate,json=workingsetActivate,proto3" json:"workingset_activate,omitempty"` + WorkingsetNodereclaim uint64 `protobuf:"varint,22,opt,name=workingset_nodereclaim,json=workingsetNodereclaim,proto3" json:"workingset_nodereclaim,omitempty"` + Pgrefill uint64 `protobuf:"varint,23,opt,name=pgrefill,proto3" json:"pgrefill,omitempty"` + Pgscan uint64 `protobuf:"varint,24,opt,name=pgscan,proto3" json:"pgscan,omitempty"` + Pgsteal uint64 `protobuf:"varint,25,opt,name=pgsteal,proto3" json:"pgsteal,omitempty"` + Pgactivate uint64 `protobuf:"varint,26,opt,name=pgactivate,proto3" json:"pgactivate,omitempty"` + Pgdeactivate uint64 `protobuf:"varint,27,opt,name=pgdeactivate,proto3" json:"pgdeactivate,omitempty"` + Pglazyfree uint64 `protobuf:"varint,28,opt,name=pglazyfree,proto3" json:"pglazyfree,omitempty"` + Pglazyfreed uint64 `protobuf:"varint,29,opt,name=pglazyfreed,proto3" json:"pglazyfreed,omitempty"` + ThpFaultAlloc uint64 `protobuf:"varint,30,opt,name=thp_fault_alloc,json=thpFaultAlloc,proto3" json:"thp_fault_alloc,omitempty"` + ThpCollapseAlloc uint64 `protobuf:"varint,31,opt,name=thp_collapse_alloc,json=thpCollapseAlloc,proto3" json:"thp_collapse_alloc,omitempty"` + Usage uint64 `protobuf:"varint,32,opt,name=usage,proto3" json:"usage,omitempty"` + UsageLimit uint64 `protobuf:"varint,33,opt,name=usage_limit,json=usageLimit,proto3" json:"usage_limit,omitempty"` + SwapUsage uint64 `protobuf:"varint,34,opt,name=swap_usage,json=swapUsage,proto3" json:"swap_usage,omitempty"` + SwapLimit uint64 `protobuf:"varint,35,opt,name=swap_limit,json=swapLimit,proto3" json:"swap_limit,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *MemoryStat) Reset() { *m = MemoryStat{} } +func (*MemoryStat) ProtoMessage() {} +func (*MemoryStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{3} +} +func (m *MemoryStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *MemoryStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_MemoryStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *MemoryStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_MemoryStat.Merge(m, src) +} +func (m *MemoryStat) XXX_Size() int { + return m.Size() +} +func (m *MemoryStat) XXX_DiscardUnknown() { + xxx_messageInfo_MemoryStat.DiscardUnknown(m) +} + +var xxx_messageInfo_MemoryStat proto.InternalMessageInfo + +type MemoryEvents struct { + Low uint64 `protobuf:"varint,1,opt,name=low,proto3" json:"low,omitempty"` + High uint64 `protobuf:"varint,2,opt,name=high,proto3" json:"high,omitempty"` + Max uint64 `protobuf:"varint,3,opt,name=max,proto3" json:"max,omitempty"` + Oom uint64 `protobuf:"varint,4,opt,name=oom,proto3" json:"oom,omitempty"` + OomKill uint64 `protobuf:"varint,5,opt,name=oom_kill,json=oomKill,proto3" json:"oom_kill,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *MemoryEvents) Reset() { *m = MemoryEvents{} } +func (*MemoryEvents) ProtoMessage() {} +func (*MemoryEvents) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{4} +} +func (m *MemoryEvents) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *MemoryEvents) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_MemoryEvents.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *MemoryEvents) XXX_Merge(src proto.Message) { + xxx_messageInfo_MemoryEvents.Merge(m, src) +} +func (m *MemoryEvents) XXX_Size() int { + return m.Size() +} +func (m *MemoryEvents) XXX_DiscardUnknown() { + xxx_messageInfo_MemoryEvents.DiscardUnknown(m) +} + +var xxx_messageInfo_MemoryEvents proto.InternalMessageInfo + +type RdmaStat struct { + Current []*RdmaEntry `protobuf:"bytes,1,rep,name=current,proto3" json:"current,omitempty"` + Limit []*RdmaEntry `protobuf:"bytes,2,rep,name=limit,proto3" json:"limit,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *RdmaStat) Reset() { *m = RdmaStat{} } +func (*RdmaStat) ProtoMessage() {} +func (*RdmaStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{5} +} +func (m *RdmaStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *RdmaStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_RdmaStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *RdmaStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_RdmaStat.Merge(m, src) +} +func (m *RdmaStat) XXX_Size() int { + return m.Size() +} +func (m *RdmaStat) XXX_DiscardUnknown() { + xxx_messageInfo_RdmaStat.DiscardUnknown(m) +} + +var xxx_messageInfo_RdmaStat proto.InternalMessageInfo + +type RdmaEntry struct { + Device string `protobuf:"bytes,1,opt,name=device,proto3" json:"device,omitempty"` + HcaHandles uint32 `protobuf:"varint,2,opt,name=hca_handles,json=hcaHandles,proto3" json:"hca_handles,omitempty"` + HcaObjects uint32 `protobuf:"varint,3,opt,name=hca_objects,json=hcaObjects,proto3" json:"hca_objects,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *RdmaEntry) Reset() { *m = RdmaEntry{} } +func (*RdmaEntry) ProtoMessage() {} +func (*RdmaEntry) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{6} +} +func (m *RdmaEntry) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *RdmaEntry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_RdmaEntry.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *RdmaEntry) XXX_Merge(src proto.Message) { + xxx_messageInfo_RdmaEntry.Merge(m, src) +} +func (m *RdmaEntry) XXX_Size() int { + return m.Size() +} +func (m *RdmaEntry) XXX_DiscardUnknown() { + xxx_messageInfo_RdmaEntry.DiscardUnknown(m) +} + +var xxx_messageInfo_RdmaEntry proto.InternalMessageInfo + +type IOStat struct { + Usage []*IOEntry `protobuf:"bytes,1,rep,name=usage,proto3" json:"usage,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *IOStat) Reset() { *m = IOStat{} } +func (*IOStat) ProtoMessage() {} +func (*IOStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{7} +} +func (m *IOStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *IOStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_IOStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *IOStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_IOStat.Merge(m, src) +} +func (m *IOStat) XXX_Size() int { + return m.Size() +} +func (m *IOStat) XXX_DiscardUnknown() { + xxx_messageInfo_IOStat.DiscardUnknown(m) +} + +var xxx_messageInfo_IOStat proto.InternalMessageInfo + +type IOEntry struct { + Major uint64 `protobuf:"varint,1,opt,name=major,proto3" json:"major,omitempty"` + Minor uint64 `protobuf:"varint,2,opt,name=minor,proto3" json:"minor,omitempty"` + Rbytes uint64 `protobuf:"varint,3,opt,name=rbytes,proto3" json:"rbytes,omitempty"` + Wbytes uint64 `protobuf:"varint,4,opt,name=wbytes,proto3" json:"wbytes,omitempty"` + Rios uint64 `protobuf:"varint,5,opt,name=rios,proto3" json:"rios,omitempty"` + Wios uint64 `protobuf:"varint,6,opt,name=wios,proto3" json:"wios,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *IOEntry) Reset() { *m = IOEntry{} } +func (*IOEntry) ProtoMessage() {} +func (*IOEntry) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{8} +} +func (m *IOEntry) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *IOEntry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_IOEntry.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *IOEntry) XXX_Merge(src proto.Message) { + xxx_messageInfo_IOEntry.Merge(m, src) +} +func (m *IOEntry) XXX_Size() int { + return m.Size() +} +func (m *IOEntry) XXX_DiscardUnknown() { + xxx_messageInfo_IOEntry.DiscardUnknown(m) +} + +var xxx_messageInfo_IOEntry proto.InternalMessageInfo + +type HugeTlbStat struct { + Current uint64 `protobuf:"varint,1,opt,name=current,proto3" json:"current,omitempty"` + Max uint64 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"` + Pagesize string `protobuf:"bytes,3,opt,name=pagesize,proto3" json:"pagesize,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HugeTlbStat) Reset() { *m = HugeTlbStat{} } +func (*HugeTlbStat) ProtoMessage() {} +func (*HugeTlbStat) Descriptor() ([]byte, []int) { + return fileDescriptor_2fc6005842049e6b, []int{9} +} +func (m *HugeTlbStat) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *HugeTlbStat) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_HugeTlbStat.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *HugeTlbStat) XXX_Merge(src proto.Message) { + xxx_messageInfo_HugeTlbStat.Merge(m, src) +} +func (m *HugeTlbStat) XXX_Size() int { + return m.Size() +} +func (m *HugeTlbStat) XXX_DiscardUnknown() { + xxx_messageInfo_HugeTlbStat.DiscardUnknown(m) +} + +var xxx_messageInfo_HugeTlbStat proto.InternalMessageInfo + +func init() { + proto.RegisterType((*Metrics)(nil), "io.containerd.cgroups.v2.Metrics") + proto.RegisterType((*PidsStat)(nil), "io.containerd.cgroups.v2.PidsStat") + proto.RegisterType((*CPUStat)(nil), "io.containerd.cgroups.v2.CPUStat") + proto.RegisterType((*MemoryStat)(nil), "io.containerd.cgroups.v2.MemoryStat") + proto.RegisterType((*MemoryEvents)(nil), "io.containerd.cgroups.v2.MemoryEvents") + proto.RegisterType((*RdmaStat)(nil), "io.containerd.cgroups.v2.RdmaStat") + proto.RegisterType((*RdmaEntry)(nil), "io.containerd.cgroups.v2.RdmaEntry") + proto.RegisterType((*IOStat)(nil), "io.containerd.cgroups.v2.IOStat") + proto.RegisterType((*IOEntry)(nil), "io.containerd.cgroups.v2.IOEntry") + proto.RegisterType((*HugeTlbStat)(nil), "io.containerd.cgroups.v2.HugeTlbStat") +} + +func init() { + proto.RegisterFile("github.com/containerd/cgroups/v2/stats/metrics.proto", fileDescriptor_2fc6005842049e6b) +} + +var fileDescriptor_2fc6005842049e6b = []byte{ + // 1198 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0x4d, 0x73, 0xd4, 0x46, + 0x13, 0x66, 0xed, 0xc5, 0xeb, 0xed, 0xb5, 0xc1, 0x0c, 0x86, 0x57, 0xc0, 0xcb, 0xda, 0x5e, 0x02, + 0x45, 0xaa, 0x92, 0xdd, 0x94, 0xf3, 0x55, 0x49, 0x91, 0x4a, 0x19, 0x02, 0x45, 0x8a, 0x10, 0x5c, + 0x02, 0x57, 0x8e, 0xaa, 0x59, 0x69, 0x2c, 0x0d, 0x96, 0x34, 0xaa, 0x99, 0x91, 0x1d, 0x73, 0xca, + 0x21, 0xd7, 0x54, 0x7e, 0x4d, 0xfe, 0x03, 0xb7, 0xe4, 0x98, 0x53, 0x2a, 0xf8, 0x97, 0xa4, 0xba, + 0x67, 0x64, 0x29, 0x07, 0x43, 0x6e, 0xd3, 0x4f, 0x3f, 0xdd, 0xea, 0x8f, 0x99, 0x6e, 0xc1, 0x27, + 0xa9, 0xb4, 0x59, 0x3d, 0x9f, 0xc6, 0xaa, 0x98, 0xc5, 0xaa, 0xb4, 0x5c, 0x96, 0x42, 0x27, 0xb3, + 0x38, 0xd5, 0xaa, 0xae, 0xcc, 0xec, 0x70, 0x7b, 0x66, 0x2c, 0xb7, 0x66, 0x56, 0x08, 0xab, 0x65, + 0x6c, 0xa6, 0x95, 0x56, 0x56, 0xb1, 0x40, 0xaa, 0x69, 0xcb, 0x9e, 0x7a, 0xf6, 0xf4, 0x70, 0xfb, + 0xfa, 0x7a, 0xaa, 0x52, 0x45, 0xa4, 0x19, 0x9e, 0x1c, 0x7f, 0xf2, 0xdb, 0x22, 0x0c, 0x9e, 0x3a, + 0x0f, 0xec, 0x33, 0xe8, 0x57, 0x32, 0x31, 0x41, 0x6f, 0xb3, 0x77, 0x77, 0xb4, 0x3d, 0x99, 0x9e, + 0xe5, 0x6a, 0xba, 0x2b, 0x13, 0xf3, 0xdc, 0x72, 0x1b, 0x12, 0x9f, 0xdd, 0x83, 0xc5, 0xb8, 0xaa, + 0x83, 0x05, 0x32, 0xdb, 0x3a, 0xdb, 0xec, 0xc1, 0xee, 0x1e, 0x5a, 0xdd, 0x1f, 0x9c, 0xfc, 0xb5, + 0xb1, 0xf8, 0x60, 0x77, 0x2f, 0x44, 0x33, 0x76, 0x0f, 0x96, 0x0a, 0x51, 0x28, 0x7d, 0x1c, 0xf4, + 0xc9, 0xc1, 0x7b, 0x67, 0x3b, 0x78, 0x4a, 0x3c, 0xfa, 0xb2, 0xb7, 0xc1, 0x98, 0x75, 0x52, 0xf0, + 0xe0, 0xfc, 0xbb, 0x62, 0x0e, 0x93, 0x82, 0xbb, 0x98, 0x91, 0xcf, 0x3e, 0x82, 0x05, 0xa9, 0x82, + 0x25, 0xb2, 0xda, 0x3c, 0xdb, 0xea, 0xdb, 0x67, 0x64, 0xb3, 0x20, 0x15, 0xfb, 0x1a, 0x06, 0x59, + 0x9d, 0x0a, 0x9b, 0xcf, 0x83, 0xc1, 0xe6, 0xe2, 0xdd, 0xd1, 0xf6, 0xed, 0xb3, 0xcd, 0x1e, 0xd7, + 0xa9, 0x78, 0x91, 0xcf, 0xc9, 0xb6, 0xb1, 0x62, 0x4f, 0x60, 0xd5, 0x05, 0x1d, 0x89, 0x43, 0x51, + 0x5a, 0x13, 0x2c, 0xd3, 0xd7, 0xef, 0xbc, 0x2b, 0xdf, 0x87, 0xc4, 0x0e, 0x57, 0x8a, 0x8e, 0x34, + 0xf9, 0x12, 0x96, 0x9b, 0x2e, 0xb0, 0x00, 0x06, 0x71, 0xad, 0xb5, 0x28, 0x2d, 0xb5, 0xae, 0x1f, + 0x36, 0x22, 0x5b, 0x87, 0xf3, 0xb9, 0x2c, 0xa4, 0xa5, 0xde, 0xf4, 0x43, 0x27, 0x4c, 0x7e, 0xef, + 0xc1, 0xc0, 0xf7, 0x82, 0xdd, 0x04, 0xa8, 0x0d, 0x4f, 0x45, 0x54, 0x1b, 0x11, 0x7b, 0xf3, 0x21, + 0x21, 0x7b, 0x46, 0xc4, 0xec, 0x06, 0x0c, 0x6b, 0x23, 0xb4, 0xd3, 0x3a, 0x27, 0xcb, 0x08, 0x90, + 0x72, 0x03, 0x46, 0xe6, 0xd8, 0x58, 0x51, 0x38, 0xf5, 0x22, 0xa9, 0xc1, 0x41, 0x44, 0xb8, 0x09, + 0x50, 0xea, 0xa8, 0x12, 0x5a, 0xaa, 0xc4, 0x50, 0x7b, 0xfb, 0xe1, 0xb0, 0xd4, 0xbb, 0x0e, 0x60, + 0x5b, 0xb0, 0x52, 0xea, 0xc8, 0x66, 0x5a, 0x59, 0x9b, 0x8b, 0x84, 0x7a, 0xd8, 0x0f, 0x47, 0xa5, + 0x7e, 0xd1, 0x40, 0xec, 0x36, 0x5c, 0x38, 0xd5, 0xbb, 0xaf, 0x2c, 0x11, 0x69, 0xf5, 0x14, 0xc5, + 0x0f, 0x4d, 0x7e, 0x1d, 0x02, 0xb4, 0x97, 0x83, 0x31, 0xe8, 0xf3, 0x52, 0x95, 0x3e, 0x1d, 0x3a, + 0x23, 0xb6, 0x2f, 0x73, 0xe1, 0x93, 0xa0, 0x33, 0x06, 0x70, 0x20, 0x74, 0x29, 0xf2, 0xc8, 0x58, + 0x1e, 0x1f, 0xf8, 0x0c, 0x46, 0x0e, 0x7b, 0x8e, 0x10, 0x9a, 0x99, 0x9c, 0xcf, 0x7d, 0xf0, 0x74, + 0x26, 0x4c, 0xc5, 0x07, 0x3e, 0x5e, 0x3a, 0x63, 0xa5, 0x4d, 0x56, 0x88, 0xc2, 0xc7, 0xe7, 0x04, + 0xac, 0x10, 0x7e, 0x28, 0x2a, 0x78, 0x55, 0x89, 0x24, 0x18, 0xb8, 0x0a, 0x21, 0xf4, 0x94, 0x10, + 0xac, 0x10, 0x11, 0x12, 0xa9, 0xed, 0x31, 0x5d, 0x88, 0x7e, 0x38, 0x44, 0xe4, 0x1b, 0x04, 0x30, + 0x7d, 0x52, 0x1f, 0x69, 0x69, 0xc5, 0x1c, 0x43, 0x1c, 0xba, 0xf4, 0x11, 0xfd, 0xa1, 0x01, 0xd9, + 0x35, 0x58, 0xc6, 0x1c, 0x23, 0x9b, 0x55, 0x01, 0xb8, 0x1b, 0x80, 0xf2, 0x8b, 0xac, 0x62, 0xb7, + 0x60, 0x55, 0x96, 0x3c, 0xb6, 0xf2, 0x50, 0x44, 0x54, 0x93, 0x11, 0xe9, 0x57, 0x1a, 0x70, 0x07, + 0x6b, 0xb3, 0x01, 0xa3, 0x2e, 0x65, 0xc5, 0x85, 0xd9, 0x21, 0x74, 0xbd, 0x50, 0x15, 0x57, 0xff, + 0xed, 0xe5, 0x11, 0x56, 0xb3, 0xf5, 0x42, 0x94, 0x0b, 0x5d, 0x2f, 0x44, 0xd8, 0x84, 0x51, 0x5d, + 0x8a, 0x43, 0x19, 0x5b, 0x3e, 0xcf, 0x45, 0x70, 0xd1, 0x55, 0xbb, 0x03, 0xb1, 0xf7, 0x61, 0x0d, + 0x2b, 0x1c, 0x69, 0x11, 0xe7, 0x5c, 0x16, 0x44, 0x5b, 0x23, 0xda, 0x45, 0xc4, 0xc3, 0x16, 0x66, + 0x1f, 0x02, 0x23, 0x6a, 0x5d, 0x76, 0xc9, 0x97, 0x88, 0x7c, 0x09, 0x35, 0x7b, 0x5d, 0x05, 0xbe, + 0x91, 0x2a, 0xdd, 0xe7, 0x75, 0x6e, 0x03, 0xe6, 0x2a, 0xe4, 0x45, 0x36, 0x06, 0xa8, 0xd2, 0x82, + 0xbf, 0x74, 0xca, 0xcb, 0x2e, 0xea, 0x16, 0xc1, 0x0f, 0x1d, 0x29, 0x7d, 0x20, 0xcb, 0xd4, 0x08, + 0x1b, 0x69, 0xe1, 0x78, 0xeb, 0xee, 0x43, 0xad, 0x26, 0x74, 0x0a, 0x36, 0x83, 0xcb, 0x1d, 0x3a, + 0x65, 0xcf, 0xad, 0x08, 0xae, 0x10, 0xbf, 0xe3, 0x69, 0xc7, 0x6b, 0xd8, 0xa7, 0x70, 0xb5, 0x63, + 0x50, 0xaa, 0x44, 0xf8, 0xb8, 0x83, 0xab, 0x64, 0x73, 0xa5, 0xd5, 0x7e, 0xdf, 0x2a, 0xd9, 0x75, + 0x58, 0xae, 0x52, 0x2d, 0xf6, 0x65, 0x9e, 0x07, 0xff, 0x73, 0x0f, 0xb3, 0x91, 0xd9, 0x55, 0x58, + 0xaa, 0x52, 0x13, 0xf3, 0x32, 0x08, 0x48, 0xe3, 0x25, 0x57, 0x04, 0x63, 0x05, 0xcf, 0x83, 0x6b, + 0x4d, 0x11, 0x48, 0x74, 0x45, 0x38, 0x0d, 0xf6, 0x7a, 0x53, 0x84, 0x06, 0x61, 0x13, 0x58, 0xa9, + 0xd2, 0x44, 0x9c, 0x32, 0x6e, 0xb8, 0xfe, 0x77, 0x31, 0xe7, 0x23, 0xe7, 0xaf, 0x8e, 0xf7, 0xb5, + 0x10, 0xc1, 0xff, 0x1b, 0x1f, 0x0d, 0x82, 0xed, 0x6f, 0xa5, 0x24, 0xb8, 0xe9, 0xda, 0xdf, 0x81, + 0xd8, 0x1d, 0xb8, 0x68, 0xb3, 0x2a, 0xa2, 0x42, 0x46, 0x3c, 0xcf, 0x55, 0x1c, 0x8c, 0x9b, 0xe7, + 0x5e, 0x3d, 0x42, 0x74, 0x07, 0x41, 0xf6, 0x01, 0x30, 0xe4, 0xc5, 0x2a, 0xcf, 0x79, 0x65, 0x84, + 0xa7, 0x6e, 0x10, 0x75, 0xcd, 0x66, 0xd5, 0x03, 0xaf, 0x70, 0xec, 0x75, 0x38, 0x4f, 0x03, 0x2d, + 0xd8, 0x74, 0x4f, 0x93, 0x04, 0xbc, 0xad, 0x6e, 0xf0, 0xb9, 0x01, 0xb9, 0xe5, 0xc2, 0x25, 0xe8, + 0x3b, 0x44, 0xf0, 0x69, 0x9a, 0x23, 0x5e, 0x45, 0xce, 0x76, 0xe2, 0x9e, 0x26, 0x22, 0x7b, 0x64, + 0xdf, 0xa8, 0x9d, 0xf9, 0xad, 0x56, 0x4d, 0xd6, 0x13, 0x03, 0x2b, 0xdd, 0xe9, 0xcd, 0xd6, 0x60, + 0x31, 0x57, 0x47, 0x7e, 0x22, 0xe1, 0x11, 0xa7, 0x48, 0x26, 0xd3, 0xac, 0x19, 0x48, 0x78, 0x46, + 0x56, 0xc1, 0x7f, 0xf4, 0x73, 0x08, 0x8f, 0x88, 0x28, 0x55, 0xf8, 0xf1, 0x83, 0x47, 0x7c, 0xec, + 0x4a, 0x15, 0xd1, 0x01, 0x36, 0xde, 0x4d, 0xa0, 0x81, 0x52, 0xc5, 0x13, 0x99, 0xe7, 0x93, 0x9f, + 0x7b, 0xb0, 0xdc, 0xec, 0x39, 0xf6, 0x55, 0x77, 0x2b, 0xe0, 0xbe, 0xba, 0xf5, 0xf6, 0xe5, 0xf8, + 0xb0, 0xb4, 0xfa, 0xb8, 0x5d, 0x1d, 0x5f, 0xb4, 0xab, 0xe3, 0x3f, 0x1b, 0xfb, 0xfd, 0x22, 0x60, + 0x78, 0x8a, 0xe1, 0x5d, 0x4c, 0xf0, 0x81, 0x0b, 0xca, 0x7d, 0x18, 0x7a, 0x09, 0xeb, 0x9f, 0xc5, + 0x3c, 0xca, 0x78, 0x99, 0xe4, 0xc2, 0x50, 0x15, 0x56, 0x43, 0xc8, 0x62, 0xfe, 0xd8, 0x21, 0x0d, + 0x41, 0xcd, 0x5f, 0x8a, 0xd8, 0x1a, 0xaa, 0x89, 0x23, 0x3c, 0x73, 0xc8, 0x64, 0x07, 0x96, 0xdc, + 0x7a, 0x66, 0x9f, 0x37, 0x1d, 0x76, 0x89, 0x6e, 0xbd, 0x6d, 0x9f, 0xfb, 0x48, 0x89, 0x3f, 0xf9, + 0xa5, 0x07, 0x03, 0x0f, 0xe1, 0x35, 0x29, 0xf8, 0x4b, 0xa5, 0x7d, 0x8f, 0x9c, 0x40, 0xa8, 0x2c, + 0x95, 0x6e, 0x36, 0x28, 0x09, 0x98, 0x94, 0x9e, 0x1f, 0x5b, 0x61, 0x7c, 0xab, 0xbc, 0x84, 0xf8, + 0x91, 0xc3, 0x5d, 0xc3, 0xbc, 0x84, 0xbd, 0xd6, 0x52, 0x99, 0x66, 0x63, 0xe0, 0x19, 0xb1, 0x23, + 0xc4, 0xdc, 0xc2, 0xa0, 0xf3, 0x64, 0x0f, 0x46, 0x9d, 0x5f, 0x87, 0xb7, 0x2c, 0x76, 0x7f, 0x51, + 0x16, 0xda, 0x8b, 0x82, 0xf3, 0x80, 0xa7, 0xc2, 0xc8, 0x57, 0x82, 0x82, 0x1a, 0x86, 0xa7, 0xf2, + 0xfd, 0xe0, 0xf5, 0x9b, 0xf1, 0xb9, 0x3f, 0xdf, 0x8c, 0xcf, 0xfd, 0x74, 0x32, 0xee, 0xbd, 0x3e, + 0x19, 0xf7, 0xfe, 0x38, 0x19, 0xf7, 0xfe, 0x3e, 0x19, 0xf7, 0xe6, 0x4b, 0xf4, 0x17, 0xf8, 0xf1, + 0x3f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x4f, 0x2b, 0x30, 0xd6, 0x6d, 0x0a, 0x00, 0x00, +} + +func (m *Metrics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Metrics) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *Metrics) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.MemoryEvents != nil { + { + size, err := m.MemoryEvents.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x42 + } + if len(m.Hugetlb) > 0 { + for iNdEx := len(m.Hugetlb) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Hugetlb[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x3a + } + } + if m.Io != nil { + { + size, err := m.Io.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x32 + } + if m.Rdma != nil { + { + size, err := m.Rdma.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x2a + } + if m.Memory != nil { + { + size, err := m.Memory.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x22 + } + if m.CPU != nil { + { + size, err := m.CPU.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } + if m.Pids != nil { + { + size, err := m.Pids.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func (m *PidsStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *PidsStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.Limit != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Limit)) + i-- + dAtA[i] = 0x10 + } + if m.Current != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Current)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *CPUStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CPUStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *CPUStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.ThrottledUsec != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.ThrottledUsec)) + i-- + dAtA[i] = 0x30 + } + if m.NrThrottled != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.NrThrottled)) + i-- + dAtA[i] = 0x28 + } + if m.NrPeriods != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.NrPeriods)) + i-- + dAtA[i] = 0x20 + } + if m.SystemUsec != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.SystemUsec)) + i-- + dAtA[i] = 0x18 + } + if m.UserUsec != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.UserUsec)) + i-- + dAtA[i] = 0x10 + } + if m.UsageUsec != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.UsageUsec)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *MemoryStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MemoryStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *MemoryStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.SwapLimit != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.SwapLimit)) + i-- + dAtA[i] = 0x2 + i-- + dAtA[i] = 0x98 + } + if m.SwapUsage != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.SwapUsage)) + i-- + dAtA[i] = 0x2 + i-- + dAtA[i] = 0x90 + } + if m.UsageLimit != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.UsageLimit)) + i-- + dAtA[i] = 0x2 + i-- + dAtA[i] = 0x88 + } + if m.Usage != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Usage)) + i-- + dAtA[i] = 0x2 + i-- + dAtA[i] = 0x80 + } + if m.ThpCollapseAlloc != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.ThpCollapseAlloc)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xf8 + } + if m.ThpFaultAlloc != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.ThpFaultAlloc)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xf0 + } + if m.Pglazyfreed != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pglazyfreed)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xe8 + } + if m.Pglazyfree != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pglazyfree)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xe0 + } + if m.Pgdeactivate != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgdeactivate)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xd8 + } + if m.Pgactivate != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgactivate)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xd0 + } + if m.Pgsteal != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgsteal)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xc8 + } + if m.Pgscan != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgscan)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xc0 + } + if m.Pgrefill != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgrefill)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xb8 + } + if m.WorkingsetNodereclaim != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.WorkingsetNodereclaim)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xb0 + } + if m.WorkingsetActivate != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.WorkingsetActivate)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xa8 + } + if m.WorkingsetRefault != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.WorkingsetRefault)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0xa0 + } + if m.Pgmajfault != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgmajfault)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0x98 + } + if m.Pgfault != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Pgfault)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0x90 + } + if m.SlabUnreclaimable != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.SlabUnreclaimable)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0x88 + } + if m.SlabReclaimable != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.SlabReclaimable)) + i-- + dAtA[i] = 0x1 + i-- + dAtA[i] = 0x80 + } + if m.Unevictable != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Unevictable)) + i-- + dAtA[i] = 0x78 + } + if m.ActiveFile != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.ActiveFile)) + i-- + dAtA[i] = 0x70 + } + if m.InactiveFile != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.InactiveFile)) + i-- + dAtA[i] = 0x68 + } + if m.ActiveAnon != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.ActiveAnon)) + i-- + dAtA[i] = 0x60 + } + if m.InactiveAnon != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.InactiveAnon)) + i-- + dAtA[i] = 0x58 + } + if m.AnonThp != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.AnonThp)) + i-- + dAtA[i] = 0x50 + } + if m.FileWriteback != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.FileWriteback)) + i-- + dAtA[i] = 0x48 + } + if m.FileDirty != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.FileDirty)) + i-- + dAtA[i] = 0x40 + } + if m.FileMapped != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.FileMapped)) + i-- + dAtA[i] = 0x38 + } + if m.Shmem != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Shmem)) + i-- + dAtA[i] = 0x30 + } + if m.Sock != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Sock)) + i-- + dAtA[i] = 0x28 + } + if m.Slab != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Slab)) + i-- + dAtA[i] = 0x20 + } + if m.KernelStack != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.KernelStack)) + i-- + dAtA[i] = 0x18 + } + if m.File != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.File)) + i-- + dAtA[i] = 0x10 + } + if m.Anon != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Anon)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *MemoryEvents) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MemoryEvents) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *MemoryEvents) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.OomKill != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.OomKill)) + i-- + dAtA[i] = 0x28 + } + if m.Oom != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Oom)) + i-- + dAtA[i] = 0x20 + } + if m.Max != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Max)) + i-- + dAtA[i] = 0x18 + } + if m.High != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.High)) + i-- + dAtA[i] = 0x10 + } + if m.Low != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Low)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *RdmaStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *RdmaStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *RdmaStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if len(m.Limit) > 0 { + for iNdEx := len(m.Limit) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Limit[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } + } + if len(m.Current) > 0 { + for iNdEx := len(m.Current) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Current[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + +func (m *RdmaEntry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *RdmaEntry) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *RdmaEntry) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.HcaObjects != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.HcaObjects)) + i-- + dAtA[i] = 0x18 + } + if m.HcaHandles != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.HcaHandles)) + i-- + dAtA[i] = 0x10 + } + if len(m.Device) > 0 { + i -= len(m.Device) + copy(dAtA[i:], m.Device) + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Device))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func (m *IOStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *IOStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *IOStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if len(m.Usage) > 0 { + for iNdEx := len(m.Usage) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Usage[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintMetrics(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + +func (m *IOEntry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *IOEntry) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *IOEntry) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if m.Wios != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Wios)) + i-- + dAtA[i] = 0x30 + } + if m.Rios != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Rios)) + i-- + dAtA[i] = 0x28 + } + if m.Wbytes != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Wbytes)) + i-- + dAtA[i] = 0x20 + } + if m.Rbytes != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Rbytes)) + i-- + dAtA[i] = 0x18 + } + if m.Minor != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Minor)) + i-- + dAtA[i] = 0x10 + } + if m.Major != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Major)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func (m *HugeTlbStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *HugeTlbStat) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *HugeTlbStat) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.XXX_unrecognized != nil { + i -= len(m.XXX_unrecognized) + copy(dAtA[i:], m.XXX_unrecognized) + } + if len(m.Pagesize) > 0 { + i -= len(m.Pagesize) + copy(dAtA[i:], m.Pagesize) + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Pagesize))) + i-- + dAtA[i] = 0x1a + } + if m.Max != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Max)) + i-- + dAtA[i] = 0x10 + } + if m.Current != 0 { + i = encodeVarintMetrics(dAtA, i, uint64(m.Current)) + i-- + dAtA[i] = 0x8 + } + return len(dAtA) - i, nil +} + +func encodeVarintMetrics(dAtA []byte, offset int, v uint64) int { + offset -= sovMetrics(v) + base := offset + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return base +} +func (m *Metrics) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Pids != nil { + l = m.Pids.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.CPU != nil { + l = m.CPU.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Memory != nil { + l = m.Memory.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Rdma != nil { + l = m.Rdma.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Io != nil { + l = m.Io.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if len(m.Hugetlb) > 0 { + for _, e := range m.Hugetlb { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if m.MemoryEvents != nil { + l = m.MemoryEvents.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *PidsStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Current != 0 { + n += 1 + sovMetrics(uint64(m.Current)) + } + if m.Limit != 0 { + n += 1 + sovMetrics(uint64(m.Limit)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *CPUStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.UsageUsec != 0 { + n += 1 + sovMetrics(uint64(m.UsageUsec)) + } + if m.UserUsec != 0 { + n += 1 + sovMetrics(uint64(m.UserUsec)) + } + if m.SystemUsec != 0 { + n += 1 + sovMetrics(uint64(m.SystemUsec)) + } + if m.NrPeriods != 0 { + n += 1 + sovMetrics(uint64(m.NrPeriods)) + } + if m.NrThrottled != 0 { + n += 1 + sovMetrics(uint64(m.NrThrottled)) + } + if m.ThrottledUsec != 0 { + n += 1 + sovMetrics(uint64(m.ThrottledUsec)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *MemoryStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Anon != 0 { + n += 1 + sovMetrics(uint64(m.Anon)) + } + if m.File != 0 { + n += 1 + sovMetrics(uint64(m.File)) + } + if m.KernelStack != 0 { + n += 1 + sovMetrics(uint64(m.KernelStack)) + } + if m.Slab != 0 { + n += 1 + sovMetrics(uint64(m.Slab)) + } + if m.Sock != 0 { + n += 1 + sovMetrics(uint64(m.Sock)) + } + if m.Shmem != 0 { + n += 1 + sovMetrics(uint64(m.Shmem)) + } + if m.FileMapped != 0 { + n += 1 + sovMetrics(uint64(m.FileMapped)) + } + if m.FileDirty != 0 { + n += 1 + sovMetrics(uint64(m.FileDirty)) + } + if m.FileWriteback != 0 { + n += 1 + sovMetrics(uint64(m.FileWriteback)) + } + if m.AnonThp != 0 { + n += 1 + sovMetrics(uint64(m.AnonThp)) + } + if m.InactiveAnon != 0 { + n += 1 + sovMetrics(uint64(m.InactiveAnon)) + } + if m.ActiveAnon != 0 { + n += 1 + sovMetrics(uint64(m.ActiveAnon)) + } + if m.InactiveFile != 0 { + n += 1 + sovMetrics(uint64(m.InactiveFile)) + } + if m.ActiveFile != 0 { + n += 1 + sovMetrics(uint64(m.ActiveFile)) + } + if m.Unevictable != 0 { + n += 1 + sovMetrics(uint64(m.Unevictable)) + } + if m.SlabReclaimable != 0 { + n += 2 + sovMetrics(uint64(m.SlabReclaimable)) + } + if m.SlabUnreclaimable != 0 { + n += 2 + sovMetrics(uint64(m.SlabUnreclaimable)) + } + if m.Pgfault != 0 { + n += 2 + sovMetrics(uint64(m.Pgfault)) + } + if m.Pgmajfault != 0 { + n += 2 + sovMetrics(uint64(m.Pgmajfault)) + } + if m.WorkingsetRefault != 0 { + n += 2 + sovMetrics(uint64(m.WorkingsetRefault)) + } + if m.WorkingsetActivate != 0 { + n += 2 + sovMetrics(uint64(m.WorkingsetActivate)) + } + if m.WorkingsetNodereclaim != 0 { + n += 2 + sovMetrics(uint64(m.WorkingsetNodereclaim)) + } + if m.Pgrefill != 0 { + n += 2 + sovMetrics(uint64(m.Pgrefill)) + } + if m.Pgscan != 0 { + n += 2 + sovMetrics(uint64(m.Pgscan)) + } + if m.Pgsteal != 0 { + n += 2 + sovMetrics(uint64(m.Pgsteal)) + } + if m.Pgactivate != 0 { + n += 2 + sovMetrics(uint64(m.Pgactivate)) + } + if m.Pgdeactivate != 0 { + n += 2 + sovMetrics(uint64(m.Pgdeactivate)) + } + if m.Pglazyfree != 0 { + n += 2 + sovMetrics(uint64(m.Pglazyfree)) + } + if m.Pglazyfreed != 0 { + n += 2 + sovMetrics(uint64(m.Pglazyfreed)) + } + if m.ThpFaultAlloc != 0 { + n += 2 + sovMetrics(uint64(m.ThpFaultAlloc)) + } + if m.ThpCollapseAlloc != 0 { + n += 2 + sovMetrics(uint64(m.ThpCollapseAlloc)) + } + if m.Usage != 0 { + n += 2 + sovMetrics(uint64(m.Usage)) + } + if m.UsageLimit != 0 { + n += 2 + sovMetrics(uint64(m.UsageLimit)) + } + if m.SwapUsage != 0 { + n += 2 + sovMetrics(uint64(m.SwapUsage)) + } + if m.SwapLimit != 0 { + n += 2 + sovMetrics(uint64(m.SwapLimit)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *MemoryEvents) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Low != 0 { + n += 1 + sovMetrics(uint64(m.Low)) + } + if m.High != 0 { + n += 1 + sovMetrics(uint64(m.High)) + } + if m.Max != 0 { + n += 1 + sovMetrics(uint64(m.Max)) + } + if m.Oom != 0 { + n += 1 + sovMetrics(uint64(m.Oom)) + } + if m.OomKill != 0 { + n += 1 + sovMetrics(uint64(m.OomKill)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *RdmaStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Current) > 0 { + for _, e := range m.Current { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.Limit) > 0 { + for _, e := range m.Limit { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *RdmaEntry) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.Device) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + if m.HcaHandles != 0 { + n += 1 + sovMetrics(uint64(m.HcaHandles)) + } + if m.HcaObjects != 0 { + n += 1 + sovMetrics(uint64(m.HcaObjects)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *IOStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Usage) > 0 { + for _, e := range m.Usage { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *IOEntry) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Major != 0 { + n += 1 + sovMetrics(uint64(m.Major)) + } + if m.Minor != 0 { + n += 1 + sovMetrics(uint64(m.Minor)) + } + if m.Rbytes != 0 { + n += 1 + sovMetrics(uint64(m.Rbytes)) + } + if m.Wbytes != 0 { + n += 1 + sovMetrics(uint64(m.Wbytes)) + } + if m.Rios != 0 { + n += 1 + sovMetrics(uint64(m.Rios)) + } + if m.Wios != 0 { + n += 1 + sovMetrics(uint64(m.Wios)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *HugeTlbStat) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if m.Current != 0 { + n += 1 + sovMetrics(uint64(m.Current)) + } + if m.Max != 0 { + n += 1 + sovMetrics(uint64(m.Max)) + } + l = len(m.Pagesize) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovMetrics(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} +func sozMetrics(x uint64) (n int) { + return sovMetrics(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Metrics) String() string { + if this == nil { + return "nil" + } + repeatedStringForHugetlb := "[]*HugeTlbStat{" + for _, f := range this.Hugetlb { + repeatedStringForHugetlb += strings.Replace(f.String(), "HugeTlbStat", "HugeTlbStat", 1) + "," + } + repeatedStringForHugetlb += "}" + s := strings.Join([]string{`&Metrics{`, + `Pids:` + strings.Replace(this.Pids.String(), "PidsStat", "PidsStat", 1) + `,`, + `CPU:` + strings.Replace(this.CPU.String(), "CPUStat", "CPUStat", 1) + `,`, + `Memory:` + strings.Replace(this.Memory.String(), "MemoryStat", "MemoryStat", 1) + `,`, + `Rdma:` + strings.Replace(this.Rdma.String(), "RdmaStat", "RdmaStat", 1) + `,`, + `Io:` + strings.Replace(this.Io.String(), "IOStat", "IOStat", 1) + `,`, + `Hugetlb:` + repeatedStringForHugetlb + `,`, + `MemoryEvents:` + strings.Replace(this.MemoryEvents.String(), "MemoryEvents", "MemoryEvents", 1) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *PidsStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsStat{`, + `Current:` + fmt.Sprintf("%v", this.Current) + `,`, + `Limit:` + fmt.Sprintf("%v", this.Limit) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *CPUStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CPUStat{`, + `UsageUsec:` + fmt.Sprintf("%v", this.UsageUsec) + `,`, + `UserUsec:` + fmt.Sprintf("%v", this.UserUsec) + `,`, + `SystemUsec:` + fmt.Sprintf("%v", this.SystemUsec) + `,`, + `NrPeriods:` + fmt.Sprintf("%v", this.NrPeriods) + `,`, + `NrThrottled:` + fmt.Sprintf("%v", this.NrThrottled) + `,`, + `ThrottledUsec:` + fmt.Sprintf("%v", this.ThrottledUsec) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *MemoryStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&MemoryStat{`, + `Anon:` + fmt.Sprintf("%v", this.Anon) + `,`, + `File:` + fmt.Sprintf("%v", this.File) + `,`, + `KernelStack:` + fmt.Sprintf("%v", this.KernelStack) + `,`, + `Slab:` + fmt.Sprintf("%v", this.Slab) + `,`, + `Sock:` + fmt.Sprintf("%v", this.Sock) + `,`, + `Shmem:` + fmt.Sprintf("%v", this.Shmem) + `,`, + `FileMapped:` + fmt.Sprintf("%v", this.FileMapped) + `,`, + `FileDirty:` + fmt.Sprintf("%v", this.FileDirty) + `,`, + `FileWriteback:` + fmt.Sprintf("%v", this.FileWriteback) + `,`, + `AnonThp:` + fmt.Sprintf("%v", this.AnonThp) + `,`, + `InactiveAnon:` + fmt.Sprintf("%v", this.InactiveAnon) + `,`, + `ActiveAnon:` + fmt.Sprintf("%v", this.ActiveAnon) + `,`, + `InactiveFile:` + fmt.Sprintf("%v", this.InactiveFile) + `,`, + `ActiveFile:` + fmt.Sprintf("%v", this.ActiveFile) + `,`, + `Unevictable:` + fmt.Sprintf("%v", this.Unevictable) + `,`, + `SlabReclaimable:` + fmt.Sprintf("%v", this.SlabReclaimable) + `,`, + `SlabUnreclaimable:` + fmt.Sprintf("%v", this.SlabUnreclaimable) + `,`, + `Pgfault:` + fmt.Sprintf("%v", this.Pgfault) + `,`, + `Pgmajfault:` + fmt.Sprintf("%v", this.Pgmajfault) + `,`, + `WorkingsetRefault:` + fmt.Sprintf("%v", this.WorkingsetRefault) + `,`, + `WorkingsetActivate:` + fmt.Sprintf("%v", this.WorkingsetActivate) + `,`, + `WorkingsetNodereclaim:` + fmt.Sprintf("%v", this.WorkingsetNodereclaim) + `,`, + `Pgrefill:` + fmt.Sprintf("%v", this.Pgrefill) + `,`, + `Pgscan:` + fmt.Sprintf("%v", this.Pgscan) + `,`, + `Pgsteal:` + fmt.Sprintf("%v", this.Pgsteal) + `,`, + `Pgactivate:` + fmt.Sprintf("%v", this.Pgactivate) + `,`, + `Pgdeactivate:` + fmt.Sprintf("%v", this.Pgdeactivate) + `,`, + `Pglazyfree:` + fmt.Sprintf("%v", this.Pglazyfree) + `,`, + `Pglazyfreed:` + fmt.Sprintf("%v", this.Pglazyfreed) + `,`, + `ThpFaultAlloc:` + fmt.Sprintf("%v", this.ThpFaultAlloc) + `,`, + `ThpCollapseAlloc:` + fmt.Sprintf("%v", this.ThpCollapseAlloc) + `,`, + `Usage:` + fmt.Sprintf("%v", this.Usage) + `,`, + `UsageLimit:` + fmt.Sprintf("%v", this.UsageLimit) + `,`, + `SwapUsage:` + fmt.Sprintf("%v", this.SwapUsage) + `,`, + `SwapLimit:` + fmt.Sprintf("%v", this.SwapLimit) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *MemoryEvents) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&MemoryEvents{`, + `Low:` + fmt.Sprintf("%v", this.Low) + `,`, + `High:` + fmt.Sprintf("%v", this.High) + `,`, + `Max:` + fmt.Sprintf("%v", this.Max) + `,`, + `Oom:` + fmt.Sprintf("%v", this.Oom) + `,`, + `OomKill:` + fmt.Sprintf("%v", this.OomKill) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *RdmaStat) String() string { + if this == nil { + return "nil" + } + repeatedStringForCurrent := "[]*RdmaEntry{" + for _, f := range this.Current { + repeatedStringForCurrent += strings.Replace(f.String(), "RdmaEntry", "RdmaEntry", 1) + "," + } + repeatedStringForCurrent += "}" + repeatedStringForLimit := "[]*RdmaEntry{" + for _, f := range this.Limit { + repeatedStringForLimit += strings.Replace(f.String(), "RdmaEntry", "RdmaEntry", 1) + "," + } + repeatedStringForLimit += "}" + s := strings.Join([]string{`&RdmaStat{`, + `Current:` + repeatedStringForCurrent + `,`, + `Limit:` + repeatedStringForLimit + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *RdmaEntry) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&RdmaEntry{`, + `Device:` + fmt.Sprintf("%v", this.Device) + `,`, + `HcaHandles:` + fmt.Sprintf("%v", this.HcaHandles) + `,`, + `HcaObjects:` + fmt.Sprintf("%v", this.HcaObjects) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *IOStat) String() string { + if this == nil { + return "nil" + } + repeatedStringForUsage := "[]*IOEntry{" + for _, f := range this.Usage { + repeatedStringForUsage += strings.Replace(f.String(), "IOEntry", "IOEntry", 1) + "," + } + repeatedStringForUsage += "}" + s := strings.Join([]string{`&IOStat{`, + `Usage:` + repeatedStringForUsage + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *IOEntry) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&IOEntry{`, + `Major:` + fmt.Sprintf("%v", this.Major) + `,`, + `Minor:` + fmt.Sprintf("%v", this.Minor) + `,`, + `Rbytes:` + fmt.Sprintf("%v", this.Rbytes) + `,`, + `Wbytes:` + fmt.Sprintf("%v", this.Wbytes) + `,`, + `Rios:` + fmt.Sprintf("%v", this.Rios) + `,`, + `Wios:` + fmt.Sprintf("%v", this.Wios) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *HugeTlbStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&HugeTlbStat{`, + `Current:` + fmt.Sprintf("%v", this.Current) + `,`, + `Max:` + fmt.Sprintf("%v", this.Max) + `,`, + `Pagesize:` + fmt.Sprintf("%v", this.Pagesize) + `,`, + `XXX_unrecognized:` + fmt.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func valueToStringMetrics(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Metrics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Metrics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Metrics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pids", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Pids == nil { + m.Pids = &PidsStat{} + } + if err := m.Pids.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field CPU", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.CPU == nil { + m.CPU = &CPUStat{} + } + if err := m.CPU.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Memory", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Memory == nil { + m.Memory = &MemoryStat{} + } + if err := m.Memory.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Rdma", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Rdma == nil { + m.Rdma = &RdmaStat{} + } + if err := m.Rdma.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Io", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Io == nil { + m.Io = &IOStat{} + } + if err := m.Io.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Hugetlb", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Hugetlb = append(m.Hugetlb, &HugeTlbStat{}) + if err := m.Hugetlb[len(m.Hugetlb)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 8: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MemoryEvents", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.MemoryEvents == nil { + m.MemoryEvents = &MemoryEvents{} + } + if err := m.MemoryEvents.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + m.Current = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Current |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CPUStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CPUStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CPUStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field UsageUsec", wireType) + } + m.UsageUsec = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.UsageUsec |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field UserUsec", wireType) + } + m.UserUsec = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.UserUsec |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SystemUsec", wireType) + } + m.SystemUsec = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SystemUsec |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field NrPeriods", wireType) + } + m.NrPeriods = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.NrPeriods |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field NrThrottled", wireType) + } + m.NrThrottled = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.NrThrottled |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ThrottledUsec", wireType) + } + m.ThrottledUsec = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ThrottledUsec |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MemoryStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MemoryStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MemoryStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Anon", wireType) + } + m.Anon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Anon |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field File", wireType) + } + m.File = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.File |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field KernelStack", wireType) + } + m.KernelStack = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.KernelStack |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Slab", wireType) + } + m.Slab = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Slab |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Sock", wireType) + } + m.Sock = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Sock |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Shmem", wireType) + } + m.Shmem = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Shmem |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field FileMapped", wireType) + } + m.FileMapped = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.FileMapped |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field FileDirty", wireType) + } + m.FileDirty = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.FileDirty |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 9: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field FileWriteback", wireType) + } + m.FileWriteback = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.FileWriteback |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field AnonThp", wireType) + } + m.AnonThp = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.AnonThp |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InactiveAnon", wireType) + } + m.InactiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.InactiveAnon |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 12: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ActiveAnon", wireType) + } + m.ActiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ActiveAnon |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 13: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InactiveFile", wireType) + } + m.InactiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.InactiveFile |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 14: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ActiveFile", wireType) + } + m.ActiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ActiveFile |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 15: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Unevictable", wireType) + } + m.Unevictable = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Unevictable |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 16: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SlabReclaimable", wireType) + } + m.SlabReclaimable = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SlabReclaimable |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 17: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SlabUnreclaimable", wireType) + } + m.SlabUnreclaimable = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SlabUnreclaimable |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 18: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgfault", wireType) + } + m.Pgfault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgfault |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 19: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgmajfault", wireType) + } + m.Pgmajfault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgmajfault |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 20: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field WorkingsetRefault", wireType) + } + m.WorkingsetRefault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.WorkingsetRefault |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 21: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field WorkingsetActivate", wireType) + } + m.WorkingsetActivate = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.WorkingsetActivate |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 22: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field WorkingsetNodereclaim", wireType) + } + m.WorkingsetNodereclaim = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.WorkingsetNodereclaim |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 23: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgrefill", wireType) + } + m.Pgrefill = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgrefill |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 24: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgscan", wireType) + } + m.Pgscan = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgscan |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 25: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgsteal", wireType) + } + m.Pgsteal = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgsteal |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 26: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgactivate", wireType) + } + m.Pgactivate = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgactivate |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 27: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pgdeactivate", wireType) + } + m.Pgdeactivate = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pgdeactivate |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 28: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pglazyfree", wireType) + } + m.Pglazyfree = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pglazyfree |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 29: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pglazyfreed", wireType) + } + m.Pglazyfreed = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pglazyfreed |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 30: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ThpFaultAlloc", wireType) + } + m.ThpFaultAlloc = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ThpFaultAlloc |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 31: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ThpCollapseAlloc", wireType) + } + m.ThpCollapseAlloc = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ThpCollapseAlloc |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 32: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + m.Usage = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Usage |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 33: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field UsageLimit", wireType) + } + m.UsageLimit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.UsageLimit |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 34: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SwapUsage", wireType) + } + m.SwapUsage = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SwapUsage |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 35: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field SwapLimit", wireType) + } + m.SwapLimit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.SwapLimit |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MemoryEvents) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MemoryEvents: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MemoryEvents: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Low", wireType) + } + m.Low = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Low |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field High", wireType) + } + m.High = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.High |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Max", wireType) + } + m.Max = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Max |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Oom", wireType) + } + m.Oom = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Oom |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field OomKill", wireType) + } + m.OomKill = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.OomKill |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *RdmaStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: RdmaStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: RdmaStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Current = append(m.Current, &RdmaEntry{}) + if err := m.Current[len(m.Current)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Limit = append(m.Limit, &RdmaEntry{}) + if err := m.Limit[len(m.Limit)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *RdmaEntry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: RdmaEntry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: RdmaEntry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Device", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Device = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HcaHandles", wireType) + } + m.HcaHandles = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HcaHandles |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HcaObjects", wireType) + } + m.HcaObjects = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HcaObjects |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *IOStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: IOStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: IOStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Usage = append(m.Usage, &IOEntry{}) + if err := m.Usage[len(m.Usage)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *IOEntry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: IOEntry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: IOEntry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Major", wireType) + } + m.Major = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Major |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Minor", wireType) + } + m.Minor = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Minor |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Rbytes", wireType) + } + m.Rbytes = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Rbytes |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Wbytes", wireType) + } + m.Wbytes = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Wbytes |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Rios", wireType) + } + m.Rios = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Rios |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Wios", wireType) + } + m.Wios = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Wios |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *HugeTlbStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: HugeTlbStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: HugeTlbStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + m.Current = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Current |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Max", wireType) + } + m.Max = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Max |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pagesize", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthMetrics + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Pagesize = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipMetrics(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + depth := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + case 1: + iNdEx += 8 + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthMetrics + } + iNdEx += length + case 3: + depth++ + case 4: + if depth == 0 { + return 0, ErrUnexpectedEndOfGroupMetrics + } + depth-- + case 5: + iNdEx += 4 + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + if iNdEx < 0 { + return 0, ErrInvalidLengthMetrics + } + if depth == 0 { + return iNdEx, nil + } + } + return 0, io.ErrUnexpectedEOF +} + +var ( + ErrInvalidLengthMetrics = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowMetrics = fmt.Errorf("proto: integer overflow") + ErrUnexpectedEndOfGroupMetrics = fmt.Errorf("proto: unexpected end of group") +) diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.txt b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.txt new file mode 100644 index 000000000000..59fe27cbffb7 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.txt @@ -0,0 +1,539 @@ +file { + name: "github.com/containerd/cgroups/v2/stats/metrics.proto" + package: "io.containerd.cgroups.v2" + dependency: "gogoproto/gogo.proto" + message_type { + name: "Metrics" + field { + name: "pids" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.PidsStat" + json_name: "pids" + } + field { + name: "cpu" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.CPUStat" + options { + 65004: "CPU" + } + json_name: "cpu" + } + field { + name: "memory" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.MemoryStat" + json_name: "memory" + } + field { + name: "rdma" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.RdmaStat" + json_name: "rdma" + } + field { + name: "io" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.IOStat" + json_name: "io" + } + field { + name: "hugetlb" + number: 7 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.HugeTlbStat" + json_name: "hugetlb" + } + field { + name: "memory_events" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.MemoryEvents" + json_name: "memoryEvents" + } + } + message_type { + name: "PidsStat" + field { + name: "current" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "current" + } + field { + name: "limit" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "limit" + } + } + message_type { + name: "CPUStat" + field { + name: "usage_usec" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "usageUsec" + } + field { + name: "user_usec" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "userUsec" + } + field { + name: "system_usec" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "systemUsec" + } + field { + name: "nr_periods" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "nrPeriods" + } + field { + name: "nr_throttled" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "nrThrottled" + } + field { + name: "throttled_usec" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "throttledUsec" + } + } + message_type { + name: "MemoryStat" + field { + name: "anon" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "anon" + } + field { + name: "file" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "file" + } + field { + name: "kernel_stack" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "kernelStack" + } + field { + name: "slab" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "slab" + } + field { + name: "sock" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "sock" + } + field { + name: "shmem" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "shmem" + } + field { + name: "file_mapped" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "fileMapped" + } + field { + name: "file_dirty" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "fileDirty" + } + field { + name: "file_writeback" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "fileWriteback" + } + field { + name: "anon_thp" + number: 10 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "anonThp" + } + field { + name: "inactive_anon" + number: 11 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "inactiveAnon" + } + field { + name: "active_anon" + number: 12 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "activeAnon" + } + field { + name: "inactive_file" + number: 13 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "inactiveFile" + } + field { + name: "active_file" + number: 14 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "activeFile" + } + field { + name: "unevictable" + number: 15 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "unevictable" + } + field { + name: "slab_reclaimable" + number: 16 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "slabReclaimable" + } + field { + name: "slab_unreclaimable" + number: 17 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "slabUnreclaimable" + } + field { + name: "pgfault" + number: 18 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgfault" + } + field { + name: "pgmajfault" + number: 19 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgmajfault" + } + field { + name: "workingset_refault" + number: 20 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "workingsetRefault" + } + field { + name: "workingset_activate" + number: 21 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "workingsetActivate" + } + field { + name: "workingset_nodereclaim" + number: 22 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "workingsetNodereclaim" + } + field { + name: "pgrefill" + number: 23 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgrefill" + } + field { + name: "pgscan" + number: 24 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgscan" + } + field { + name: "pgsteal" + number: 25 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgsteal" + } + field { + name: "pgactivate" + number: 26 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgactivate" + } + field { + name: "pgdeactivate" + number: 27 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pgdeactivate" + } + field { + name: "pglazyfree" + number: 28 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pglazyfree" + } + field { + name: "pglazyfreed" + number: 29 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "pglazyfreed" + } + field { + name: "thp_fault_alloc" + number: 30 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "thpFaultAlloc" + } + field { + name: "thp_collapse_alloc" + number: 31 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "thpCollapseAlloc" + } + field { + name: "usage" + number: 32 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "usage" + } + field { + name: "usage_limit" + number: 33 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "usageLimit" + } + field { + name: "swap_usage" + number: 34 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "swapUsage" + } + field { + name: "swap_limit" + number: 35 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "swapLimit" + } + } + message_type { + name: "MemoryEvents" + field { + name: "low" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "low" + } + field { + name: "high" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "high" + } + field { + name: "max" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "max" + } + field { + name: "oom" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "oom" + } + field { + name: "oom_kill" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "oomKill" + } + } + message_type { + name: "RdmaStat" + field { + name: "current" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.RdmaEntry" + json_name: "current" + } + field { + name: "limit" + number: 2 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.RdmaEntry" + json_name: "limit" + } + } + message_type { + name: "RdmaEntry" + field { + name: "device" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "device" + } + field { + name: "hca_handles" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT32 + json_name: "hcaHandles" + } + field { + name: "hca_objects" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT32 + json_name: "hcaObjects" + } + } + message_type { + name: "IOStat" + field { + name: "usage" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".io.containerd.cgroups.v2.IOEntry" + json_name: "usage" + } + } + message_type { + name: "IOEntry" + field { + name: "major" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "major" + } + field { + name: "minor" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "minor" + } + field { + name: "rbytes" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "rbytes" + } + field { + name: "wbytes" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "wbytes" + } + field { + name: "rios" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "rios" + } + field { + name: "wios" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "wios" + } + } + message_type { + name: "HugeTlbStat" + field { + name: "current" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "current" + } + field { + name: "max" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_UINT64 + json_name: "max" + } + field { + name: "pagesize" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "pagesize" + } + } + syntax: "proto3" +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.proto b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.proto new file mode 100644 index 000000000000..8ac472e46459 --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.proto @@ -0,0 +1,105 @@ +syntax = "proto3"; + +package io.containerd.cgroups.v2; + + import "gogoproto/gogo.proto"; + +message Metrics { + PidsStat pids = 1; + CPUStat cpu = 2 [(gogoproto.customname) = "CPU"]; + MemoryStat memory = 4; + RdmaStat rdma = 5; + IOStat io = 6; + repeated HugeTlbStat hugetlb = 7; + MemoryEvents memory_events = 8; +} + +message PidsStat { + uint64 current = 1; + uint64 limit = 2; +} + +message CPUStat { + uint64 usage_usec = 1; + uint64 user_usec = 2; + uint64 system_usec = 3; + uint64 nr_periods = 4; + uint64 nr_throttled = 5; + uint64 throttled_usec = 6; +} + +message MemoryStat { + uint64 anon = 1; + uint64 file = 2; + uint64 kernel_stack = 3; + uint64 slab = 4; + uint64 sock = 5; + uint64 shmem = 6; + uint64 file_mapped = 7; + uint64 file_dirty = 8; + uint64 file_writeback = 9; + uint64 anon_thp = 10; + uint64 inactive_anon = 11; + uint64 active_anon = 12; + uint64 inactive_file = 13; + uint64 active_file = 14; + uint64 unevictable = 15; + uint64 slab_reclaimable = 16; + uint64 slab_unreclaimable = 17; + uint64 pgfault = 18; + uint64 pgmajfault = 19; + uint64 workingset_refault = 20; + uint64 workingset_activate = 21; + uint64 workingset_nodereclaim = 22; + uint64 pgrefill = 23; + uint64 pgscan = 24; + uint64 pgsteal = 25; + uint64 pgactivate = 26; + uint64 pgdeactivate = 27; + uint64 pglazyfree = 28; + uint64 pglazyfreed = 29; + uint64 thp_fault_alloc = 30; + uint64 thp_collapse_alloc = 31; + uint64 usage = 32; + uint64 usage_limit = 33; + uint64 swap_usage = 34; + uint64 swap_limit = 35; +} + +message MemoryEvents { + uint64 low = 1; + uint64 high = 2; + uint64 max = 3; + uint64 oom = 4; + uint64 oom_kill = 5; +} + +message RdmaStat { + repeated RdmaEntry current = 1; + repeated RdmaEntry limit = 2; +} + +message RdmaEntry { + string device = 1; + uint32 hca_handles = 2; + uint32 hca_objects = 3; +} + +message IOStat { + repeated IOEntry usage = 1; +} + +message IOEntry { + uint64 major = 1; + uint64 minor = 2; + uint64 rbytes = 3; + uint64 wbytes = 4; + uint64 rios = 5; + uint64 wios = 6; +} + +message HugeTlbStat { + uint64 current = 1; + uint64 max = 2; + string pagesize = 3; +} diff --git a/src/runtime/vendor/github.com/containerd/cgroups/v2/utils.go b/src/runtime/vendor/github.com/containerd/cgroups/v2/utils.go new file mode 100644 index 000000000000..240c9267798a --- /dev/null +++ b/src/runtime/vendor/github.com/containerd/cgroups/v2/utils.go @@ -0,0 +1,436 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package v2 + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "math" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/containerd/cgroups/v2/stats" + + "github.com/godbus/dbus/v5" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +const ( + cgroupProcs = "cgroup.procs" + defaultDirPerm = 0755 +) + +// defaultFilePerm is a var so that the test framework can change the filemode +// of all files created when the tests are running. The difference between the +// tests and real world use is that files like "cgroup.procs" will exist when writing +// to a read cgroup filesystem and do not exist prior when running in the tests. +// this is set to a non 0 value in the test code +var defaultFilePerm = os.FileMode(0) + +// remove will remove a cgroup path handling EAGAIN and EBUSY errors and +// retrying the remove after a exp timeout +func remove(path string) error { + var err error + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + if err = os.RemoveAll(path); err == nil { + return nil + } + } + return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err) +} + +// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs +func parseCgroupProcsFile(path string) ([]uint64, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + var ( + out []uint64 + s = bufio.NewScanner(f) + ) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.ParseUint(t, 10, 0) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + if err := s.Err(); err != nil { + return nil, err + } + return out, nil +} + +func parseKV(raw string) (string, interface{}, error) { + parts := strings.Fields(raw) + switch len(parts) { + case 2: + v, err := parseUint(parts[1], 10, 64) + if err != nil { + // if we cannot parse as a uint, parse as a string + return parts[0], parts[1], nil + } + return parts[0], v, nil + default: + return "", 0, ErrInvalidFormat + } +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + v, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && + intErr.(*strconv.NumError).Err == strconv.ErrRange && + intValue < 0 { + return 0, nil + } + return 0, err + } + return v, nil +} + +// parseCgroupFile parses /proc/PID/cgroup file and return string +func parseCgroupFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (string, error) { + var ( + s = bufio.NewScanner(r) + ) + for s.Scan() { + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return "", fmt.Errorf("invalid cgroup entry: %q", text) + } + // text is like "0::/user.slice/user-1001.slice/session-1.scope" + if parts[0] == "0" && parts[1] == "" { + return parts[2], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + return "", fmt.Errorf("cgroup path not found") +} + +// ToResources converts the oci LinuxResources struct into a +// v2 Resources type for use with this package. +// +// converting cgroups configuration from v1 to v2 +// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2 +func ToResources(spec *specs.LinuxResources) *Resources { + var resources Resources + if cpu := spec.CPU; cpu != nil { + resources.CPU = &CPU{ + Cpus: cpu.Cpus, + Mems: cpu.Mems, + } + if shares := cpu.Shares; shares != nil { + convertedWeight := 1 + ((*shares-2)*9999)/262142 + resources.CPU.Weight = &convertedWeight + } + if period := cpu.Period; period != nil { + resources.CPU.Max = NewCPUMax(cpu.Quota, period) + } + } + if mem := spec.Memory; mem != nil { + resources.Memory = &Memory{} + if swap := mem.Swap; swap != nil { + resources.Memory.Swap = swap + } + if l := mem.Limit; l != nil { + resources.Memory.Max = l + } + if l := mem.Reservation; l != nil { + resources.Memory.Low = l + } + } + if hugetlbs := spec.HugepageLimits; hugetlbs != nil { + hugeTlbUsage := HugeTlb{} + for _, hugetlb := range hugetlbs { + hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{ + HugePageSize: hugetlb.Pagesize, + Limit: hugetlb.Limit, + }) + } + resources.HugeTlb = &hugeTlbUsage + } + if pids := spec.Pids; pids != nil { + resources.Pids = &Pids{ + Max: pids.Limit, + } + } + if i := spec.BlockIO; i != nil { + resources.IO = &IO{} + if i.Weight != nil { + resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990 + } + for t, devices := range map[IOType][]specs.LinuxThrottleDevice{ + ReadBPS: i.ThrottleReadBpsDevice, + WriteBPS: i.ThrottleWriteBpsDevice, + ReadIOPS: i.ThrottleReadIOPSDevice, + WriteIOPS: i.ThrottleWriteIOPSDevice, + } { + for _, d := range devices { + resources.IO.Max = append(resources.IO.Max, Entry{ + Type: t, + Major: d.Major, + Minor: d.Minor, + Rate: d.Rate, + }) + } + } + } + if i := spec.Rdma; i != nil { + resources.RDMA = &RDMA{} + for device, value := range spec.Rdma { + if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) { + resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{ + Device: device, + HcaHandles: *value.HcaHandles, + HcaObjects: *value.HcaObjects, + }) + } + } + } + + return &resources +} + +// Gets uint64 parsed content of single value cgroup stat file +func getStatFileContentUint64(filePath string) uint64 { + contents, err := ioutil.ReadFile(filePath) + if err != nil { + return 0 + } + trimmed := strings.TrimSpace(string(contents)) + if trimmed == "max" { + return math.MaxUint64 + } + + res, err := parseUint(trimmed, 10, 64) + if err != nil { + logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath) + return res + } + + return res +} + +func readIoStats(path string) []*stats.IOEntry { + // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt + var usage []*stats.IOEntry + fpath := filepath.Join(path, "io.stat") + currentData, err := ioutil.ReadFile(fpath) + if err != nil { + return usage + } + entries := strings.Split(string(currentData), "\n") + + for _, entry := range entries { + parts := strings.Split(entry, " ") + if len(parts) < 2 { + continue + } + majmin := strings.Split(parts[0], ":") + if len(majmin) != 2 { + continue + } + major, err := strconv.ParseUint(majmin[0], 10, 0) + if err != nil { + return usage + } + minor, err := strconv.ParseUint(majmin[1], 10, 0) + if err != nil { + return usage + } + parts = parts[1:] + ioEntry := stats.IOEntry{ + Major: major, + Minor: minor, + } + for _, s := range parts { + keyPairValue := strings.Split(s, "=") + if len(keyPairValue) != 2 { + continue + } + v, err := strconv.ParseUint(keyPairValue[1], 10, 0) + if err != nil { + continue + } + switch keyPairValue[0] { + case "rbytes": + ioEntry.Rbytes = v + case "wbytes": + ioEntry.Wbytes = v + case "rios": + ioEntry.Rios = v + case "wios": + ioEntry.Wios = v + } + } + usage = append(usage, &ioEntry) + } + return usage +} + +func rdmaStats(filepath string) []*stats.RdmaEntry { + currentData, err := ioutil.ReadFile(filepath) + if err != nil { + return []*stats.RdmaEntry{} + } + return toRdmaEntry(strings.Split(string(currentData), "\n")) +} + +func parseRdmaKV(raw string, entry *stats.RdmaEntry) { + var value uint64 + var err error + + parts := strings.Split(raw, "=") + switch len(parts) { + case 2: + if parts[1] == "max" { + value = math.MaxUint32 + } else { + value, err = parseUint(parts[1], 10, 32) + if err != nil { + return + } + } + if parts[0] == "hca_handle" { + entry.HcaHandles = uint32(value) + } else if parts[0] == "hca_object" { + entry.HcaObjects = uint32(value) + } + } +} + +func toRdmaEntry(strEntries []string) []*stats.RdmaEntry { + var rdmaEntries []*stats.RdmaEntry + for i := range strEntries { + parts := strings.Fields(strEntries[i]) + switch len(parts) { + case 3: + entry := new(stats.RdmaEntry) + entry.Device = parts[0] + parseRdmaKV(parts[1], entry) + parseRdmaKV(parts[2], entry) + + rdmaEntries = append(rdmaEntries, entry) + default: + continue + } + } + return rdmaEntries +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + if err != nil { + if dbusError, ok := err.(dbus.Error); ok { + return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + } + } + return false +} + +func systemdUnitFromPath(path string) string { + _, unit := filepath.Split(path) + return unit +} + +func readHugeTlbStats(path string) []*stats.HugeTlbStat { + var usage = []*stats.HugeTlbStat{} + var keyUsage = make(map[string]*stats.HugeTlbStat) + f, err := os.Open(path) + if err != nil { + return usage + } + files, err := f.Readdir(-1) + f.Close() + if err != nil { + return usage + } + + for _, file := range files { + if strings.Contains(file.Name(), "hugetlb") && + (strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) { + var hugeTlb *stats.HugeTlbStat + var ok bool + fileName := strings.Split(file.Name(), ".") + pageSize := fileName[1] + if hugeTlb, ok = keyUsage[pageSize]; !ok { + hugeTlb = &stats.HugeTlbStat{} + } + hugeTlb.Pagesize = pageSize + out, err := ioutil.ReadFile(filepath.Join(path, file.Name())) + if err != nil { + continue + } + var value uint64 + stringVal := strings.TrimSpace(string(out)) + if stringVal == "max" { + value = math.MaxUint64 + } else { + value, err = strconv.ParseUint(stringVal, 10, 64) + } + if err != nil { + continue + } + switch fileName[2] { + case "max": + hugeTlb.Max = value + case "current": + hugeTlb.Current = value + } + keyUsage[pageSize] = hugeTlb + } + } + for _, entry := range keyUsage { + usage = append(usage, entry) + } + return usage +} diff --git a/src/runtime/vendor/github.com/containerd/console/go.mod b/src/runtime/vendor/github.com/containerd/console/go.mod deleted file mode 100644 index 1fe5b7fecb4a..000000000000 --- a/src/runtime/vendor/github.com/containerd/console/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/containerd/console - -go 1.13 - -require golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c diff --git a/src/runtime/vendor/github.com/containerd/console/go.sum b/src/runtime/vendor/github.com/containerd/console/go.sum deleted file mode 100644 index 1225630b7b60..000000000000 --- a/src/runtime/vendor/github.com/containerd/console/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/src/runtime/vendor/github.com/containerd/containerd/version/version.go b/src/runtime/vendor/github.com/containerd/containerd/version/version.go index cef635bb9bf3..a4156cb7a6c2 100644 --- a/src/runtime/vendor/github.com/containerd/containerd/version/version.go +++ b/src/runtime/vendor/github.com/containerd/containerd/version/version.go @@ -23,7 +23,7 @@ var ( Package = "github.com/containerd/containerd" // Version holds the complete version number. Filled in at linking time. - Version = "1.6.6+unknown" + Version = "1.6.8+unknown" // Revision is filled with the VCS (e.g. git) revision being used to build // the program at linking time. diff --git a/src/runtime/vendor/github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1/api.pb.go b/src/runtime/vendor/github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1/api.pb.go index 71341ec78da7..bf0cf3d41be0 100644 --- a/src/runtime/vendor/github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1/api.pb.go +++ b/src/runtime/vendor/github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1/api.pb.go @@ -1,17 +1,17 @@ /* -Copyright 2019 The containerd Authors. + Copyright The containerd Authors. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ // Code generated by protoc-gen-gogo. DO NOT EDIT. // source: api.proto diff --git a/src/runtime/vendor/github.com/containerd/fifo/go.mod b/src/runtime/vendor/github.com/containerd/fifo/go.mod deleted file mode 100644 index 0c1c48fab782..000000000000 --- a/src/runtime/vendor/github.com/containerd/fifo/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/containerd/fifo - -go 1.13 - -require ( - github.com/pkg/errors v0.9.1 - github.com/stretchr/testify v1.6.1 - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c -) diff --git a/src/runtime/vendor/github.com/containerd/fifo/go.sum b/src/runtime/vendor/github.com/containerd/fifo/go.sum deleted file mode 100644 index 48477d38527c..000000000000 --- a/src/runtime/vendor/github.com/containerd/fifo/go.sum +++ /dev/null @@ -1,15 +0,0 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/src/runtime/vendor/github.com/containerd/go-runc/go.mod b/src/runtime/vendor/github.com/containerd/go-runc/go.mod deleted file mode 100644 index f69c26fd68fb..000000000000 --- a/src/runtime/vendor/github.com/containerd/go-runc/go.mod +++ /dev/null @@ -1,11 +0,0 @@ -module github.com/containerd/go-runc - -go 1.13 - -require ( - github.com/containerd/console v1.0.1 - github.com/opencontainers/runtime-spec v1.0.2 - github.com/pkg/errors v0.9.1 - github.com/sirupsen/logrus v1.7.0 - golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f -) diff --git a/src/runtime/vendor/github.com/containerd/go-runc/go.sum b/src/runtime/vendor/github.com/containerd/go-runc/go.sum deleted file mode 100644 index afc9198e09ae..000000000000 --- a/src/runtime/vendor/github.com/containerd/go-runc/go.sum +++ /dev/null @@ -1,17 +0,0 @@ -github.com/containerd/console v1.0.1 h1:u7SFAJyRqWcG6ogaMAx3KjSTy1e3hT9QxqX7Jco7dRc= -github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= -github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM= -github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f h1:6Sc1XOXTulBN6imkqo6XoAXDEzoQ4/ro6xy7Vn8+rOM= -golang.org/x/sys v0.0.0-20200916030750-2334cc1a136f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/src/runtime/vendor/github.com/containerd/ttrpc/go.mod b/src/runtime/vendor/github.com/containerd/ttrpc/go.mod deleted file mode 100644 index efc00860c6ae..000000000000 --- a/src/runtime/vendor/github.com/containerd/ttrpc/go.mod +++ /dev/null @@ -1,13 +0,0 @@ -module github.com/containerd/ttrpc - -go 1.13 - -require ( - github.com/gogo/protobuf v1.3.2 - github.com/prometheus/procfs v0.6.0 - github.com/sirupsen/logrus v1.8.1 - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c - google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 - google.golang.org/grpc v1.27.1 - google.golang.org/protobuf v1.27.1 -) diff --git a/src/runtime/vendor/github.com/containerd/ttrpc/go.sum b/src/runtime/vendor/github.com/containerd/ttrpc/go.sum deleted file mode 100644 index cb91432e33d1..000000000000 --- a/src/runtime/vendor/github.com/containerd/ttrpc/go.sum +++ /dev/null @@ -1,99 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/procfs v0.6.0 h1:mxy4L2jP6qMonqmq+aTtOx1ifVWUgG/TAmntgbh3xv4= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 h1:YzfoEYWbODU5Fbt37+h7X16BWQbad7Q4S6gclTKFXM8= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= -google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/src/runtime/vendor/github.com/containerd/typeurl/go.mod b/src/runtime/vendor/github.com/containerd/typeurl/go.mod deleted file mode 100644 index 77e171e57bcb..000000000000 --- a/src/runtime/vendor/github.com/containerd/typeurl/go.mod +++ /dev/null @@ -1,8 +0,0 @@ -module github.com/containerd/typeurl - -go 1.13 - -require ( - github.com/gogo/protobuf v1.3.2 - github.com/pkg/errors v0.9.1 -) diff --git a/src/runtime/vendor/github.com/containerd/typeurl/go.sum b/src/runtime/vendor/github.com/containerd/typeurl/go.sum deleted file mode 100644 index cf5608882da9..000000000000 --- a/src/runtime/vendor/github.com/containerd/typeurl/go.sum +++ /dev/null @@ -1,33 +0,0 @@ -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/runtime/vendor/github.com/containers/podman/v4/LICENSE b/src/runtime/vendor/github.com/containers/podman/v4/LICENSE new file mode 100644 index 000000000000..9b259bdfcf90 --- /dev/null +++ b/src/runtime/vendor/github.com/containers/podman/v4/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/runtime/vendor/github.com/containers/podman/v4/pkg/annotations/annotations.go b/src/runtime/vendor/github.com/containers/podman/v4/pkg/annotations/annotations.go new file mode 100644 index 000000000000..a22222f10473 --- /dev/null +++ b/src/runtime/vendor/github.com/containers/podman/v4/pkg/annotations/annotations.go @@ -0,0 +1,122 @@ +package annotations + +const ( + // Annotations carries the received Kubelet annotations. + Annotations = "io.kubernetes.cri-o.Annotations" + + // ContainerID is the container ID annotation. + ContainerID = "io.kubernetes.cri-o.ContainerID" + + // ContainerName is the container name annotation. + ContainerName = "io.kubernetes.cri-o.ContainerName" + + // ContainerType is the container type (sandbox or container) annotation. + ContainerType = "io.kubernetes.cri-o.ContainerType" + + // Created is the container creation time annotation. + Created = "io.kubernetes.cri-o.Created" + + // HostName is the container host name annotation. + HostName = "io.kubernetes.cri-o.HostName" + + // CgroupParent is the sandbox cgroup parent. + CgroupParent = "io.kubernetes.cri-o.CgroupParent" + + // IP is the container ipv4 or ipv6 address. + IP = "io.kubernetes.cri-o.IP" + + // NamespaceOptions store the options for namespaces. + NamespaceOptions = "io.kubernetes.cri-o.NamespaceOptions" + + // SeccompProfilePath is the node seccomp profile path. + SeccompProfilePath = "io.kubernetes.cri-o.SeccompProfilePath" + + // Image is the container image ID annotation. + Image = "io.kubernetes.cri-o.Image" + + // ImageName is the container image name annotation. + ImageName = "io.kubernetes.cri-o.ImageName" + + // ImageRef is the container image ref annotation. + ImageRef = "io.kubernetes.cri-o.ImageRef" + + // KubeName is the kubernetes name annotation. + KubeName = "io.kubernetes.cri-o.KubeName" + + // PortMappings holds the port mappings for the sandbox. + PortMappings = "io.kubernetes.cri-o.PortMappings" + + // Labels are the kubernetes labels annotation. + Labels = "io.kubernetes.cri-o.Labels" + + // LogPath is the container logging path annotation. + LogPath = "io.kubernetes.cri-o.LogPath" + + // Metadata is the container metadata annotation. + Metadata = "io.kubernetes.cri-o.Metadata" + + // Name is the pod name annotation. + Name = "io.kubernetes.cri-o.Name" + + // Namespace is the pod namespace annotation. + Namespace = "io.kubernetes.cri-o.Namespace" + + // PrivilegedRuntime is the annotation for the privileged runtime path. + PrivilegedRuntime = "io.kubernetes.cri-o.PrivilegedRuntime" + + // ResolvPath is the resolver configuration path annotation. + ResolvPath = "io.kubernetes.cri-o.ResolvPath" + + // HostnamePath is the path to /etc/hostname to bind mount annotation. + HostnamePath = "io.kubernetes.cri-o.HostnamePath" + + // SandboxID is the sandbox ID annotation. + SandboxID = "io.kubernetes.cri-o.SandboxID" + + // SandboxName is the sandbox name annotation. + SandboxName = "io.kubernetes.cri-o.SandboxName" + + // ShmPath is the shared memory path annotation. + ShmPath = "io.kubernetes.cri-o.ShmPath" + + // MountPoint is the mount point of the container rootfs. + MountPoint = "io.kubernetes.cri-o.MountPoint" + + // RuntimeHandler is the annotation for runtime handler. + RuntimeHandler = "io.kubernetes.cri-o.RuntimeHandler" + + // TTY is the terminal path annotation. + TTY = "io.kubernetes.cri-o.TTY" + + // Stdin is the stdin annotation. + Stdin = "io.kubernetes.cri-o.Stdin" + + // StdinOnce is the stdin_once annotation. + StdinOnce = "io.kubernetes.cri-o.StdinOnce" + + // Volumes is the volumes annotation. + Volumes = "io.kubernetes.cri-o.Volumes" + + // HostNetwork indicates whether the host network namespace is used or not. + HostNetwork = "io.kubernetes.cri-o.HostNetwork" + + // CNIResult is the JSON string representation of the Result from CNI. + CNIResult = "io.kubernetes.cri-o.CNIResult" + + // ContainerManager is the annotation key for indicating the creator and + // manager of the container. + ContainerManager = "io.container.manager" +) + +// ContainerType values +const ( + // ContainerTypeSandbox represents a pod sandbox container. + ContainerTypeSandbox = "sandbox" + + // ContainerTypeContainer represents a container running within a pod. + ContainerTypeContainer = "container" +) + +// ContainerManagerLibpod indicates that libpod created and manages the +// container. +const ContainerManagerLibpod = "libpod" diff --git a/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go b/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go index 0668a66cf707..be2b3436062d 100644 --- a/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go +++ b/src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go @@ -15,7 +15,7 @@ type roffRenderer struct { extensions blackfriday.Extensions listCounters []int firstHeader bool - defineTerm bool + firstDD bool listDepth int } @@ -42,7 +42,8 @@ const ( quoteCloseTag = "\n.RE\n" listTag = "\n.RS\n" listCloseTag = "\n.RE\n" - arglistTag = "\n.TP\n" + dtTag = "\n.TP\n" + dd2Tag = "\n" tableStart = "\n.TS\nallbox;\n" tableEnd = ".TE\n" tableCellStart = "T{\n" @@ -90,7 +91,7 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering switch node.Type { case blackfriday.Text: - r.handleText(w, node, entering) + escapeSpecialChars(w, node.Literal) case blackfriday.Softbreak: out(w, crTag) case blackfriday.Hardbreak: @@ -150,40 +151,21 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering out(w, codeCloseTag) case blackfriday.Table: r.handleTable(w, node, entering) - case blackfriday.TableCell: - r.handleTableCell(w, node, entering) case blackfriday.TableHead: case blackfriday.TableBody: case blackfriday.TableRow: // no action as cell entries do all the nroff formatting return blackfriday.GoToNext + case blackfriday.TableCell: + r.handleTableCell(w, node, entering) + case blackfriday.HTMLSpan: + // ignore other HTML tags default: fmt.Fprintln(os.Stderr, "WARNING: go-md2man does not handle node type "+node.Type.String()) } return walkAction } -func (r *roffRenderer) handleText(w io.Writer, node *blackfriday.Node, entering bool) { - var ( - start, end string - ) - // handle special roff table cell text encapsulation - if node.Parent.Type == blackfriday.TableCell { - if len(node.Literal) > 30 { - start = tableCellStart - end = tableCellEnd - } else { - // end rows that aren't terminated by "tableCellEnd" with a cr if end of row - if node.Parent.Next == nil && !node.Parent.IsHeader { - end = crTag - } - } - } - out(w, start) - escapeSpecialChars(w, node.Literal) - out(w, end) -} - func (r *roffRenderer) handleHeading(w io.Writer, node *blackfriday.Node, entering bool) { if entering { switch node.Level { @@ -230,15 +212,20 @@ func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering if node.ListFlags&blackfriday.ListTypeOrdered != 0 { out(w, fmt.Sprintf(".IP \"%3d.\" 5\n", r.listCounters[len(r.listCounters)-1])) r.listCounters[len(r.listCounters)-1]++ + } else if node.ListFlags&blackfriday.ListTypeTerm != 0 { + // DT (definition term): line just before DD (see below). + out(w, dtTag) + r.firstDD = true } else if node.ListFlags&blackfriday.ListTypeDefinition != 0 { - // state machine for handling terms and following definitions - // since blackfriday does not distinguish them properly, nor - // does it seperate them into separate lists as it should - if !r.defineTerm { - out(w, arglistTag) - r.defineTerm = true + // DD (definition description): line that starts with ": ". + // + // We have to distinguish between the first DD and the + // subsequent ones, as there should be no vertical + // whitespace between the DT and the first DD. + if r.firstDD { + r.firstDD = false } else { - r.defineTerm = false + out(w, dd2Tag) } } else { out(w, ".IP \\(bu 2\n") @@ -251,7 +238,7 @@ func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering func (r *roffRenderer) handleTable(w io.Writer, node *blackfriday.Node, entering bool) { if entering { out(w, tableStart) - //call walker to count cells (and rows?) so format section can be produced + // call walker to count cells (and rows?) so format section can be produced columns := countColumns(node) out(w, strings.Repeat("l ", columns)+"\n") out(w, strings.Repeat("l ", columns)+".\n") @@ -261,28 +248,41 @@ func (r *roffRenderer) handleTable(w io.Writer, node *blackfriday.Node, entering } func (r *roffRenderer) handleTableCell(w io.Writer, node *blackfriday.Node, entering bool) { - var ( - start, end string - ) - if node.IsHeader { - start = codespanTag - end = codespanCloseTag - } if entering { + var start string if node.Prev != nil && node.Prev.Type == blackfriday.TableCell { - out(w, "\t"+start) - } else { - out(w, start) + start = "\t" + } + if node.IsHeader { + start += codespanTag + } else if nodeLiteralSize(node) > 30 { + start += tableCellStart } + out(w, start) } else { - // need to carriage return if we are at the end of the header row - if node.IsHeader && node.Next == nil { - end = end + crTag + var end string + if node.IsHeader { + end = codespanCloseTag + } else if nodeLiteralSize(node) > 30 { + end = tableCellEnd + } + if node.Next == nil && end != tableCellEnd { + // Last cell: need to carriage return if we are at the end of the + // header row and content isn't wrapped in a "tablecell" + end += crTag } out(w, end) } } +func nodeLiteralSize(node *blackfriday.Node) int { + total := 0 + for n := node.FirstChild; n != nil; n = n.FirstChild { + total += len(n.Literal) + } + return total +} + // because roff format requires knowing the column count before outputting any table // data we need to walk a table tree and count the columns func countColumns(node *blackfriday.Node) int { @@ -309,15 +309,6 @@ func out(w io.Writer, output string) { io.WriteString(w, output) // nolint: errcheck } -func needsBackslash(c byte) bool { - for _, r := range []byte("-_&\\~") { - if c == r { - return true - } - } - return false -} - func escapeSpecialChars(w io.Writer, text []byte) { for i := 0; i < len(text); i++ { // escape initial apostrophe or period @@ -328,7 +319,7 @@ func escapeSpecialChars(w io.Writer, text []byte) { // directly copy normal characters org := i - for i < len(text) && !needsBackslash(text[i]) { + for i < len(text) && text[i] != '\\' { i++ } if i > org { diff --git a/src/runtime/vendor/github.com/cri-o/cri-o/LICENSE b/src/runtime/vendor/github.com/cri-o/cri-o/LICENSE deleted file mode 100644 index 8dada3edaf50..000000000000 --- a/src/runtime/vendor/github.com/cri-o/cri-o/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/src/runtime/vendor/github.com/cri-o/cri-o/pkg/annotations/annotations.go b/src/runtime/vendor/github.com/cri-o/cri-o/pkg/annotations/annotations.go deleted file mode 100644 index 151d93904a49..000000000000 --- a/src/runtime/vendor/github.com/cri-o/cri-o/pkg/annotations/annotations.go +++ /dev/null @@ -1,93 +0,0 @@ -package annotations - -const ( - // Annotations carries the received Kubelet annotations - Annotations = "io.kubernetes.cri-o.Annotations" - - // ContainerID is the container ID annotation - ContainerID = "io.kubernetes.cri-o.ContainerID" - - // ContainerName is the container name annotation - ContainerName = "io.kubernetes.cri-o.ContainerName" - - // ContainerType is the container type (sandbox or container) annotation - ContainerType = "io.kubernetes.cri-o.ContainerType" - - // Created is the container creation time annotation - Created = "io.kubernetes.cri-o.Created" - - // HostName is the container host name annotation - HostName = "io.kubernetes.cri-o.HostName" - - // IP is the container ipv4 or ipv6 address - IP = "io.kubernetes.cri-o.IP" - - // Image is the container image ID annotation - Image = "io.kubernetes.cri-o.Image" - - // ImageName is the container image name annotation - ImageName = "io.kubernetes.cri-o.ImageName" - - // ImageRef is the container image ref annotation - ImageRef = "io.kubernetes.cri-o.ImageRef" - - // KubeName is the kubernetes name annotation - KubeName = "io.kubernetes.cri-o.KubeName" - - // Labels are the kubernetes labels annotation - Labels = "io.kubernetes.cri-o.Labels" - - // LogPath is the container logging path annotation - LogPath = "io.kubernetes.cri-o.LogPath" - - // Metadata is the container metadata annotation - Metadata = "io.kubernetes.cri-o.Metadata" - - // Name is the pod name annotation - Name = "io.kubernetes.cri-o.Name" - - // PrivilegedRuntime is the annotation for the privileged runtime path - PrivilegedRuntime = "io.kubernetes.cri-o.PrivilegedRuntime" - - // ResolvPath is the resolver configuration path annotation - ResolvPath = "io.kubernetes.cri-o.ResolvPath" - - // HostnamePath is the path to /etc/hostname to bind mount annotation - HostnamePath = "io.kubernetes.cri-o.HostnamePath" - - // SandboxID is the sandbox ID annotation - SandboxID = "io.kubernetes.cri-o.SandboxID" - - // SandboxName is the sandbox name annotation - SandboxName = "io.kubernetes.cri-o.SandboxName" - - // ShmPath is the shared memory path annotation - ShmPath = "io.kubernetes.cri-o.ShmPath" - - // MountPoint is the mount point of the container rootfs - MountPoint = "io.kubernetes.cri-o.MountPoint" - - // TrustedSandbox is the annotation for trusted sandboxes - TrustedSandbox = "io.kubernetes.cri-o.TrustedSandbox" - - // TTY is the terminal path annotation - TTY = "io.kubernetes.cri-o.TTY" - - // Stdin is the stdin annotation - Stdin = "io.kubernetes.cri-o.Stdin" - - // StdinOnce is the stdin_once annotation - StdinOnce = "io.kubernetes.cri-o.StdinOnce" - - // Volumes is the volumes annotatoin - Volumes = "io.kubernetes.cri-o.Volumes" -) - -// ContainerType values -const ( - // ContainerTypeSandbox represents a pod sandbox container - ContainerTypeSandbox = "sandbox" - - // ContainerTypeContainer represents a container running within a pod - ContainerTypeContainer = "container" -) diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml index 3938f3834944..b94ff8cf92a9 100644 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml +++ b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml @@ -4,10 +4,12 @@ language: go go: - - 1.7.x - - 1.8.x + - 1.13.x + - 1.16.x - tip - +arch: + - AMD64 + - ppc64le os: - linux - osx diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md index 49b2baa9f35c..3624617c89b0 100644 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md +++ b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -7,6 +7,19 @@ standard library][go#20126]. The purpose of this function is to be a "secure" alternative to `filepath.Join`, and in particular it provides certain guarantees that are not provided by `filepath.Join`. +> **NOTE**: This code is *only* safe if you are not at risk of other processes +> modifying path components after you've used `SecureJoin`. If it is possible +> for a malicious process to modify path components of the resolved path, then +> you will be vulnerable to some fairly trivial TOCTOU race conditions. [There +> are some Linux kernel patches I'm working on which might allow for a better +> solution.][lwn-obeneath] +> +> In addition, with a slightly modified API it might be possible to use +> `O_PATH` and verify that the opened path is actually the resolved one -- but +> I have not done that yet. I might add it in the future as a helper function +> to help users verify the path (we can't just return `/proc/self/fd/` +> because that doesn't always work transparently for all users). + This is the function prototype: ```go @@ -16,8 +29,8 @@ func SecureJoin(root, unsafePath string) (string, error) This library **guarantees** the following: * If no error is set, the resulting string **must** be a child path of - `SecureJoin` and will not contain any symlink path components (they will all - be expanded). + `root` and will not contain any symlink path components (they will all be + expanded). * When expanding symlinks, all symlink path components **must** be resolved relative to the provided root. In particular, this can be considered a @@ -25,7 +38,7 @@ This library **guarantees** the following: these symlinks will **not** be expanded lexically (`filepath.Clean` is not called on the input before processing). -* Non-existant path components are unaffected by `SecureJoin` (similar to +* Non-existent path components are unaffected by `SecureJoin` (similar to `filepath.EvalSymlinks`'s semantics). * The returned path will always be `filepath.Clean`ed and thus not contain any @@ -57,6 +70,7 @@ func SecureJoin(root, unsafePath string) (string, error) { } ``` +[lwn-obeneath]: https://lwn.net/Articles/767547/ [go#20126]: https://github.com/golang/go/issues/20126 ### License ### diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION index ee1372d33a29..7179039691ce 100644 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION +++ b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -1 +1 @@ -0.2.2 +0.2.3 diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go index c4ca3d713005..7dd08dbbdf7b 100644 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go +++ b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -12,39 +12,20 @@ package securejoin import ( "bytes" + "errors" "os" "path/filepath" "strings" "syscall" - - "github.com/pkg/errors" ) -// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been -// evaluated in attempting to securely join the two given paths. -var ErrSymlinkLoop = errors.Wrap(syscall.ELOOP, "secure join") - // IsNotExist tells you if err is an error that implies that either the path // accessed does not exist (or path components don't exist). This is // effectively a more broad version of os.IsNotExist. func IsNotExist(err error) bool { - // If it's a bone-fide ENOENT just bail. - if os.IsNotExist(errors.Cause(err)) { - return true - } - // Check that it's not actually an ENOTDIR, which in some cases is a more // convoluted case of ENOENT (usually involving weird paths). - var errno error - switch err := errors.Cause(err).(type) { - case *os.PathError: - errno = err.Err - case *os.LinkError: - errno = err.Err - case *os.SyscallError: - errno = err.Err - } - return errno == syscall.ENOTDIR || errno == syscall.ENOENT + return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT) } // SecureJoinVFS joins the two given path components (similar to Join) except @@ -68,7 +49,7 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { n := 0 for unsafePath != "" { if n > 255 { - return "", ErrSymlinkLoop + return "", &os.PathError{Op: "SecureJoin", Path: root + "/" + unsafePath, Err: syscall.ELOOP} } // Next path component, p. diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf deleted file mode 100644 index 66bb574b955b..000000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf +++ /dev/null @@ -1 +0,0 @@ -github.com/pkg/errors v0.8.0 diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/.mailmap b/src/runtime/vendor/github.com/fsnotify/fsnotify/.mailmap new file mode 100644 index 000000000000..a04f2907fed3 --- /dev/null +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/.mailmap @@ -0,0 +1,2 @@ +Chris Howey +Nathan Youngman <4566+nathany@users.noreply.github.com> diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/.travis.yml b/src/runtime/vendor/github.com/fsnotify/fsnotify/.travis.yml deleted file mode 100644 index a9c30165cddf..000000000000 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -sudo: false -language: go - -go: - - "stable" - - "1.11.x" - - "1.10.x" - - "1.9.x" - -matrix: - include: - - go: "stable" - env: GOLINT=true - allow_failures: - - go: tip - fast_finish: true - - -before_install: - - if [ ! -z "${GOLINT}" ]; then go get -u golang.org/x/lint/golint; fi - -script: - - go test --race ./... - -after_script: - - test -z "$(gofmt -s -l -w . | tee /dev/stderr)" - - if [ ! -z "${GOLINT}" ]; then echo running golint; golint --set_exit_status ./...; else echo skipping golint; fi - - go vet ./... - -os: - - linux - - osx - - windows - -notifications: - email: false diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/AUTHORS b/src/runtime/vendor/github.com/fsnotify/fsnotify/AUTHORS index 5ab5d41c5472..6cbabe5ef50b 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/AUTHORS +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/AUTHORS @@ -4,35 +4,44 @@ # You can update this list using the following command: # -# $ git shortlog -se | awk '{print $2 " " $3 " " $4}' +# $ (head -n10 AUTHORS && git shortlog -se | sed -E 's/^\s+[0-9]+\t//') | tee AUTHORS # Please keep the list sorted. Aaron L Adrien Bustany +Alexey Kazakov Amit Krishnan Anmol Sethi Bjørn Erik Pedersen +Brian Goff Bruno Bigras Caleb Spare Case Nelson -Chris Howey +Chris Howey Christoffer Buchholz Daniel Wagner-Hall Dave Cheney +Eric Lin Evan Phoenix Francisco Souza +Gautam Dey Hari haran -John C Barstow +Ichinose Shogo +Johannes Ebke +John C Barstow Kelvin Fo Ken-ichirou MATSUZAWA Matt Layher +Matthias Stone Nathan Youngman Nickolai Zeldovich +Oliver Bristow Patrick Paul Hammond Pawel Knap Pieter Droogendijk +Pratik Shinde Pursuit92 Riku Voipio Rob Figueiredo @@ -41,6 +50,7 @@ Slawek Ligus Soge Zhang Tiffany Jernigan Tilak Sharma +Tobias Klauser Tom Payne Travis Cline Tudor Golubenco diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md b/src/runtime/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md index be4d7ea2c145..cc01c08f56d5 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md @@ -1,6 +1,46 @@ # Changelog -## v1.4.7 / 2018-01-09 +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [1.5.4] - 2022-04-25 + +* Windows: add missing defer to `Watcher.WatchList` [#447](https://github.com/fsnotify/fsnotify/pull/447) +* go.mod: use latest x/sys [#444](https://github.com/fsnotify/fsnotify/pull/444) +* Fix compilation for OpenBSD [#443](https://github.com/fsnotify/fsnotify/pull/443) + +## [1.5.3] - 2022-04-22 + +* This version is retracted. An incorrect branch is published accidentally [#445](https://github.com/fsnotify/fsnotify/issues/445) + +## [1.5.2] - 2022-04-21 + +* Add a feature to return the directories and files that are being monitored [#374](https://github.com/fsnotify/fsnotify/pull/374) +* Fix potential crash on windows if `raw.FileNameLength` exceeds `syscall.MAX_PATH` [#361](https://github.com/fsnotify/fsnotify/pull/361) +* Allow build on unsupported GOOS [#424](https://github.com/fsnotify/fsnotify/pull/424) +* Don't set `poller.fd` twice in `newFdPoller` [#406](https://github.com/fsnotify/fsnotify/pull/406) +* fix go vet warnings: call to `(*T).Fatalf` from a non-test goroutine [#416](https://github.com/fsnotify/fsnotify/pull/416) + +## [1.5.1] - 2021-08-24 + +* Revert Add AddRaw to not follow symlinks [#394](https://github.com/fsnotify/fsnotify/pull/394) + +## [1.5.0] - 2021-08-20 + +* Go: Increase minimum required version to Go 1.12 [#381](https://github.com/fsnotify/fsnotify/pull/381) +* Feature: Add AddRaw method which does not follow symlinks when adding a watch [#289](https://github.com/fsnotify/fsnotify/pull/298) +* Windows: Follow symlinks by default like on all other systems [#289](https://github.com/fsnotify/fsnotify/pull/289) +* CI: Use GitHub Actions for CI and cover go 1.12-1.17 + [#378](https://github.com/fsnotify/fsnotify/pull/378) + [#381](https://github.com/fsnotify/fsnotify/pull/381) + [#385](https://github.com/fsnotify/fsnotify/pull/385) +* Go 1.14+: Fix unsafe pointer conversion [#325](https://github.com/fsnotify/fsnotify/pull/325) + +## [1.4.7] - 2018-01-09 * BSD/macOS: Fix possible deadlock on closing the watcher on kqueue (thanks @nhooyr and @glycerine) * Tests: Fix missing verb on format string (thanks @rchiossi) @@ -10,62 +50,62 @@ * Linux: Properly handle inotify's IN_Q_OVERFLOW event (thanks @zeldovich) * Docs: replace references to OS X with macOS -## v1.4.2 / 2016-10-10 +## [1.4.2] - 2016-10-10 * Linux: use InotifyInit1 with IN_CLOEXEC to stop leaking a file descriptor to a child process when using fork/exec [#178](https://github.com/fsnotify/fsnotify/pull/178) (thanks @pattyshack) -## v1.4.1 / 2016-10-04 +## [1.4.1] - 2016-10-04 * Fix flaky inotify stress test on Linux [#177](https://github.com/fsnotify/fsnotify/pull/177) (thanks @pattyshack) -## v1.4.0 / 2016-10-01 +## [1.4.0] - 2016-10-01 * add a String() method to Event.Op [#165](https://github.com/fsnotify/fsnotify/pull/165) (thanks @oozie) -## v1.3.1 / 2016-06-28 +## [1.3.1] - 2016-06-28 * Windows: fix for double backslash when watching the root of a drive [#151](https://github.com/fsnotify/fsnotify/issues/151) (thanks @brunoqc) -## v1.3.0 / 2016-04-19 +## [1.3.0] - 2016-04-19 * Support linux/arm64 by [patching](https://go-review.googlesource.com/#/c/21971/) x/sys/unix and switching to to it from syscall (thanks @suihkulokki) [#135](https://github.com/fsnotify/fsnotify/pull/135) -## v1.2.10 / 2016-03-02 +## [1.2.10] - 2016-03-02 * Fix golint errors in windows.go [#121](https://github.com/fsnotify/fsnotify/pull/121) (thanks @tiffanyfj) -## v1.2.9 / 2016-01-13 +## [1.2.9] - 2016-01-13 kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsnotify/pull/111) (thanks @bep) -## v1.2.8 / 2015-12-17 +## [1.2.8] - 2015-12-17 * kqueue: fix race condition in Close [#105](https://github.com/fsnotify/fsnotify/pull/105) (thanks @djui for reporting the issue and @ppknap for writing a failing test) * inotify: fix race in test * enable race detection for continuous integration (Linux, Mac, Windows) -## v1.2.5 / 2015-10-17 +## [1.2.5] - 2015-10-17 * inotify: use epoll_create1 for arm64 support (requires Linux 2.6.27 or later) [#100](https://github.com/fsnotify/fsnotify/pull/100) (thanks @suihkulokki) * inotify: fix path leaks [#73](https://github.com/fsnotify/fsnotify/pull/73) (thanks @chamaken) * kqueue: watch for rename events on subdirectories [#83](https://github.com/fsnotify/fsnotify/pull/83) (thanks @guotie) * kqueue: avoid infinite loops from symlinks cycles [#101](https://github.com/fsnotify/fsnotify/pull/101) (thanks @illicitonion) -## v1.2.1 / 2015-10-14 +## [1.2.1] - 2015-10-14 * kqueue: don't watch named pipes [#98](https://github.com/fsnotify/fsnotify/pull/98) (thanks @evanphx) -## v1.2.0 / 2015-02-08 +## [1.2.0] - 2015-02-08 * inotify: use epoll to wake up readEvents [#66](https://github.com/fsnotify/fsnotify/pull/66) (thanks @PieterD) * inotify: closing watcher should now always shut down goroutine [#63](https://github.com/fsnotify/fsnotify/pull/63) (thanks @PieterD) * kqueue: close kqueue after removing watches, fixes [#59](https://github.com/fsnotify/fsnotify/issues/59) -## v1.1.1 / 2015-02-05 +## [1.1.1] - 2015-02-05 * inotify: Retry read on EINTR [#61](https://github.com/fsnotify/fsnotify/issues/61) (thanks @PieterD) -## v1.1.0 / 2014-12-12 +## [1.1.0] - 2014-12-12 * kqueue: rework internals [#43](https://github.com/fsnotify/fsnotify/pull/43) * add low-level functions @@ -77,22 +117,22 @@ kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsn * kqueue: fix regression in rework causing subdirectories to be watched [#48](https://github.com/fsnotify/fsnotify/issues/48) * kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51) -## v1.0.4 / 2014-09-07 +## [1.0.4] - 2014-09-07 * kqueue: add dragonfly to the build tags. * Rename source code files, rearrange code so exported APIs are at the top. * Add done channel to example code. [#37](https://github.com/fsnotify/fsnotify/pull/37) (thanks @chenyukang) -## v1.0.3 / 2014-08-19 +## [1.0.3] - 2014-08-19 * [Fix] Windows MOVED_TO now translates to Create like on BSD and Linux. [#36](https://github.com/fsnotify/fsnotify/issues/36) -## v1.0.2 / 2014-08-17 +## [1.0.2] - 2014-08-17 * [Fix] Missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso) * [Fix] Make ./path and path equivalent. (thanks @zhsso) -## v1.0.0 / 2014-08-15 +## [1.0.0] - 2014-08-15 * [API] Remove AddWatch on Windows, use Add. * Improve documentation for exported identifiers. [#30](https://github.com/fsnotify/fsnotify/issues/30) @@ -146,51 +186,51 @@ kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsn * no tests for the current implementation * not fully implemented on Windows [#93](https://github.com/howeyc/fsnotify/issues/93#issuecomment-39285195) -## v0.9.3 / 2014-12-31 +## [0.9.3] - 2014-12-31 * kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51) -## v0.9.2 / 2014-08-17 +## [0.9.2] - 2014-08-17 * [Backport] Fix missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso) -## v0.9.1 / 2014-06-12 +## [0.9.1] - 2014-06-12 * Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98) -## v0.9.0 / 2014-01-17 +## [0.9.0] - 2014-01-17 * IsAttrib() for events that only concern a file's metadata [#79][] (thanks @abustany) * [Fix] kqueue: fix deadlock [#77][] (thanks @cespare) * [NOTICE] Development has moved to `code.google.com/p/go.exp/fsnotify` in preparation for inclusion in the Go standard library. -## v0.8.12 / 2013-11-13 +## [0.8.12] - 2013-11-13 * [API] Remove FD_SET and friends from Linux adapter -## v0.8.11 / 2013-11-02 +## [0.8.11] - 2013-11-02 * [Doc] Add Changelog [#72][] (thanks @nathany) * [Doc] Spotlight and double modify events on macOS [#62][] (reported by @paulhammond) -## v0.8.10 / 2013-10-19 +## [0.8.10] - 2013-10-19 * [Fix] kqueue: remove file watches when parent directory is removed [#71][] (reported by @mdwhatcott) * [Fix] kqueue: race between Close and readEvents [#70][] (reported by @bernerdschaefer) * [Doc] specify OS-specific limits in README (thanks @debrando) -## v0.8.9 / 2013-09-08 +## [0.8.9] - 2013-09-08 * [Doc] Contributing (thanks @nathany) * [Doc] update package path in example code [#63][] (thanks @paulhammond) * [Doc] GoCI badge in README (Linux only) [#60][] * [Doc] Cross-platform testing with Vagrant [#59][] (thanks @nathany) -## v0.8.8 / 2013-06-17 +## [0.8.8] - 2013-06-17 * [Fix] Windows: handle `ERROR_MORE_DATA` on Windows [#49][] (thanks @jbowtie) -## v0.8.7 / 2013-06-03 +## [0.8.7] - 2013-06-03 * [API] Make syscall flags internal * [Fix] inotify: ignore event changes @@ -198,74 +238,74 @@ kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsn * [Fix] tests on Windows * lower case error messages -## v0.8.6 / 2013-05-23 +## [0.8.6] - 2013-05-23 * kqueue: Use EVT_ONLY flag on Darwin * [Doc] Update README with full example -## v0.8.5 / 2013-05-09 +## [0.8.5] - 2013-05-09 * [Fix] inotify: allow monitoring of "broken" symlinks (thanks @tsg) -## v0.8.4 / 2013-04-07 +## [0.8.4] - 2013-04-07 * [Fix] kqueue: watch all file events [#40][] (thanks @ChrisBuchholz) -## v0.8.3 / 2013-03-13 +## [0.8.3] - 2013-03-13 * [Fix] inoitfy/kqueue memory leak [#36][] (reported by @nbkolchin) * [Fix] kqueue: use fsnFlags for watching a directory [#33][] (reported by @nbkolchin) -## v0.8.2 / 2013-02-07 +## [0.8.2] - 2013-02-07 * [Doc] add Authors * [Fix] fix data races for map access [#29][] (thanks @fsouza) -## v0.8.1 / 2013-01-09 +## [0.8.1] - 2013-01-09 * [Fix] Windows path separators * [Doc] BSD License -## v0.8.0 / 2012-11-09 +## [0.8.0] - 2012-11-09 * kqueue: directory watching improvements (thanks @vmirage) * inotify: add `IN_MOVED_TO` [#25][] (requested by @cpisto) * [Fix] kqueue: deleting watched directory [#24][] (reported by @jakerr) -## v0.7.4 / 2012-10-09 +## [0.7.4] - 2012-10-09 * [Fix] inotify: fixes from https://codereview.appspot.com/5418045/ (ugorji) * [Fix] kqueue: preserve watch flags when watching for delete [#21][] (reported by @robfig) * [Fix] kqueue: watch the directory even if it isn't a new watch (thanks @robfig) * [Fix] kqueue: modify after recreation of file -## v0.7.3 / 2012-09-27 +## [0.7.3] - 2012-09-27 * [Fix] kqueue: watch with an existing folder inside the watched folder (thanks @vmirage) * [Fix] kqueue: no longer get duplicate CREATE events -## v0.7.2 / 2012-09-01 +## [0.7.2] - 2012-09-01 * kqueue: events for created directories -## v0.7.1 / 2012-07-14 +## [0.7.1] - 2012-07-14 * [Fix] for renaming files -## v0.7.0 / 2012-07-02 +## [0.7.0] - 2012-07-02 * [Feature] FSNotify flags * [Fix] inotify: Added file name back to event path -## v0.6.0 / 2012-06-06 +## [0.6.0] - 2012-06-06 * kqueue: watch files after directory created (thanks @tmc) -## v0.5.1 / 2012-05-22 +## [0.5.1] - 2012-05-22 * [Fix] inotify: remove all watches before Close() -## v0.5.0 / 2012-05-03 +## [0.5.0] - 2012-05-03 * [API] kqueue: return errors during watch instead of sending over channel * kqueue: match symlink behavior on Linux @@ -273,22 +313,22 @@ kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsn * [Fix] kqueue: handle EINTR (reported by @robfig) * [Doc] Godoc example [#1][] (thanks @davecheney) -## v0.4.0 / 2012-03-30 +## [0.4.0] - 2012-03-30 * Go 1 released: build with go tool * [Feature] Windows support using winfsnotify * Windows does not have attribute change notifications * Roll attribute notifications into IsModify -## v0.3.0 / 2012-02-19 +## [0.3.0] - 2012-02-19 * kqueue: add files when watch directory -## v0.2.0 / 2011-12-30 +## [0.2.0] - 2011-12-30 * update to latest Go weekly code -## v0.1.0 / 2011-10-19 +## [0.1.0] - 2011-10-19 * kqueue: add watch on file creation to match inotify * kqueue: create file event diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md b/src/runtime/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md index 828a60b24ba2..8a642563d718 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md @@ -48,18 +48,6 @@ fsnotify uses build tags to compile different code on Linux, BSD, macOS, and Win Before doing a pull request, please do your best to test your changes on multiple platforms, and list which platforms you were able/unable to test on. -To aid in cross-platform testing there is a Vagrantfile for Linux and BSD. - -* Install [Vagrant](http://www.vagrantup.com/) and [VirtualBox](https://www.virtualbox.org/) -* Setup [Vagrant Gopher](https://github.com/nathany/vagrant-gopher) in your `src` folder. -* Run `vagrant up` from the project folder. You can also setup just one box with `vagrant up linux` or `vagrant up bsd` (note: the BSD box doesn't support Windows hosts at this time, and NFS may prompt for your host OS password) -* Once setup, you can run the test suite on a given OS with a single command `vagrant ssh linux -c 'cd fsnotify/fsnotify; go test'`. -* When you're done, you will want to halt or destroy the Vagrant boxes. - -Notice: fsnotify file system events won't trigger in shared folders. The tests get around this limitation by using the /tmp directory. - -Right now there is no equivalent solution for Windows and macOS, but there are Windows VMs [freely available from Microsoft](http://www.modern.ie/en-us/virtualization-tools#downloads). - ### Maintainers Help maintaining fsnotify is welcome. To be a maintainer: @@ -67,11 +55,6 @@ Help maintaining fsnotify is welcome. To be a maintainer: * Submit a pull request and sign the CLA as above. * You must be able to run the test suite on Mac, Windows, Linux and BSD. -To keep master clean, the fsnotify project uses the "apply mail" workflow outlined in Nathaniel Talbott's post ["Merge pull request" Considered Harmful][am]. This requires installing [hub][]. - All code changes should be internal pull requests. Releases are tagged using [Semantic Versioning](http://semver.org/). - -[hub]: https://github.com/github/hub -[am]: http://blog.spreedly.com/2014/06/24/merge-pull-request-considered-harmful/#.VGa5yZPF_Zs diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/README.md b/src/runtime/vendor/github.com/fsnotify/fsnotify/README.md index b2629e5229ca..0731c5ef8adc 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/README.md +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/README.md @@ -1,37 +1,31 @@ # File system notifications for Go -[![GoDoc](https://godoc.org/github.com/fsnotify/fsnotify?status.svg)](https://godoc.org/github.com/fsnotify/fsnotify) [![Go Report Card](https://goreportcard.com/badge/github.com/fsnotify/fsnotify)](https://goreportcard.com/report/github.com/fsnotify/fsnotify) +[![Go Reference](https://pkg.go.dev/badge/github.com/fsnotify/fsnotify.svg)](https://pkg.go.dev/github.com/fsnotify/fsnotify) [![Go Report Card](https://goreportcard.com/badge/github.com/fsnotify/fsnotify)](https://goreportcard.com/report/github.com/fsnotify/fsnotify) [![Maintainers Wanted](https://img.shields.io/badge/maintainers-wanted-red.svg)](https://github.com/fsnotify/fsnotify/issues/413) -fsnotify utilizes [golang.org/x/sys](https://godoc.org/golang.org/x/sys) rather than `syscall` from the standard library. Ensure you have the latest version installed by running: - -```console -go get -u golang.org/x/sys/... -``` +fsnotify utilizes [`golang.org/x/sys`](https://pkg.go.dev/golang.org/x/sys) rather than [`syscall`](https://pkg.go.dev/syscall) from the standard library. Cross platform: Windows, Linux, BSD and macOS. | Adapter | OS | Status | | --------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | -| inotify | Linux 2.6.27 or later, Android\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) | -| kqueue | BSD, macOS, iOS\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) | -| ReadDirectoryChangesW | Windows | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) | +| inotify | Linux 2.6.27 or later, Android\* | Supported | +| kqueue | BSD, macOS, iOS\* | Supported | +| ReadDirectoryChangesW | Windows | Supported | | FSEvents | macOS | [Planned](https://github.com/fsnotify/fsnotify/issues/11) | -| FEN | Solaris 11 | [In Progress](https://github.com/fsnotify/fsnotify/issues/12) | -| fanotify | Linux 2.6.37+ | [Planned](https://github.com/fsnotify/fsnotify/issues/114) | +| FEN | Solaris 11 | [In Progress](https://github.com/fsnotify/fsnotify/pull/371) | +| fanotify | Linux 2.6.37+ | [Maybe](https://github.com/fsnotify/fsnotify/issues/114) | | USN Journals | Windows | [Maybe](https://github.com/fsnotify/fsnotify/issues/53) | | Polling | *All* | [Maybe](https://github.com/fsnotify/fsnotify/issues/9) | \* Android and iOS are untested. -Please see [the documentation](https://godoc.org/github.com/fsnotify/fsnotify) and consult the [FAQ](#faq) for usage information. +Please see [the documentation](https://pkg.go.dev/github.com/fsnotify/fsnotify) and consult the [FAQ](#faq) for usage information. ## API stability -fsnotify is a fork of [howeyc/fsnotify](https://godoc.org/github.com/howeyc/fsnotify) with a new API as of v1.0. The API is based on [this design document](http://goo.gl/MrYxyA). - -All [releases](https://github.com/fsnotify/fsnotify/releases) are tagged based on [Semantic Versioning](http://semver.org/). Further API changes are [planned](https://github.com/fsnotify/fsnotify/milestones), and will be tagged with a new major revision number. +fsnotify is a fork of [howeyc/fsnotify](https://github.com/howeyc/fsnotify) with a new API as of v1.0. The API is based on [this design document](http://goo.gl/MrYxyA). -Go 1.6 supports dependencies located in the `vendor/` folder. Unless you are creating a library, it is recommended that you copy fsnotify into `vendor/github.com/fsnotify/fsnotify` within your project, and likewise for `golang.org/x/sys`. +All [releases](https://github.com/fsnotify/fsnotify/releases) are tagged based on [Semantic Versioning](http://semver.org/). ## Usage @@ -84,10 +78,6 @@ func main() { Please refer to [CONTRIBUTING][] before opening an issue or pull request. -## Example - -See [example_test.go](https://github.com/fsnotify/fsnotify/blob/master/example_test.go). - ## FAQ **When a file is moved to another directory is it still being watched?** diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/fen.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/fen.go index ced39cb881e6..b3ac3d8f55fa 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/fen.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/fen.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build solaris // +build solaris package fsnotify diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify.go index 89cab046d124..0f4ee52e8aa2 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !plan9 // +build !plan9 // Package fsnotify provides a platform-independent interface for file system notifications. diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify_unsupported.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify_unsupported.go new file mode 100644 index 000000000000..59688559836f --- /dev/null +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/fsnotify_unsupported.go @@ -0,0 +1,36 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !darwin && !dragonfly && !freebsd && !openbsd && !linux && !netbsd && !solaris && !windows +// +build !darwin,!dragonfly,!freebsd,!openbsd,!linux,!netbsd,!solaris,!windows + +package fsnotify + +import ( + "fmt" + "runtime" +) + +// Watcher watches a set of files, delivering events to a channel. +type Watcher struct{} + +// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events. +func NewWatcher() (*Watcher, error) { + return nil, fmt.Errorf("fsnotify not supported on %s", runtime.GOOS) +} + +// Close removes all watches and closes the events channel. +func (w *Watcher) Close() error { + return nil +} + +// Add starts watching the named file or directory (non-recursively). +func (w *Watcher) Add(name string) error { + return nil +} + +// Remove stops watching the the named file or directory (non-recursively). +func (w *Watcher) Remove(name string) error { + return nil +} diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/go.mod b/src/runtime/vendor/github.com/fsnotify/fsnotify/go.mod deleted file mode 100644 index ff11e13f2240..000000000000 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/fsnotify/fsnotify - -go 1.13 - -require golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9 diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/go.sum b/src/runtime/vendor/github.com/fsnotify/fsnotify/go.sum deleted file mode 100644 index f60af9855da7..000000000000 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9 h1:L2auWcuQIvxz9xSEqzESnV/QN/gNRXNApHi3fYwl2w0= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify.go index d9fd1b88a05f..a6d0e0ec8c10 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build linux // +build linux package fsnotify @@ -162,6 +163,19 @@ func (w *Watcher) Remove(name string) error { return nil } +// WatchList returns the directories and files that are being monitered. +func (w *Watcher) WatchList() []string { + w.mu.Lock() + defer w.mu.Unlock() + + entries := make([]string, 0, len(w.watches)) + for pathname := range w.watches { + entries = append(entries, pathname) + } + + return entries +} + type watch struct { wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall) flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags) @@ -272,7 +286,7 @@ func (w *Watcher) readEvents() { if nameLen > 0 { // Point "bytes" at the first byte of the filename - bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent])) + bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen] // The filename is padded with NULL bytes. TrimRight() gets rid of those. name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000") } diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify_poller.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify_poller.go index b33f2b4d4b79..b572a37c3f1a 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify_poller.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/inotify_poller.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build linux // +build linux package fsnotify @@ -37,7 +38,6 @@ func newFdPoller(fd int) (*fdPoller, error) { poller.close() } }() - poller.fd = fd // Create epoll fd poller.epfd, errno = unix.EpollCreate1(unix.EPOLL_CLOEXEC) diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/kqueue.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/kqueue.go index 86e76a3d6768..6fb8d8532e7f 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/kqueue.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/kqueue.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build freebsd || openbsd || netbsd || dragonfly || darwin // +build freebsd openbsd netbsd dragonfly darwin package fsnotify @@ -147,6 +148,19 @@ func (w *Watcher) Remove(name string) error { return nil } +// WatchList returns the directories and files that are being monitered. +func (w *Watcher) WatchList() []string { + w.mu.Lock() + defer w.mu.Unlock() + + entries := make([]string, 0, len(w.watches)) + for pathname := range w.watches { + entries = append(entries, pathname) + } + + return entries +} + // Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE) const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | unix.NOTE_RENAME diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_bsd.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_bsd.go index 2306c4620bf6..36cc3845b6e7 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_bsd.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_bsd.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build freebsd || openbsd || netbsd || dragonfly // +build freebsd openbsd netbsd dragonfly package fsnotify diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_darwin.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_darwin.go index 870c4d6d1845..98cd8476ffb8 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_darwin.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/open_mode_darwin.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build darwin // +build darwin package fsnotify diff --git a/src/runtime/vendor/github.com/fsnotify/fsnotify/windows.go b/src/runtime/vendor/github.com/fsnotify/fsnotify/windows.go index 09436f31d821..02ce7deb0bbf 100644 --- a/src/runtime/vendor/github.com/fsnotify/fsnotify/windows.go +++ b/src/runtime/vendor/github.com/fsnotify/fsnotify/windows.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build windows // +build windows package fsnotify @@ -11,6 +12,7 @@ import ( "fmt" "os" "path/filepath" + "reflect" "runtime" "sync" "syscall" @@ -95,6 +97,21 @@ func (w *Watcher) Remove(name string) error { return <-in.reply } +// WatchList returns the directories and files that are being monitered. +func (w *Watcher) WatchList() []string { + w.mu.Lock() + defer w.mu.Unlock() + + entries := make([]string, 0, len(w.watches)) + for _, entry := range w.watches { + for _, watchEntry := range entry { + entries = append(entries, watchEntry.path) + } + } + + return entries +} + const ( // Options for AddWatch sysFSONESHOT = 0x80000000 @@ -451,8 +468,16 @@ func (w *Watcher) readEvents() { // Point "raw" to the event in the buffer raw := (*syscall.FileNotifyInformation)(unsafe.Pointer(&watch.buf[offset])) - buf := (*[syscall.MAX_PATH]uint16)(unsafe.Pointer(&raw.FileName)) - name := syscall.UTF16ToString(buf[:raw.FileNameLength/2]) + // TODO: Consider using unsafe.Slice that is available from go1.17 + // https://stackoverflow.com/questions/51187973/how-to-create-an-array-or-a-slice-from-an-array-unsafe-pointer-in-golang + // instead of using a fixed syscall.MAX_PATH buf, we create a buf that is the size of the path name + size := int(raw.FileNameLength / 2) + var buf []uint16 + sh := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + sh.Data = uintptr(unsafe.Pointer(&raw.FileName)) + sh.Len = size + sh.Cap = size + name := syscall.UTF16ToString(buf) fullname := filepath.Join(watch.path, name) var mask uint64 diff --git a/src/runtime/vendor/github.com/globalsign/mgo/LICENSE b/src/runtime/vendor/github.com/globalsign/mgo/LICENSE deleted file mode 100644 index 770c7672b45d..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/LICENSE +++ /dev/null @@ -1,25 +0,0 @@ -mgo - MongoDB driver for Go - -Copyright (c) 2010-2013 - Gustavo Niemeyer - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/LICENSE b/src/runtime/vendor/github.com/globalsign/mgo/bson/LICENSE deleted file mode 100644 index 890326017b85..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/LICENSE +++ /dev/null @@ -1,25 +0,0 @@ -BSON library for Go - -Copyright (c) 2010-2012 - Gustavo Niemeyer - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/README.md b/src/runtime/vendor/github.com/globalsign/mgo/bson/README.md deleted file mode 100644 index 5c5819e612b7..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/README.md +++ /dev/null @@ -1,12 +0,0 @@ -[![GoDoc](https://godoc.org/github.com/globalsign/mgo/bson?status.svg)](https://godoc.org/github.com/globalsign/mgo/bson) - -An Implementation of BSON for Go --------------------------------- - -Package bson is an implementation of the [BSON specification](http://bsonspec.org) for Go. - -While the BSON package implements the BSON spec as faithfully as possible, there -is some MongoDB specific behaviour (such as map keys `$in`, `$all`, etc) in the -`bson` package. The priority is for backwards compatibility for the `mgo` -driver, though fixes for obviously buggy behaviour is welcome (and features, etc -behind feature flags). diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/bson.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/bson.go deleted file mode 100644 index eb87ef6208a2..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/bson.go +++ /dev/null @@ -1,836 +0,0 @@ -// BSON library for Go -// -// Copyright (c) 2010-2012 - Gustavo Niemeyer -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Package bson is an implementation of the BSON specification for Go: -// -// http://bsonspec.org -// -// It was created as part of the mgo MongoDB driver for Go, but is standalone -// and may be used on its own without the driver. -package bson - -import ( - "bytes" - "crypto/md5" - "crypto/rand" - "encoding/binary" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "io" - "math" - "os" - "reflect" - "runtime" - "strings" - "sync" - "sync/atomic" - "time" -) - -//go:generate go run bson_corpus_spec_test_generator.go - -// -------------------------------------------------------------------------- -// The public API. - -// Element types constants from BSON specification. -const ( - ElementFloat64 byte = 0x01 - ElementString byte = 0x02 - ElementDocument byte = 0x03 - ElementArray byte = 0x04 - ElementBinary byte = 0x05 - Element06 byte = 0x06 - ElementObjectId byte = 0x07 - ElementBool byte = 0x08 - ElementDatetime byte = 0x09 - ElementNil byte = 0x0A - ElementRegEx byte = 0x0B - ElementDBPointer byte = 0x0C - ElementJavaScriptWithoutScope byte = 0x0D - ElementSymbol byte = 0x0E - ElementJavaScriptWithScope byte = 0x0F - ElementInt32 byte = 0x10 - ElementTimestamp byte = 0x11 - ElementInt64 byte = 0x12 - ElementDecimal128 byte = 0x13 - ElementMinKey byte = 0xFF - ElementMaxKey byte = 0x7F - - BinaryGeneric byte = 0x00 - BinaryFunction byte = 0x01 - BinaryBinaryOld byte = 0x02 - BinaryUUIDOld byte = 0x03 - BinaryUUID byte = 0x04 - BinaryMD5 byte = 0x05 - BinaryUserDefined byte = 0x80 -) - -// Getter interface: a value implementing the bson.Getter interface will have its GetBSON -// method called when the given value has to be marshalled, and the result -// of this method will be marshaled in place of the actual object. -// -// If GetBSON returns return a non-nil error, the marshalling procedure -// will stop and error out with the provided value. -type Getter interface { - GetBSON() (interface{}, error) -} - -// Setter interface: a value implementing the bson.Setter interface will receive the BSON -// value via the SetBSON method during unmarshaling, and the object -// itself will not be changed as usual. -// -// If setting the value works, the method should return nil or alternatively -// bson.ErrSetZero to set the respective field to its zero value (nil for -// pointer types). If SetBSON returns a value of type bson.TypeError, the -// BSON value will be omitted from a map or slice being decoded and the -// unmarshalling will continue. If it returns any other non-nil error, the -// unmarshalling procedure will stop and error out with the provided value. -// -// This interface is generally useful in pointer receivers, since the method -// will want to change the receiver. A type field that implements the Setter -// interface doesn't have to be a pointer, though. -// -// Unlike the usual behavior, unmarshalling onto a value that implements a -// Setter interface will NOT reset the value to its zero state. This allows -// the value to decide by itself how to be unmarshalled. -// -// For example: -// -// type MyString string -// -// func (s *MyString) SetBSON(raw bson.Raw) error { -// return raw.Unmarshal(s) -// } -// -type Setter interface { - SetBSON(raw Raw) error -} - -// ErrSetZero may be returned from a SetBSON method to have the value set to -// its respective zero value. When used in pointer values, this will set the -// field to nil rather than to the pre-allocated value. -var ErrSetZero = errors.New("set to zero") - -// M is a convenient alias for a map[string]interface{} map, useful for -// dealing with BSON in a native way. For instance: -// -// bson.M{"a": 1, "b": true} -// -// There's no special handling for this type in addition to what's done anyway -// for an equivalent map type. Elements in the map will be dumped in an -// undefined ordered. See also the bson.D type for an ordered alternative. -type M map[string]interface{} - -// D represents a BSON document containing ordered elements. For example: -// -// bson.D{{"a", 1}, {"b", true}} -// -// In some situations, such as when creating indexes for MongoDB, the order in -// which the elements are defined is important. If the order is not important, -// using a map is generally more comfortable. See bson.M and bson.RawD. -type D []DocElem - -// DocElem is an element of the bson.D document representation. -type DocElem struct { - Name string - Value interface{} -} - -// Map returns a map out of the ordered element name/value pairs in d. -func (d D) Map() (m M) { - m = make(M, len(d)) - for _, item := range d { - m[item.Name] = item.Value - } - return m -} - -// The Raw type represents raw unprocessed BSON documents and elements. -// Kind is the kind of element as defined per the BSON specification, and -// Data is the raw unprocessed data for the respective element. -// Using this type it is possible to unmarshal or marshal values partially. -// -// Relevant documentation: -// -// http://bsonspec.org/#/specification -// -type Raw struct { - Kind byte - Data []byte -} - -// RawD represents a BSON document containing raw unprocessed elements. -// This low-level representation may be useful when lazily processing -// documents of uncertain content, or when manipulating the raw content -// documents in general. -type RawD []RawDocElem - -// RawDocElem elements of RawD type. -type RawDocElem struct { - Name string - Value Raw -} - -// ObjectId is a unique ID identifying a BSON value. It must be exactly 12 bytes -// long. MongoDB objects by default have such a property set in their "_id" -// property. -// -// http://www.mongodb.org/display/DOCS/Object+Ids -type ObjectId string - -// ObjectIdHex returns an ObjectId from the provided hex representation. -// Calling this function with an invalid hex representation will -// cause a runtime panic. See the IsObjectIdHex function. -func ObjectIdHex(s string) ObjectId { - d, err := hex.DecodeString(s) - if err != nil || len(d) != 12 { - panic(fmt.Sprintf("invalid input to ObjectIdHex: %q", s)) - } - return ObjectId(d) -} - -// IsObjectIdHex returns whether s is a valid hex representation of -// an ObjectId. See the ObjectIdHex function. -func IsObjectIdHex(s string) bool { - if len(s) != 24 { - return false - } - _, err := hex.DecodeString(s) - return err == nil -} - -// objectIdCounter is atomically incremented when generating a new ObjectId -// using NewObjectId() function. It's used as a counter part of an id. -var objectIdCounter = readRandomUint32() - -// readRandomUint32 returns a random objectIdCounter. -func readRandomUint32() uint32 { - var b [4]byte - _, err := io.ReadFull(rand.Reader, b[:]) - if err != nil { - panic(fmt.Errorf("cannot read random object id: %v", err)) - } - return uint32((uint32(b[0]) << 0) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)) -} - -// machineId stores machine id generated once and used in subsequent calls -// to NewObjectId function. -var machineId = readMachineId() -var processId = os.Getpid() - -// readMachineId generates and returns a machine id. -// If this function fails to get the hostname it will cause a runtime error. -func readMachineId() []byte { - var sum [3]byte - id := sum[:] - hostname, err1 := os.Hostname() - if err1 != nil { - _, err2 := io.ReadFull(rand.Reader, id) - if err2 != nil { - panic(fmt.Errorf("cannot get hostname: %v; %v", err1, err2)) - } - return id - } - hw := md5.New() - hw.Write([]byte(hostname)) - copy(id, hw.Sum(nil)) - return id -} - -// NewObjectId returns a new unique ObjectId. -func NewObjectId() ObjectId { - var b [12]byte - // Timestamp, 4 bytes, big endian - binary.BigEndian.PutUint32(b[:], uint32(time.Now().Unix())) - // Machine, first 3 bytes of md5(hostname) - b[4] = machineId[0] - b[5] = machineId[1] - b[6] = machineId[2] - // Pid, 2 bytes, specs don't specify endianness, but we use big endian. - b[7] = byte(processId >> 8) - b[8] = byte(processId) - // Increment, 3 bytes, big endian - i := atomic.AddUint32(&objectIdCounter, 1) - b[9] = byte(i >> 16) - b[10] = byte(i >> 8) - b[11] = byte(i) - return ObjectId(b[:]) -} - -// NewObjectIdWithTime returns a dummy ObjectId with the timestamp part filled -// with the provided number of seconds from epoch UTC, and all other parts -// filled with zeroes. It's not safe to insert a document with an id generated -// by this method, it is useful only for queries to find documents with ids -// generated before or after the specified timestamp. -func NewObjectIdWithTime(t time.Time) ObjectId { - var b [12]byte - binary.BigEndian.PutUint32(b[:4], uint32(t.Unix())) - return ObjectId(string(b[:])) -} - -// String returns a hex string representation of the id. -// Example: ObjectIdHex("4d88e15b60f486e428412dc9"). -func (id ObjectId) String() string { - return fmt.Sprintf(`ObjectIdHex("%x")`, string(id)) -} - -// Hex returns a hex representation of the ObjectId. -func (id ObjectId) Hex() string { - return hex.EncodeToString([]byte(id)) -} - -// MarshalJSON turns a bson.ObjectId into a json.Marshaller. -func (id ObjectId) MarshalJSON() ([]byte, error) { - return []byte(fmt.Sprintf(`"%x"`, string(id))), nil -} - -var nullBytes = []byte("null") - -// UnmarshalJSON turns *bson.ObjectId into a json.Unmarshaller. -func (id *ObjectId) UnmarshalJSON(data []byte) error { - if len(data) > 0 && (data[0] == '{' || data[0] == 'O') { - var v struct { - Id json.RawMessage `json:"$oid"` - Func struct { - Id json.RawMessage - } `json:"$oidFunc"` - } - err := jdec(data, &v) - if err == nil { - if len(v.Id) > 0 { - data = []byte(v.Id) - } else { - data = []byte(v.Func.Id) - } - } - } - if len(data) == 2 && data[0] == '"' && data[1] == '"' || bytes.Equal(data, nullBytes) { - *id = "" - return nil - } - if len(data) != 26 || data[0] != '"' || data[25] != '"' { - return fmt.Errorf("invalid ObjectId in JSON: %s", string(data)) - } - var buf [12]byte - _, err := hex.Decode(buf[:], data[1:25]) - if err != nil { - return fmt.Errorf("invalid ObjectId in JSON: %s (%s)", string(data), err) - } - *id = ObjectId(string(buf[:])) - return nil -} - -// MarshalText turns bson.ObjectId into an encoding.TextMarshaler. -func (id ObjectId) MarshalText() ([]byte, error) { - return []byte(fmt.Sprintf("%x", string(id))), nil -} - -// UnmarshalText turns *bson.ObjectId into an encoding.TextUnmarshaler. -func (id *ObjectId) UnmarshalText(data []byte) error { - if len(data) == 1 && data[0] == ' ' || len(data) == 0 { - *id = "" - return nil - } - if len(data) != 24 { - return fmt.Errorf("invalid ObjectId: %s", data) - } - var buf [12]byte - _, err := hex.Decode(buf[:], data[:]) - if err != nil { - return fmt.Errorf("invalid ObjectId: %s (%s)", data, err) - } - *id = ObjectId(string(buf[:])) - return nil -} - -// Valid returns true if id is valid. A valid id must contain exactly 12 bytes. -func (id ObjectId) Valid() bool { - return len(id) == 12 -} - -// byteSlice returns byte slice of id from start to end. -// Calling this function with an invalid id will cause a runtime panic. -func (id ObjectId) byteSlice(start, end int) []byte { - if len(id) != 12 { - panic(fmt.Sprintf("invalid ObjectId: %q", string(id))) - } - return []byte(string(id)[start:end]) -} - -// Time returns the timestamp part of the id. -// It's a runtime error to call this method with an invalid id. -func (id ObjectId) Time() time.Time { - // First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch. - secs := int64(binary.BigEndian.Uint32(id.byteSlice(0, 4))) - return time.Unix(secs, 0) -} - -// Machine returns the 3-byte machine id part of the id. -// It's a runtime error to call this method with an invalid id. -func (id ObjectId) Machine() []byte { - return id.byteSlice(4, 7) -} - -// Pid returns the process id part of the id. -// It's a runtime error to call this method with an invalid id. -func (id ObjectId) Pid() uint16 { - return binary.BigEndian.Uint16(id.byteSlice(7, 9)) -} - -// Counter returns the incrementing value part of the id. -// It's a runtime error to call this method with an invalid id. -func (id ObjectId) Counter() int32 { - b := id.byteSlice(9, 12) - // Counter is stored as big-endian 3-byte value - return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])) -} - -// The Symbol type is similar to a string and is used in languages with a -// distinct symbol type. -type Symbol string - -// Now returns the current time with millisecond precision. MongoDB stores -// timestamps with the same precision, so a Time returned from this method -// will not change after a roundtrip to the database. That's the only reason -// why this function exists. Using the time.Now function also works fine -// otherwise. -func Now() time.Time { - return time.Unix(0, time.Now().UnixNano()/1e6*1e6) -} - -// MongoTimestamp is a special internal type used by MongoDB that for some -// strange reason has its own datatype defined in BSON. -type MongoTimestamp int64 - -// Time returns the time part of ts which is stored with second precision. -func (ts MongoTimestamp) Time() time.Time { - return time.Unix(int64(uint64(ts)>>32), 0) -} - -// Counter returns the counter part of ts. -func (ts MongoTimestamp) Counter() uint32 { - return uint32(ts) -} - -// NewMongoTimestamp creates a timestamp using the given -// date `t` (with second precision) and counter `c` (unique for `t`). -// -// Returns an error if time `t` is not between 1970-01-01T00:00:00Z -// and 2106-02-07T06:28:15Z (inclusive). -// -// Note that two MongoTimestamps should never have the same (time, counter) combination: -// the caller must ensure the counter `c` is increased if creating multiple MongoTimestamp -// values for the same time `t` (ignoring fractions of seconds). -func NewMongoTimestamp(t time.Time, c uint32) (MongoTimestamp, error) { - u := t.Unix() - if u < 0 || u > math.MaxUint32 { - return -1, errors.New("invalid value for time") - } - - i := int64(u<<32 | int64(c)) - - return MongoTimestamp(i), nil -} - -type orderKey int64 - -// MaxKey is a special value that compares higher than all other possible BSON -// values in a MongoDB database. -var MaxKey = orderKey(1<<63 - 1) - -// MinKey is a special value that compares lower than all other possible BSON -// values in a MongoDB database. -var MinKey = orderKey(-1 << 63) - -type undefined struct{} - -// Undefined represents the undefined BSON value. -var Undefined undefined - -// Binary is a representation for non-standard binary values. Any kind should -// work, but the following are known as of this writing: -// -// 0x00 - Generic. This is decoded as []byte(data), not Binary{0x00, data}. -// 0x01 - Function (!?) -// 0x02 - Obsolete generic. -// 0x03 - UUID -// 0x05 - MD5 -// 0x80 - User defined. -// -type Binary struct { - Kind byte - Data []byte -} - -// RegEx represents a regular expression. The Options field may contain -// individual characters defining the way in which the pattern should be -// applied, and must be sorted. Valid options as of this writing are 'i' for -// case insensitive matching, 'm' for multi-line matching, 'x' for verbose -// mode, 'l' to make \w, \W, and similar be locale-dependent, 's' for dot-all -// mode (a '.' matches everything), and 'u' to make \w, \W, and similar match -// unicode. The value of the Options parameter is not verified before being -// marshaled into the BSON format. -type RegEx struct { - Pattern string - Options string -} - -// JavaScript is a type that holds JavaScript code. If Scope is non-nil, it -// will be marshaled as a mapping from identifiers to values that may be -// used when evaluating the provided Code. -type JavaScript struct { - Code string - Scope interface{} -} - -// DBPointer refers to a document id in a namespace. -// -// This type is deprecated in the BSON specification and should not be used -// except for backwards compatibility with ancient applications. -type DBPointer struct { - Namespace string - Id ObjectId -} - -const initialBufferSize = 64 - -func handleErr(err *error) { - if r := recover(); r != nil { - if _, ok := r.(runtime.Error); ok { - panic(r) - } else if _, ok := r.(externalPanic); ok { - panic(r) - } else if s, ok := r.(string); ok { - *err = errors.New(s) - } else if e, ok := r.(error); ok { - *err = e - } else { - panic(r) - } - } -} - -// Marshal serializes the in value, which may be a map or a struct value. -// In the case of struct values, only exported fields will be serialized, -// and the order of serialized fields will match that of the struct itself. -// The lowercased field name is used as the key for each exported field, -// but this behavior may be changed using the respective field tag. -// The tag may also contain flags to tweak the marshalling behavior for -// the field. The tag formats accepted are: -// -// "[][,[,]]" -// -// `(...) bson:"[][,[,]]" (...)` -// -// The following flags are currently supported: -// -// omitempty Only include the field if it's not set to the zero -// value for the type or to empty slices or maps. -// -// minsize Marshal an int64 value as an int32, if that's feasible -// while preserving the numeric value. -// -// inline Inline the field, which must be a struct or a map, -// causing all of its fields or keys to be processed as if -// they were part of the outer struct. For maps, keys must -// not conflict with the bson keys of other struct fields. -// -// Some examples: -// -// type T struct { -// A bool -// B int "myb" -// C string "myc,omitempty" -// D string `bson:",omitempty" json:"jsonkey"` -// E int64 ",minsize" -// F int64 "myf,omitempty,minsize" -// } -// -func Marshal(in interface{}) (out []byte, err error) { - return MarshalBuffer(in, make([]byte, 0, initialBufferSize)) -} - -// MarshalBuffer behaves the same way as Marshal, except that instead of -// allocating a new byte slice it tries to use the received byte slice and -// only allocates more memory if necessary to fit the marshaled value. -func MarshalBuffer(in interface{}, buf []byte) (out []byte, err error) { - defer handleErr(&err) - e := &encoder{buf} - e.addDoc(reflect.ValueOf(in)) - return e.out, nil -} - -// Unmarshal deserializes data from in into the out value. The out value -// must be a map, a pointer to a struct, or a pointer to a bson.D value. -// In the case of struct values, only exported fields will be deserialized. -// The lowercased field name is used as the key for each exported field, -// but this behavior may be changed using the respective field tag. -// The tag may also contain flags to tweak the marshalling behavior for -// the field. The tag formats accepted are: -// -// "[][,[,]]" -// -// `(...) bson:"[][,[,]]" (...)` -// -// The following flags are currently supported during unmarshal (see the -// Marshal method for other flags): -// -// inline Inline the field, which must be a struct or a map. -// Inlined structs are handled as if its fields were part -// of the outer struct. An inlined map causes keys that do -// not match any other struct field to be inserted in the -// map rather than being discarded as usual. -// -// The target field or element types of out may not necessarily match -// the BSON values of the provided data. The following conversions are -// made automatically: -// -// - Numeric types are converted if at least the integer part of the -// value would be preserved correctly -// - Bools are converted to numeric types as 1 or 0 -// - Numeric types are converted to bools as true if not 0 or false otherwise -// - Binary and string BSON data is converted to a string, array or byte slice -// -// If the value would not fit the type and cannot be converted, it's -// silently skipped. -// -// Pointer values are initialized when necessary. -func Unmarshal(in []byte, out interface{}) (err error) { - if raw, ok := out.(*Raw); ok { - raw.Kind = 3 - raw.Data = in - return nil - } - defer handleErr(&err) - v := reflect.ValueOf(out) - switch v.Kind() { - case reflect.Ptr: - fallthrough - case reflect.Map: - d := newDecoder(in) - d.readDocTo(v) - if d.i < len(d.in) { - return errors.New("document is corrupted") - } - case reflect.Struct: - return errors.New("unmarshal can't deal with struct values. Use a pointer") - default: - return errors.New("unmarshal needs a map or a pointer to a struct") - } - return nil -} - -// Unmarshal deserializes raw into the out value. If the out value type -// is not compatible with raw, a *bson.TypeError is returned. -// -// See the Unmarshal function documentation for more details on the -// unmarshalling process. -func (raw Raw) Unmarshal(out interface{}) (err error) { - defer handleErr(&err) - v := reflect.ValueOf(out) - switch v.Kind() { - case reflect.Ptr: - v = v.Elem() - fallthrough - case reflect.Map: - d := newDecoder(raw.Data) - good := d.readElemTo(v, raw.Kind) - if !good { - return &TypeError{v.Type(), raw.Kind} - } - case reflect.Struct: - return errors.New("raw Unmarshal can't deal with struct values. Use a pointer") - default: - return errors.New("raw Unmarshal needs a map or a valid pointer") - } - return nil -} - -// TypeError store details for type error occuring -// during unmarshaling -type TypeError struct { - Type reflect.Type - Kind byte -} - -func (e *TypeError) Error() string { - return fmt.Sprintf("BSON kind 0x%02x isn't compatible with type %s", e.Kind, e.Type.String()) -} - -// -------------------------------------------------------------------------- -// Maintain a mapping of keys to structure field indexes - -type structInfo struct { - FieldsMap map[string]fieldInfo - FieldsList []fieldInfo - InlineMap int - Zero reflect.Value -} - -type fieldInfo struct { - Key string - Num int - OmitEmpty bool - MinSize bool - Inline []int -} - -var structMap = make(map[reflect.Type]*structInfo) -var structMapMutex sync.RWMutex - -type externalPanic string - -func (e externalPanic) String() string { - return string(e) -} - -func getStructInfo(st reflect.Type) (*structInfo, error) { - structMapMutex.RLock() - sinfo, found := structMap[st] - structMapMutex.RUnlock() - if found { - return sinfo, nil - } - n := st.NumField() - fieldsMap := make(map[string]fieldInfo) - fieldsList := make([]fieldInfo, 0, n) - inlineMap := -1 - for i := 0; i != n; i++ { - field := st.Field(i) - if field.PkgPath != "" && !field.Anonymous { - continue // Private field - } - - info := fieldInfo{Num: i} - - tag := field.Tag.Get("bson") - - // Fall-back to JSON struct tag, if feature flag is set. - if tag == "" && useJSONTagFallback { - tag = field.Tag.Get("json") - } - - // If there's no bson/json tag available. - if tag == "" { - // If there's no tag, and also no tag: value splits (i.e. no colon) - // then assume the entire tag is the value - if strings.Index(string(field.Tag), ":") < 0 { - tag = string(field.Tag) - } - } - - if tag == "-" { - continue - } - - inline := false - fields := strings.Split(tag, ",") - if len(fields) > 1 { - for _, flag := range fields[1:] { - switch flag { - case "omitempty": - info.OmitEmpty = true - case "minsize": - info.MinSize = true - case "inline": - inline = true - default: - msg := fmt.Sprintf("Unsupported flag %q in tag %q of type %s", flag, tag, st) - panic(externalPanic(msg)) - } - } - tag = fields[0] - } - - if inline { - switch field.Type.Kind() { - case reflect.Map: - if inlineMap >= 0 { - return nil, errors.New("Multiple ,inline maps in struct " + st.String()) - } - if field.Type.Key() != reflect.TypeOf("") { - return nil, errors.New("Option ,inline needs a map with string keys in struct " + st.String()) - } - inlineMap = info.Num - case reflect.Ptr: - // allow only pointer to struct - if kind := field.Type.Elem().Kind(); kind != reflect.Struct { - return nil, errors.New("Option ,inline allows a pointer only to a struct, was given pointer to " + kind.String()) - } - - field.Type = field.Type.Elem() - fallthrough - case reflect.Struct: - sinfo, err := getStructInfo(field.Type) - if err != nil { - return nil, err - } - for _, finfo := range sinfo.FieldsList { - if _, found := fieldsMap[finfo.Key]; found { - msg := "Duplicated key '" + finfo.Key + "' in struct " + st.String() - return nil, errors.New(msg) - } - if finfo.Inline == nil { - finfo.Inline = []int{i, finfo.Num} - } else { - finfo.Inline = append([]int{i}, finfo.Inline...) - } - fieldsMap[finfo.Key] = finfo - fieldsList = append(fieldsList, finfo) - } - default: - panic("Option ,inline needs a struct value or a pointer to a struct or map field") - } - continue - } - - if tag != "" { - info.Key = tag - } else { - info.Key = strings.ToLower(field.Name) - } - - if _, found = fieldsMap[info.Key]; found { - msg := "Duplicated key '" + info.Key + "' in struct " + st.String() - return nil, errors.New(msg) - } - - fieldsList = append(fieldsList, info) - fieldsMap[info.Key] = info - } - sinfo = &structInfo{ - fieldsMap, - fieldsList, - inlineMap, - reflect.New(st).Elem(), - } - structMapMutex.Lock() - structMap[st] = sinfo - structMapMutex.Unlock() - return sinfo, nil -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/compatibility.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/compatibility.go deleted file mode 100644 index 66efd465facc..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/compatibility.go +++ /dev/null @@ -1,29 +0,0 @@ -package bson - -// Current state of the JSON tag fallback option. -var useJSONTagFallback = false -var useRespectNilValues = false - -// SetJSONTagFallback enables or disables the JSON-tag fallback for structure tagging. When this is enabled, structures -// without BSON tags on a field will fall-back to using the JSON tag (if present). -func SetJSONTagFallback(state bool) { - useJSONTagFallback = state -} - -// JSONTagFallbackState returns the current status of the JSON tag fallback compatability option. See SetJSONTagFallback -// for more information. -func JSONTagFallbackState() bool { - return useJSONTagFallback -} - -// SetRespectNilValues enables or disables serializing nil slices or maps to `null` values. -// In other words it enables `encoding/json` compatible behaviour. -func SetRespectNilValues(state bool) { - useRespectNilValues = state -} - -// RespectNilValuesState returns the current status of the JSON nil slices and maps fallback compatibility option. -// See SetRespectNilValues for more information. -func RespectNilValuesState() bool { - return useRespectNilValues -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/decimal.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/decimal.go deleted file mode 100644 index 672ba1825940..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/decimal.go +++ /dev/null @@ -1,312 +0,0 @@ -// BSON library for Go -// -// Copyright (c) 2010-2012 - Gustavo Niemeyer -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -package bson - -import ( - "fmt" - "strconv" - "strings" -) - -// Decimal128 holds decimal128 BSON values. -type Decimal128 struct { - h, l uint64 -} - -func (d Decimal128) String() string { - var pos int // positive sign - var e int // exponent - var h, l uint64 // significand high/low - - if d.h>>63&1 == 0 { - pos = 1 - } - - switch d.h >> 58 & (1<<5 - 1) { - case 0x1F: - return "NaN" - case 0x1E: - return "-Inf"[pos:] - } - - l = d.l - if d.h>>61&3 == 3 { - // Bits: 1*sign 2*ignored 14*exponent 111*significand. - // Implicit 0b100 prefix in significand. - e = int(d.h>>47&(1<<14-1)) - 6176 - //h = 4<<47 | d.h&(1<<47-1) - // Spec says all of these values are out of range. - h, l = 0, 0 - } else { - // Bits: 1*sign 14*exponent 113*significand - e = int(d.h>>49&(1<<14-1)) - 6176 - h = d.h & (1<<49 - 1) - } - - // Would be handled by the logic below, but that's trivial and common. - if h == 0 && l == 0 && e == 0 { - return "-0"[pos:] - } - - var repr [48]byte // Loop 5 times over 9 digits plus dot, negative sign, and leading zero. - var last = len(repr) - var i = len(repr) - var dot = len(repr) + e - var rem uint32 -Loop: - for d9 := 0; d9 < 5; d9++ { - h, l, rem = divmod(h, l, 1e9) - for d1 := 0; d1 < 9; d1++ { - // Handle "-0.0", "0.00123400", "-1.00E-6", "1.050E+3", etc. - if i < len(repr) && (dot == i || l == 0 && h == 0 && rem > 0 && rem < 10 && (dot < i-6 || e > 0)) { - e += len(repr) - i - i-- - repr[i] = '.' - last = i - 1 - dot = len(repr) // Unmark. - } - c := '0' + byte(rem%10) - rem /= 10 - i-- - repr[i] = c - // Handle "0E+3", "1E+3", etc. - if l == 0 && h == 0 && rem == 0 && i == len(repr)-1 && (dot < i-5 || e > 0) { - last = i - break Loop - } - if c != '0' { - last = i - } - // Break early. Works without it, but why. - if dot > i && l == 0 && h == 0 && rem == 0 { - break Loop - } - } - } - repr[last-1] = '-' - last-- - - if e > 0 { - return string(repr[last+pos:]) + "E+" + strconv.Itoa(e) - } - if e < 0 { - return string(repr[last+pos:]) + "E" + strconv.Itoa(e) - } - return string(repr[last+pos:]) -} - -func divmod(h, l uint64, div uint32) (qh, ql uint64, rem uint32) { - div64 := uint64(div) - a := h >> 32 - aq := a / div64 - ar := a % div64 - b := ar<<32 + h&(1<<32-1) - bq := b / div64 - br := b % div64 - c := br<<32 + l>>32 - cq := c / div64 - cr := c % div64 - d := cr<<32 + l&(1<<32-1) - dq := d / div64 - dr := d % div64 - return (aq<<32 | bq), (cq<<32 | dq), uint32(dr) -} - -var dNaN = Decimal128{0x1F << 58, 0} -var dPosInf = Decimal128{0x1E << 58, 0} -var dNegInf = Decimal128{0x3E << 58, 0} - -func dErr(s string) (Decimal128, error) { - return dNaN, fmt.Errorf("cannot parse %q as a decimal128", s) -} - -// ParseDecimal128 parse a string and return the corresponding value as -// a decimal128 -func ParseDecimal128(s string) (Decimal128, error) { - orig := s - if s == "" { - return dErr(orig) - } - neg := s[0] == '-' - if neg || s[0] == '+' { - s = s[1:] - } - - if (len(s) == 3 || len(s) == 8) && (s[0] == 'N' || s[0] == 'n' || s[0] == 'I' || s[0] == 'i') { - if s == "NaN" || s == "nan" || strings.EqualFold(s, "nan") { - return dNaN, nil - } - if s == "Inf" || s == "inf" || strings.EqualFold(s, "inf") || strings.EqualFold(s, "infinity") { - if neg { - return dNegInf, nil - } - return dPosInf, nil - } - return dErr(orig) - } - - var h, l uint64 - var e int - - var add, ovr uint32 - var mul uint32 = 1 - var dot = -1 - var digits = 0 - var i = 0 - for i < len(s) { - c := s[i] - if mul == 1e9 { - h, l, ovr = muladd(h, l, mul, add) - mul, add = 1, 0 - if ovr > 0 || h&((1<<15-1)<<49) > 0 { - return dErr(orig) - } - } - if c >= '0' && c <= '9' { - i++ - if c > '0' || digits > 0 { - digits++ - } - if digits > 34 { - if c == '0' { - // Exact rounding. - e++ - continue - } - return dErr(orig) - } - mul *= 10 - add *= 10 - add += uint32(c - '0') - continue - } - if c == '.' { - i++ - if dot >= 0 || i == 1 && len(s) == 1 { - return dErr(orig) - } - if i == len(s) { - break - } - if s[i] < '0' || s[i] > '9' || e > 0 { - return dErr(orig) - } - dot = i - continue - } - break - } - if i == 0 { - return dErr(orig) - } - if mul > 1 { - h, l, ovr = muladd(h, l, mul, add) - if ovr > 0 || h&((1<<15-1)<<49) > 0 { - return dErr(orig) - } - } - if dot >= 0 { - e += dot - i - } - if i+1 < len(s) && (s[i] == 'E' || s[i] == 'e') { - i++ - eneg := s[i] == '-' - if eneg || s[i] == '+' { - i++ - if i == len(s) { - return dErr(orig) - } - } - n := 0 - for i < len(s) && n < 1e4 { - c := s[i] - i++ - if c < '0' || c > '9' { - return dErr(orig) - } - n *= 10 - n += int(c - '0') - } - if eneg { - n = -n - } - e += n - for e < -6176 { - // Subnormal. - var div uint32 = 1 - for div < 1e9 && e < -6176 { - div *= 10 - e++ - } - var rem uint32 - h, l, rem = divmod(h, l, div) - if rem > 0 { - return dErr(orig) - } - } - for e > 6111 { - // Clamped. - var mul uint32 = 1 - for mul < 1e9 && e > 6111 { - mul *= 10 - e-- - } - h, l, ovr = muladd(h, l, mul, 0) - if ovr > 0 || h&((1<<15-1)<<49) > 0 { - return dErr(orig) - } - } - if e < -6176 || e > 6111 { - return dErr(orig) - } - } - - if i < len(s) { - return dErr(orig) - } - - h |= uint64(e+6176) & uint64(1<<14-1) << 49 - if neg { - h |= 1 << 63 - } - return Decimal128{h, l}, nil -} - -func muladd(h, l uint64, mul uint32, add uint32) (resh, resl uint64, overflow uint32) { - mul64 := uint64(mul) - a := mul64 * (l & (1<<32 - 1)) - b := a>>32 + mul64*(l>>32) - c := b>>32 + mul64*(h&(1<<32-1)) - d := c>>32 + mul64*(h>>32) - - a = a&(1<<32-1) + uint64(add) - b = b&(1<<32-1) + a>>32 - c = c&(1<<32-1) + b>>32 - d = d&(1<<32-1) + c>>32 - - return (d<<32 | c&(1<<32-1)), (b<<32 | a&(1<<32-1)), uint32(d >> 32) -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/decode.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/decode.go deleted file mode 100644 index 658856add04b..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/decode.go +++ /dev/null @@ -1,1055 +0,0 @@ -// BSON library for Go -// -// Copyright (c) 2010-2012 - Gustavo Niemeyer -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// gobson - BSON library for Go. - -package bson - -import ( - "errors" - "fmt" - "io" - "math" - "net/url" - "reflect" - "strconv" - "sync" - "time" -) - -type decoder struct { - in []byte - i int - docType reflect.Type -} - -var typeM = reflect.TypeOf(M{}) - -func newDecoder(in []byte) *decoder { - return &decoder{in, 0, typeM} -} - -// -------------------------------------------------------------------------- -// Some helper functions. - -func corrupted() { - panic("Document is corrupted") -} - -// -------------------------------------------------------------------------- -// Unmarshaling of documents. - -const ( - setterUnknown = iota - setterNone - setterType - setterAddr -) - -var setterStyles map[reflect.Type]int -var setterIface reflect.Type -var setterMutex sync.RWMutex - -func init() { - var iface Setter - setterIface = reflect.TypeOf(&iface).Elem() - setterStyles = make(map[reflect.Type]int) -} - -func setterStyle(outt reflect.Type) int { - setterMutex.RLock() - style := setterStyles[outt] - setterMutex.RUnlock() - if style != setterUnknown { - return style - } - - setterMutex.Lock() - defer setterMutex.Unlock() - if outt.Implements(setterIface) { - style = setterType - } else if reflect.PtrTo(outt).Implements(setterIface) { - style = setterAddr - } else { - style = setterNone - } - setterStyles[outt] = style - return style -} - -func getSetter(outt reflect.Type, out reflect.Value) Setter { - style := setterStyle(outt) - if style == setterNone { - return nil - } - if style == setterAddr { - if !out.CanAddr() { - return nil - } - out = out.Addr() - } else if outt.Kind() == reflect.Ptr && out.IsNil() { - out.Set(reflect.New(outt.Elem())) - } - return out.Interface().(Setter) -} - -func clearMap(m reflect.Value) { - var none reflect.Value - for _, k := range m.MapKeys() { - m.SetMapIndex(k, none) - } -} - -func (d *decoder) readDocTo(out reflect.Value) { - var elemType reflect.Type - outt := out.Type() - outk := outt.Kind() - - for { - if outk == reflect.Ptr && out.IsNil() { - out.Set(reflect.New(outt.Elem())) - } - if setter := getSetter(outt, out); setter != nil { - raw := d.readRaw(ElementDocument) - err := setter.SetBSON(raw) - if _, ok := err.(*TypeError); err != nil && !ok { - panic(err) - } - return - } - if outk == reflect.Ptr { - out = out.Elem() - outt = out.Type() - outk = out.Kind() - continue - } - break - } - - var fieldsMap map[string]fieldInfo - var inlineMap reflect.Value - if outt == typeRaw { - out.Set(reflect.ValueOf(d.readRaw(ElementDocument))) - return - } - - origout := out - if outk == reflect.Interface { - if d.docType.Kind() == reflect.Map { - mv := reflect.MakeMap(d.docType) - out.Set(mv) - out = mv - } else { - dv := reflect.New(d.docType).Elem() - out.Set(dv) - out = dv - } - outt = out.Type() - outk = outt.Kind() - } - - docType := d.docType - keyType := typeString - convertKey := false - switch outk { - case reflect.Map: - keyType = outt.Key() - if keyType != typeString { - convertKey = true - } - elemType = outt.Elem() - if elemType == typeIface { - d.docType = outt - } - if out.IsNil() { - out.Set(reflect.MakeMap(out.Type())) - } else if out.Len() > 0 { - clearMap(out) - } - case reflect.Struct: - sinfo, err := getStructInfo(out.Type()) - if err != nil { - panic(err) - } - fieldsMap = sinfo.FieldsMap - out.Set(sinfo.Zero) - if sinfo.InlineMap != -1 { - inlineMap = out.Field(sinfo.InlineMap) - if !inlineMap.IsNil() && inlineMap.Len() > 0 { - clearMap(inlineMap) - } - elemType = inlineMap.Type().Elem() - if elemType == typeIface { - d.docType = inlineMap.Type() - } - } - case reflect.Slice: - switch outt.Elem() { - case typeDocElem: - origout.Set(d.readDocElems(outt)) - return - case typeRawDocElem: - origout.Set(d.readRawDocElems(outt)) - return - } - fallthrough - default: - panic("Unsupported document type for unmarshalling: " + out.Type().String()) - } - - end := int(d.readInt32()) - end += d.i - 4 - if end <= d.i || end > len(d.in) || d.in[end-1] != '\x00' { - corrupted() - } - for d.in[d.i] != '\x00' { - kind := d.readByte() - name := d.readCStr() - if d.i >= end { - corrupted() - } - - switch outk { - case reflect.Map: - e := reflect.New(elemType).Elem() - if d.readElemTo(e, kind) { - k := reflect.ValueOf(name) - if convertKey { - mapKeyType := out.Type().Key() - mapKeyKind := mapKeyType.Kind() - - switch mapKeyKind { - case reflect.Int: - fallthrough - case reflect.Int8: - fallthrough - case reflect.Int16: - fallthrough - case reflect.Int32: - fallthrough - case reflect.Int64: - fallthrough - case reflect.Uint: - fallthrough - case reflect.Uint8: - fallthrough - case reflect.Uint16: - fallthrough - case reflect.Uint32: - fallthrough - case reflect.Uint64: - fallthrough - case reflect.Float32: - fallthrough - case reflect.Float64: - parsed := d.parseMapKeyAsFloat(k, mapKeyKind) - k = reflect.ValueOf(parsed) - case reflect.String: - mapKeyType = keyType - default: - panic("BSON map must have string or decimal keys. Got: " + outt.String()) - } - - k = k.Convert(mapKeyType) - } - out.SetMapIndex(k, e) - } - case reflect.Struct: - if info, ok := fieldsMap[name]; ok { - if info.Inline == nil { - d.readElemTo(out.Field(info.Num), kind) - } else { - d.readElemTo(out.FieldByIndex(info.Inline), kind) - } - } else if inlineMap.IsValid() { - if inlineMap.IsNil() { - inlineMap.Set(reflect.MakeMap(inlineMap.Type())) - } - e := reflect.New(elemType).Elem() - if d.readElemTo(e, kind) { - inlineMap.SetMapIndex(reflect.ValueOf(name), e) - } - } else { - d.dropElem(kind) - } - case reflect.Slice: - } - - if d.i >= end { - corrupted() - } - } - d.i++ // '\x00' - if d.i != end { - corrupted() - } - d.docType = docType -} - -func (decoder) parseMapKeyAsFloat(k reflect.Value, mapKeyKind reflect.Kind) float64 { - parsed, err := strconv.ParseFloat(k.String(), 64) - if err != nil { - panic("Map key is defined to be a decimal type (" + mapKeyKind.String() + ") but got error " + - err.Error()) - } - - return parsed -} - -func (d *decoder) readArrayDocTo(out reflect.Value) { - end := int(d.readInt32()) - end += d.i - 4 - if end <= d.i || end > len(d.in) || d.in[end-1] != '\x00' { - corrupted() - } - i := 0 - l := out.Len() - for d.in[d.i] != '\x00' { - if i >= l { - panic("Length mismatch on array field") - } - kind := d.readByte() - for d.i < end && d.in[d.i] != '\x00' { - d.i++ - } - if d.i >= end { - corrupted() - } - d.i++ - d.readElemTo(out.Index(i), kind) - if d.i >= end { - corrupted() - } - i++ - } - if i != l { - panic("Length mismatch on array field") - } - d.i++ // '\x00' - if d.i != end { - corrupted() - } -} - -func (d *decoder) readSliceDoc(t reflect.Type) interface{} { - tmp := make([]reflect.Value, 0, 8) - elemType := t.Elem() - if elemType == typeRawDocElem { - d.dropElem(ElementArray) - return reflect.Zero(t).Interface() - } - if elemType == typeRaw { - return d.readSliceOfRaw() - } - - end := int(d.readInt32()) - end += d.i - 4 - if end <= d.i || end > len(d.in) || d.in[end-1] != '\x00' { - corrupted() - } - for d.in[d.i] != '\x00' { - kind := d.readByte() - for d.i < end && d.in[d.i] != '\x00' { - d.i++ - } - if d.i >= end { - corrupted() - } - d.i++ - e := reflect.New(elemType).Elem() - if d.readElemTo(e, kind) { - tmp = append(tmp, e) - } - if d.i >= end { - corrupted() - } - } - d.i++ // '\x00' - if d.i != end { - corrupted() - } - - n := len(tmp) - slice := reflect.MakeSlice(t, n, n) - for i := 0; i != n; i++ { - slice.Index(i).Set(tmp[i]) - } - return slice.Interface() -} - -func BSONElementSize(kind byte, offset int, buffer []byte) (int, error) { - switch kind { - case ElementFloat64: // Float64 - return 8, nil - case ElementJavaScriptWithoutScope: // JavaScript without scope - fallthrough - case ElementSymbol: // Symbol - fallthrough - case ElementString: // UTF-8 string - size, err := getSize(offset, buffer) - if err != nil { - return 0, err - } - if size < 1 { - return 0, errors.New("String size can't be less then one byte") - } - size += 4 - if offset+size > len(buffer) { - return 0, io.ErrUnexpectedEOF - } - if buffer[offset+size-1] != 0 { - return 0, errors.New("Invalid string: non zero-terminated") - } - return size, nil - case ElementArray: // Array - fallthrough - case ElementDocument: // Document - size, err := getSize(offset, buffer) - if err != nil { - return 0, err - } - if size < 5 { - return 0, errors.New("Declared document size is too small") - } - return size, nil - case ElementBinary: // Binary - size, err := getSize(offset, buffer) - if err != nil { - return 0, err - } - if size < 0 { - return 0, errors.New("Binary data size can't be negative") - } - return size + 5, nil - case Element06: // Undefined (obsolete, but still seen in the wild) - return 0, nil - case ElementObjectId: // ObjectId - return 12, nil - case ElementBool: // Bool - return 1, nil - case ElementDatetime: // Timestamp - return 8, nil - case ElementNil: // Nil - return 0, nil - case ElementRegEx: // RegEx - end := offset - for i := 0; i < 2; i++ { - for end < len(buffer) && buffer[end] != '\x00' { - end++ - } - end++ - } - if end > len(buffer) { - return 0, io.ErrUnexpectedEOF - } - return end - offset, nil - case ElementDBPointer: // DBPointer - size, err := getSize(offset, buffer) - if err != nil { - return 0, err - } - if size < 1 { - return 0, errors.New("String size can't be less then one byte") - } - return size + 12 + 4, nil - case ElementJavaScriptWithScope: // JavaScript with scope - size, err := getSize(offset, buffer) - if err != nil { - return 0, err - } - if size < 4+5+5 { - return 0, errors.New("Declared document element is too small") - } - return size, nil - case ElementInt32: // Int32 - return 4, nil - case ElementTimestamp: // Mongo-specific timestamp - return 8, nil - case ElementInt64: // Int64 - return 8, nil - case ElementDecimal128: // Decimal128 - return 16, nil - case ElementMaxKey: // Max key - return 0, nil - case ElementMinKey: // Min key - return 0, nil - default: - return 0, errors.New(fmt.Sprintf("Unknown element kind (0x%02X)", kind)) - } -} - -func (d *decoder) readRaw(kind byte) Raw { - size, err := BSONElementSize(kind, d.i, d.in) - if err != nil { - corrupted() - } - if d.i+size > len(d.in) { - corrupted() - } - d.i += size - return Raw{ - Kind: kind, - Data: d.in[d.i-size : d.i], - } -} - -func (d *decoder) readSliceOfRaw() interface{} { - tmp := make([]Raw, 0, 8) - end := int(d.readInt32()) - end += d.i - 4 - if end <= d.i || end > len(d.in) || d.in[end-1] != '\x00' { - corrupted() - } - for d.in[d.i] != '\x00' { - kind := d.readByte() - for d.i < end && d.in[d.i] != '\x00' { - d.i++ - } - if d.i >= end { - corrupted() - } - d.i++ - e := d.readRaw(kind) - tmp = append(tmp, e) - if d.i >= end { - corrupted() - } - } - d.i++ // '\x00' - if d.i != end { - corrupted() - } - return tmp -} - -var typeSlice = reflect.TypeOf([]interface{}{}) -var typeIface = typeSlice.Elem() - -func (d *decoder) readDocElems(typ reflect.Type) reflect.Value { - docType := d.docType - d.docType = typ - slice := make([]DocElem, 0, 8) - d.readDocWith(func(kind byte, name string) { - e := DocElem{Name: name} - v := reflect.ValueOf(&e.Value) - if d.readElemTo(v.Elem(), kind) { - slice = append(slice, e) - } - }) - slicev := reflect.New(typ).Elem() - slicev.Set(reflect.ValueOf(slice)) - d.docType = docType - return slicev -} - -func (d *decoder) readRawDocElems(typ reflect.Type) reflect.Value { - docType := d.docType - d.docType = typ - slice := make([]RawDocElem, 0, 8) - d.readDocWith(func(kind byte, name string) { - e := RawDocElem{Name: name, Value: d.readRaw(kind)} - slice = append(slice, e) - }) - slicev := reflect.New(typ).Elem() - slicev.Set(reflect.ValueOf(slice)) - d.docType = docType - return slicev -} - -func (d *decoder) readDocWith(f func(kind byte, name string)) { - end := int(d.readInt32()) - end += d.i - 4 - if end <= d.i || end > len(d.in) || d.in[end-1] != '\x00' { - corrupted() - } - for d.in[d.i] != '\x00' { - kind := d.readByte() - name := d.readCStr() - if d.i >= end { - corrupted() - } - f(kind, name) - if d.i >= end { - corrupted() - } - } - d.i++ // '\x00' - if d.i != end { - corrupted() - } -} - -// -------------------------------------------------------------------------- -// Unmarshaling of individual elements within a document. -func (d *decoder) dropElem(kind byte) { - size, err := BSONElementSize(kind, d.i, d.in) - if err != nil { - corrupted() - } - if d.i+size > len(d.in) { - corrupted() - } - d.i += size -} - -// Attempt to decode an element from the document and put it into out. -// If the types are not compatible, the returned ok value will be -// false and out will be unchanged. -func (d *decoder) readElemTo(out reflect.Value, kind byte) (good bool) { - outt := out.Type() - - if outt == typeRaw { - out.Set(reflect.ValueOf(d.readRaw(kind))) - return true - } - - if outt == typeRawPtr { - raw := d.readRaw(kind) - out.Set(reflect.ValueOf(&raw)) - return true - } - - if kind == ElementDocument { - // Delegate unmarshaling of documents. - outt := out.Type() - outk := out.Kind() - switch outk { - case reflect.Interface, reflect.Ptr, reflect.Struct, reflect.Map: - d.readDocTo(out) - return true - } - if setterStyle(outt) != setterNone { - d.readDocTo(out) - return true - } - if outk == reflect.Slice { - switch outt.Elem() { - case typeDocElem: - out.Set(d.readDocElems(outt)) - case typeRawDocElem: - out.Set(d.readRawDocElems(outt)) - default: - d.dropElem(kind) - } - return true - } - d.dropElem(kind) - return true - } - - if setter := getSetter(outt, out); setter != nil { - err := setter.SetBSON(d.readRaw(kind)) - if err == ErrSetZero { - out.Set(reflect.Zero(outt)) - return true - } - if err == nil { - return true - } - if _, ok := err.(*TypeError); !ok { - panic(err) - } - return false - } - - var in interface{} - - switch kind { - case ElementFloat64: - in = d.readFloat64() - case ElementString: - in = d.readStr() - case ElementDocument: - panic("Can't happen. Handled above.") - case ElementArray: - outt := out.Type() - if setterStyle(outt) != setterNone { - // Skip the value so its data is handed to the setter below. - d.dropElem(kind) - break - } - for outt.Kind() == reflect.Ptr { - outt = outt.Elem() - } - switch outt.Kind() { - case reflect.Array: - d.readArrayDocTo(out) - return true - case reflect.Slice: - in = d.readSliceDoc(outt) - default: - in = d.readSliceDoc(typeSlice) - } - case ElementBinary: - b := d.readBinary() - if b.Kind == BinaryGeneric || b.Kind == BinaryBinaryOld { - in = b.Data - } else { - in = b - } - case Element06: // Undefined (obsolete, but still seen in the wild) - in = Undefined - case ElementObjectId: - in = ObjectId(d.readBytes(12)) - case ElementBool: - in = d.readBool() - case ElementDatetime: // Timestamp - // MongoDB handles timestamps as milliseconds. - i := d.readInt64() - if i == -62135596800000 { - in = time.Time{} // In UTC for convenience. - } else { - in = time.Unix(i/1e3, i%1e3*1e6).UTC() - } - case ElementNil: - in = nil - case ElementRegEx: - in = d.readRegEx() - case ElementDBPointer: - in = DBPointer{Namespace: d.readStr(), Id: ObjectId(d.readBytes(12))} - case ElementJavaScriptWithoutScope: - in = JavaScript{Code: d.readStr()} - case ElementSymbol: - in = Symbol(d.readStr()) - case ElementJavaScriptWithScope: - start := d.i - l := int(d.readInt32()) - js := JavaScript{d.readStr(), make(M)} - d.readDocTo(reflect.ValueOf(js.Scope)) - if d.i != start+l { - corrupted() - } - in = js - case ElementInt32: - in = int(d.readInt32()) - case ElementTimestamp: // Mongo-specific timestamp - in = MongoTimestamp(d.readInt64()) - case ElementInt64: - switch out.Type() { - case typeTimeDuration: - in = time.Duration(time.Duration(d.readInt64()) * time.Millisecond) - default: - in = d.readInt64() - } - case ElementDecimal128: - in = Decimal128{ - l: uint64(d.readInt64()), - h: uint64(d.readInt64()), - } - case ElementMaxKey: - in = MaxKey - case ElementMinKey: - in = MinKey - default: - panic(fmt.Sprintf("Unknown element kind (0x%02X)", kind)) - } - - if in == nil { - out.Set(reflect.Zero(outt)) - return true - } - - outk := outt.Kind() - - // Dereference and initialize pointer if necessary. - first := true - for outk == reflect.Ptr { - if !out.IsNil() { - out = out.Elem() - } else { - elem := reflect.New(outt.Elem()) - if first { - // Only set if value is compatible. - first = false - defer func(out, elem reflect.Value) { - if good { - out.Set(elem) - } - }(out, elem) - } else { - out.Set(elem) - } - out = elem - } - outt = out.Type() - outk = outt.Kind() - } - - inv := reflect.ValueOf(in) - if outt == inv.Type() { - out.Set(inv) - return true - } - - switch outk { - case reflect.Interface: - out.Set(inv) - return true - case reflect.String: - switch inv.Kind() { - case reflect.String: - out.SetString(inv.String()) - return true - case reflect.Slice: - if b, ok := in.([]byte); ok { - out.SetString(string(b)) - return true - } - case reflect.Int, reflect.Int64: - if outt == typeJSONNumber { - out.SetString(strconv.FormatInt(inv.Int(), 10)) - return true - } - case reflect.Float64: - if outt == typeJSONNumber { - out.SetString(strconv.FormatFloat(inv.Float(), 'f', -1, 64)) - return true - } - } - case reflect.Slice, reflect.Array: - // Remember, array (0x04) slices are built with the correct - // element type. If we are here, must be a cross BSON kind - // conversion (e.g. 0x05 unmarshalling on string). - if outt.Elem().Kind() != reflect.Uint8 { - break - } - switch inv.Kind() { - case reflect.String: - slice := []byte(inv.String()) - out.Set(reflect.ValueOf(slice)) - return true - case reflect.Slice: - switch outt.Kind() { - case reflect.Array: - reflect.Copy(out, inv) - case reflect.Slice: - out.SetBytes(inv.Bytes()) - } - return true - } - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - switch inv.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - out.SetInt(inv.Int()) - return true - case reflect.Float32, reflect.Float64: - out.SetInt(int64(inv.Float())) - return true - case reflect.Bool: - if inv.Bool() { - out.SetInt(1) - } else { - out.SetInt(0) - } - return true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - panic("can't happen: no uint types in BSON (!?)") - } - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - switch inv.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - out.SetUint(uint64(inv.Int())) - return true - case reflect.Float32, reflect.Float64: - out.SetUint(uint64(inv.Float())) - return true - case reflect.Bool: - if inv.Bool() { - out.SetUint(1) - } else { - out.SetUint(0) - } - return true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - panic("Can't happen. No uint types in BSON.") - } - case reflect.Float32, reflect.Float64: - switch inv.Kind() { - case reflect.Float32, reflect.Float64: - out.SetFloat(inv.Float()) - return true - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - out.SetFloat(float64(inv.Int())) - return true - case reflect.Bool: - if inv.Bool() { - out.SetFloat(1) - } else { - out.SetFloat(0) - } - return true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - panic("Can't happen. No uint types in BSON?") - } - case reflect.Bool: - switch inv.Kind() { - case reflect.Bool: - out.SetBool(inv.Bool()) - return true - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - out.SetBool(inv.Int() != 0) - return true - case reflect.Float32, reflect.Float64: - out.SetBool(inv.Float() != 0) - return true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - panic("Can't happen. No uint types in BSON?") - } - case reflect.Struct: - if outt == typeURL && inv.Kind() == reflect.String { - u, err := url.Parse(inv.String()) - if err != nil { - panic(err) - } - out.Set(reflect.ValueOf(u).Elem()) - return true - } - if outt == typeBinary { - if b, ok := in.([]byte); ok { - out.Set(reflect.ValueOf(Binary{Data: b})) - return true - } - } - } - - return false -} - -// -------------------------------------------------------------------------- -// Parsers of basic types. - -func (d *decoder) readRegEx() RegEx { - re := RegEx{} - re.Pattern = d.readCStr() - re.Options = d.readCStr() - return re -} - -func (d *decoder) readBinary() Binary { - l := d.readInt32() - b := Binary{} - b.Kind = d.readByte() - if b.Kind == BinaryBinaryOld && l > 4 { - // Weird obsolete format with redundant length. - rl := d.readInt32() - if rl != l-4 { - corrupted() - } - l = rl - } - b.Data = d.readBytes(l) - return b -} - -func (d *decoder) readStr() string { - l := d.readInt32() - b := d.readBytes(l - 1) - if d.readByte() != '\x00' { - corrupted() - } - return string(b) -} - -func (d *decoder) readCStr() string { - start := d.i - end := start - l := len(d.in) - for ; end != l; end++ { - if d.in[end] == '\x00' { - break - } - } - d.i = end + 1 - if d.i > l { - corrupted() - } - return string(d.in[start:end]) -} - -func (d *decoder) readBool() bool { - b := d.readByte() - if b == 0 { - return false - } - if b == 1 { - return true - } - panic(fmt.Sprintf("encoded boolean must be 1 or 0, found %d", b)) -} - -func (d *decoder) readFloat64() float64 { - return math.Float64frombits(uint64(d.readInt64())) -} - -func (d *decoder) readInt32() int32 { - b := d.readBytes(4) - return int32((uint32(b[0]) << 0) | - (uint32(b[1]) << 8) | - (uint32(b[2]) << 16) | - (uint32(b[3]) << 24)) -} - -func getSize(offset int, b []byte) (int, error) { - if offset+4 > len(b) { - return 0, io.ErrUnexpectedEOF - } - return int((uint32(b[offset]) << 0) | - (uint32(b[offset+1]) << 8) | - (uint32(b[offset+2]) << 16) | - (uint32(b[offset+3]) << 24)), nil -} - -func (d *decoder) readInt64() int64 { - b := d.readBytes(8) - return int64((uint64(b[0]) << 0) | - (uint64(b[1]) << 8) | - (uint64(b[2]) << 16) | - (uint64(b[3]) << 24) | - (uint64(b[4]) << 32) | - (uint64(b[5]) << 40) | - (uint64(b[6]) << 48) | - (uint64(b[7]) << 56)) -} - -func (d *decoder) readByte() byte { - i := d.i - d.i++ - if d.i > len(d.in) { - corrupted() - } - return d.in[i] -} - -func (d *decoder) readBytes(length int32) []byte { - if length < 0 { - corrupted() - } - start := d.i - d.i += int(length) - if d.i < start || d.i > len(d.in) { - corrupted() - } - return d.in[start : start+int(length)] -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/encode.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/encode.go deleted file mode 100644 index d0c6b2a855f8..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/encode.go +++ /dev/null @@ -1,645 +0,0 @@ -// BSON library for Go -// -// Copyright (c) 2010-2012 - Gustavo Niemeyer -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// 1. Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// gobson - BSON library for Go. - -package bson - -import ( - "encoding/json" - "fmt" - "math" - "net/url" - "reflect" - "sort" - "strconv" - "sync" - "time" -) - -// -------------------------------------------------------------------------- -// Some internal infrastructure. - -var ( - typeBinary = reflect.TypeOf(Binary{}) - typeObjectId = reflect.TypeOf(ObjectId("")) - typeDBPointer = reflect.TypeOf(DBPointer{"", ObjectId("")}) - typeSymbol = reflect.TypeOf(Symbol("")) - typeMongoTimestamp = reflect.TypeOf(MongoTimestamp(0)) - typeOrderKey = reflect.TypeOf(MinKey) - typeDocElem = reflect.TypeOf(DocElem{}) - typeRawDocElem = reflect.TypeOf(RawDocElem{}) - typeRaw = reflect.TypeOf(Raw{}) - typeRawPtr = reflect.PtrTo(reflect.TypeOf(Raw{})) - typeURL = reflect.TypeOf(url.URL{}) - typeTime = reflect.TypeOf(time.Time{}) - typeString = reflect.TypeOf("") - typeJSONNumber = reflect.TypeOf(json.Number("")) - typeTimeDuration = reflect.TypeOf(time.Duration(0)) -) - -var ( - // spec for []uint8 or []byte encoding - arrayOps = map[string]bool{ - "$in": true, - "$nin": true, - "$all": true, - } -) - -const itoaCacheSize = 32 - -const ( - getterUnknown = iota - getterNone - getterTypeVal - getterTypePtr - getterAddr -) - -var itoaCache []string - -var getterStyles map[reflect.Type]int -var getterIface reflect.Type -var getterMutex sync.RWMutex - -func init() { - itoaCache = make([]string, itoaCacheSize) - for i := 0; i != itoaCacheSize; i++ { - itoaCache[i] = strconv.Itoa(i) - } - var iface Getter - getterIface = reflect.TypeOf(&iface).Elem() - getterStyles = make(map[reflect.Type]int) -} - -func itoa(i int) string { - if i < itoaCacheSize { - return itoaCache[i] - } - return strconv.Itoa(i) -} - -func getterStyle(outt reflect.Type) int { - getterMutex.RLock() - style := getterStyles[outt] - getterMutex.RUnlock() - if style != getterUnknown { - return style - } - - getterMutex.Lock() - defer getterMutex.Unlock() - if outt.Implements(getterIface) { - vt := outt - for vt.Kind() == reflect.Ptr { - vt = vt.Elem() - } - if vt.Implements(getterIface) { - style = getterTypeVal - } else { - style = getterTypePtr - } - } else if reflect.PtrTo(outt).Implements(getterIface) { - style = getterAddr - } else { - style = getterNone - } - getterStyles[outt] = style - return style -} - -func getGetter(outt reflect.Type, out reflect.Value) Getter { - style := getterStyle(outt) - if style == getterNone { - return nil - } - if style == getterAddr { - if !out.CanAddr() { - return nil - } - return out.Addr().Interface().(Getter) - } - if style == getterTypeVal && out.Kind() == reflect.Ptr && out.IsNil() { - return nil - } - return out.Interface().(Getter) -} - -// -------------------------------------------------------------------------- -// Marshaling of the document value itself. - -type encoder struct { - out []byte -} - -func (e *encoder) addDoc(v reflect.Value) { - for { - if vi, ok := v.Interface().(Getter); ok { - getv, err := vi.GetBSON() - if err != nil { - panic(err) - } - v = reflect.ValueOf(getv) - continue - } - if v.Kind() == reflect.Ptr { - v = v.Elem() - continue - } - break - } - - if v.Type() == typeRaw { - raw := v.Interface().(Raw) - if raw.Kind != 0x03 && raw.Kind != 0x00 { - panic("Attempted to marshal Raw kind " + strconv.Itoa(int(raw.Kind)) + " as a document") - } - if len(raw.Data) == 0 { - panic("Attempted to marshal empty Raw document") - } - e.addBytes(raw.Data...) - return - } - - start := e.reserveInt32() - - switch v.Kind() { - case reflect.Map: - e.addMap(v) - case reflect.Struct: - e.addStruct(v) - case reflect.Array, reflect.Slice: - e.addSlice(v) - default: - panic("Can't marshal " + v.Type().String() + " as a BSON document") - } - - e.addBytes(0) - e.setInt32(start, int32(len(e.out)-start)) -} - -func (e *encoder) addMap(v reflect.Value) { - for _, k := range v.MapKeys() { - e.addElem(fmt.Sprint(k), v.MapIndex(k), false) - } -} - -func (e *encoder) addStruct(v reflect.Value) { - sinfo, err := getStructInfo(v.Type()) - if err != nil { - panic(err) - } - var value reflect.Value - if sinfo.InlineMap >= 0 { - m := v.Field(sinfo.InlineMap) - if m.Len() > 0 { - for _, k := range m.MapKeys() { - ks := k.String() - if _, found := sinfo.FieldsMap[ks]; found { - panic(fmt.Sprintf("Can't have key %q in inlined map; conflicts with struct field", ks)) - } - e.addElem(ks, m.MapIndex(k), false) - } - } - } - for _, info := range sinfo.FieldsList { - if info.Inline == nil { - value = v.Field(info.Num) - } else { - // as pointers to struct are allowed here, - // there is no guarantee that pointer won't be nil. - // - // It is expected allowed behaviour - // so info.Inline MAY consist index to a nil pointer - // and that is why we safely call v.FieldByIndex and just continue on panic - field, errField := safeFieldByIndex(v, info.Inline) - if errField != nil { - continue - } - - value = field - } - if info.OmitEmpty && isZero(value) { - continue - } - if useRespectNilValues && - (value.Kind() == reflect.Slice || value.Kind() == reflect.Map) && - value.IsNil() { - e.addElem(info.Key, reflect.ValueOf(nil), info.MinSize) - continue - } - e.addElem(info.Key, value, info.MinSize) - } -} - -func safeFieldByIndex(v reflect.Value, index []int) (result reflect.Value, err error) { - defer func() { - if recovered := recover(); recovered != nil { - switch r := recovered.(type) { - case string: - err = fmt.Errorf("%s", r) - case error: - err = r - } - } - }() - - result = v.FieldByIndex(index) - return -} - -func isZero(v reflect.Value) bool { - switch v.Kind() { - case reflect.String: - return len(v.String()) == 0 - case reflect.Ptr, reflect.Interface: - return v.IsNil() - case reflect.Slice: - return v.Len() == 0 - case reflect.Map: - return v.Len() == 0 - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return v.Int() == 0 - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return v.Uint() == 0 - case reflect.Float32, reflect.Float64: - return v.Float() == 0 - case reflect.Bool: - return !v.Bool() - case reflect.Struct: - vt := v.Type() - if vt == typeTime { - return v.Interface().(time.Time).IsZero() - } - for i := 0; i < v.NumField(); i++ { - if vt.Field(i).PkgPath != "" && !vt.Field(i).Anonymous { - continue // Private field - } - if !isZero(v.Field(i)) { - return false - } - } - return true - } - return false -} - -func (e *encoder) addSlice(v reflect.Value) { - vi := v.Interface() - if d, ok := vi.(D); ok { - for _, elem := range d { - e.addElem(elem.Name, reflect.ValueOf(elem.Value), false) - } - return - } - if d, ok := vi.(RawD); ok { - for _, elem := range d { - e.addElem(elem.Name, reflect.ValueOf(elem.Value), false) - } - return - } - l := v.Len() - et := v.Type().Elem() - if et == typeDocElem { - for i := 0; i < l; i++ { - elem := v.Index(i).Interface().(DocElem) - e.addElem(elem.Name, reflect.ValueOf(elem.Value), false) - } - return - } - if et == typeRawDocElem { - for i := 0; i < l; i++ { - elem := v.Index(i).Interface().(RawDocElem) - e.addElem(elem.Name, reflect.ValueOf(elem.Value), false) - } - return - } - for i := 0; i < l; i++ { - e.addElem(itoa(i), v.Index(i), false) - } -} - -// -------------------------------------------------------------------------- -// Marshaling of elements in a document. - -func (e *encoder) addElemName(kind byte, name string) { - e.addBytes(kind) - e.addBytes([]byte(name)...) - e.addBytes(0) -} - -func (e *encoder) addElem(name string, v reflect.Value, minSize bool) { - - if !v.IsValid() { - e.addElemName(0x0A, name) - return - } - - if getter := getGetter(v.Type(), v); getter != nil { - getv, err := getter.GetBSON() - if err != nil { - panic(err) - } - e.addElem(name, reflect.ValueOf(getv), minSize) - return - } - - switch v.Kind() { - - case reflect.Interface: - e.addElem(name, v.Elem(), minSize) - - case reflect.Ptr: - e.addElem(name, v.Elem(), minSize) - - case reflect.String: - s := v.String() - switch v.Type() { - case typeObjectId: - if len(s) != 12 { - panic("ObjectIDs must be exactly 12 bytes long (got " + - strconv.Itoa(len(s)) + ")") - } - e.addElemName(0x07, name) - e.addBytes([]byte(s)...) - case typeSymbol: - e.addElemName(0x0E, name) - e.addStr(s) - case typeJSONNumber: - n := v.Interface().(json.Number) - if i, err := n.Int64(); err == nil { - e.addElemName(0x12, name) - e.addInt64(i) - } else if f, err := n.Float64(); err == nil { - e.addElemName(0x01, name) - e.addFloat64(f) - } else { - panic("failed to convert json.Number to a number: " + s) - } - default: - e.addElemName(0x02, name) - e.addStr(s) - } - - case reflect.Float32, reflect.Float64: - e.addElemName(0x01, name) - e.addFloat64(v.Float()) - - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - u := v.Uint() - if int64(u) < 0 { - panic("BSON has no uint64 type, and value is too large to fit correctly in an int64") - } else if u <= math.MaxInt32 && (minSize || v.Kind() <= reflect.Uint32) { - e.addElemName(0x10, name) - e.addInt32(int32(u)) - } else { - e.addElemName(0x12, name) - e.addInt64(int64(u)) - } - - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - switch v.Type() { - case typeMongoTimestamp: - e.addElemName(0x11, name) - e.addInt64(v.Int()) - - case typeOrderKey: - if v.Int() == int64(MaxKey) { - e.addElemName(0x7F, name) - } else { - e.addElemName(0xFF, name) - } - case typeTimeDuration: - // Stored as int64 - e.addElemName(0x12, name) - - e.addInt64(int64(v.Int() / 1e6)) - default: - i := v.Int() - if (minSize || v.Type().Kind() != reflect.Int64) && i >= math.MinInt32 && i <= math.MaxInt32 { - // It fits into an int32, encode as such. - e.addElemName(0x10, name) - e.addInt32(int32(i)) - } else { - e.addElemName(0x12, name) - e.addInt64(i) - } - } - - case reflect.Bool: - e.addElemName(0x08, name) - if v.Bool() { - e.addBytes(1) - } else { - e.addBytes(0) - } - - case reflect.Map: - e.addElemName(0x03, name) - e.addDoc(v) - - case reflect.Slice: - vt := v.Type() - et := vt.Elem() - if et.Kind() == reflect.Uint8 { - if arrayOps[name] { - e.addElemName(0x04, name) - e.addDoc(v) - } else { - e.addElemName(0x05, name) - e.addBinary(0x00, v.Bytes()) - } - } else if et == typeDocElem || et == typeRawDocElem { - e.addElemName(0x03, name) - e.addDoc(v) - } else { - e.addElemName(0x04, name) - e.addDoc(v) - } - - case reflect.Array: - et := v.Type().Elem() - if et.Kind() == reflect.Uint8 { - if arrayOps[name] { - e.addElemName(0x04, name) - e.addDoc(v) - } else { - e.addElemName(0x05, name) - if v.CanAddr() { - e.addBinary(0x00, v.Slice(0, v.Len()).Interface().([]byte)) - } else { - n := v.Len() - e.addInt32(int32(n)) - e.addBytes(0x00) - for i := 0; i < n; i++ { - el := v.Index(i) - e.addBytes(byte(el.Uint())) - } - } - } - } else { - e.addElemName(0x04, name) - e.addDoc(v) - } - - case reflect.Struct: - switch s := v.Interface().(type) { - - case Raw: - kind := s.Kind - if kind == 0x00 { - kind = 0x03 - } - if len(s.Data) == 0 && kind != 0x06 && kind != 0x0A && kind != 0xFF && kind != 0x7F { - panic("Attempted to marshal empty Raw document") - } - e.addElemName(kind, name) - e.addBytes(s.Data...) - - case Binary: - e.addElemName(0x05, name) - e.addBinary(s.Kind, s.Data) - - case Decimal128: - e.addElemName(0x13, name) - e.addInt64(int64(s.l)) - e.addInt64(int64(s.h)) - - case DBPointer: - e.addElemName(0x0C, name) - e.addStr(s.Namespace) - if len(s.Id) != 12 { - panic("ObjectIDs must be exactly 12 bytes long (got " + - strconv.Itoa(len(s.Id)) + ")") - } - e.addBytes([]byte(s.Id)...) - - case RegEx: - e.addElemName(0x0B, name) - e.addCStr(s.Pattern) - options := runes(s.Options) - sort.Sort(options) - e.addCStr(string(options)) - - case JavaScript: - if s.Scope == nil { - e.addElemName(0x0D, name) - e.addStr(s.Code) - } else { - e.addElemName(0x0F, name) - start := e.reserveInt32() - e.addStr(s.Code) - e.addDoc(reflect.ValueOf(s.Scope)) - e.setInt32(start, int32(len(e.out)-start)) - } - - case time.Time: - // MongoDB handles timestamps as milliseconds. - e.addElemName(0x09, name) - e.addInt64(s.Unix()*1000 + int64(s.Nanosecond()/1e6)) - - case url.URL: - e.addElemName(0x02, name) - e.addStr(s.String()) - - case undefined: - e.addElemName(0x06, name) - - default: - e.addElemName(0x03, name) - e.addDoc(v) - } - - default: - panic("Can't marshal " + v.Type().String() + " in a BSON document") - } -} - -// ------------- -// Helper method for sorting regex options -type runes []rune - -func (a runes) Len() int { return len(a) } -func (a runes) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a runes) Less(i, j int) bool { return a[i] < a[j] } - -// -------------------------------------------------------------------------- -// Marshaling of base types. - -func (e *encoder) addBinary(subtype byte, v []byte) { - if subtype == 0x02 { - // Wonder how that brilliant idea came to life. Obsolete, luckily. - e.addInt32(int32(len(v) + 4)) - e.addBytes(subtype) - e.addInt32(int32(len(v))) - } else { - e.addInt32(int32(len(v))) - e.addBytes(subtype) - } - e.addBytes(v...) -} - -func (e *encoder) addStr(v string) { - e.addInt32(int32(len(v) + 1)) - e.addCStr(v) -} - -func (e *encoder) addCStr(v string) { - e.addBytes([]byte(v)...) - e.addBytes(0) -} - -func (e *encoder) reserveInt32() (pos int) { - pos = len(e.out) - e.addBytes(0, 0, 0, 0) - return pos -} - -func (e *encoder) setInt32(pos int, v int32) { - e.out[pos+0] = byte(v) - e.out[pos+1] = byte(v >> 8) - e.out[pos+2] = byte(v >> 16) - e.out[pos+3] = byte(v >> 24) -} - -func (e *encoder) addInt32(v int32) { - u := uint32(v) - e.addBytes(byte(u), byte(u>>8), byte(u>>16), byte(u>>24)) -} - -func (e *encoder) addInt64(v int64) { - u := uint64(v) - e.addBytes(byte(u), byte(u>>8), byte(u>>16), byte(u>>24), - byte(u>>32), byte(u>>40), byte(u>>48), byte(u>>56)) -} - -func (e *encoder) addFloat64(v float64) { - e.addInt64(int64(math.Float64bits(v))) -} - -func (e *encoder) addBytes(v ...byte) { - e.out = append(e.out, v...) -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/json.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/json.go deleted file mode 100644 index 045c713012b9..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/json.go +++ /dev/null @@ -1,384 +0,0 @@ -package bson - -import ( - "bytes" - "encoding/base64" - "fmt" - "strconv" - "strings" - "time" - - "github.com/globalsign/mgo/internal/json" -) - -// UnmarshalJSON unmarshals a JSON value that may hold non-standard -// syntax as defined in BSON's extended JSON specification. -func UnmarshalJSON(data []byte, value interface{}) error { - d := json.NewDecoder(bytes.NewBuffer(data)) - d.Extend(&jsonExt) - return d.Decode(value) -} - -// MarshalJSON marshals a JSON value that may hold non-standard -// syntax as defined in BSON's extended JSON specification. -func MarshalJSON(value interface{}) ([]byte, error) { - var buf bytes.Buffer - e := json.NewEncoder(&buf) - e.Extend(&jsonExt) - err := e.Encode(value) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -// jdec is used internally by the JSON decoding functions -// so they may unmarshal functions without getting into endless -// recursion due to keyed objects. -func jdec(data []byte, value interface{}) error { - d := json.NewDecoder(bytes.NewBuffer(data)) - d.Extend(&funcExt) - return d.Decode(value) -} - -var jsonExt json.Extension -var funcExt json.Extension - -// TODO -// - Shell regular expressions ("/regexp/opts") - -func init() { - jsonExt.DecodeUnquotedKeys(true) - jsonExt.DecodeTrailingCommas(true) - - funcExt.DecodeFunc("BinData", "$binaryFunc", "$type", "$binary") - jsonExt.DecodeKeyed("$binary", jdecBinary) - jsonExt.DecodeKeyed("$binaryFunc", jdecBinary) - jsonExt.EncodeType([]byte(nil), jencBinarySlice) - jsonExt.EncodeType(Binary{}, jencBinaryType) - - funcExt.DecodeFunc("ISODate", "$dateFunc", "S") - funcExt.DecodeFunc("new Date", "$dateFunc", "S") - jsonExt.DecodeKeyed("$date", jdecDate) - jsonExt.DecodeKeyed("$dateFunc", jdecDate) - jsonExt.EncodeType(time.Time{}, jencDate) - - funcExt.DecodeFunc("Timestamp", "$timestamp", "t", "i") - jsonExt.DecodeKeyed("$timestamp", jdecTimestamp) - jsonExt.EncodeType(MongoTimestamp(0), jencTimestamp) - - funcExt.DecodeConst("undefined", Undefined) - - jsonExt.DecodeKeyed("$regex", jdecRegEx) - jsonExt.EncodeType(RegEx{}, jencRegEx) - - funcExt.DecodeFunc("ObjectId", "$oidFunc", "Id") - jsonExt.DecodeKeyed("$oid", jdecObjectId) - jsonExt.DecodeKeyed("$oidFunc", jdecObjectId) - jsonExt.EncodeType(ObjectId(""), jencObjectId) - - funcExt.DecodeFunc("DBRef", "$dbrefFunc", "$ref", "$id") - jsonExt.DecodeKeyed("$dbrefFunc", jdecDBRef) - - funcExt.DecodeFunc("NumberLong", "$numberLongFunc", "N") - jsonExt.DecodeKeyed("$numberLong", jdecNumberLong) - jsonExt.DecodeKeyed("$numberLongFunc", jdecNumberLong) - jsonExt.EncodeType(int64(0), jencNumberLong) - jsonExt.EncodeType(int(0), jencInt) - - funcExt.DecodeConst("MinKey", MinKey) - funcExt.DecodeConst("MaxKey", MaxKey) - jsonExt.DecodeKeyed("$minKey", jdecMinKey) - jsonExt.DecodeKeyed("$maxKey", jdecMaxKey) - jsonExt.EncodeType(orderKey(0), jencMinMaxKey) - - jsonExt.DecodeKeyed("$undefined", jdecUndefined) - jsonExt.EncodeType(Undefined, jencUndefined) - - jsonExt.Extend(&funcExt) -} - -func fbytes(format string, args ...interface{}) []byte { - var buf bytes.Buffer - fmt.Fprintf(&buf, format, args...) - return buf.Bytes() -} - -func jdecBinary(data []byte) (interface{}, error) { - var v struct { - Binary []byte `json:"$binary"` - Type string `json:"$type"` - Func struct { - Binary []byte `json:"$binary"` - Type int64 `json:"$type"` - } `json:"$binaryFunc"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - - var binData []byte - var binKind int64 - if v.Type == "" && v.Binary == nil { - binData = v.Func.Binary - binKind = v.Func.Type - } else if v.Type == "" { - return v.Binary, nil - } else { - binData = v.Binary - binKind, err = strconv.ParseInt(v.Type, 0, 64) - if err != nil { - binKind = -1 - } - } - - if binKind == 0 { - return binData, nil - } - if binKind < 0 || binKind > 255 { - return nil, fmt.Errorf("invalid type in binary object: %s", data) - } - - return Binary{Kind: byte(binKind), Data: binData}, nil -} - -func jencBinarySlice(v interface{}) ([]byte, error) { - in := v.([]byte) - out := make([]byte, base64.StdEncoding.EncodedLen(len(in))) - base64.StdEncoding.Encode(out, in) - return fbytes(`{"$binary":"%s","$type":"0x0"}`, out), nil -} - -func jencBinaryType(v interface{}) ([]byte, error) { - in := v.(Binary) - out := make([]byte, base64.StdEncoding.EncodedLen(len(in.Data))) - base64.StdEncoding.Encode(out, in.Data) - return fbytes(`{"$binary":"%s","$type":"0x%x"}`, out, in.Kind), nil -} - -const jdateFormat = "2006-01-02T15:04:05.999Z07:00" - -func jdecDate(data []byte) (interface{}, error) { - var v struct { - S string `json:"$date"` - Func struct { - S string - } `json:"$dateFunc"` - } - _ = jdec(data, &v) - if v.S == "" { - v.S = v.Func.S - } - if v.S != "" { - var errs []string - for _, format := range []string{jdateFormat, "2006-01-02"} { - t, err := time.Parse(format, v.S) - if err == nil { - return t, nil - } - errs = append(errs, err.Error()) - } - return nil, fmt.Errorf("cannot parse date: %q [%s]", v.S, strings.Join(errs, ", ")) - } - - var vn struct { - Date struct { - N int64 `json:"$numberLong,string"` - } `json:"$date"` - Func struct { - S int64 - } `json:"$dateFunc"` - } - err := jdec(data, &vn) - if err != nil { - return nil, fmt.Errorf("cannot parse date: %q", data) - } - n := vn.Date.N - if n == 0 { - n = vn.Func.S - } - return time.Unix(n/1000, n%1000*1e6).UTC(), nil -} - -func jencDate(v interface{}) ([]byte, error) { - t := v.(time.Time) - return fbytes(`{"$date":%q}`, t.Format(jdateFormat)), nil -} - -func jdecTimestamp(data []byte) (interface{}, error) { - var v struct { - Func struct { - T int32 `json:"t"` - I int32 `json:"i"` - } `json:"$timestamp"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - return MongoTimestamp(uint64(v.Func.T)<<32 | uint64(uint32(v.Func.I))), nil -} - -func jencTimestamp(v interface{}) ([]byte, error) { - ts := uint64(v.(MongoTimestamp)) - return fbytes(`{"$timestamp":{"t":%d,"i":%d}}`, ts>>32, uint32(ts)), nil -} - -func jdecRegEx(data []byte) (interface{}, error) { - var v struct { - Regex string `json:"$regex"` - Options string `json:"$options"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - return RegEx{v.Regex, v.Options}, nil -} - -func jencRegEx(v interface{}) ([]byte, error) { - re := v.(RegEx) - type regex struct { - Regex string `json:"$regex"` - Options string `json:"$options"` - } - return json.Marshal(regex{re.Pattern, re.Options}) -} - -func jdecObjectId(data []byte) (interface{}, error) { - var v struct { - Id string `json:"$oid"` - Func struct { - Id string - } `json:"$oidFunc"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - if v.Id == "" { - v.Id = v.Func.Id - } - return ObjectIdHex(v.Id), nil -} - -func jencObjectId(v interface{}) ([]byte, error) { - return fbytes(`{"$oid":"%s"}`, v.(ObjectId).Hex()), nil -} - -func jdecDBRef(data []byte) (interface{}, error) { - // TODO Support unmarshaling $ref and $id into the input value. - var v struct { - Obj map[string]interface{} `json:"$dbrefFunc"` - } - // TODO Fix this. Must not be required. - v.Obj = make(map[string]interface{}) - err := jdec(data, &v) - if err != nil { - return nil, err - } - return v.Obj, nil -} - -func jdecNumberLong(data []byte) (interface{}, error) { - var v struct { - N int64 `json:"$numberLong,string"` - Func struct { - N int64 `json:",string"` - } `json:"$numberLongFunc"` - } - var vn struct { - N int64 `json:"$numberLong"` - Func struct { - N int64 - } `json:"$numberLongFunc"` - } - err := jdec(data, &v) - if err != nil { - err = jdec(data, &vn) - v.N = vn.N - v.Func.N = vn.Func.N - } - if err != nil { - return nil, err - } - if v.N != 0 { - return v.N, nil - } - return v.Func.N, nil -} - -func jencNumberLong(v interface{}) ([]byte, error) { - n := v.(int64) - f := `{"$numberLong":"%d"}` - if n <= 1<<53 { - f = `{"$numberLong":%d}` - } - return fbytes(f, n), nil -} - -func jencInt(v interface{}) ([]byte, error) { - n := v.(int) - f := `{"$numberLong":"%d"}` - if int64(n) <= 1<<53 { - f = `%d` - } - return fbytes(f, n), nil -} - -func jdecMinKey(data []byte) (interface{}, error) { - var v struct { - N int64 `json:"$minKey"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - if v.N != 1 { - return nil, fmt.Errorf("invalid $minKey object: %s", data) - } - return MinKey, nil -} - -func jdecMaxKey(data []byte) (interface{}, error) { - var v struct { - N int64 `json:"$maxKey"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - if v.N != 1 { - return nil, fmt.Errorf("invalid $maxKey object: %s", data) - } - return MaxKey, nil -} - -func jencMinMaxKey(v interface{}) ([]byte, error) { - switch v.(orderKey) { - case MinKey: - return []byte(`{"$minKey":1}`), nil - case MaxKey: - return []byte(`{"$maxKey":1}`), nil - } - panic(fmt.Sprintf("invalid $minKey/$maxKey value: %d", v)) -} - -func jdecUndefined(data []byte) (interface{}, error) { - var v struct { - B bool `json:"$undefined"` - } - err := jdec(data, &v) - if err != nil { - return nil, err - } - if !v.B { - return nil, fmt.Errorf("invalid $undefined object: %s", data) - } - return Undefined, nil -} - -func jencUndefined(v interface{}) ([]byte, error) { - return []byte(`{"$undefined":true}`), nil -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/bson/stream.go b/src/runtime/vendor/github.com/globalsign/mgo/bson/stream.go deleted file mode 100644 index 466528457b5f..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/bson/stream.go +++ /dev/null @@ -1,90 +0,0 @@ -package bson - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" -) - -const ( - // MinDocumentSize is the size of the smallest possible valid BSON document: - // an int32 size header + 0x00 (end of document). - MinDocumentSize = 5 - - // MaxDocumentSize is the largest possible size for a BSON document allowed by MongoDB, - // that is, 16 MiB (see https://docs.mongodb.com/manual/reference/limits/). - MaxDocumentSize = 16777216 -) - -// ErrInvalidDocumentSize is an error returned when a BSON document's header -// contains a size smaller than MinDocumentSize or greater than MaxDocumentSize. -type ErrInvalidDocumentSize struct { - DocumentSize int32 -} - -func (e ErrInvalidDocumentSize) Error() string { - return fmt.Sprintf("invalid document size %d", e.DocumentSize) -} - -// A Decoder reads and decodes BSON values from an input stream. -type Decoder struct { - source io.Reader -} - -// NewDecoder returns a new Decoder that reads from source. -// It does not add any extra buffering, and may not read data from source beyond the BSON values requested. -func NewDecoder(source io.Reader) *Decoder { - return &Decoder{source: source} -} - -// Decode reads the next BSON-encoded value from its input and stores it in the value pointed to by v. -// See the documentation for Unmarshal for details about the conversion of BSON into a Go value. -func (dec *Decoder) Decode(v interface{}) (err error) { - // BSON documents start with their size as a *signed* int32. - var docSize int32 - if err = binary.Read(dec.source, binary.LittleEndian, &docSize); err != nil { - return - } - - if docSize < MinDocumentSize || docSize > MaxDocumentSize { - return ErrInvalidDocumentSize{DocumentSize: docSize} - } - - docBuffer := bytes.NewBuffer(make([]byte, 0, docSize)) - if err = binary.Write(docBuffer, binary.LittleEndian, docSize); err != nil { - return - } - - // docSize is the *full* document's size (including the 4-byte size header, - // which has already been read). - if _, err = io.CopyN(docBuffer, dec.source, int64(docSize-4)); err != nil { - return - } - - // Let Unmarshal handle the rest. - defer handleErr(&err) - return Unmarshal(docBuffer.Bytes(), v) -} - -// An Encoder encodes and writes BSON values to an output stream. -type Encoder struct { - target io.Writer -} - -// NewEncoder returns a new Encoder that writes to target. -func NewEncoder(target io.Writer) *Encoder { - return &Encoder{target: target} -} - -// Encode encodes v to BSON, and if successful writes it to the Encoder's output stream. -// See the documentation for Marshal for details about the conversion of Go values to BSON. -func (enc *Encoder) Encode(v interface{}) error { - data, err := Marshal(v) - if err != nil { - return err - } - - _, err = enc.target.Write(data) - return err -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/LICENSE b/src/runtime/vendor/github.com/globalsign/mgo/internal/json/LICENSE deleted file mode 100644 index 74487567632c..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2012 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/decode.go b/src/runtime/vendor/github.com/globalsign/mgo/internal/json/decode.go deleted file mode 100644 index d5ca1f9a851c..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/decode.go +++ /dev/null @@ -1,1685 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Represents JSON data structure using native Go types: booleans, floats, -// strings, arrays, and maps. - -package json - -import ( - "bytes" - "encoding" - "encoding/base64" - "errors" - "fmt" - "reflect" - "runtime" - "strconv" - "unicode" - "unicode/utf16" - "unicode/utf8" -) - -// Unmarshal parses the JSON-encoded data and stores the result -// in the value pointed to by v. -// -// Unmarshal uses the inverse of the encodings that -// Marshal uses, allocating maps, slices, and pointers as necessary, -// with the following additional rules: -// -// To unmarshal JSON into a pointer, Unmarshal first handles the case of -// the JSON being the JSON literal null. In that case, Unmarshal sets -// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into -// the value pointed at by the pointer. If the pointer is nil, Unmarshal -// allocates a new value for it to point to. -// -// To unmarshal JSON into a struct, Unmarshal matches incoming object -// keys to the keys used by Marshal (either the struct field name or its tag), -// preferring an exact match but also accepting a case-insensitive match. -// Unmarshal will only set exported fields of the struct. -// -// To unmarshal JSON into an interface value, -// Unmarshal stores one of these in the interface value: -// -// bool, for JSON booleans -// float64, for JSON numbers -// string, for JSON strings -// []interface{}, for JSON arrays -// map[string]interface{}, for JSON objects -// nil for JSON null -// -// To unmarshal a JSON array into a slice, Unmarshal resets the slice length -// to zero and then appends each element to the slice. -// As a special case, to unmarshal an empty JSON array into a slice, -// Unmarshal replaces the slice with a new empty slice. -// -// To unmarshal a JSON array into a Go array, Unmarshal decodes -// JSON array elements into corresponding Go array elements. -// If the Go array is smaller than the JSON array, -// the additional JSON array elements are discarded. -// If the JSON array is smaller than the Go array, -// the additional Go array elements are set to zero values. -// -// To unmarshal a JSON object into a map, Unmarshal first establishes a map to -// use, If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal -// reuses the existing map, keeping existing entries. Unmarshal then stores key- -// value pairs from the JSON object into the map. The map's key type must -// either be a string or implement encoding.TextUnmarshaler. -// -// If a JSON value is not appropriate for a given target type, -// or if a JSON number overflows the target type, Unmarshal -// skips that field and completes the unmarshaling as best it can. -// If no more serious errors are encountered, Unmarshal returns -// an UnmarshalTypeError describing the earliest such error. -// -// The JSON null value unmarshals into an interface, map, pointer, or slice -// by setting that Go value to nil. Because null is often used in JSON to mean -// ``not present,'' unmarshaling a JSON null into any other Go type has no effect -// on the value and produces no error. -// -// When unmarshaling quoted strings, invalid UTF-8 or -// invalid UTF-16 surrogate pairs are not treated as an error. -// Instead, they are replaced by the Unicode replacement -// character U+FFFD. -// -func Unmarshal(data []byte, v interface{}) error { - // Check for well-formedness. - // Avoids filling out half a data structure - // before discovering a JSON syntax error. - var d decodeState - err := checkValid(data, &d.scan) - if err != nil { - return err - } - - d.init(data) - return d.unmarshal(v) -} - -// Unmarshaler is the interface implemented by types -// that can unmarshal a JSON description of themselves. -// The input can be assumed to be a valid encoding of -// a JSON value. UnmarshalJSON must copy the JSON data -// if it wishes to retain the data after returning. -type Unmarshaler interface { - UnmarshalJSON([]byte) error -} - -// An UnmarshalTypeError describes a JSON value that was -// not appropriate for a value of a specific Go type. -type UnmarshalTypeError struct { - Value string // description of JSON value - "bool", "array", "number -5" - Type reflect.Type // type of Go value it could not be assigned to - Offset int64 // error occurred after reading Offset bytes -} - -func (e *UnmarshalTypeError) Error() string { - return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() -} - -// An UnmarshalFieldError describes a JSON object key that -// led to an unexported (and therefore unwritable) struct field. -// (No longer used; kept for compatibility.) -type UnmarshalFieldError struct { - Key string - Type reflect.Type - Field reflect.StructField -} - -func (e *UnmarshalFieldError) Error() string { - return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() -} - -// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal. -// (The argument to Unmarshal must be a non-nil pointer.) -type InvalidUnmarshalError struct { - Type reflect.Type -} - -func (e *InvalidUnmarshalError) Error() string { - if e.Type == nil { - return "json: Unmarshal(nil)" - } - - if e.Type.Kind() != reflect.Ptr { - return "json: Unmarshal(non-pointer " + e.Type.String() + ")" - } - return "json: Unmarshal(nil " + e.Type.String() + ")" -} - -func (d *decodeState) unmarshal(v interface{}) (err error) { - defer func() { - if r := recover(); r != nil { - if _, ok := r.(runtime.Error); ok { - panic(r) - } - err = r.(error) - } - }() - - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Ptr || rv.IsNil() { - return &InvalidUnmarshalError{reflect.TypeOf(v)} - } - - d.scan.reset() - // We decode rv not rv.Elem because the Unmarshaler interface - // test must be applied at the top level of the value. - d.value(rv) - return d.savedError -} - -// A Number represents a JSON number literal. -type Number string - -// String returns the literal text of the number. -func (n Number) String() string { return string(n) } - -// Float64 returns the number as a float64. -func (n Number) Float64() (float64, error) { - return strconv.ParseFloat(string(n), 64) -} - -// Int64 returns the number as an int64. -func (n Number) Int64() (int64, error) { - return strconv.ParseInt(string(n), 10, 64) -} - -// isValidNumber reports whether s is a valid JSON number literal. -func isValidNumber(s string) bool { - // This function implements the JSON numbers grammar. - // See https://tools.ietf.org/html/rfc7159#section-6 - // and http://json.org/number.gif - - if s == "" { - return false - } - - // Optional - - if s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - - // Digits - switch { - default: - return false - - case s[0] == '0': - s = s[1:] - - case '1' <= s[0] && s[0] <= '9': - s = s[1:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // . followed by 1 or more digits. - if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { - s = s[2:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // e or E followed by an optional - or + and - // 1 or more digits. - if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { - s = s[1:] - if s[0] == '+' || s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // Make sure we are at the end. - return s == "" -} - -// decodeState represents the state while decoding a JSON value. -type decodeState struct { - data []byte - off int // read offset in data - scan scanner - nextscan scanner // for calls to nextValue - savedError error - useNumber bool - ext Extension -} - -// errPhase is used for errors that should not happen unless -// there is a bug in the JSON decoder or something is editing -// the data slice while the decoder executes. -var errPhase = errors.New("JSON decoder out of sync - data changing underfoot?") - -func (d *decodeState) init(data []byte) *decodeState { - d.data = data - d.off = 0 - d.savedError = nil - return d -} - -// error aborts the decoding by panicking with err. -func (d *decodeState) error(err error) { - panic(err) -} - -// saveError saves the first err it is called with, -// for reporting at the end of the unmarshal. -func (d *decodeState) saveError(err error) { - if d.savedError == nil { - d.savedError = err - } -} - -// next cuts off and returns the next full JSON value in d.data[d.off:]. -// The next value is known to be an object or array, not a literal. -func (d *decodeState) next() []byte { - c := d.data[d.off] - item, rest, err := nextValue(d.data[d.off:], &d.nextscan) - if err != nil { - d.error(err) - } - d.off = len(d.data) - len(rest) - - // Our scanner has seen the opening brace/bracket - // and thinks we're still in the middle of the object. - // invent a closing brace/bracket to get it out. - if c == '{' { - d.scan.step(&d.scan, '}') - } else if c == '[' { - d.scan.step(&d.scan, ']') - } else { - // Was inside a function name. Get out of it. - d.scan.step(&d.scan, '(') - d.scan.step(&d.scan, ')') - } - - return item -} - -// scanWhile processes bytes in d.data[d.off:] until it -// receives a scan code not equal to op. -// It updates d.off and returns the new scan code. -func (d *decodeState) scanWhile(op int) int { - var newOp int - for { - if d.off >= len(d.data) { - newOp = d.scan.eof() - d.off = len(d.data) + 1 // mark processed EOF with len+1 - } else { - c := d.data[d.off] - d.off++ - newOp = d.scan.step(&d.scan, c) - } - if newOp != op { - break - } - } - return newOp -} - -// value decodes a JSON value from d.data[d.off:] into the value. -// it updates d.off to point past the decoded value. -func (d *decodeState) value(v reflect.Value) { - if !v.IsValid() { - _, rest, err := nextValue(d.data[d.off:], &d.nextscan) - if err != nil { - d.error(err) - } - d.off = len(d.data) - len(rest) - - // d.scan thinks we're still at the beginning of the item. - // Feed in an empty string - the shortest, simplest value - - // so that it knows we got to the end of the value. - if d.scan.redo { - // rewind. - d.scan.redo = false - d.scan.step = stateBeginValue - } - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '"') - - n := len(d.scan.parseState) - if n > 0 && d.scan.parseState[n-1] == parseObjectKey { - // d.scan thinks we just read an object key; finish the object - d.scan.step(&d.scan, ':') - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '}') - } - - return - } - - switch op := d.scanWhile(scanSkipSpace); op { - default: - d.error(errPhase) - - case scanBeginArray: - d.array(v) - - case scanBeginObject: - d.object(v) - - case scanBeginLiteral: - d.literal(v) - - case scanBeginName: - d.name(v) - } -} - -type unquotedValue struct{} - -// valueQuoted is like value but decodes a -// quoted string literal or literal null into an interface value. -// If it finds anything other than a quoted string literal or null, -// valueQuoted returns unquotedValue{}. -func (d *decodeState) valueQuoted() interface{} { - switch op := d.scanWhile(scanSkipSpace); op { - default: - d.error(errPhase) - - case scanBeginArray: - d.array(reflect.Value{}) - - case scanBeginObject: - d.object(reflect.Value{}) - - case scanBeginName: - switch v := d.nameInterface().(type) { - case nil, string: - return v - } - - case scanBeginLiteral: - switch v := d.literalInterface().(type) { - case nil, string: - return v - } - } - return unquotedValue{} -} - -// indirect walks down v allocating pointers as needed, -// until it gets to a non-pointer. -// if it encounters an Unmarshaler, indirect stops and returns that. -// if decodingNull is true, indirect stops at the last pointer so it can be set to nil. -func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { - // If v is a named type and is addressable, - // start with its address, so that if the type has pointer methods, - // we find them. - if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() { - v = v.Addr() - } - for { - // Load value from interface, but only if the result will be - // usefully addressable. - if v.Kind() == reflect.Interface && !v.IsNil() { - e := v.Elem() - if e.Kind() == reflect.Ptr && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Ptr) { - v = e - continue - } - } - - if v.Kind() != reflect.Ptr { - break - } - - if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() { - break - } - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) - } - if v.Type().NumMethod() > 0 { - if u, ok := v.Interface().(Unmarshaler); ok { - return u, nil, v - } - if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { - return nil, u, v - } - } - v = v.Elem() - } - return nil, nil, v -} - -// array consumes an array from d.data[d.off-1:], decoding into the value v. -// the first byte of the array ('[') has been read already. -func (d *decodeState) array(v reflect.Value) { - // Check for unmarshaler. - u, ut, pv := d.indirect(v, false) - if u != nil { - d.off-- - err := u.UnmarshalJSON(d.next()) - if err != nil { - d.error(err) - } - return - } - if ut != nil { - d.saveError(&UnmarshalTypeError{"array", v.Type(), int64(d.off)}) - d.off-- - d.next() - return - } - - v = pv - - // Check type of target. - switch v.Kind() { - case reflect.Interface: - if v.NumMethod() == 0 { - // Decoding into nil interface? Switch to non-reflect code. - v.Set(reflect.ValueOf(d.arrayInterface())) - return - } - // Otherwise it's invalid. - fallthrough - default: - d.saveError(&UnmarshalTypeError{"array", v.Type(), int64(d.off)}) - d.off-- - d.next() - return - case reflect.Array: - case reflect.Slice: - break - } - - i := 0 - for { - // Look ahead for ] - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndArray { - break - } - - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - - // Get element of array, growing if necessary. - if v.Kind() == reflect.Slice { - // Grow slice if necessary - if i >= v.Cap() { - newcap := v.Cap() + v.Cap()/2 - if newcap < 4 { - newcap = 4 - } - newv := reflect.MakeSlice(v.Type(), v.Len(), newcap) - reflect.Copy(newv, v) - v.Set(newv) - } - if i >= v.Len() { - v.SetLen(i + 1) - } - } - - if i < v.Len() { - // Decode into element. - d.value(v.Index(i)) - } else { - // Ran out of fixed array: skip. - d.value(reflect.Value{}) - } - i++ - - // Next token must be , or ]. - op = d.scanWhile(scanSkipSpace) - if op == scanEndArray { - break - } - if op != scanArrayValue { - d.error(errPhase) - } - } - - if i < v.Len() { - if v.Kind() == reflect.Array { - // Array. Zero the rest. - z := reflect.Zero(v.Type().Elem()) - for ; i < v.Len(); i++ { - v.Index(i).Set(z) - } - } else { - v.SetLen(i) - } - } - if i == 0 && v.Kind() == reflect.Slice { - v.Set(reflect.MakeSlice(v.Type(), 0, 0)) - } -} - -var nullLiteral = []byte("null") -var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem() - -// object consumes an object from d.data[d.off-1:], decoding into the value v. -// the first byte ('{') of the object has been read already. -func (d *decodeState) object(v reflect.Value) { - // Check for unmarshaler. - u, ut, pv := d.indirect(v, false) - if d.storeKeyed(pv) { - return - } - if u != nil { - d.off-- - err := u.UnmarshalJSON(d.next()) - if err != nil { - d.error(err) - } - return - } - if ut != nil { - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over { } in input - return - } - v = pv - - // Decoding into nil interface? Switch to non-reflect code. - if v.Kind() == reflect.Interface && v.NumMethod() == 0 { - v.Set(reflect.ValueOf(d.objectInterface())) - return - } - - // Check type of target: - // struct or - // map[string]T or map[encoding.TextUnmarshaler]T - switch v.Kind() { - case reflect.Map: - // Map key must either have string kind or be an encoding.TextUnmarshaler. - t := v.Type() - if t.Key().Kind() != reflect.String && - !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) { - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over { } in input - return - } - if v.IsNil() { - v.Set(reflect.MakeMap(t)) - } - case reflect.Struct: - - default: - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over { } in input - return - } - - var mapElem reflect.Value - - empty := true - for { - // Read opening " of string key or closing }. - op := d.scanWhile(scanSkipSpace) - if op == scanEndObject { - if !empty && !d.ext.trailingCommas { - d.syntaxError("beginning of object key string") - } - break - } - empty = false - if op == scanBeginName { - if !d.ext.unquotedKeys { - d.syntaxError("beginning of object key string") - } - } else if op != scanBeginLiteral { - d.error(errPhase) - } - unquotedKey := op == scanBeginName - - // Read key. - start := d.off - 1 - op = d.scanWhile(scanContinue) - item := d.data[start : d.off-1] - var key []byte - if unquotedKey { - key = item - // TODO Fix code below to quote item when necessary. - } else { - var ok bool - key, ok = unquoteBytes(item) - if !ok { - d.error(errPhase) - } - } - - // Figure out field corresponding to key. - var subv reflect.Value - destring := false // whether the value is wrapped in a string to be decoded first - - if v.Kind() == reflect.Map { - elemType := v.Type().Elem() - if !mapElem.IsValid() { - mapElem = reflect.New(elemType).Elem() - } else { - mapElem.Set(reflect.Zero(elemType)) - } - subv = mapElem - } else { - var f *field - fields := cachedTypeFields(v.Type()) - for i := range fields { - ff := &fields[i] - if bytes.Equal(ff.nameBytes, key) { - f = ff - break - } - if f == nil && ff.equalFold(ff.nameBytes, key) { - f = ff - } - } - if f != nil { - subv = v - destring = f.quoted - for _, i := range f.index { - if subv.Kind() == reflect.Ptr { - if subv.IsNil() { - subv.Set(reflect.New(subv.Type().Elem())) - } - subv = subv.Elem() - } - subv = subv.Field(i) - } - } - } - - // Read : before value. - if op == scanSkipSpace { - op = d.scanWhile(scanSkipSpace) - } - if op != scanObjectKey { - d.error(errPhase) - } - - // Read value. - if destring { - switch qv := d.valueQuoted().(type) { - case nil: - d.literalStore(nullLiteral, subv, false) - case string: - d.literalStore([]byte(qv), subv, true) - default: - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) - } - } else { - d.value(subv) - } - - // Write value back to map; - // if using struct, subv points into struct already. - if v.Kind() == reflect.Map { - kt := v.Type().Key() - var kv reflect.Value - switch { - case kt.Kind() == reflect.String: - kv = reflect.ValueOf(key).Convert(v.Type().Key()) - case reflect.PtrTo(kt).Implements(textUnmarshalerType): - kv = reflect.New(v.Type().Key()) - d.literalStore(item, kv, true) - kv = kv.Elem() - default: - panic("json: Unexpected key type") // should never occur - } - v.SetMapIndex(kv, subv) - } - - // Next token must be , or }. - op = d.scanWhile(scanSkipSpace) - if op == scanEndObject { - break - } - if op != scanObjectValue { - d.error(errPhase) - } - } -} - -// isNull returns whether there's a null literal at the provided offset. -func (d *decodeState) isNull(off int) bool { - if off+4 >= len(d.data) || d.data[off] != 'n' || d.data[off+1] != 'u' || d.data[off+2] != 'l' || d.data[off+3] != 'l' { - return false - } - d.nextscan.reset() - for i, c := range d.data[off:] { - if i > 4 { - return false - } - switch d.nextscan.step(&d.nextscan, c) { - case scanContinue, scanBeginName: - continue - } - break - } - return true -} - -// name consumes a const or function from d.data[d.off-1:], decoding into the value v. -// the first byte of the function name has been read already. -func (d *decodeState) name(v reflect.Value) { - if d.isNull(d.off - 1) { - d.literal(v) - return - } - - // Check for unmarshaler. - u, ut, pv := d.indirect(v, false) - if d.storeKeyed(pv) { - return - } - if u != nil { - d.off-- - err := u.UnmarshalJSON(d.next()) - if err != nil { - d.error(err) - } - return - } - if ut != nil { - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over function in input - return - } - v = pv - - // Decoding into nil interface? Switch to non-reflect code. - if v.Kind() == reflect.Interface && v.NumMethod() == 0 { - out := d.nameInterface() - if out == nil { - v.Set(reflect.Zero(v.Type())) - } else { - v.Set(reflect.ValueOf(out)) - } - return - } - - nameStart := d.off - 1 - - op := d.scanWhile(scanContinue) - - name := d.data[nameStart : d.off-1] - if op != scanParam { - // Back up so the byte just read is consumed next. - d.off-- - d.scan.undo(op) - if l, ok := d.convertLiteral(name); ok { - d.storeValue(v, l) - return - } - d.error(&SyntaxError{fmt.Sprintf("json: unknown constant %q", name), int64(d.off)}) - } - - funcName := string(name) - funcData := d.ext.funcs[funcName] - if funcData.key == "" { - d.error(fmt.Errorf("json: unknown function %q", funcName)) - } - - // Check type of target: - // struct or - // map[string]T or map[encoding.TextUnmarshaler]T - switch v.Kind() { - case reflect.Map: - // Map key must either have string kind or be an encoding.TextUnmarshaler. - t := v.Type() - if t.Key().Kind() != reflect.String && - !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) { - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over { } in input - return - } - if v.IsNil() { - v.Set(reflect.MakeMap(t)) - } - case reflect.Struct: - - default: - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - d.off-- - d.next() // skip over { } in input - return - } - - // TODO Fix case of func field as map. - //topv := v - - // Figure out field corresponding to function. - key := []byte(funcData.key) - if v.Kind() == reflect.Map { - elemType := v.Type().Elem() - v = reflect.New(elemType).Elem() - } else { - var f *field - fields := cachedTypeFields(v.Type()) - for i := range fields { - ff := &fields[i] - if bytes.Equal(ff.nameBytes, key) { - f = ff - break - } - if f == nil && ff.equalFold(ff.nameBytes, key) { - f = ff - } - } - if f != nil { - for _, i := range f.index { - if v.Kind() == reflect.Ptr { - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) - } - v = v.Elem() - } - v = v.Field(i) - } - if v.Kind() == reflect.Ptr { - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) - } - v = v.Elem() - } - } - } - - // Check for unmarshaler on func field itself. - u, _, _ = d.indirect(v, false) - if u != nil { - d.off = nameStart - err := u.UnmarshalJSON(d.next()) - if err != nil { - d.error(err) - } - return - } - - var mapElem reflect.Value - - // Parse function arguments. - for i := 0; ; i++ { - // closing ) - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndParams { - break - } - - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - - if i >= len(funcData.args) { - d.error(fmt.Errorf("json: too many arguments for function %s", funcName)) - } - key := []byte(funcData.args[i]) - - // Figure out field corresponding to key. - var subv reflect.Value - destring := false // whether the value is wrapped in a string to be decoded first - - if v.Kind() == reflect.Map { - elemType := v.Type().Elem() - if !mapElem.IsValid() { - mapElem = reflect.New(elemType).Elem() - } else { - mapElem.Set(reflect.Zero(elemType)) - } - subv = mapElem - } else { - var f *field - fields := cachedTypeFields(v.Type()) - for i := range fields { - ff := &fields[i] - if bytes.Equal(ff.nameBytes, key) { - f = ff - break - } - if f == nil && ff.equalFold(ff.nameBytes, key) { - f = ff - } - } - if f != nil { - subv = v - destring = f.quoted - for _, i := range f.index { - if subv.Kind() == reflect.Ptr { - if subv.IsNil() { - subv.Set(reflect.New(subv.Type().Elem())) - } - subv = subv.Elem() - } - subv = subv.Field(i) - } - } - } - - // Read value. - if destring { - switch qv := d.valueQuoted().(type) { - case nil: - d.literalStore(nullLiteral, subv, false) - case string: - d.literalStore([]byte(qv), subv, true) - default: - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) - } - } else { - d.value(subv) - } - - // Write value back to map; - // if using struct, subv points into struct already. - if v.Kind() == reflect.Map { - kt := v.Type().Key() - var kv reflect.Value - switch { - case kt.Kind() == reflect.String: - kv = reflect.ValueOf(key).Convert(v.Type().Key()) - case reflect.PtrTo(kt).Implements(textUnmarshalerType): - kv = reflect.New(v.Type().Key()) - d.literalStore(key, kv, true) - kv = kv.Elem() - default: - panic("json: Unexpected key type") // should never occur - } - v.SetMapIndex(kv, subv) - } - - // Next token must be , or ). - op = d.scanWhile(scanSkipSpace) - if op == scanEndParams { - break - } - if op != scanParam { - d.error(errPhase) - } - } -} - -// keyed attempts to decode an object or function using a keyed doc extension, -// and returns the value and true on success, or nil and false otherwise. -func (d *decodeState) keyed() (interface{}, bool) { - if len(d.ext.keyed) == 0 { - return nil, false - } - - unquote := false - - // Look-ahead first key to check for a keyed document extension. - d.nextscan.reset() - var start, end int - for i, c := range d.data[d.off-1:] { - switch op := d.nextscan.step(&d.nextscan, c); op { - case scanSkipSpace, scanContinue, scanBeginObject: - continue - case scanBeginLiteral, scanBeginName: - unquote = op == scanBeginLiteral - start = i - continue - } - end = i - break - } - - name := bytes.Trim(d.data[d.off-1+start:d.off-1+end], " \n\t") - - var key []byte - var ok bool - if unquote { - key, ok = unquoteBytes(name) - if !ok { - d.error(errPhase) - } - } else { - funcData, ok := d.ext.funcs[string(name)] - if !ok { - return nil, false - } - key = []byte(funcData.key) - } - - decode, ok := d.ext.keyed[string(key)] - if !ok { - return nil, false - } - - d.off-- - out, err := decode(d.next()) - if err != nil { - d.error(err) - } - return out, true -} - -func (d *decodeState) storeKeyed(v reflect.Value) bool { - keyed, ok := d.keyed() - if !ok { - return false - } - d.storeValue(v, keyed) - return true -} - -var ( - trueBytes = []byte("true") - falseBytes = []byte("false") - nullBytes = []byte("null") -) - -func (d *decodeState) storeValue(v reflect.Value, from interface{}) { - switch from { - case nil: - d.literalStore(nullBytes, v, false) - return - case true: - d.literalStore(trueBytes, v, false) - return - case false: - d.literalStore(falseBytes, v, false) - return - } - fromv := reflect.ValueOf(from) - for fromv.Kind() == reflect.Ptr && !fromv.IsNil() { - fromv = fromv.Elem() - } - fromt := fromv.Type() - for v.Kind() == reflect.Ptr && !v.IsNil() { - v = v.Elem() - } - vt := v.Type() - if fromt.AssignableTo(vt) { - v.Set(fromv) - } else if fromt.ConvertibleTo(vt) { - v.Set(fromv.Convert(vt)) - } else { - d.saveError(&UnmarshalTypeError{"object", v.Type(), int64(d.off)}) - } -} - -func (d *decodeState) convertLiteral(name []byte) (interface{}, bool) { - if len(name) == 0 { - return nil, false - } - switch name[0] { - case 't': - if bytes.Equal(name, trueBytes) { - return true, true - } - case 'f': - if bytes.Equal(name, falseBytes) { - return false, true - } - case 'n': - if bytes.Equal(name, nullBytes) { - return nil, true - } - } - if l, ok := d.ext.consts[string(name)]; ok { - return l, true - } - return nil, false -} - -// literal consumes a literal from d.data[d.off-1:], decoding into the value v. -// The first byte of the literal has been read already -// (that's how the caller knows it's a literal). -func (d *decodeState) literal(v reflect.Value) { - // All bytes inside literal return scanContinue op code. - start := d.off - 1 - op := d.scanWhile(scanContinue) - - // Scan read one byte too far; back up. - d.off-- - d.scan.undo(op) - - d.literalStore(d.data[start:d.off], v, false) -} - -// convertNumber converts the number literal s to a float64 or a Number -// depending on the setting of d.useNumber. -func (d *decodeState) convertNumber(s string) (interface{}, error) { - if d.useNumber { - return Number(s), nil - } - f, err := strconv.ParseFloat(s, 64) - if err != nil { - return nil, &UnmarshalTypeError{"number " + s, reflect.TypeOf(0.0), int64(d.off)} - } - return f, nil -} - -var numberType = reflect.TypeOf(Number("")) - -// literalStore decodes a literal stored in item into v. -// -// fromQuoted indicates whether this literal came from unwrapping a -// string from the ",string" struct tag option. this is used only to -// produce more helpful error messages. -func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) { - // Check for unmarshaler. - if len(item) == 0 { - //Empty string given - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - return - } - wantptr := item[0] == 'n' // null - u, ut, pv := d.indirect(v, wantptr) - if u != nil { - err := u.UnmarshalJSON(item) - if err != nil { - d.error(err) - } - return - } - if ut != nil { - if item[0] != '"' { - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) - } - return - } - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.error(errPhase) - } - } - err := ut.UnmarshalText(s) - if err != nil { - d.error(err) - } - return - } - - v = pv - - switch c := item[0]; c { - case 'n': // null - switch v.Kind() { - case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice: - v.Set(reflect.Zero(v.Type())) - // otherwise, ignore null for primitives/string - } - case 't', 'f': // true, false - value := c == 't' - switch v.Kind() { - default: - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.saveError(&UnmarshalTypeError{"bool", v.Type(), int64(d.off)}) - } - case reflect.Bool: - v.SetBool(value) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(value)) - } else { - d.saveError(&UnmarshalTypeError{"bool", v.Type(), int64(d.off)}) - } - } - - case '"': // string - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.error(errPhase) - } - } - switch v.Kind() { - default: - d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) - case reflect.Slice: - if v.Type().Elem().Kind() != reflect.Uint8 { - d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) - break - } - b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) - n, err := base64.StdEncoding.Decode(b, s) - if err != nil { - d.saveError(err) - break - } - v.SetBytes(b[:n]) - case reflect.String: - v.SetString(string(s)) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(string(s))) - } else { - d.saveError(&UnmarshalTypeError{"string", v.Type(), int64(d.off)}) - } - } - - default: // number - if c != '-' && (c < '0' || c > '9') { - if fromQuoted { - d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.error(errPhase) - } - } - s := string(item) - switch v.Kind() { - default: - if v.Kind() == reflect.String && v.Type() == numberType { - v.SetString(s) - if !isValidNumber(s) { - d.error(fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item)) - } - break - } - if fromQuoted { - d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.error(&UnmarshalTypeError{"number", v.Type(), int64(d.off)}) - } - case reflect.Interface: - n, err := d.convertNumber(s) - if err != nil { - d.saveError(err) - break - } - if v.NumMethod() != 0 { - d.saveError(&UnmarshalTypeError{"number", v.Type(), int64(d.off)}) - break - } - v.Set(reflect.ValueOf(n)) - - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - n, err := strconv.ParseInt(s, 10, 64) - if err != nil || v.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) - break - } - v.SetInt(n) - - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - n, err := strconv.ParseUint(s, 10, 64) - if err != nil || v.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) - break - } - v.SetUint(n) - - case reflect.Float32, reflect.Float64: - n, err := strconv.ParseFloat(s, v.Type().Bits()) - if err != nil || v.OverflowFloat(n) { - d.saveError(&UnmarshalTypeError{"number " + s, v.Type(), int64(d.off)}) - break - } - v.SetFloat(n) - } - } -} - -// The xxxInterface routines build up a value to be stored -// in an empty interface. They are not strictly necessary, -// but they avoid the weight of reflection in this common case. - -// valueInterface is like value but returns interface{} -func (d *decodeState) valueInterface() interface{} { - switch d.scanWhile(scanSkipSpace) { - default: - d.error(errPhase) - panic("unreachable") - case scanBeginArray: - return d.arrayInterface() - case scanBeginObject: - return d.objectInterface() - case scanBeginLiteral: - return d.literalInterface() - case scanBeginName: - return d.nameInterface() - } -} - -func (d *decodeState) syntaxError(expected string) { - msg := fmt.Sprintf("invalid character '%c' looking for %s", d.data[d.off-1], expected) - d.error(&SyntaxError{msg, int64(d.off)}) -} - -// arrayInterface is like array but returns []interface{}. -func (d *decodeState) arrayInterface() []interface{} { - var v = make([]interface{}, 0) - for { - // Look ahead for ] - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndArray { - if len(v) > 0 && !d.ext.trailingCommas { - d.syntaxError("beginning of value") - } - break - } - - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - - v = append(v, d.valueInterface()) - - // Next token must be , or ]. - op = d.scanWhile(scanSkipSpace) - if op == scanEndArray { - break - } - if op != scanArrayValue { - d.error(errPhase) - } - } - return v -} - -// objectInterface is like object but returns map[string]interface{}. -func (d *decodeState) objectInterface() interface{} { - v, ok := d.keyed() - if ok { - return v - } - - m := make(map[string]interface{}) - for { - // Read opening " of string key or closing }. - op := d.scanWhile(scanSkipSpace) - if op == scanEndObject { - if len(m) > 0 && !d.ext.trailingCommas { - d.syntaxError("beginning of object key string") - } - break - } - if op == scanBeginName { - if !d.ext.unquotedKeys { - d.syntaxError("beginning of object key string") - } - } else if op != scanBeginLiteral { - d.error(errPhase) - } - unquotedKey := op == scanBeginName - - // Read string key. - start := d.off - 1 - op = d.scanWhile(scanContinue) - item := d.data[start : d.off-1] - var key string - if unquotedKey { - key = string(item) - } else { - var ok bool - key, ok = unquote(item) - if !ok { - d.error(errPhase) - } - } - - // Read : before value. - if op == scanSkipSpace { - op = d.scanWhile(scanSkipSpace) - } - if op != scanObjectKey { - d.error(errPhase) - } - - // Read value. - m[key] = d.valueInterface() - - // Next token must be , or }. - op = d.scanWhile(scanSkipSpace) - if op == scanEndObject { - break - } - if op != scanObjectValue { - d.error(errPhase) - } - } - return m -} - -// literalInterface is like literal but returns an interface value. -func (d *decodeState) literalInterface() interface{} { - // All bytes inside literal return scanContinue op code. - start := d.off - 1 - op := d.scanWhile(scanContinue) - - // Scan read one byte too far; back up. - d.off-- - d.scan.undo(op) - item := d.data[start:d.off] - - switch c := item[0]; c { - case 'n': // null - return nil - - case 't', 'f': // true, false - return c == 't' - - case '"': // string - s, ok := unquote(item) - if !ok { - d.error(errPhase) - } - return s - - default: // number - if c != '-' && (c < '0' || c > '9') { - d.error(errPhase) - } - n, err := d.convertNumber(string(item)) - if err != nil { - d.saveError(err) - } - return n - } -} - -// nameInterface is like function but returns map[string]interface{}. -func (d *decodeState) nameInterface() interface{} { - v, ok := d.keyed() - if ok { - return v - } - - nameStart := d.off - 1 - - op := d.scanWhile(scanContinue) - - name := d.data[nameStart : d.off-1] - if op != scanParam { - // Back up so the byte just read is consumed next. - d.off-- - d.scan.undo(op) - if l, ok := d.convertLiteral(name); ok { - return l - } - d.error(&SyntaxError{fmt.Sprintf("json: unknown constant %q", name), int64(d.off)}) - } - - funcName := string(name) - funcData := d.ext.funcs[funcName] - if funcData.key == "" { - d.error(fmt.Errorf("json: unknown function %q", funcName)) - } - - m := make(map[string]interface{}) - for i := 0; ; i++ { - // Look ahead for ) - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndParams { - break - } - - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - - if i >= len(funcData.args) { - d.error(fmt.Errorf("json: too many arguments for function %s", funcName)) - } - m[funcData.args[i]] = d.valueInterface() - - // Next token must be , or ). - op = d.scanWhile(scanSkipSpace) - if op == scanEndParams { - break - } - if op != scanParam { - d.error(errPhase) - } - } - return map[string]interface{}{funcData.key: m} -} - -// getu4 decodes \uXXXX from the beginning of s, returning the hex value, -// or it returns -1. -func getu4(s []byte) rune { - if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { - return -1 - } - r, err := strconv.ParseUint(string(s[2:6]), 16, 64) - if err != nil { - return -1 - } - return rune(r) -} - -// unquote converts a quoted JSON string literal s into an actual string t. -// The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) - t = string(s) - return -} - -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { - return - } - s = s[1 : len(s)-1] - - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { - return s, true - } - - b := make([]byte, len(s)+2*utf8.UTFMax) - w := copy(b, s[0:r]) - for r < len(s) { - // Out of room? Can only happen if s is full of - // malformed UTF-8 and we're replacing each - // byte with RuneError. - if w >= len(b)-2*utf8.UTFMax { - nb := make([]byte, (len(b)+utf8.UTFMax)*2) - copy(nb, b[0:w]) - b = nb - } - switch c := s[r]; { - case c == '\\': - r++ - if r >= len(s) { - return - } - switch s[r] { - default: - return - case '"', '\\', '/', '\'': - b[w] = s[r] - r++ - w++ - case 'b': - b[w] = '\b' - r++ - w++ - case 'f': - b[w] = '\f' - r++ - w++ - case 'n': - b[w] = '\n' - r++ - w++ - case 'r': - b[w] = '\r' - r++ - w++ - case 't': - b[w] = '\t' - r++ - w++ - case 'u': - r-- - rr := getu4(s[r:]) - if rr < 0 { - return - } - r += 6 - if utf16.IsSurrogate(rr) { - rr1 := getu4(s[r:]) - if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { - // A valid pair; consume. - r += 6 - w += utf8.EncodeRune(b[w:], dec) - break - } - // Invalid surrogate; fall back to replacement rune. - rr = unicode.ReplacementChar - } - w += utf8.EncodeRune(b[w:], rr) - } - - // Quote, control characters are invalid. - case c == '"', c < ' ': - return - - // ASCII - case c < utf8.RuneSelf: - b[w] = c - r++ - w++ - - // Coerce to well-formed UTF-8. - default: - rr, size := utf8.DecodeRune(s[r:]) - r += size - w += utf8.EncodeRune(b[w:], rr) - } - } - return b[0:w], true -} diff --git a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/encode.go b/src/runtime/vendor/github.com/globalsign/mgo/internal/json/encode.go deleted file mode 100644 index e4b8f86487cd..000000000000 --- a/src/runtime/vendor/github.com/globalsign/mgo/internal/json/encode.go +++ /dev/null @@ -1,1260 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package json implements encoding and decoding of JSON as defined in -// RFC 4627. The mapping between JSON and Go values is described -// in the documentation for the Marshal and Unmarshal functions. -// -// See "JSON and Go" for an introduction to this package: -// https://golang.org/doc/articles/json_and_go.html -package json - -import ( - "bytes" - "encoding" - "encoding/base64" - "fmt" - "math" - "reflect" - "runtime" - "sort" - "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" -) - -// Marshal returns the JSON encoding of v. -// -// Marshal traverses the value v recursively. -// If an encountered value implements the Marshaler interface -// and is not a nil pointer, Marshal calls its MarshalJSON method -// to produce JSON. If no MarshalJSON method is present but the -// value implements encoding.TextMarshaler instead, Marshal calls -// its MarshalText method. -// The nil pointer exception is not strictly necessary -// but mimics a similar, necessary exception in the behavior of -// UnmarshalJSON. -// -// Otherwise, Marshal uses the following type-dependent default encodings: -// -// Boolean values encode as JSON booleans. -// -// Floating point, integer, and Number values encode as JSON numbers. -// -// String values encode as JSON strings coerced to valid UTF-8, -// replacing invalid bytes with the Unicode replacement rune. -// The angle brackets "<" and ">" are escaped to "\u003c" and "\u003e" -// to keep some browsers from misinterpreting JSON output as HTML. -// Ampersand "&" is also escaped to "\u0026" for the same reason. -// This escaping can be disabled using an Encoder with DisableHTMLEscaping. -// -// Array and slice values encode as JSON arrays, except that -// []byte encodes as a base64-encoded string, and a nil slice -// encodes as the null JSON value. -// -// Struct values encode as JSON objects. Each exported struct field -// becomes a member of the object unless -// - the field's tag is "-", or -// - the field is empty and its tag specifies the "omitempty" option. -// The empty values are false, 0, any -// nil pointer or interface value, and any array, slice, map, or string of -// length zero. The object's default key string is the struct field name -// but can be specified in the struct field's tag value. The "json" key in -// the struct field's tag value is the key name, followed by an optional comma -// and options. Examples: -// -// // Field is ignored by this package. -// Field int `json:"-"` -// -// // Field appears in JSON as key "myName". -// Field int `json:"myName"` -// -// // Field appears in JSON as key "myName" and -// // the field is omitted from the object if its value is empty, -// // as defined above. -// Field int `json:"myName,omitempty"` -// -// // Field appears in JSON as key "Field" (the default), but -// // the field is skipped if empty. -// // Note the leading comma. -// Field int `json:",omitempty"` -// -// The "string" option signals that a field is stored as JSON inside a -// JSON-encoded string. It applies only to fields of string, floating point, -// integer, or boolean types. This extra level of encoding is sometimes used -// when communicating with JavaScript programs: -// -// Int64String int64 `json:",string"` -// -// The key name will be used if it's a non-empty string consisting of -// only Unicode letters, digits, dollar signs, percent signs, hyphens, -// underscores and slashes. -// -// Anonymous struct fields are usually marshaled as if their inner exported fields -// were fields in the outer struct, subject to the usual Go visibility rules amended -// as described in the next paragraph. -// An anonymous struct field with a name given in its JSON tag is treated as -// having that name, rather than being anonymous. -// An anonymous struct field of interface type is treated the same as having -// that type as its name, rather than being anonymous. -// -// The Go visibility rules for struct fields are amended for JSON when -// deciding which field to marshal or unmarshal. If there are -// multiple fields at the same level, and that level is the least -// nested (and would therefore be the nesting level selected by the -// usual Go rules), the following extra rules apply: -// -// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered, -// even if there are multiple untagged fields that would otherwise conflict. -// 2) If there is exactly one field (tagged or not according to the first rule), that is selected. -// 3) Otherwise there are multiple fields, and all are ignored; no error occurs. -// -// Handling of anonymous struct fields is new in Go 1.1. -// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of -// an anonymous struct field in both current and earlier versions, give the field -// a JSON tag of "-". -// -// Map values encode as JSON objects. The map's key type must either be a string -// or implement encoding.TextMarshaler. The map keys are used as JSON object -// keys, subject to the UTF-8 coercion described for string values above. -// -// Pointer values encode as the value pointed to. -// A nil pointer encodes as the null JSON value. -// -// Interface values encode as the value contained in the interface. -// A nil interface value encodes as the null JSON value. -// -// Channel, complex, and function values cannot be encoded in JSON. -// Attempting to encode such a value causes Marshal to return -// an UnsupportedTypeError. -// -// JSON cannot represent cyclic data structures and Marshal does not -// handle them. Passing cyclic structures to Marshal will result in -// an infinite recursion. -// -func Marshal(v interface{}) ([]byte, error) { - e := &encodeState{} - err := e.marshal(v, encOpts{escapeHTML: true}) - if err != nil { - return nil, err - } - return e.Bytes(), nil -} - -// MarshalIndent is like Marshal but applies Indent to format the output. -func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { - b, err := Marshal(v) - if err != nil { - return nil, err - } - var buf bytes.Buffer - err = Indent(&buf, b, prefix, indent) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 -// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 -// so that the JSON will be safe to embed inside HTML