mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-02-22 14:54:23 +00:00
Compare commits
141 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c980b6e191 | ||
|
|
30aff429df | ||
|
|
014ab2fce6 | ||
|
|
dd1752ac1c | ||
|
|
29ab8df881 | ||
|
|
0ac8f1f70e | ||
|
|
a0ae1b6608 | ||
|
|
412a384aad | ||
|
|
0daafecef2 | ||
|
|
f0db4032f2 | ||
|
|
208cec429a | ||
|
|
1f978ecc31 | ||
|
|
b23d094928 | ||
|
|
0f19465b3a | ||
|
|
e05197e81c | ||
|
|
683d673f4f | ||
|
|
38242d3a61 | ||
|
|
283fd45045 | ||
|
|
730b0f1769 | ||
|
|
585d0be342 | ||
|
|
b748688e69 | ||
|
|
c4af9be411 | ||
|
|
bce8efca67 | ||
|
|
e20f6b2f9d | ||
|
|
3503bcdb50 | ||
|
|
a03dc3129d | ||
|
|
93ec470928 | ||
|
|
903e608c23 | ||
|
|
c92bb1aa88 | ||
|
|
28bd0cf405 | ||
|
|
3a4e1917d2 | ||
|
|
3a5e2060aa | ||
|
|
55ee8abf0b | ||
|
|
0fa7d5b293 | ||
|
|
dcb62a7f91 | ||
|
|
8be41a4e80 | ||
|
|
65a9fe0063 | ||
|
|
43cdde4c5d | ||
|
|
9891b111d1 | ||
|
|
d147e2491b | ||
|
|
479cce8406 | ||
|
|
ea74024b93 | ||
|
|
aadad0c9b6 | ||
|
|
cfd0ebe85f | ||
|
|
c7f4c9a3bb | ||
|
|
2f50c85b12 | ||
|
|
5635410dd3 | ||
|
|
1a6f1fc3ac | ||
|
|
9379a18c8a | ||
|
|
c7c811071a | ||
|
|
f3a669ee2d | ||
|
|
407252a863 | ||
|
|
196d7d674d | ||
|
|
be148c7f72 | ||
|
|
dcbdf56281 | ||
|
|
1d2f2d6350 | ||
|
|
aaf8de3dbf | ||
|
|
9816ffdac7 | ||
|
|
1aa65167d7 | ||
|
|
b50777a174 | ||
|
|
beea0c34c5 | ||
|
|
f9e16431c1 | ||
|
|
f9a6359674 | ||
|
|
6d96875d04 | ||
|
|
69f21692ed | ||
|
|
00bfa3fa02 | ||
|
|
e2156721fd | ||
|
|
1f95d9401b | ||
|
|
cdc0eab8e4 | ||
|
|
ec480dc438 | ||
|
|
37685c41c7 | ||
|
|
163f04a918 | ||
|
|
e3b4d87b6d | ||
|
|
3eb0641431 | ||
|
|
1b1b3af9ab | ||
|
|
af01434226 | ||
|
|
ede773db17 | ||
|
|
05eca5ca25 | ||
|
|
c47bff6d6a | ||
|
|
82f141a02e | ||
|
|
7198c8789e | ||
|
|
9585e608e5 | ||
|
|
8422411d91 | ||
|
|
3fd354b991 | ||
|
|
9e38fd2562 | ||
|
|
f7a36df290 | ||
|
|
d077ed4c1e | ||
|
|
8d30b84abd | ||
|
|
20bef41347 | ||
|
|
96f1d95de5 | ||
|
|
fbb0e7f2f2 | ||
|
|
30778594d0 | ||
|
|
8768e08258 | ||
|
|
254dbd9b45 | ||
|
|
568b13400a | ||
|
|
6188b7f79f | ||
|
|
9a829107ba | ||
|
|
7669f1fbd1 | ||
|
|
97d7575d41 | ||
|
|
00e0db99a3 | ||
|
|
5cccbb9f41 | ||
|
|
1aaaef2134 | ||
|
|
c11c972465 | ||
|
|
30bfa2dfcc | ||
|
|
94995d7102 | ||
|
|
f6016f4f36 | ||
|
|
077c59dd1f | ||
|
|
74fba9c736 | ||
|
|
2a3c8b04df | ||
|
|
3f46347dc5 | ||
|
|
e5d5768c75 | ||
|
|
4ca6c2d917 | ||
|
|
3ec10b3721 | ||
|
|
14e9d2c815 | ||
|
|
6f6d64604f | ||
|
|
860779c4d9 | ||
|
|
639273366a | ||
|
|
2e81ac463a | ||
|
|
5f7da1ccaa | ||
|
|
225e6fffbc | ||
|
|
0502b05718 | ||
|
|
60e3679eb7 | ||
|
|
613dba6f1f | ||
|
|
6aa3517393 | ||
|
|
c762a3dd4f | ||
|
|
fdbe549368 | ||
|
|
635272f3e8 | ||
|
|
79f29bc523 | ||
|
|
475baf95ad | ||
|
|
b40d65bc1b | ||
|
|
e683a7fd37 | ||
|
|
4521cae0c0 | ||
|
|
b4d276bc2b | ||
|
|
fbd84fd3f4 | ||
|
|
57645c0786 | ||
|
|
40e6aacc34 | ||
|
|
125383e53c | ||
|
|
ef9d960763 | ||
|
|
58925714d2 | ||
|
|
a12ae58431 | ||
|
|
3ab6a8462d |
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@@ -23,3 +23,4 @@ self-hosted-runner:
|
||||
- s390x
|
||||
- s390x-large
|
||||
- tdx
|
||||
- amd64-nvidia-a100
|
||||
|
||||
15
.github/workflows/basic-ci-amd64.yaml
vendored
15
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -49,6 +49,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -89,6 +91,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/stability/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -132,6 +136,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/nydus/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -209,6 +215,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/functional/tracing/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -253,6 +261,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/functional/vfio/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -294,6 +304,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/docker/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
@@ -339,6 +351,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
env:
|
||||
GITHUB_API_TOKEN: ${{ github.token }}
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: bash tests/integration/nerdctl/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
@@ -383,6 +396,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/functional/kata-agent-apis/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
|
||||
4
.github/workflows/basic-ci-s390x.yaml
vendored
4
.github/workflows/basic-ci-s390x.yaml
vendored
@@ -48,7 +48,9 @@ jobs:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
|
||||
4
.github/workflows/build-checks.yaml
vendored
4
.github/workflows/build-checks.yaml
vendored
@@ -42,6 +42,10 @@ jobs:
|
||||
path: src/runtime-rs
|
||||
needs:
|
||||
- rust
|
||||
- name: libs
|
||||
path: src/libs
|
||||
needs:
|
||||
- rust
|
||||
- name: agent-ctl
|
||||
path: src/tools/agent-ctl
|
||||
needs:
|
||||
|
||||
@@ -23,6 +23,8 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: false
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -95,6 +97,7 @@ jobs:
|
||||
- name: Build ${{ matrix.asset }}
|
||||
id: build
|
||||
run: |
|
||||
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
|
||||
make "${KATA_ASSET}-tarball"
|
||||
build_dir=$(readlink -f build)
|
||||
# store-artifact does not work with symlink
|
||||
@@ -201,6 +204,7 @@ jobs:
|
||||
- name: Build ${{ matrix.asset }}
|
||||
id: build
|
||||
run: |
|
||||
[[ "${KATA_ASSET}" == *"nvidia"* ]] && echo "KBUILD_SIGN_PIN=${{ secrets.KBUILD_SIGN_PIN }}" >> "${GITHUB_ENV}"
|
||||
./tests/gha-adjust-to-use-prebuilt-components.sh kata-artifacts "${KATA_ASSET}"
|
||||
make "${KATA_ASSET}-tarball"
|
||||
build_dir=$(readlink -f build)
|
||||
|
||||
1
.github/workflows/ci-coco-stability.yaml
vendored
1
.github/workflows/ci-coco-stability.yaml
vendored
@@ -31,3 +31,4 @@ jobs:
|
||||
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
|
||||
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
2
.github/workflows/ci-devel.yaml
vendored
2
.github/workflows/ci-devel.yaml
vendored
@@ -27,6 +27,8 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-checks:
|
||||
uses: ./.github/workflows/build-checks.yaml
|
||||
|
||||
2
.github/workflows/ci-nightly.yaml
vendored
2
.github/workflows/ci-nightly.yaml
vendored
@@ -31,3 +31,5 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
3
.github/workflows/ci-on-push.yaml
vendored
3
.github/workflows/ci-on-push.yaml
vendored
@@ -3,7 +3,6 @@ on:
|
||||
pull_request_target:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'stable-*'
|
||||
types:
|
||||
# Adding 'labeled' to the list of activity types that trigger this event
|
||||
# (default: opened, synchronize, reopened) so that we can run this
|
||||
@@ -52,3 +51,5 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
4
.github/workflows/ci-weekly.yaml
vendored
4
.github/workflows/ci-weekly.yaml
vendored
@@ -27,6 +27,8 @@ on:
|
||||
required: true
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -43,6 +45,8 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
publish-kata-deploy-payload-amd64:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
|
||||
35
.github/workflows/ci.yaml
vendored
35
.github/workflows/ci.yaml
vendored
@@ -35,6 +35,10 @@ on:
|
||||
required: true
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
NGC_API_KEY:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -52,6 +56,8 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
publish-kata-deploy-payload-amd64:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
@@ -323,6 +329,21 @@ jobs:
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-k8s-tests-on-nvidia-gpu:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: publish-kata-deploy-payload-amd64
|
||||
uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
|
||||
|
||||
run-kata-coco-tests:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs:
|
||||
@@ -383,20 +404,6 @@ jobs:
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-metrics-tests:
|
||||
# Skip metrics tests whilst runner is broken
|
||||
if: false
|
||||
# if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: build-kata-static-tarball-amd64
|
||||
uses: ./.github/workflows/run-metrics.yaml
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-basic-amd64-tests:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: build-kata-static-tarball-amd64
|
||||
|
||||
1
.github/workflows/payload-after-push.yaml
vendored
1
.github/workflows/payload-after-push.yaml
vendored
@@ -25,6 +25,7 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-assets-arm64:
|
||||
permissions:
|
||||
|
||||
3
.github/workflows/release-amd64.yaml
vendored
3
.github/workflows/release-amd64.yaml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -20,6 +22,7 @@ jobs:
|
||||
stage: release
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
1
.github/workflows/release.yaml
vendored
1
.github/workflows/release.yaml
vendored
@@ -35,6 +35,7 @@ jobs:
|
||||
target-arch: amd64
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-and-push-assets-arm64:
|
||||
needs: release
|
||||
|
||||
@@ -59,6 +59,8 @@ jobs:
|
||||
- name: Install dependencies
|
||||
timeout-minutes: 15
|
||||
run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball for ${{ inputs.arch }}
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
|
||||
89
.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
vendored
Normal file
89
.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
name: CI | Run NVIDIA GPU kubernetes tests on arm64
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
registry:
|
||||
required: true
|
||||
type: string
|
||||
repo:
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
required: true
|
||||
type: string
|
||||
pr-number:
|
||||
required: true
|
||||
type: string
|
||||
commit-hash:
|
||||
required: false
|
||||
type: string
|
||||
target-branch:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
NGC_API_KEY:
|
||||
required: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-nvidia-gpu-tests-on-amd64:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-nvidia-gpu
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: amd64-nvidia-a100
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: all
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Install `bats`
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
|
||||
env:
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
- name: Collect artifacts ${{ matrix.vmm }}
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
|
||||
continue-on-error: true
|
||||
|
||||
- name: Archive artifacts ${{ matrix.vmm }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }}
|
||||
path: /tmp/artifacts
|
||||
retention-days: 1
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
@@ -91,9 +91,6 @@ jobs:
|
||||
- name: Install kata
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Download Azure CLI
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-azure-cli
|
||||
|
||||
- name: Log into the Azure account
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
|
||||
3
.github/workflows/run-kata-coco-tests.yaml
vendored
3
.github/workflows/run-kata-coco-tests.yaml
vendored
@@ -268,9 +268,6 @@ jobs:
|
||||
- name: Install kata
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-artifacts
|
||||
|
||||
- name: Download Azure CLI
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-azure-cli
|
||||
|
||||
- name: Log into the Azure account
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
|
||||
@@ -72,9 +72,6 @@ jobs:
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Download Azure CLI
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh install-azure-cli
|
||||
|
||||
- name: Log into the Azure account
|
||||
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
|
||||
with:
|
||||
|
||||
@@ -54,6 +54,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/functional/kata-monitor/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
|
||||
2
.github/workflows/run-runk-tests.yaml
vendored
2
.github/workflows/run-runk-tests.yaml
vendored
@@ -38,6 +38,8 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/runk/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
|
||||
33
.github/workflows/static-checks.yaml
vendored
33
.github/workflows/static-checks.yaml
vendored
@@ -150,3 +150,36 @@ jobs:
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
uses: ./.github/workflows/govulncheck.yaml
|
||||
|
||||
codegen:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: skipper
|
||||
if: ${{ needs.skipper.outputs.skip_static != 'yes' }}
|
||||
permissions:
|
||||
contents: read # for checkout
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: generate
|
||||
run: make -C src/agent generate-protocols
|
||||
- name: check for diff
|
||||
run: |
|
||||
diff=$(git diff)
|
||||
if [[ -z "${diff}" ]]; then
|
||||
echo "No diff detected."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cat << EOF >> "${GITHUB_STEP_SUMMARY}"
|
||||
Run \`make -C src/agent generate-protocols\` to update protobuf bindings.
|
||||
|
||||
\`\`\`diff
|
||||
${diff}
|
||||
\`\`\`
|
||||
EOF
|
||||
|
||||
echo "::error::Golang protobuf bindings need to be regenerated (see Github step summary for diff)."
|
||||
exit 1
|
||||
|
||||
2
Makefile
2
Makefile
@@ -42,7 +42,7 @@ generate-protocols:
|
||||
|
||||
# Some static checks rely on generated source files of components.
|
||||
static-checks: static-checks-build
|
||||
bash tests/static-checks.sh github.com/kata-containers/kata-containers
|
||||
bash tests/static-checks.sh
|
||||
|
||||
docs-url-alive-check:
|
||||
bash ci/docs-url-alive-check.sh
|
||||
|
||||
114
src/agent/Cargo.lock
generated
114
src/agent/Cargo.lock
generated
@@ -508,6 +508,15 @@ dependencies = [
|
||||
"wyz",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
@@ -889,6 +898,16 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-mac"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.14.4"
|
||||
@@ -1033,13 +1052,22 @@ dependencies = [
|
||||
"syn 2.0.101",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"block-buffer 0.10.4",
|
||||
"crypto-common",
|
||||
]
|
||||
|
||||
@@ -1543,6 +1571,16 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
|
||||
[[package]]
|
||||
name = "hmac"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b"
|
||||
dependencies = [
|
||||
"crypto-mac",
|
||||
"digest 0.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.9"
|
||||
@@ -2049,7 +2087,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serial_test",
|
||||
"sha2",
|
||||
"sha2 0.10.9",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
"slog-stdlog",
|
||||
@@ -2133,7 +2171,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde-enum-str",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"sha2 0.10.9",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
"sysinfo",
|
||||
@@ -2210,6 +2248,23 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a7cbbd4ad467251987c6e5b47d53b11a5a05add08f2447a9e2d70aef1e0d138"
|
||||
|
||||
[[package]]
|
||||
name = "libsystemd"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f4f0b5b062ba67aa075e331de778082c09e66b5ef32970ea5a1e9c37c9555d1"
|
||||
dependencies = [
|
||||
"hmac",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.23.2",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"sha2 0.9.9",
|
||||
"thiserror 1.0.69",
|
||||
"uuid 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.22"
|
||||
@@ -2273,6 +2328,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"slog",
|
||||
"slog-async",
|
||||
"slog-journald",
|
||||
"slog-json",
|
||||
"slog-scope",
|
||||
"slog-term",
|
||||
@@ -2734,6 +2790,12 @@ version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.14.0"
|
||||
@@ -3498,7 +3560,7 @@ dependencies = [
|
||||
"rkyv_derive",
|
||||
"seahash",
|
||||
"tinyvec",
|
||||
"uuid",
|
||||
"uuid 1.16.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3911,7 +3973,20 @@ checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"digest 0.10.7",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.9.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
|
||||
dependencies = [
|
||||
"block-buffer 0.9.0",
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest 0.9.0",
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3922,7 +3997,7 @@ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
"digest 0.10.7",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3994,6 +4069,16 @@ dependencies = [
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-journald"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83e14eb8c2f5d0c8fc9fbac40e6391095e4dc5cb334f7dce99c75cb1919eb39c"
|
||||
dependencies = [
|
||||
"libsystemd",
|
||||
"slog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-json"
|
||||
version = "2.6.1"
|
||||
@@ -4133,6 +4218,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
@@ -4694,6 +4785,15 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.16.0"
|
||||
@@ -4707,7 +4807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8"
|
||||
dependencies = [
|
||||
"outref",
|
||||
"uuid",
|
||||
"uuid 1.16.0",
|
||||
"vsimd",
|
||||
]
|
||||
|
||||
|
||||
@@ -217,4 +217,11 @@ codecov-html: check_tarpaulin
|
||||
|
||||
##TARGET generate-protocols: generate/update grpc agent protocols
|
||||
generate-protocols:
|
||||
image=$$(docker build -q \
|
||||
--build-arg GO_VERSION=$$(yq '.languages.golang.version' $(CURDIR)/../../versions.yaml) \
|
||||
--build-arg PROTOC_VERSION=$$(yq '.externals.protoc.version' $(CURDIR)/../../versions.yaml | grep -oE "[0-9.]+") \
|
||||
--build-arg PROTOC_GEN_GO_VERSION=$$(yq '.externals.protoc-gen-go.version' $(CURDIR)/../../versions.yaml) \
|
||||
--build-arg TTRPC_VERSION=$$(yq '.externals.ttrpc.version' $(CURDIR)/../../versions.yaml) \
|
||||
$(CURDIR)/../../tools/packaging/static-build/codegen) && \
|
||||
docker run --rm --workdir /kata/src/agent -v $(CURDIR)/../..:/kata --user $(shell id -u) $$image \
|
||||
../libs/protocols/hack/update-generated-proto.sh all
|
||||
|
||||
@@ -22,6 +22,8 @@ use protocols::{
|
||||
};
|
||||
use safe_path::scoped_join;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
use std::{os::unix::fs::symlink, path::PathBuf};
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -235,8 +237,8 @@ pub async fn unseal_file(path: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
let secret_name = entry.file_name();
|
||||
let contents = fs::read_to_string(&target_path)?;
|
||||
if contents.starts_with(SEALED_SECRET_PREFIX) {
|
||||
if content_starts_with_prefix(&target_path, SEALED_SECRET_PREFIX).await? {
|
||||
let contents = fs::read_to_string(&target_path)?;
|
||||
// Get the directory name of the sealed secret file
|
||||
let dir_name = target_path
|
||||
.parent()
|
||||
@@ -262,6 +264,17 @@ pub async fn unseal_file(path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn content_starts_with_prefix(path: &Path, prefix: &str) -> io::Result<bool> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = vec![0u8; prefix.len()];
|
||||
|
||||
match file.read_exact(&mut buffer) {
|
||||
Ok(()) => Ok(buffer == prefix.as_bytes()),
|
||||
Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof => Ok(false),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn secure_mount(
|
||||
volume_type: &str,
|
||||
options: &std::collections::HashMap<String, String>,
|
||||
@@ -294,7 +307,7 @@ mod tests {
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use std::sync::Arc;
|
||||
use tempfile::tempdir;
|
||||
use tempfile::{tempdir, NamedTempFile};
|
||||
use test_utils::skip_if_not_root;
|
||||
use tokio::signal::unix::{signal, SignalKind};
|
||||
struct TestService;
|
||||
@@ -416,4 +429,34 @@ mod tests {
|
||||
rt.shutdown_background();
|
||||
std::thread::sleep(std::time::Duration::from_secs(2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_content_starts_with_prefix() {
|
||||
// Normal case: content matches the prefix
|
||||
let mut f = NamedTempFile::new().unwrap();
|
||||
write!(f, "sealed.hello_world").unwrap();
|
||||
assert!(content_starts_with_prefix(f.path(), "sealed.")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// Does not match the prefix
|
||||
let mut f2 = NamedTempFile::new().unwrap();
|
||||
write!(f2, "notsealed.hello_world").unwrap();
|
||||
assert!(!content_starts_with_prefix(f2.path(), "sealed.")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// File length < prefix.len()
|
||||
let mut f3 = NamedTempFile::new().unwrap();
|
||||
write!(f3, "seal").unwrap();
|
||||
assert!(!content_starts_with_prefix(f3.path(), "sealed.")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// Empty file
|
||||
let f4 = NamedTempFile::new().unwrap();
|
||||
assert!(!content_starts_with_prefix(f4.path(), "sealed.")
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
8
src/dragonball/Cargo.lock
generated
8
src/dragonball/Cargo.lock
generated
@@ -1461,9 +1461,9 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.72"
|
||||
version = "0.10.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
|
||||
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
|
||||
dependencies = [
|
||||
"bitflags 2.4.0",
|
||||
"cfg-if",
|
||||
@@ -1502,9 +1502,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.108"
|
||||
version = "0.9.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847"
|
||||
checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
|
||||
@@ -146,7 +146,6 @@ mod tests {
|
||||
assert!(MacAddr::from_bytes(&src3[..]).is_err());
|
||||
}
|
||||
|
||||
#[cfg(feature = "with-serde")]
|
||||
#[test]
|
||||
fn test_mac_addr_serialization_and_deserialization() {
|
||||
let mac: MacAddr =
|
||||
|
||||
@@ -313,8 +313,8 @@ mod tests {
|
||||
pub struct TestContext {
|
||||
pub cid: u64,
|
||||
pub mem: GuestMemoryMmap,
|
||||
pub mem_size: usize,
|
||||
pub epoll_manager: EpollManager,
|
||||
pub _mem_size: usize,
|
||||
pub _epoll_manager: EpollManager,
|
||||
pub device: Vsock<Arc<GuestMemoryMmap>, TestMuxer>,
|
||||
}
|
||||
|
||||
@@ -327,8 +327,8 @@ mod tests {
|
||||
Self {
|
||||
cid: CID,
|
||||
mem,
|
||||
mem_size: MEM_SIZE,
|
||||
epoll_manager: epoll_manager.clone(),
|
||||
_mem_size: MEM_SIZE,
|
||||
_epoll_manager: epoll_manager.clone(),
|
||||
device: Vsock::new_with_muxer(
|
||||
CID,
|
||||
Arc::new(defs::QUEUE_SIZES.to_vec()),
|
||||
@@ -394,7 +394,7 @@ mod tests {
|
||||
EventHandlerContext {
|
||||
guest_rxvq,
|
||||
guest_txvq,
|
||||
guest_evvq,
|
||||
_guest_evvq: guest_evvq,
|
||||
queues,
|
||||
epoll_handler: None,
|
||||
device: Vsock::new_with_muxer(
|
||||
@@ -422,7 +422,7 @@ mod tests {
|
||||
pub queues: Vec<VirtioQueueConfig<QueueSync>>,
|
||||
pub guest_rxvq: GuestQ<'a>,
|
||||
pub guest_txvq: GuestQ<'a>,
|
||||
pub guest_evvq: GuestQ<'a>,
|
||||
pub _guest_evvq: GuestQ<'a>,
|
||||
pub mem: Arc<GuestMemoryMmap>,
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ use tracing::instrument;
|
||||
use crate::error::{Result, StartMicroVmError, StopMicrovmError};
|
||||
use crate::event_manager::EventManager;
|
||||
use crate::tracer::{DragonballTracer, TraceError, TraceInfo};
|
||||
use crate::vcpu::VcpuManagerError;
|
||||
use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo};
|
||||
use crate::vmm::Vmm;
|
||||
|
||||
@@ -55,6 +54,8 @@ pub use crate::device_manager::virtio_net_dev_mgr::{
|
||||
};
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError};
|
||||
#[cfg(feature = "host-device")]
|
||||
use crate::vcpu::VcpuManagerError;
|
||||
#[cfg(feature = "hotplug")]
|
||||
pub use crate::vcpu::{VcpuResizeError, VcpuResizeInfo};
|
||||
|
||||
|
||||
@@ -879,7 +879,7 @@ impl DeviceManager {
|
||||
/// Start all registered devices when booting the associated virtual machine.
|
||||
pub fn start_devices(
|
||||
&mut self,
|
||||
vm_as: &GuestAddressSpaceImpl,
|
||||
#[allow(unused)] vm_as: &GuestAddressSpaceImpl,
|
||||
) -> std::result::Result<(), StartMicroVmError> {
|
||||
// It is safe because we don't expect poison lock.
|
||||
#[cfg(feature = "host-device")]
|
||||
@@ -899,6 +899,7 @@ impl DeviceManager {
|
||||
address_space: Option<&AddressSpace>,
|
||||
) -> Result<()> {
|
||||
// create context for removing devices
|
||||
#[allow(unused)]
|
||||
let mut ctx = DeviceOpContext::new(
|
||||
Some(epoll_mgr),
|
||||
self,
|
||||
@@ -1275,7 +1276,9 @@ mod tests {
|
||||
use dbs_address_space::{AddressSpaceLayout, AddressSpaceRegion, AddressSpaceRegionType};
|
||||
use kvm_ioctls::Kvm;
|
||||
use test_utils::skip_if_not_root;
|
||||
use vm_memory::{GuestAddress, GuestUsize, MmapRegion};
|
||||
#[cfg(feature = "virtio-fs")]
|
||||
use vm_memory::MmapRegion;
|
||||
use vm_memory::{GuestAddress, GuestUsize};
|
||||
|
||||
use super::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
|
||||
@@ -34,8 +34,7 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool {
|
||||
mount.destination(),
|
||||
|
||||
),
|
||||
(Some("bind"), Some(source), dest) if get_linux_mount_info(source)
|
||||
.map_or(false, |info| info.fs_type == "tmpfs") &&
|
||||
(Some("bind"), Some(source), dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
|
||||
(is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm"))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -823,11 +823,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_get_linux_mount_info() {
|
||||
let info = get_linux_mount_info("/sys/fs/cgroup").unwrap();
|
||||
let info = get_linux_mount_info("/dev/shm").unwrap();
|
||||
|
||||
assert_eq!(&info.device, "tmpfs");
|
||||
assert_eq!(&info.fs_type, "tmpfs");
|
||||
assert_eq!(&info.path, "/sys/fs/cgroup");
|
||||
assert_eq!(&info.path, "/dev/shm");
|
||||
|
||||
assert!(matches!(
|
||||
get_linux_mount_info(""),
|
||||
|
||||
@@ -168,7 +168,7 @@ pub fn is_valid_numa_cpu(cpus: &[u32]) -> Result<bool> {
|
||||
let numa_nodes = get_numa_nodes()?;
|
||||
|
||||
for cpu in cpus {
|
||||
if numa_nodes.get(cpu).is_none() {
|
||||
if !numa_nodes.contains_key(cpu) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ impl PCIDevices for NvidiaPCIDevice {
|
||||
}
|
||||
}
|
||||
|
||||
return nvidia_devices;
|
||||
nvidia_devices
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use mockall::automock;
|
||||
use pci_ids::{Classes, Vendors};
|
||||
@@ -61,24 +61,22 @@ pub(crate) trait MemoryResourceTrait {
|
||||
|
||||
impl MemoryResourceTrait for MemoryResources {
|
||||
fn get_total_addressable_memory(&self, round_up: bool) -> (u64, u64) {
|
||||
let mut num_bar = 0;
|
||||
let mut mem_size_32bit = 0u64;
|
||||
let mut mem_size_64bit = 0u64;
|
||||
|
||||
let mut keys: Vec<_> = self.keys().cloned().collect();
|
||||
keys.sort();
|
||||
|
||||
for key in keys {
|
||||
if key as usize >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR {
|
||||
for (num_bar, key) in keys.into_iter().enumerate() {
|
||||
if key >= PCI_IOV_NUM_BAR || num_bar == PCI_IOV_NUM_BAR {
|
||||
break;
|
||||
}
|
||||
num_bar += 1;
|
||||
|
||||
if let Some(region) = self.get(&key) {
|
||||
let flags = region.flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK;
|
||||
let mem_type_32bit = flags == PCI_BASE_ADDRESS_MEM_TYPE32;
|
||||
let mem_type_64bit = flags == PCI_BASE_ADDRESS_MEM_TYPE64;
|
||||
let mem_size = (region.end - region.start + 1) as u64;
|
||||
let mem_size = region.end - region.start + 1;
|
||||
|
||||
if mem_type_32bit {
|
||||
mem_size_32bit += mem_size;
|
||||
@@ -138,10 +136,10 @@ impl PCIDeviceManager {
|
||||
for entry in device_dirs {
|
||||
let device_dir = entry?;
|
||||
let device_address = device_dir.file_name().to_string_lossy().to_string();
|
||||
if let Ok(device) = self.get_device_by_pci_bus_id(&device_address, vendor, &mut cache) {
|
||||
if let Some(dev) = device {
|
||||
pci_devices.push(dev);
|
||||
}
|
||||
if let Ok(Some(dev)) =
|
||||
self.get_device_by_pci_bus_id(&device_address, vendor, &mut cache)
|
||||
{
|
||||
pci_devices.push(dev);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -238,7 +236,7 @@ impl PCIDeviceManager {
|
||||
Ok(Some(pci_device))
|
||||
}
|
||||
|
||||
fn parse_resources(&self, device_path: &PathBuf) -> io::Result<MemoryResources> {
|
||||
fn parse_resources(&self, device_path: &Path) -> io::Result<MemoryResources> {
|
||||
let content = fs::read_to_string(device_path.join("resource"))?;
|
||||
let mut resources: MemoryResources = MemoryResources::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
@@ -405,6 +403,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_resources() {
|
||||
setup_mock_device_files();
|
||||
|
||||
let manager = PCIDeviceManager::new(MOCK_PCI_DEVICES_ROOT);
|
||||
let device_path = PathBuf::from(MOCK_PCI_DEVICES_ROOT).join("0000:ff:1f.0");
|
||||
|
||||
@@ -418,6 +418,8 @@ mod tests {
|
||||
assert_eq!(resource.start, 0x00000000);
|
||||
assert_eq!(resource.end, 0x0000ffff);
|
||||
assert_eq!(resource.flags, 0x00000404);
|
||||
|
||||
cleanup_mock_device_files();
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -435,10 +437,7 @@ mod tests {
|
||||
file.write_all(&vec![0; 512]).unwrap();
|
||||
|
||||
// It should be true
|
||||
assert!(is_pcie_device(
|
||||
&format!("ff:00.0"),
|
||||
MOCK_SYS_BUS_PCI_DEVICES
|
||||
));
|
||||
assert!(is_pcie_device("ff:00.0", MOCK_SYS_BUS_PCI_DEVICES));
|
||||
|
||||
// Clean up
|
||||
let _ = fs::remove_file(config_path);
|
||||
|
||||
@@ -142,14 +142,11 @@ pub fn arch_guest_protection(
|
||||
#[allow(dead_code)]
|
||||
pub fn available_guest_protection() -> Result<GuestProtection, ProtectionError> {
|
||||
if !Uid::effective().is_root() {
|
||||
return Err(ProtectionError::NoPerms)?;
|
||||
Err(ProtectionError::NoPerms)?;
|
||||
}
|
||||
|
||||
let facilities = crate::cpu::retrieve_cpu_facilities().map_err(|err| {
|
||||
ProtectionError::CheckFailed(format!(
|
||||
"Error retrieving cpu facilities file : {}",
|
||||
err.to_string()
|
||||
))
|
||||
ProtectionError::CheckFailed(format!("Error retrieving cpu facilities file : {}", err))
|
||||
})?;
|
||||
|
||||
// Secure Execution
|
||||
|
||||
@@ -8,7 +8,6 @@ use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, Result};
|
||||
use std::result::{self};
|
||||
use std::u32;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -463,12 +462,12 @@ impl Annotation {
|
||||
/// update config info by annotation
|
||||
pub fn update_config_by_annotation(&self, config: &mut TomlConfig) -> Result<()> {
|
||||
if let Some(hv) = self.annotations.get(KATA_ANNO_CFG_RUNTIME_HYPERVISOR) {
|
||||
if config.hypervisor.get(hv).is_some() {
|
||||
if config.hypervisor.contains_key(hv) {
|
||||
config.runtime.hypervisor_name = hv.to_string();
|
||||
}
|
||||
}
|
||||
if let Some(ag) = self.annotations.get(KATA_ANNO_CFG_RUNTIME_AGENT) {
|
||||
if config.agent.get(ag).is_some() {
|
||||
if config.agent.contains_key(ag) {
|
||||
config.runtime.agent_name = ag.to_string();
|
||||
}
|
||||
}
|
||||
@@ -635,13 +634,13 @@ impl Annotation {
|
||||
KATA_ANNO_CFG_HYPERVISOR_CPU_FEATURES => {
|
||||
hv.cpu_info.cpu_features = value.to_string();
|
||||
}
|
||||
KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS => match self.get_value::<i32>(key) {
|
||||
KATA_ANNO_CFG_HYPERVISOR_DEFAULT_VCPUS => match self.get_value::<f32>(key) {
|
||||
Ok(num_cpus) => {
|
||||
let num_cpus = num_cpus.unwrap_or_default();
|
||||
if num_cpus
|
||||
> get_hypervisor_plugin(hypervisor_name)
|
||||
.unwrap()
|
||||
.get_max_cpus() as i32
|
||||
.get_max_cpus() as f32
|
||||
{
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
@@ -944,8 +943,7 @@ impl Annotation {
|
||||
}
|
||||
}
|
||||
KATA_ANNO_CFG_HYPERVISOR_VIRTIO_FS_EXTRA_ARGS => {
|
||||
let args: Vec<String> =
|
||||
value.to_string().split(',').map(str::to_string).collect();
|
||||
let args: Vec<String> = value.split(',').map(str::to_string).collect();
|
||||
for arg in args {
|
||||
hv.shared_fs.virtio_fs_extra_args.push(arg.to_string());
|
||||
}
|
||||
@@ -971,7 +969,7 @@ impl Annotation {
|
||||
// update agent config
|
||||
KATA_ANNO_CFG_KERNEL_MODULES => {
|
||||
let kernel_mod: Vec<String> =
|
||||
value.to_string().split(';').map(str::to_string).collect();
|
||||
value.split(';').map(str::to_string).collect();
|
||||
for modules in kernel_mod {
|
||||
ag.kernel_modules.push(modules.to_string());
|
||||
}
|
||||
@@ -992,14 +990,16 @@ impl Annotation {
|
||||
return Err(u32_err);
|
||||
}
|
||||
},
|
||||
KATA_ANNO_CFG_RUNTIME_CREATE_CONTAINTER_TIMEOUT => match self.get_value::<u32>(key) {
|
||||
Ok(v) => {
|
||||
ag.request_timeout_ms = v.unwrap_or_default() * 1000;
|
||||
KATA_ANNO_CFG_RUNTIME_CREATE_CONTAINTER_TIMEOUT => {
|
||||
match self.get_value::<u32>(key) {
|
||||
Ok(v) => {
|
||||
ag.request_timeout_ms = v.unwrap_or_default() * 1000;
|
||||
}
|
||||
Err(_e) => {
|
||||
return Err(u32_err);
|
||||
}
|
||||
}
|
||||
Err(_e) => {
|
||||
return Err(u32_err);
|
||||
}
|
||||
},
|
||||
}
|
||||
// update runtime config
|
||||
KATA_ANNO_CFG_RUNTIME_NAME => {
|
||||
let runtime = vec!["virt-container", "linux-container", "wasm-container"];
|
||||
@@ -1032,8 +1032,7 @@ impl Annotation {
|
||||
}
|
||||
},
|
||||
KATA_ANNO_CFG_EXPERIMENTAL => {
|
||||
let args: Vec<String> =
|
||||
value.to_string().split(',').map(str::to_string).collect();
|
||||
let args: Vec<String> = value.split(',').map(str::to_string).collect();
|
||||
for arg in args {
|
||||
config.runtime.experimental.push(arg.to_string());
|
||||
}
|
||||
@@ -1079,6 +1078,9 @@ impl Annotation {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
config.adjust_config()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,7 +115,10 @@ pub struct Agent {
|
||||
/// This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest.
|
||||
/// It's also used to ensure that workloads, especially those involving large image pulls within the guest,
|
||||
/// have sufficient time to complete.
|
||||
#[serde(default = "default_request_timeout", rename = "create_container_timeout")]
|
||||
#[serde(
|
||||
default = "default_request_timeout",
|
||||
rename = "create_container_timeout"
|
||||
)]
|
||||
pub request_timeout_ms: u32,
|
||||
|
||||
/// Agent health check request timeout value in millisecond
|
||||
@@ -127,12 +130,12 @@ pub struct Agent {
|
||||
/// These modules will be loaded in the guest kernel using modprobe(8).
|
||||
/// The following example can be used to load two kernel modules with parameters:
|
||||
/// - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
|
||||
/// The first word is considered as the module name and the rest as its parameters.
|
||||
/// Container will not be started when:
|
||||
/// The first word is considered as the module name and the rest as its parameters.
|
||||
/// Container will not be started when:
|
||||
/// - A kernel module is specified and the modprobe command is not installed in the guest
|
||||
/// or it fails loading the module.
|
||||
/// - The module is not available in the guest or it doesn't met the guest kernel
|
||||
/// requirements, like architecture and version.
|
||||
/// requirements, like architecture and version.
|
||||
#[serde(default)]
|
||||
pub kernel_modules: Vec<String>,
|
||||
|
||||
|
||||
@@ -37,6 +37,9 @@ pub const DEFAULT_INTERNETWORKING_MODEL: &str = "tcfilter";
|
||||
pub const DEFAULT_BLOCK_DEVICE_TYPE: &str = "virtio-blk-pci";
|
||||
pub const DEFAULT_VHOST_USER_STORE_PATH: &str = "/var/run/vhost-user";
|
||||
pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0;
|
||||
pub const DEFAULT_BLOCK_DEVICE_AIO_THREADS: &str = "threads";
|
||||
pub const DEFAULT_BLOCK_DEVICE_AIO_NATIVE: &str = "native";
|
||||
pub const DEFAULT_BLOCK_DEVICE_AIO: &str = "io_uring";
|
||||
|
||||
pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs";
|
||||
pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "never";
|
||||
|
||||
@@ -369,7 +369,7 @@ mod drop_in_directory_handling {
|
||||
config.hypervisor["qemu"].path,
|
||||
"/usr/bin/qemu-kvm".to_string()
|
||||
);
|
||||
assert_eq!(config.hypervisor["qemu"].cpu_info.default_vcpus, 2);
|
||||
assert_eq!(config.hypervisor["qemu"].cpu_info.default_vcpus, 2.0);
|
||||
assert_eq!(config.hypervisor["qemu"].device_info.default_bridges, 4);
|
||||
assert_eq!(
|
||||
config.hypervisor["qemu"].shared_fs.shared_fs.as_deref(),
|
||||
|
||||
@@ -109,7 +109,7 @@ impl ConfigPlugin for CloudHypervisorConfig {
|
||||
return Err(eother!("Both guest boot image and initrd for CH are empty"));
|
||||
}
|
||||
|
||||
if (ch.cpu_info.default_vcpus > 0
|
||||
if (ch.cpu_info.default_vcpus > 0.0
|
||||
&& ch.cpu_info.default_vcpus as u32 > default::MAX_CH_VCPUS)
|
||||
|| ch.cpu_info.default_maxvcpus > default::MAX_CH_VCPUS
|
||||
{
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
use std::io::Result;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::u32;
|
||||
|
||||
use super::{default, register_hypervisor_plugin};
|
||||
use crate::config::default::MAX_DRAGONBALL_VCPUS;
|
||||
@@ -66,7 +65,7 @@ impl ConfigPlugin for DragonballConfig {
|
||||
}
|
||||
|
||||
if db.cpu_info.default_vcpus as u32 > db.cpu_info.default_maxvcpus {
|
||||
db.cpu_info.default_vcpus = db.cpu_info.default_maxvcpus as i32;
|
||||
db.cpu_info.default_vcpus = db.cpu_info.default_maxvcpus as f32;
|
||||
}
|
||||
|
||||
if db.machine_info.entropy_source.is_empty() {
|
||||
@@ -135,7 +134,7 @@ impl ConfigPlugin for DragonballConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if (db.cpu_info.default_vcpus > 0
|
||||
if (db.cpu_info.default_vcpus > 0.0
|
||||
&& db.cpu_info.default_vcpus as u32 > default::MAX_DRAGONBALL_VCPUS)
|
||||
|| db.cpu_info.default_maxvcpus > default::MAX_DRAGONBALL_VCPUS
|
||||
{
|
||||
|
||||
@@ -93,7 +93,7 @@ impl ConfigPlugin for FirecrackerConfig {
|
||||
));
|
||||
}
|
||||
|
||||
if (firecracker.cpu_info.default_vcpus > 0
|
||||
if (firecracker.cpu_info.default_vcpus > 0.0
|
||||
&& firecracker.cpu_info.default_vcpus as u32 > default::MAX_FIRECRACKER_VCPUS)
|
||||
|| firecracker.cpu_info.default_maxvcpus > default::MAX_FIRECRACKER_VCPUS
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -128,7 +128,7 @@ impl ConfigPlugin for QemuConfig {
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu.cpu_info.default_vcpus > 0
|
||||
if (qemu.cpu_info.default_vcpus > 0.0
|
||||
&& qemu.cpu_info.default_vcpus as u32 > default::MAX_QEMU_VCPUS)
|
||||
|| qemu.cpu_info.default_maxvcpus > default::MAX_QEMU_VCPUS
|
||||
{
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::fs;
|
||||
use std::io::{self, Result};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::u32;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
@@ -131,9 +130,7 @@ impl TomlConfig {
|
||||
pub fn load_from_file<P: AsRef<Path>>(config_file: P) -> Result<(TomlConfig, PathBuf)> {
|
||||
let mut result = Self::load_raw_from_file(config_file);
|
||||
if let Ok((ref mut config, _)) = result {
|
||||
Hypervisor::adjust_config(config)?;
|
||||
Runtime::adjust_config(config)?;
|
||||
Agent::adjust_config(config)?;
|
||||
config.adjust_config()?;
|
||||
info!(sl!(), "get kata config: {:?}", config);
|
||||
}
|
||||
|
||||
@@ -175,13 +172,20 @@ impl TomlConfig {
|
||||
/// drop-in config file fragments in config.d/.
|
||||
pub fn load(content: &str) -> Result<TomlConfig> {
|
||||
let mut config: TomlConfig = toml::from_str(content)?;
|
||||
Hypervisor::adjust_config(&mut config)?;
|
||||
Runtime::adjust_config(&mut config)?;
|
||||
Agent::adjust_config(&mut config)?;
|
||||
config.adjust_config()?;
|
||||
info!(sl!(), "get kata config: {:?}", config);
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
/// Adjust Kata configuration information.
|
||||
pub fn adjust_config(&mut self) -> Result<()> {
|
||||
Hypervisor::adjust_config(self)?;
|
||||
Runtime::adjust_config(self)?;
|
||||
Agent::adjust_config(self)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate Kata configuration information.
|
||||
pub fn validate(&self) -> Result<()> {
|
||||
Hypervisor::validate(self)?;
|
||||
|
||||
@@ -8,6 +8,7 @@ use flate2::read::GzDecoder;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256, Sha384, Sha512};
|
||||
use std::{collections::HashMap, io::Read};
|
||||
use crate::sl;
|
||||
|
||||
/// Currently, initdata only supports version 0.1.0.
|
||||
const INITDATA_VERSION: &str = "0.1.0";
|
||||
@@ -129,20 +130,20 @@ fn calculate_digest(algorithm: &str, data: &str) -> Result<Vec<u8>> {
|
||||
let digest = match algorithm {
|
||||
"sha256" => {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(&data);
|
||||
hasher.update(data);
|
||||
hasher.finalize().to_vec()
|
||||
}
|
||||
"sha384" => {
|
||||
let mut hasher = Sha384::new();
|
||||
hasher.update(&data);
|
||||
hasher.update(data);
|
||||
hasher.finalize().to_vec()
|
||||
}
|
||||
"sha512" => {
|
||||
let mut hasher = Sha512::new();
|
||||
hasher.update(&data);
|
||||
hasher.update(data);
|
||||
hasher.finalize().to_vec()
|
||||
}
|
||||
_ => return Err(anyhow!("unsupported Hash algorithm: {}", algorithm).into()),
|
||||
_ => return Err(anyhow!("unsupported Hash algorithm: {}", algorithm)),
|
||||
};
|
||||
|
||||
Ok(digest)
|
||||
@@ -172,7 +173,7 @@ fn adjust_digest(digest: &[u8], platform: ProtectedPlatform) -> Vec<u8> {
|
||||
|
||||
/// Parse initdata
|
||||
fn parse_initdata(initdata_str: &str) -> Result<InitData> {
|
||||
let initdata: InitData = toml::from_str(&initdata_str)?;
|
||||
let initdata: InitData = toml::from_str(initdata_str)?;
|
||||
initdata.validate()?;
|
||||
|
||||
Ok(initdata)
|
||||
@@ -192,7 +193,7 @@ pub fn calculate_initdata_digest(
|
||||
let algorithm: &str = &initdata.algorithm;
|
||||
|
||||
// 2. Calculate Digest
|
||||
let digest = calculate_digest(algorithm, &initdata_toml).context("calculate digest")?;
|
||||
let digest = calculate_digest(algorithm, initdata_toml).context("calculate digest")?;
|
||||
|
||||
// 3. Adjust Digest with Platform
|
||||
let digest_platform = adjust_digest(&digest, platform);
|
||||
@@ -203,12 +204,18 @@ pub fn calculate_initdata_digest(
|
||||
Ok(b64encoded_digest)
|
||||
}
|
||||
|
||||
/// The argument `initda_annotation` is a Standard base64 encoded string containing a TOML formatted content.
|
||||
/// The argument `initdata_annotation` is a Standard base64 encoded string containing a TOML formatted content.
|
||||
/// This function decodes the base64 string, parses the TOML content into an InitData structure.
|
||||
pub fn add_hypervisor_initdata_overrides(initda_annotation: &str) -> Result<String> {
|
||||
pub fn add_hypervisor_initdata_overrides(initdata_annotation: &str) -> Result<String> {
|
||||
// If the initdata is empty, return an empty string
|
||||
if initdata_annotation.is_empty() {
|
||||
info!(sl!(), "initdata_annotation is empty");
|
||||
return Ok("".to_string());
|
||||
}
|
||||
|
||||
// Base64 decode the annotation value
|
||||
let b64_decoded =
|
||||
base64::decode_config(initda_annotation, base64::STANDARD).context("base64 decode")?;
|
||||
base64::decode_config(initdata_annotation, base64::STANDARD).context("base64 decode")?;
|
||||
|
||||
// Gzip decompress the decoded data
|
||||
let mut gz_decoder = GzDecoder::new(&b64_decoded[..]);
|
||||
@@ -231,6 +238,139 @@ mod tests {
|
||||
use flate2::Compression;
|
||||
use std::io::Write;
|
||||
|
||||
// create gzipped and base64 encoded string
|
||||
fn create_encoded_input(content: &str) -> String {
|
||||
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
|
||||
encoder.write_all(content.as_bytes()).unwrap();
|
||||
let compressed = encoder.finish().unwrap();
|
||||
base64::encode_config(&compressed, base64::STANDARD)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_annotation() {
|
||||
// Test with empty string input
|
||||
let result = add_hypervisor_initdata_overrides("");
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_data_section() {
|
||||
// Test with empty data section
|
||||
let toml_content = r#"
|
||||
algorithm = "sha384"
|
||||
version = "0.1.0"
|
||||
|
||||
[data]
|
||||
"#;
|
||||
let encoded = create_encoded_input(toml_content);
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_complete_initdata() {
|
||||
// Test with complete InitData structure
|
||||
let toml_content = r#"
|
||||
algorithm = "sha384"
|
||||
version = "0.1.0"
|
||||
|
||||
[data]
|
||||
"aa.toml" = '''
|
||||
[token_configs]
|
||||
[token_configs.coco_as]
|
||||
url = 'http://kbs-service.xxx.cluster.local:8080'
|
||||
|
||||
[token_configs.kbs]
|
||||
url = 'http://kbs-service.xxx.cluster.local:8080'
|
||||
'''
|
||||
|
||||
"cdh.toml" = '''
|
||||
socket = 'unix:///run/guest-services/cdh.sock'
|
||||
credentials = []
|
||||
|
||||
[kbc]
|
||||
name = 'cc_kbc'
|
||||
url = 'http://kbs-service.xxx.cluster.local:8080'
|
||||
'''
|
||||
"#;
|
||||
let encoded = create_encoded_input(toml_content);
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let output = result.unwrap();
|
||||
assert!(!output.is_empty());
|
||||
assert!(output.contains("algorithm"));
|
||||
assert!(output.contains("version"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_base64() {
|
||||
// Test with invalid base64 string
|
||||
let invalid_base64 = "This is not valid base64!";
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(invalid_base64);
|
||||
assert!(result.is_err());
|
||||
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("base64 decode"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_base64_invalid_gzip() {
|
||||
// Test with valid base64 but invalid gzip content
|
||||
let not_gzipped = "This is not gzipped content";
|
||||
let encoded = base64::encode_config(not_gzipped.as_bytes(), base64::STANDARD);
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
assert!(result.is_err());
|
||||
|
||||
let error = result.unwrap_err();
|
||||
assert!(error.to_string().contains("gz decoder failed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_algorithm() {
|
||||
// Test with missing algorithm field
|
||||
let toml_content = r#"
|
||||
version = "0.1.0"
|
||||
|
||||
[data]
|
||||
"test.toml" = '''
|
||||
key = "value"
|
||||
'''
|
||||
"#;
|
||||
let encoded = create_encoded_input(toml_content);
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
// This might fail depending on whether algorithm is required
|
||||
if result.is_err() {
|
||||
assert!(result.unwrap_err().to_string().contains("parse initdata"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_version() {
|
||||
// Test with missing version field
|
||||
let toml_content = r#"
|
||||
algorithm = "sha384"
|
||||
|
||||
[data]
|
||||
"test.toml" = '''
|
||||
key = "value"
|
||||
'''
|
||||
"#;
|
||||
let encoded = create_encoded_input(toml_content);
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
// This might fail depending on whether version is required
|
||||
if result.is_err() {
|
||||
assert!(result.unwrap_err().to_string().contains("parse initdata"));
|
||||
}
|
||||
}
|
||||
|
||||
/// Test InitData creation and serialization
|
||||
#[test]
|
||||
fn test_init_data() {
|
||||
|
||||
@@ -205,47 +205,48 @@ pub struct NydusImageVolume {
|
||||
pub snapshot_dir: String,
|
||||
}
|
||||
|
||||
/// Kata virtual volume to encapsulate information for extra mount options and direct volumes.
|
||||
/// Represents a Kata virtual volume, encapsulating information for extra mount options and direct volumes.
|
||||
///
|
||||
/// It's very expensive to build direct communication channels to pass information:
|
||||
/// - between snapshotters and kata-runtime/kata-agent/image-rs
|
||||
/// - between CSI drivers and kata-runtime/kata-agent
|
||||
/// Direct communication channels between components like snapshotters, `kata-runtime`, `kata-agent`,
|
||||
/// `image-rs`, and CSI drivers are often expensive to build and maintain.
|
||||
///
|
||||
/// So `KataVirtualVolume` is introduced to encapsulate extra mount options and direct volume
|
||||
/// information, so we can build a common infrastructure to handle them.
|
||||
/// `KataVirtualVolume` is a superset of `NydusExtraOptions` and `DirectVolumeMountInfo`.
|
||||
/// Therefore, `KataVirtualVolume` is introduced as a common infrastructure to encapsulate
|
||||
/// additional mount options and direct volume information. It serves as a superset of
|
||||
/// `NydusExtraOptions` and `DirectVolumeMountInfo`.
|
||||
///
|
||||
/// Value of `volume_type` determines how to interpret other fields in the structure.
|
||||
/// The interpretation of other fields within this structure is determined by the `volume_type` field.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IGNORE`
|
||||
/// -- all other fields should be ignored/unused.
|
||||
/// # Volume Types:
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_DIRECT_BLOCK`
|
||||
/// -- `source`: the directly assigned block device
|
||||
/// -- `fs_type`: filesystem type
|
||||
/// -- `options`: mount options
|
||||
/// -- `direct_volume`: additional metadata to pass to the agent regarding this volume.
|
||||
/// - `KATA_VIRTUAL_VOLUME_IGNORE`:
|
||||
/// All other fields should be ignored/unused.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_RAW_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_RAW_BLOCK`
|
||||
/// -- `source`: path to the raw block image for the container image or layer.
|
||||
/// -- `fs_type`: filesystem type
|
||||
/// -- `options`: mount options
|
||||
/// -- `dm_verity`: disk dm-verity information
|
||||
/// - `KATA_VIRTUAL_VOLUME_DIRECT_BLOCK`:
|
||||
/// - `source`: The directly assigned block device path.
|
||||
/// - `fs_type`: Filesystem type.
|
||||
/// - `options`: Mount options.
|
||||
/// - `direct_volume`: Additional metadata to pass to the agent regarding this volume.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_BLOCK`
|
||||
/// -- `source`: path to nydus meta blob
|
||||
/// -- `fs_type`: filesystem type
|
||||
/// -- `nydus_image`: configuration information for nydus image.
|
||||
/// -- `dm_verity`: disk dm-verity information
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_RAW_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_RAW_BLOCK`:
|
||||
/// - `source`: Path to the raw block image for the container image or layer.
|
||||
/// - `fs_type`: Filesystem type.
|
||||
/// - `options`: Mount options.
|
||||
/// - `dm_verity`: Disk `dm-verity` information.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS`
|
||||
/// -- `source`: path to nydus meta blob
|
||||
/// -- `fs_type`: filesystem type
|
||||
/// -- `nydus_image`: configuration information for nydus image.
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_BLOCK` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_BLOCK`:
|
||||
/// - `source`: Path to nydus meta blob.
|
||||
/// - `fs_type`: Filesystem type.
|
||||
/// - `nydus_image`: Configuration information for nydus image.
|
||||
/// - `dm_verity`: Disk `dm-verity` information.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL`
|
||||
/// -- `source`: image reference
|
||||
/// -- `image_pull`: metadata for image pulling
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_NYDUS_FS` or `KATA_VIRTUAL_VOLUME_LAYER_NYDUS_FS`:
|
||||
/// - `source`: Path to nydus meta blob.
|
||||
/// - `fs_type`: Filesystem type.
|
||||
/// - `nydus_image`: Configuration information for nydus image.
|
||||
///
|
||||
/// - `KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL`:
|
||||
/// - `source`: Image reference.
|
||||
/// - `image_pull`: Metadata for image pulling.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Default, Serialize, Deserialize)]
|
||||
pub struct KataVirtualVolume {
|
||||
/// Type of virtual volume.
|
||||
@@ -275,7 +276,7 @@ pub struct KataVirtualVolume {
|
||||
}
|
||||
|
||||
impl KataVirtualVolume {
|
||||
/// Create a new instance of `KataVirtualVolume` with specified type.
|
||||
/// Creates a new instance of `KataVirtualVolume` with the specified type.
|
||||
pub fn new(volume_type: String) -> Self {
|
||||
Self {
|
||||
volume_type,
|
||||
@@ -283,7 +284,7 @@ impl KataVirtualVolume {
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate virtual volume object.
|
||||
/// Validates the virtual volume object.
|
||||
pub fn validate(&self) -> Result<()> {
|
||||
match self.volume_type.as_str() {
|
||||
KATA_VIRTUAL_VOLUME_DIRECT_BLOCK => {
|
||||
@@ -365,25 +366,25 @@ impl KataVirtualVolume {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serialize the virtual volume object to json.
|
||||
/// Serializes the virtual volume object to a JSON string.
|
||||
pub fn to_json(&self) -> Result<String> {
|
||||
Ok(serde_json::to_string(self)?)
|
||||
}
|
||||
|
||||
/// Deserialize a virtual volume object from json string.
|
||||
/// Deserializes a virtual volume object from a JSON string.
|
||||
pub fn from_json(value: &str) -> Result<Self> {
|
||||
let volume: KataVirtualVolume = serde_json::from_str(value)?;
|
||||
volume.validate()?;
|
||||
Ok(volume)
|
||||
}
|
||||
|
||||
/// Serialize the virtual volume object to json and encode the string with base64.
|
||||
/// Serializes the virtual volume object to a JSON string and encodes the string with base64.
|
||||
pub fn to_base64(&self) -> Result<String> {
|
||||
let json = self.to_json()?;
|
||||
Ok(base64::encode(json))
|
||||
}
|
||||
|
||||
/// Decode and deserialize a virtual volume object from base64 encoded json string.
|
||||
/// Decodes and deserializes a virtual volume object from a base64 encoded JSON string.
|
||||
pub fn from_base64(value: &str) -> Result<Self> {
|
||||
let json = base64::decode(value)?;
|
||||
let volume: KataVirtualVolume = serde_json::from_slice(&json)?;
|
||||
@@ -453,18 +454,18 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume {
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait object for storage device.
|
||||
/// Trait object for a storage device.
|
||||
pub trait StorageDevice: Send + Sync {
|
||||
/// Path
|
||||
/// Returns the path of the storage device, if available.
|
||||
fn path(&self) -> Option<&str>;
|
||||
|
||||
/// Clean up resources related to the storage device.
|
||||
/// Cleans up resources related to the storage device.
|
||||
fn cleanup(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Join user provided volume path with kata direct-volume root path.
|
||||
/// Joins a user-provided volume path with the Kata direct-volume root path.
|
||||
///
|
||||
/// The `volume_path` is base64-url-encoded and then safely joined to the `prefix`
|
||||
/// The `volume_path` is base64-url-encoded and then safely joined to the `prefix`.
|
||||
pub fn join_path(prefix: &str, volume_path: &str) -> Result<PathBuf> {
|
||||
if volume_path.is_empty() {
|
||||
return Err(anyhow!(std::io::ErrorKind::NotFound));
|
||||
@@ -474,7 +475,7 @@ pub fn join_path(prefix: &str, volume_path: &str) -> Result<PathBuf> {
|
||||
Ok(safe_path::scoped_join(prefix, b64_url_encoded_path)?)
|
||||
}
|
||||
|
||||
/// get DirectVolume mountInfo from mountinfo.json.
|
||||
/// Gets `DirectVolumeMountInfo` from `mountinfo.json`.
|
||||
pub fn get_volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
|
||||
let volume_path = join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?;
|
||||
let mount_info_file_path = volume_path.join(KATA_MOUNT_INFO_FILE_NAME);
|
||||
@@ -484,28 +485,30 @@ pub fn get_volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo>
|
||||
Ok(mount_info)
|
||||
}
|
||||
|
||||
/// Check whether a mount type is a marker for Kata specific volume.
|
||||
/// Checks whether a mount type is a marker for a Kata specific volume.
|
||||
pub fn is_kata_special_volume(ty: &str) -> bool {
|
||||
ty.len() > KATA_VOLUME_TYPE_PREFIX.len() && ty.starts_with(KATA_VOLUME_TYPE_PREFIX)
|
||||
}
|
||||
|
||||
/// Check whether a mount type is a marker for Kata guest mount volume.
|
||||
/// Checks whether a mount type is a marker for a Kata guest mount volume.
|
||||
pub fn is_kata_guest_mount_volume(ty: &str) -> bool {
|
||||
ty.len() > KATA_GUEST_MOUNT_PREFIX.len() && ty.starts_with(KATA_GUEST_MOUNT_PREFIX)
|
||||
}
|
||||
|
||||
/// Check whether a mount type is a marker for Kata ephemeral volume.
|
||||
/// Checks whether a mount type is a marker for a Kata ephemeral volume.
|
||||
pub fn is_kata_ephemeral_volume(ty: &str) -> bool {
|
||||
ty == KATA_EPHEMERAL_VOLUME_TYPE
|
||||
}
|
||||
|
||||
/// Check whether a mount type is a marker for Kata hostdir volume.
|
||||
/// Checks whether a mount type is a marker for a Kata hostdir volume.
|
||||
pub fn is_kata_host_dir_volume(ty: &str) -> bool {
|
||||
ty == KATA_HOST_DIR_VOLUME_TYPE
|
||||
}
|
||||
|
||||
/// sandbox bindmount format: /path/to/dir, or /path/to/dir:ro[:rw]
|
||||
/// the real path is without suffix ":ro" or ":rw".
|
||||
/// Splits a sandbox bindmount string into its real path and mode.
|
||||
///
|
||||
/// The `bindmount` format is typically `/path/to/dir` or `/path/to/dir:ro[:rw]`.
|
||||
/// This function extracts the real path (without the suffix ":ro" or ":rw") and the mode.
|
||||
pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) {
|
||||
let (real_path, mode) = if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RO) {
|
||||
(
|
||||
@@ -525,12 +528,14 @@ pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) {
|
||||
(real_path, mode)
|
||||
}
|
||||
|
||||
/// This function, adjust_rootfs_mounts, manages the root filesystem mounts based on guest-pull mechanism.
|
||||
/// - the function disregards any provided rootfs_mounts.
|
||||
/// Instead, it forcefully creates a single, default KataVirtualVolume specifically for guest-pull operations.
|
||||
/// This volume's representation is then base64-encoded and added as the only option to a new, singular Mount entry,
|
||||
/// which becomes the sole item in the returned Vec<Mount>.
|
||||
/// This ensures that when guest pull is active, the root filesystem is exclusively configured via this virtual volume.
|
||||
/// Adjusts the root filesystem mounts based on the guest-pull mechanism.
|
||||
///
|
||||
/// This function disregards any provided `rootfs_mounts`. Instead, it forcefully creates
|
||||
/// a single, default `KataVirtualVolume` specifically for guest-pull operations.
|
||||
/// This volume's representation is then base64-encoded and added as the only option
|
||||
/// to a new, singular `Mount` entry, which becomes the sole item in the returned `Vec<Mount>`.
|
||||
/// This ensures that when guest pull is active, the root filesystem is exclusively
|
||||
/// configured via this virtual volume.
|
||||
pub fn adjust_rootfs_mounts() -> Result<Vec<Mount>> {
|
||||
// We enforce a single, default KataVirtualVolume as the exclusive rootfs mount.
|
||||
let volume = KataVirtualVolume::new(KATA_VIRTUAL_VOLUME_IMAGE_GUEST_PULL.to_string());
|
||||
|
||||
@@ -186,7 +186,7 @@ mod tests {
|
||||
"./test_hypervisor_hook_path"
|
||||
);
|
||||
assert!(!hv.memory_info.enable_mem_prealloc);
|
||||
assert_eq!(hv.cpu_info.default_vcpus, 12);
|
||||
assert_eq!(hv.cpu_info.default_vcpus, 12.0);
|
||||
assert!(!hv.memory_info.enable_guest_swap);
|
||||
assert_eq!(hv.memory_info.default_memory, 100);
|
||||
assert!(!hv.enable_iothreads);
|
||||
|
||||
@@ -22,6 +22,7 @@ slog-json = "2.4.0"
|
||||
slog-term = "2.9.1"
|
||||
slog-async = "2.7.0"
|
||||
slog-scope = "4.4.0"
|
||||
slog-journald = "2.2.0"
|
||||
lazy_static = "1.3.0"
|
||||
arc-swap = "1.5.0"
|
||||
|
||||
|
||||
@@ -81,6 +81,11 @@ pub fn create_term_logger(level: slog::Level) -> (slog::Logger, slog_async::Asyn
|
||||
(logger, guard)
|
||||
}
|
||||
|
||||
pub enum LogDestination {
|
||||
File(Box<dyn Write + Send + Sync>),
|
||||
Journal,
|
||||
}
|
||||
|
||||
// Creates a logger which prints output as JSON
|
||||
// XXX: 'writer' param used to make testing possible.
|
||||
pub fn create_logger<W>(
|
||||
@@ -92,13 +97,43 @@ pub fn create_logger<W>(
|
||||
where
|
||||
W: Write + Send + Sync + 'static,
|
||||
{
|
||||
let json_drain = slog_json::Json::new(writer)
|
||||
.add_default_keys()
|
||||
.build()
|
||||
.fuse();
|
||||
create_logger_with_destination(name, source, level, LogDestination::File(Box::new(writer)))
|
||||
}
|
||||
|
||||
// Creates a logger which prints output as JSON or to systemd journal
|
||||
pub fn create_logger_with_destination(
|
||||
name: &str,
|
||||
source: &str,
|
||||
level: slog::Level,
|
||||
destination: LogDestination,
|
||||
) -> (slog::Logger, slog_async::AsyncGuard) {
|
||||
// Check the destination type before consuming it.
|
||||
// The `matches` macro performs a non-consuming check (it borrows).
|
||||
let is_journal_destination = matches!(destination, LogDestination::Journal);
|
||||
|
||||
// The target type for boxed drain. Note that Err = slog::Never.
|
||||
// Both `.fuse()` and `.ignore_res()` convert potential errors into a non-returning path
|
||||
// (panic or ignore), so they never return an Err.
|
||||
let drain: Box<dyn Drain<Ok = (), Err = slog::Never> + Send> = match destination {
|
||||
LogDestination::File(writer) => {
|
||||
// `destination` is `File`.
|
||||
let json_drain = slog_json::Json::new(writer)
|
||||
.add_default_keys()
|
||||
.build()
|
||||
.fuse();
|
||||
|
||||
Box::new(json_drain)
|
||||
}
|
||||
LogDestination::Journal => {
|
||||
// `destination` is `Journal`.
|
||||
let journal_drain = slog_journald::JournaldDrain.ignore_res();
|
||||
|
||||
Box::new(journal_drain)
|
||||
}
|
||||
};
|
||||
|
||||
// Ensure only a unique set of key/value fields is logged
|
||||
let unique_drain = UniqueDrain::new(json_drain).fuse();
|
||||
let unique_drain = UniqueDrain::new(drain).fuse();
|
||||
|
||||
// Adjust the level which will be applied to the log-system
|
||||
// Info is the default level, but if Debug flag is set, the overall log level will be changed to Debug here
|
||||
@@ -119,16 +154,28 @@ where
|
||||
.thread_name("slog-async-logger".into())
|
||||
.build_with_guard();
|
||||
|
||||
// Add some "standard" fields
|
||||
let logger = slog::Logger::root(
|
||||
// Create a base logger with common fields.
|
||||
let base_logger = slog::Logger::root(
|
||||
async_drain.fuse(),
|
||||
o!("version" => env!("CARGO_PKG_VERSION"),
|
||||
o!(
|
||||
"version" => env!("CARGO_PKG_VERSION"),
|
||||
"subsystem" => DEFAULT_SUBSYSTEM,
|
||||
"pid" => process::id().to_string(),
|
||||
"name" => name.to_string(),
|
||||
"source" => source.to_string()),
|
||||
"source" => source.to_string()
|
||||
),
|
||||
);
|
||||
|
||||
// If not journal destination, the logger remains the base_logger.
|
||||
let logger = if is_journal_destination {
|
||||
// Use the .new() method to build a child logger which inherits all existing
|
||||
// key-value pairs from its parent and supplements them with additional ones.
|
||||
// This is the idiomatic way.
|
||||
base_logger.new(o!("SYSLOG_IDENTIFIER" => "kata"))
|
||||
} else {
|
||||
base_logger
|
||||
};
|
||||
|
||||
(logger, guard)
|
||||
}
|
||||
|
||||
@@ -502,7 +549,12 @@ mod tests {
|
||||
let record_key = "record-key-1";
|
||||
let record_value = "record-key-2";
|
||||
|
||||
let (logger, guard) = create_logger(name, source, level, writer);
|
||||
let (logger, guard) = create_logger_with_destination(
|
||||
name,
|
||||
source,
|
||||
level,
|
||||
LogDestination::File(Box::new(writer)),
|
||||
);
|
||||
|
||||
let msg = "foo, bar, baz";
|
||||
|
||||
@@ -661,7 +713,12 @@ mod tests {
|
||||
.reopen()
|
||||
.unwrap_or_else(|_| panic!("{:?}: failed to clone tempfile", msg));
|
||||
|
||||
let (logger, logger_guard) = create_logger(name, source, d.slog_level, writer);
|
||||
let (logger, logger_guard) = create_logger_with_destination(
|
||||
name,
|
||||
source,
|
||||
d.slog_level,
|
||||
LogDestination::File(Box::new(writer)),
|
||||
);
|
||||
|
||||
// Call the logger (which calls the drain)
|
||||
(d.closure)(&logger, d.msg.to_owned());
|
||||
|
||||
@@ -115,7 +115,7 @@ impl From<oci::PosixRlimit> for grpc::POSIXRlimit {
|
||||
impl From<oci::Process> for grpc::Process {
|
||||
fn from(from: oci::Process) -> Self {
|
||||
grpc::Process {
|
||||
Terminal: from.terminal().map_or(false, |t| t),
|
||||
Terminal: from.terminal().is_some_and(|t| t),
|
||||
ConsoleSize: from_option(from.console_size()),
|
||||
User: from_option(Some(from.user().clone())),
|
||||
Args: option_vec_to_vec(from.args()),
|
||||
@@ -161,7 +161,7 @@ impl From<oci::LinuxMemory> for grpc::LinuxMemory {
|
||||
Kernel: from.kernel().map_or(0, |t| t),
|
||||
KernelTCP: from.kernel_tcp().map_or(0, |t| t),
|
||||
Swappiness: from.swappiness().map_or(0, |t| t),
|
||||
DisableOOMKiller: from.disable_oom_killer().map_or(false, |t| t),
|
||||
DisableOOMKiller: from.disable_oom_killer().is_some_and(|t| t),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,6 +355,7 @@ mod tests {
|
||||
.read(false)
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o200)
|
||||
.open(&path)
|
||||
.unwrap();
|
||||
@@ -376,6 +377,7 @@ mod tests {
|
||||
.read(false)
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o200)
|
||||
.open(&path)
|
||||
.unwrap();
|
||||
|
||||
@@ -90,7 +90,7 @@ pub fn mgmt_socket_addr(sid: &str) -> Result<String> {
|
||||
));
|
||||
}
|
||||
|
||||
get_uds_with_sid(sid, &sb_storage_path()?)
|
||||
get_uds_with_sid(sid, sb_storage_path()?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
1
src/mem-agent/.gitignore
vendored
Normal file
1
src/mem-agent/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
Cargo.lock
|
||||
608
src/runtime-rs/Cargo.lock
generated
608
src/runtime-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -147,6 +147,7 @@ DEFMAXMEMSZ := 0
|
||||
##VAR DEFBRIDGES=<number> Default number of bridges
|
||||
DEFBRIDGES := 0
|
||||
DEFENABLEANNOTATIONS := [\"kernel_params\"]
|
||||
DEFENABLEANNOTATIONS_COCO := [\"kernel_params\",\"cc_init_data\"]
|
||||
DEFDISABLEGUESTSECCOMP := true
|
||||
DEFDISABLEGUESTEMPTYDIR := false
|
||||
##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled
|
||||
@@ -482,6 +483,7 @@ USER_VARS += DEFVIRTIOFSCACHE
|
||||
USER_VARS += DEFVIRTIOFSQUEUESIZE
|
||||
USER_VARS += DEFVIRTIOFSEXTRAARGS
|
||||
USER_VARS += DEFENABLEANNOTATIONS
|
||||
USER_VARS += DEFENABLEANNOTATIONS_COCO
|
||||
USER_VARS += DEFENABLEIOTHREADS
|
||||
USER_VARS += DEFSECCOMPSANDBOXPARAM
|
||||
USER_VARS += DEFGUESTSELINUXLABEL
|
||||
|
||||
@@ -195,6 +195,9 @@ block_device_driver = "virtio-blk-pci"
|
||||
# result in memory pre allocation
|
||||
#enable_hugepages = true
|
||||
|
||||
# Disable the 'seccomp' feature from Cloud Hypervisor or firecracker, default false
|
||||
# disable_seccomp = true
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available.
|
||||
#
|
||||
|
||||
@@ -45,7 +45,7 @@ confidential_guest = true
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS@
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
|
||||
@@ -145,6 +145,9 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
|
||||
# result in memory pre allocation
|
||||
#enable_hugepages = true
|
||||
|
||||
# Disable the 'seccomp' feature from Cloud Hypervisor or firecracker, default false
|
||||
# disable_seccomp = true
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
|
||||
@@ -319,12 +319,12 @@ impl TryFrom<(CpuInfo, GuestProtection)> for CpusConfig {
|
||||
let guest_protection_to_use = args.1;
|
||||
|
||||
// This can only happen if runtime-rs fails to set default values.
|
||||
if cpu.default_vcpus <= 0 {
|
||||
if cpu.default_vcpus <= 0.0 {
|
||||
return Err(CpusConfigError::BootVCPUsTooSmall);
|
||||
}
|
||||
|
||||
let default_vcpus =
|
||||
u8::try_from(cpu.default_vcpus).map_err(CpusConfigError::BootVCPUsTooBig)?;
|
||||
let default_vcpus = u8::try_from(cpu.default_vcpus.ceil() as u32)
|
||||
.map_err(CpusConfigError::BootVCPUsTooBig)?;
|
||||
|
||||
// This can only happen if runtime-rs fails to set default values.
|
||||
if cpu.default_maxvcpus == 0 {
|
||||
@@ -611,7 +611,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let cpu_info = CpuInfo {
|
||||
default_vcpus: cpu_default as i32,
|
||||
default_vcpus: cpu_default as f32,
|
||||
default_maxvcpus,
|
||||
|
||||
..Default::default()
|
||||
@@ -1159,7 +1159,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: -1,
|
||||
default_vcpus: -1.0,
|
||||
|
||||
..Default::default()
|
||||
},
|
||||
@@ -1168,7 +1168,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 0,
|
||||
|
||||
..Default::default()
|
||||
@@ -1178,7 +1178,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 9,
|
||||
default_vcpus: 9.0,
|
||||
default_maxvcpus: 7,
|
||||
|
||||
..Default::default()
|
||||
@@ -1188,7 +1188,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 1,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -1208,7 +1208,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 3,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -1228,7 +1228,7 @@ mod tests {
|
||||
},
|
||||
TestData {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 13,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -1823,7 +1823,7 @@ mod tests {
|
||||
|
||||
cfg: HypervisorConfig {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 0,
|
||||
default_vcpus: 0.0,
|
||||
|
||||
..cpu_info.clone()
|
||||
},
|
||||
@@ -1939,7 +1939,7 @@ mod tests {
|
||||
vsock_socket_path: "vsock_socket_path".into(),
|
||||
cfg: HypervisorConfig {
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 1,
|
||||
|
||||
..Default::default()
|
||||
@@ -1963,7 +1963,7 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
cpu_info: CpuInfo {
|
||||
default_vcpus: 1,
|
||||
default_vcpus: 1.0,
|
||||
default_maxvcpus: 1,
|
||||
|
||||
..Default::default()
|
||||
|
||||
@@ -8,14 +8,15 @@ use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_sys_util::rand::RandomBytes;
|
||||
use kata_types::config::hypervisor::TopologyConfigInfo;
|
||||
use kata_types::config::hypervisor::{TopologyConfigInfo, VIRTIO_SCSI};
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use crate::{
|
||||
vhost_user_blk::VhostUserBlkDevice, BlockConfig, BlockDevice, HybridVsockDevice, Hypervisor,
|
||||
NetworkDevice, PCIePortDevice, ProtectionDevice, ShareFsDevice, VfioDevice, VhostUserConfig,
|
||||
VhostUserNetDevice, VsockDevice, KATA_BLK_DEV_TYPE, KATA_CCW_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_CCW, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM,
|
||||
KATA_NVDIMM_DEV_TYPE, KATA_SCSI_DEV_TYPE, VIRTIO_BLOCK_CCW, VIRTIO_BLOCK_MMIO,
|
||||
VIRTIO_BLOCK_PCI, VIRTIO_PMEM,
|
||||
};
|
||||
|
||||
use super::{
|
||||
@@ -471,6 +472,9 @@ impl DeviceManager {
|
||||
block_config.driver_option = KATA_NVDIMM_DEV_TYPE.to_string();
|
||||
is_pmem = true;
|
||||
}
|
||||
VIRTIO_SCSI => {
|
||||
block_config.driver_option = KATA_SCSI_DEV_TYPE.to_string();
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!(
|
||||
"unsupported driver type {}",
|
||||
|
||||
@@ -25,7 +25,8 @@ pub use vhost_user::{VhostUserConfig, VhostUserDevice, VhostUserType};
|
||||
pub use vhost_user_net::VhostUserNetDevice;
|
||||
pub use virtio_blk::{
|
||||
BlockConfig, BlockDevice, KATA_BLK_DEV_TYPE, KATA_CCW_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
KATA_NVDIMM_DEV_TYPE, VIRTIO_BLOCK_CCW, VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI, VIRTIO_PMEM,
|
||||
KATA_NVDIMM_DEV_TYPE, KATA_SCSI_DEV_TYPE, VIRTIO_BLOCK_CCW, VIRTIO_BLOCK_MMIO,
|
||||
VIRTIO_BLOCK_PCI, VIRTIO_PMEM,
|
||||
};
|
||||
pub use virtio_fs::{
|
||||
ShareFsConfig, ShareFsDevice, ShareFsMountConfig, ShareFsMountOperation, ShareFsMountType,
|
||||
|
||||
@@ -23,6 +23,41 @@ pub const KATA_MMIO_BLK_DEV_TYPE: &str = "mmioblk";
|
||||
pub const KATA_BLK_DEV_TYPE: &str = "blk";
|
||||
pub const KATA_CCW_DEV_TYPE: &str = "ccw";
|
||||
pub const KATA_NVDIMM_DEV_TYPE: &str = "nvdimm";
|
||||
pub const KATA_SCSI_DEV_TYPE: &str = "scsi";
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub enum BlockDeviceAio {
|
||||
// IoUring is the Linux io_uring I/O implementation.
|
||||
#[default]
|
||||
IoUring,
|
||||
|
||||
// Native is the native Linux AIO implementation.
|
||||
Native,
|
||||
|
||||
// Threads is the pthread asynchronous I/O implementation.
|
||||
Threads,
|
||||
}
|
||||
|
||||
impl BlockDeviceAio {
|
||||
pub fn new(aio: &str) -> Self {
|
||||
match aio {
|
||||
"native" => BlockDeviceAio::Native,
|
||||
"threads" => BlockDeviceAio::Threads,
|
||||
_ => BlockDeviceAio::IoUring,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BlockDeviceAio {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let to_string = match *self {
|
||||
BlockDeviceAio::Native => "native".to_string(),
|
||||
BlockDeviceAio::Threads => "threads".to_string(),
|
||||
_ => "iouring".to_string(),
|
||||
};
|
||||
write!(f, "{}", to_string)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct BlockConfig {
|
||||
@@ -44,6 +79,9 @@ pub struct BlockConfig {
|
||||
/// device index
|
||||
pub index: u64,
|
||||
|
||||
/// blkdev_aio defines the type of asynchronous I/O the block device should use.
|
||||
pub blkdev_aio: BlockDeviceAio,
|
||||
|
||||
/// driver type for block device
|
||||
pub driver_option: String,
|
||||
|
||||
@@ -53,6 +91,10 @@ pub struct BlockConfig {
|
||||
/// pci path is the slot at which the drive is attached
|
||||
pub pci_path: Option<PciPath>,
|
||||
|
||||
/// scsi_addr of the block device, in case the device is attached using SCSI driver
|
||||
/// scsi_addr is of the format SCSI-Id:LUN
|
||||
pub scsi_addr: Option<String>,
|
||||
|
||||
/// device attach count
|
||||
pub attach_count: u64,
|
||||
|
||||
|
||||
@@ -103,6 +103,9 @@ impl FcInner {
|
||||
cmd.args(["--api-sock", &self.asock_path]);
|
||||
}
|
||||
}
|
||||
if self.config.security_info.disable_seccomp {
|
||||
cmd.arg("--no-seccomp");
|
||||
}
|
||||
debug!(sl(), "Exec: {:?}", cmd);
|
||||
|
||||
// Make sure we're in the correct Network Namespace
|
||||
|
||||
@@ -2182,6 +2182,14 @@ impl<'a> QemuCmdLine<'a> {
|
||||
qemu_cmd_line.add_virtio_balloon();
|
||||
}
|
||||
|
||||
if let Some(seccomp_sandbox) = &config
|
||||
.security_info
|
||||
.seccomp_sandbox
|
||||
.as_ref()
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
qemu_cmd_line.add_seccomp_sandbox(seccomp_sandbox);
|
||||
}
|
||||
Ok(qemu_cmd_line)
|
||||
}
|
||||
|
||||
@@ -2620,6 +2628,11 @@ impl<'a> QemuCmdLine<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_seccomp_sandbox(&mut self, param: &str) {
|
||||
let seccomp_sandbox = SeccompSandbox::new(param);
|
||||
self.devices.push(Box::new(seccomp_sandbox));
|
||||
}
|
||||
|
||||
pub async fn build(&self) -> Result<Vec<String>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
@@ -2706,3 +2719,23 @@ impl ToQemuParams for DeviceVirtioBalloon {
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SeccompSandbox {
|
||||
param: String,
|
||||
}
|
||||
|
||||
impl SeccompSandbox {
|
||||
fn new(param: &str) -> Self {
|
||||
SeccompSandbox {
|
||||
param: param.to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToQemuParams for SeccompSandbox {
|
||||
async fn qemu_params(&self) -> Result<Vec<String>> {
|
||||
Ok(vec!["-sandbox".to_owned(), self.param.clone()])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@ use kata_types::{
|
||||
};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
@@ -288,13 +287,14 @@ impl QemuInner {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
pub(crate) async fn get_thread_ids(&mut self) -> Result<VcpuThreadIds> {
|
||||
info!(sl!(), "QemuInner::get_thread_ids()");
|
||||
//todo!()
|
||||
let vcpu_thread_ids: VcpuThreadIds = VcpuThreadIds {
|
||||
vcpus: HashMap::new(),
|
||||
};
|
||||
Ok(vcpu_thread_ids)
|
||||
|
||||
Ok(self
|
||||
.qmp
|
||||
.as_mut()
|
||||
.and_then(|qmp| qmp.get_vcpu_thread_ids().ok())
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
|
||||
@@ -632,17 +632,25 @@ impl QemuInner {
|
||||
qmp.hotplug_network_device(&netdev, &virtio_net_device)?
|
||||
}
|
||||
DeviceType::Block(mut block_device) => {
|
||||
block_device.config.pci_path = qmp
|
||||
let (pci_path, scsi_addr) = qmp
|
||||
.hotplug_block_device(
|
||||
&self.config.blockdev_info.block_device_driver,
|
||||
&block_device.device_id,
|
||||
block_device.config.index,
|
||||
&block_device.config.path_on_host,
|
||||
&block_device.config.blkdev_aio.to_string(),
|
||||
block_device.config.is_direct,
|
||||
block_device.config.is_readonly,
|
||||
block_device.config.no_drop,
|
||||
)
|
||||
.context("hotplug block device")?;
|
||||
|
||||
if pci_path.is_some() {
|
||||
block_device.config.pci_path = pci_path;
|
||||
}
|
||||
if scsi_addr.is_some() {
|
||||
block_device.config.scsi_addr = scsi_addr;
|
||||
}
|
||||
|
||||
return Ok(DeviceType::Block(block_device));
|
||||
}
|
||||
DeviceType::Vfio(mut vfiodev) => {
|
||||
|
||||
@@ -135,7 +135,7 @@ impl Hypervisor for Qemu {
|
||||
}
|
||||
|
||||
async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
let inner = self.inner.read().await;
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.get_thread_ids().await
|
||||
}
|
||||
|
||||
|
||||
@@ -5,18 +5,24 @@
|
||||
|
||||
use crate::device::pci_path::PciPath;
|
||||
use crate::qemu::cmdline_generator::{DeviceVirtioNet, Netdev};
|
||||
use crate::VcpuThreadIds;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::config::hypervisor::VIRTIO_SCSI;
|
||||
use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::{Debug, Error, Formatter};
|
||||
use std::io::BufReader;
|
||||
use std::os::fd::{AsRawFd, RawFd};
|
||||
use std::os::unix::net::UnixStream;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use qapi::qmp;
|
||||
use qapi_qmp::{self, PciDeviceInfo};
|
||||
use qapi_qmp::{
|
||||
self as qmp, BlockdevAioOptions, BlockdevOptions, BlockdevOptionsBase,
|
||||
BlockdevOptionsGenericFormat, BlockdevOptionsRaw, BlockdevRef, PciDeviceInfo,
|
||||
};
|
||||
use qapi_spec::Dictionary;
|
||||
|
||||
/// default qmp connection read timeout
|
||||
@@ -493,7 +499,7 @@ impl Qmp {
|
||||
Err(anyhow!("no target device found"))
|
||||
}
|
||||
|
||||
/// hotplug block device:
|
||||
/// Hotplug block device:
|
||||
/// {
|
||||
/// "execute": "blockdev-add",
|
||||
/// "arguments": {
|
||||
@@ -514,90 +520,161 @@ impl Qmp {
|
||||
/// "bus": "pcie.1"
|
||||
/// }
|
||||
/// }
|
||||
/// Hotplug SCSI block device
|
||||
/// # virtio-scsi0
|
||||
/// {"execute":"device_add","arguments":{"driver":"virtio-scsi-pci","id":"virtio-scsi0","bus":"bus1"}}
|
||||
/// {"return": {}}
|
||||
///
|
||||
/// {"execute":"blockdev_add", "arguments": {"file":"/path/to/block.image","format":"qcow2","id":"virtio-scsi0"}}
|
||||
/// {"return": {}}
|
||||
/// {"execute":"device_add","arguments":{"driver":"scsi-hd","drive":"virtio-scsi0","id":"scsi_device_0","bus":"virtio-scsi1.0"}}
|
||||
/// {"return": {}}
|
||||
///
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn hotplug_block_device(
|
||||
&mut self,
|
||||
block_driver: &str,
|
||||
device_id: &str,
|
||||
index: u64,
|
||||
path_on_host: &str,
|
||||
blkdev_aio: &str,
|
||||
is_direct: Option<bool>,
|
||||
is_readonly: bool,
|
||||
no_drop: bool,
|
||||
) -> Result<Option<PciPath>> {
|
||||
let (bus, slot) = self.find_free_slot()?;
|
||||
|
||||
) -> Result<(Option<PciPath>, Option<String>)> {
|
||||
// `blockdev-add`
|
||||
let node_name = format!("drive-{}", device_id);
|
||||
let node_name = format!("drive-{index}");
|
||||
|
||||
let create_base_options = || qapi_qmp::BlockdevOptionsBase {
|
||||
auto_read_only: None,
|
||||
cache: if is_direct.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(qapi_qmp::BlockdevCacheOptions {
|
||||
direct: is_direct,
|
||||
no_flush: None,
|
||||
})
|
||||
},
|
||||
detect_zeroes: None,
|
||||
discard: None,
|
||||
force_share: None,
|
||||
node_name: None,
|
||||
read_only: Some(is_readonly),
|
||||
};
|
||||
|
||||
let create_backend_options = || qapi_qmp::BlockdevOptionsFile {
|
||||
aio: Some(
|
||||
BlockdevAioOptions::from_str(blkdev_aio).unwrap_or(BlockdevAioOptions::io_uring),
|
||||
),
|
||||
aio_max_batch: None,
|
||||
drop_cache: if !no_drop { None } else { Some(no_drop) },
|
||||
locking: None,
|
||||
pr_manager: None,
|
||||
x_check_cache_dropped: None,
|
||||
filename: path_on_host.to_owned(),
|
||||
};
|
||||
|
||||
// Add block device backend and check if the file is a regular file or device
|
||||
let blockdev_file = if std::fs::metadata(path_on_host)?.is_file() {
|
||||
// Regular file
|
||||
qmp::BlockdevOptions::file {
|
||||
base: create_base_options(),
|
||||
file: create_backend_options(),
|
||||
}
|
||||
} else {
|
||||
// Host device (e.g., /dev/sdx, /dev/loopX)
|
||||
qmp::BlockdevOptions::host_device {
|
||||
base: create_base_options(),
|
||||
host_device: create_backend_options(),
|
||||
}
|
||||
};
|
||||
|
||||
let blockdev_options_raw = BlockdevOptions::raw {
|
||||
base: BlockdevOptionsBase {
|
||||
detect_zeroes: None,
|
||||
cache: None,
|
||||
discard: None,
|
||||
force_share: None,
|
||||
auto_read_only: None,
|
||||
node_name: Some(node_name.clone()),
|
||||
read_only: None,
|
||||
},
|
||||
raw: BlockdevOptionsRaw {
|
||||
base: BlockdevOptionsGenericFormat {
|
||||
file: BlockdevRef::definition(Box::new(blockdev_file)),
|
||||
},
|
||||
offset: None,
|
||||
size: None,
|
||||
},
|
||||
};
|
||||
|
||||
self.qmp
|
||||
.execute(&qmp::blockdev_add(qmp::BlockdevOptions::raw {
|
||||
base: qmp::BlockdevOptionsBase {
|
||||
detect_zeroes: None,
|
||||
cache: None,
|
||||
discard: None,
|
||||
force_share: None,
|
||||
auto_read_only: None,
|
||||
node_name: Some(node_name.clone()),
|
||||
read_only: None,
|
||||
},
|
||||
raw: qmp::BlockdevOptionsRaw {
|
||||
base: qmp::BlockdevOptionsGenericFormat {
|
||||
file: qmp::BlockdevRef::definition(Box::new(qmp::BlockdevOptions::file {
|
||||
base: qapi_qmp::BlockdevOptionsBase {
|
||||
auto_read_only: None,
|
||||
cache: if is_direct.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(qapi_qmp::BlockdevCacheOptions {
|
||||
direct: is_direct,
|
||||
no_flush: None,
|
||||
})
|
||||
},
|
||||
detect_zeroes: None,
|
||||
discard: None,
|
||||
force_share: None,
|
||||
node_name: None,
|
||||
read_only: Some(is_readonly),
|
||||
},
|
||||
file: qapi_qmp::BlockdevOptionsFile {
|
||||
aio: None,
|
||||
aio_max_batch: None,
|
||||
drop_cache: if !no_drop { None } else { Some(no_drop) },
|
||||
locking: None,
|
||||
pr_manager: None,
|
||||
x_check_cache_dropped: None,
|
||||
filename: path_on_host.to_owned(),
|
||||
},
|
||||
})),
|
||||
},
|
||||
offset: None,
|
||||
size: None,
|
||||
},
|
||||
}))
|
||||
.map_err(|e| anyhow!("blockdev_add {:?}", e))
|
||||
.execute(&qapi_qmp::blockdev_add(blockdev_options_raw))
|
||||
.map_err(|e| anyhow!("blockdev-add backend {:?}", e))
|
||||
.map(|_| ())?;
|
||||
|
||||
// block device
|
||||
// `device_add`
|
||||
let mut blkdev_add_args = Dictionary::new();
|
||||
blkdev_add_args.insert("addr".to_owned(), format!("{:02}", slot).into());
|
||||
blkdev_add_args.insert("drive".to_owned(), node_name.clone().into());
|
||||
self.qmp
|
||||
.execute(&qmp::device_add {
|
||||
bus: Some(bus),
|
||||
id: Some(node_name.clone()),
|
||||
driver: block_driver.to_string(),
|
||||
arguments: blkdev_add_args,
|
||||
})
|
||||
.map_err(|e| anyhow!("device_add {:?}", e))
|
||||
.map(|_| ())?;
|
||||
|
||||
let pci_path = self
|
||||
.get_device_by_qdev_id(&node_name)
|
||||
.context("get device by qdev_id failed")?;
|
||||
info!(
|
||||
sl!(),
|
||||
"hotplug_block_device return pci path: {:?}", &pci_path
|
||||
);
|
||||
if block_driver == VIRTIO_SCSI {
|
||||
// Helper closure to decode a flattened u16 SCSI index into an (ID, LUN) pair.
|
||||
let get_scsi_id_lun = |index_u16: u16| -> Result<(u8, u8)> {
|
||||
// Uses bitwise operations for efficient and clear conversion.
|
||||
let scsi_id = (index_u16 >> 8) as u8; // Equivalent to index_u16 / 256
|
||||
let lun = (index_u16 & 0xFF) as u8; // Equivalent to index_u16 % 256
|
||||
|
||||
Ok(Some(pci_path))
|
||||
Ok((scsi_id, lun))
|
||||
};
|
||||
|
||||
// Safely convert the u64 index to u16, ensuring it does not exceed `u16::MAX` (65535).
|
||||
let (scsi_id, lun) = get_scsi_id_lun(u16::try_from(index)?)?;
|
||||
let scsi_addr = format!("{}:{}", scsi_id, lun);
|
||||
|
||||
// add SCSI frontend device
|
||||
blkdev_add_args.insert("scsi-id".to_string(), scsi_id.into());
|
||||
blkdev_add_args.insert("lun".to_string(), lun.into());
|
||||
|
||||
self.qmp
|
||||
.execute(&qmp::device_add {
|
||||
bus: Some("scsi0.0".to_string()),
|
||||
id: Some(node_name.clone()),
|
||||
driver: "scsi-hd".to_string(),
|
||||
arguments: blkdev_add_args,
|
||||
})
|
||||
.map_err(|e| anyhow!("device_add {:?}", e))
|
||||
.map(|_| ())?;
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"hotplug scsi block device return scsi address: {:?}", &scsi_addr
|
||||
);
|
||||
|
||||
Ok((None, Some(scsi_addr)))
|
||||
} else {
|
||||
let (bus, slot) = self.find_free_slot()?;
|
||||
blkdev_add_args.insert("addr".to_owned(), format!("{:02}", slot).into());
|
||||
|
||||
self.qmp
|
||||
.execute(&qmp::device_add {
|
||||
bus: Some(bus),
|
||||
id: Some(node_name.clone()),
|
||||
driver: block_driver.to_string(),
|
||||
arguments: blkdev_add_args,
|
||||
})
|
||||
.map_err(|e| anyhow!("device_add {:?}", e))
|
||||
.map(|_| ())?;
|
||||
|
||||
let pci_path = self
|
||||
.get_device_by_qdev_id(&node_name)
|
||||
.context("get device by qdev_id failed")?;
|
||||
info!(
|
||||
sl!(),
|
||||
"hotplug block device return pci path: {:?}", &pci_path
|
||||
);
|
||||
|
||||
Ok((Some(pci_path), None))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hotplug_vfio_device(
|
||||
@@ -659,6 +736,61 @@ impl Qmp {
|
||||
|
||||
Ok(Some(pci_path))
|
||||
}
|
||||
|
||||
/// Get vCPU thread IDs through QMP query_cpus_fast.
|
||||
pub fn get_vcpu_thread_ids(&mut self) -> Result<VcpuThreadIds> {
|
||||
let vcpu_info = self
|
||||
.qmp
|
||||
.execute(&qmp::query_cpus_fast {})
|
||||
.map_err(|e| anyhow!("query_cpus_fast failed: {:?}", e))?;
|
||||
|
||||
let vcpus: HashMap<u32, u32> = vcpu_info
|
||||
.iter()
|
||||
.map(|info| match info {
|
||||
qmp::CpuInfoFast::aarch64(cpu_info)
|
||||
| qmp::CpuInfoFast::alpha(cpu_info)
|
||||
| qmp::CpuInfoFast::arm(cpu_info)
|
||||
| qmp::CpuInfoFast::avr(cpu_info)
|
||||
| qmp::CpuInfoFast::cris(cpu_info)
|
||||
| qmp::CpuInfoFast::hppa(cpu_info)
|
||||
| qmp::CpuInfoFast::i386(cpu_info)
|
||||
| qmp::CpuInfoFast::loongarch64(cpu_info)
|
||||
| qmp::CpuInfoFast::m68k(cpu_info)
|
||||
| qmp::CpuInfoFast::microblaze(cpu_info)
|
||||
| qmp::CpuInfoFast::microblazeel(cpu_info)
|
||||
| qmp::CpuInfoFast::mips(cpu_info)
|
||||
| qmp::CpuInfoFast::mips64(cpu_info)
|
||||
| qmp::CpuInfoFast::mips64el(cpu_info)
|
||||
| qmp::CpuInfoFast::mipsel(cpu_info)
|
||||
| qmp::CpuInfoFast::nios2(cpu_info)
|
||||
| qmp::CpuInfoFast::or1k(cpu_info)
|
||||
| qmp::CpuInfoFast::ppc(cpu_info)
|
||||
| qmp::CpuInfoFast::ppc64(cpu_info)
|
||||
| qmp::CpuInfoFast::riscv32(cpu_info)
|
||||
| qmp::CpuInfoFast::riscv64(cpu_info)
|
||||
| qmp::CpuInfoFast::rx(cpu_info)
|
||||
| qmp::CpuInfoFast::sh4(cpu_info)
|
||||
| qmp::CpuInfoFast::sh4eb(cpu_info)
|
||||
| qmp::CpuInfoFast::sparc(cpu_info)
|
||||
| qmp::CpuInfoFast::sparc64(cpu_info)
|
||||
| qmp::CpuInfoFast::tricore(cpu_info)
|
||||
| qmp::CpuInfoFast::x86_64(cpu_info)
|
||||
| qmp::CpuInfoFast::xtensa(cpu_info)
|
||||
| qmp::CpuInfoFast::xtensaeb(cpu_info) => {
|
||||
let vcpu_id = cpu_info.cpu_index as u32;
|
||||
let thread_id = cpu_info.thread_id as u32;
|
||||
(vcpu_id, thread_id)
|
||||
}
|
||||
qmp::CpuInfoFast::s390x { base, .. } => {
|
||||
let vcpu_id = base.cpu_index as u32;
|
||||
let thread_id = base.thread_id as u32;
|
||||
(vcpu_id, thread_id)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(VcpuThreadIds { vcpus })
|
||||
}
|
||||
}
|
||||
|
||||
fn vcpu_id_from_core_id(core_id: i64) -> String {
|
||||
|
||||
@@ -17,7 +17,7 @@ anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
bitflags = "2.9.0"
|
||||
byte-unit = "5.1.6"
|
||||
cgroups-rs = { git = "https://github.com/kata-containers/cgroups-rs", rev = "v0.3.5" }
|
||||
cgroups-rs = { version = "0.4.0", features = ["oci"] }
|
||||
futures = "0.3.11"
|
||||
lazy_static = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
|
||||
@@ -5,30 +5,19 @@
|
||||
//
|
||||
|
||||
pub mod cgroup_persist;
|
||||
mod resource;
|
||||
pub use resource::CgroupsResource;
|
||||
mod resource_inner;
|
||||
mod utils;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use cgroup_persist::CgroupState;
|
||||
use cgroups_rs::{cgroup_builder::CgroupBuilder, Cgroup, CgroupPid, CpuResources, Resources};
|
||||
use hypervisor::Hypervisor;
|
||||
use anyhow::{anyhow, Result};
|
||||
use cgroups_rs::manager::is_systemd_cgroup;
|
||||
use hypervisor::HYPERVISOR_DRAGONBALL;
|
||||
use kata_sys_util::spec::load_oci_spec;
|
||||
use kata_types::config::TomlConfig;
|
||||
use oci::LinuxResources;
|
||||
use oci_spec::runtime as oci;
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
error::Error,
|
||||
io,
|
||||
iter::FromIterator,
|
||||
sync::Arc,
|
||||
};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::ResourceUpdateOp;
|
||||
use crate::cgroups::cgroup_persist::CgroupState;
|
||||
|
||||
const OS_ERROR_NO_SUCH_PROCESS: i32 = 3;
|
||||
const SANDBOXED_CGROUP_PATH: &str = "kata_sandboxed_pod";
|
||||
|
||||
pub struct CgroupArgs {
|
||||
@@ -44,7 +33,6 @@ pub struct CgroupConfig {
|
||||
|
||||
impl CgroupConfig {
|
||||
fn new(sid: &str, toml_config: &TomlConfig) -> Result<Self> {
|
||||
let overhead_path = utils::gen_overhead_path(sid);
|
||||
let path = if let Ok(spec) = load_oci_spec() {
|
||||
spec.linux()
|
||||
.clone()
|
||||
@@ -60,260 +48,38 @@ impl CgroupConfig {
|
||||
} else {
|
||||
format!("{}/{}", SANDBOXED_CGROUP_PATH, sid)
|
||||
};
|
||||
|
||||
let overhead_path = utils::gen_overhead_path(is_systemd_cgroup(&path), sid);
|
||||
|
||||
// Dragonball and runtime are the same process, so that the
|
||||
// sandbox_cgroup_only is overwriten to true.
|
||||
let sandbox_cgroup_only = if toml_config.runtime.hypervisor_name == HYPERVISOR_DRAGONBALL {
|
||||
true
|
||||
} else {
|
||||
toml_config.runtime.sandbox_cgroup_only
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
path,
|
||||
overhead_path,
|
||||
sandbox_cgroup_only: toml_config.runtime.sandbox_cgroup_only,
|
||||
sandbox_cgroup_only,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CgroupsResource {
|
||||
resources: Arc<RwLock<HashMap<String, Resources>>>,
|
||||
cgroup_manager: Cgroup,
|
||||
overhead_cgroup_manager: Option<Cgroup>,
|
||||
cgroup_config: CgroupConfig,
|
||||
}
|
||||
|
||||
impl CgroupsResource {
|
||||
pub fn new(sid: &str, toml_config: &TomlConfig) -> Result<Self> {
|
||||
let config = CgroupConfig::new(sid, toml_config)?;
|
||||
|
||||
// Create the sandbox cgroups manager (cgroups on Linux).
|
||||
// Depending on the sandbox_cgroup_only value, this cgroup
|
||||
// will either hold all the pod threads (sandbox_cgroup_only is true)
|
||||
// or only the virtual CPU ones (sandbox_cgroup_only is false).
|
||||
let hier = cgroups_rs::hierarchies::auto();
|
||||
let cgroup_manager = CgroupBuilder::new(&config.path).build(hier)?;
|
||||
|
||||
// The shim configuration is requesting that we do not put all threads
|
||||
// into the sandbox resource controller.
|
||||
// We're creating an overhead controller, with no constraints. Everything but
|
||||
// the vCPU threads will eventually make it there.
|
||||
let overhead_cgroup_manager = if !config.sandbox_cgroup_only {
|
||||
let hier = cgroups_rs::hierarchies::auto();
|
||||
Some(CgroupBuilder::new(&config.overhead_path).build(hier)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Add the runtime to the VMM sandbox resource controller
|
||||
|
||||
// By adding the runtime process to either the sandbox or overhead controller, we are making
|
||||
// sure that any child process of the runtime (i.e. *all* processes serving a Kata pod)
|
||||
// will initially live in this controller. Depending on the sandbox_cgroup_only settings, we will
|
||||
// then move the vCPU threads between resource controllers.
|
||||
let pid = CgroupPid { pid: 0 };
|
||||
if let Some(manager) = overhead_cgroup_manager.as_ref() {
|
||||
manager.add_task_by_tgid(pid).context("add task by tgid")?;
|
||||
} else {
|
||||
cgroup_manager
|
||||
.add_task_by_tgid(pid)
|
||||
.context("add task by tgid with sandbox only")?;
|
||||
}
|
||||
fn restore(state: &CgroupState) -> Result<Self> {
|
||||
let path = state
|
||||
.path
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("cgroup path is missing in state"))?;
|
||||
let overhead_path = state
|
||||
.overhead_path
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("overhead path is missing in state"))?;
|
||||
|
||||
Ok(Self {
|
||||
cgroup_manager,
|
||||
resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
overhead_cgroup_manager,
|
||||
cgroup_config: config,
|
||||
})
|
||||
}
|
||||
|
||||
/// delete will move the running processes in the cgroup_manager and
|
||||
/// overhead_cgroup_manager to the parent and then delete the cgroups.
|
||||
pub async fn delete(&self) -> Result<()> {
|
||||
for cg_pid in self.cgroup_manager.procs() {
|
||||
// For now, we can't guarantee that the process in cgroup_manager does still
|
||||
// exist. Once it exit, we should ignore that error returned by remove_task_by_tgid
|
||||
// to let it go.
|
||||
if let Err(error) = self.cgroup_manager.remove_task_by_tgid(cg_pid) {
|
||||
match error.source() {
|
||||
Some(err) => match err.downcast_ref::<io::Error>() {
|
||||
Some(e) => {
|
||||
if e.raw_os_error() != Some(OS_ERROR_NO_SUCH_PROCESS) {
|
||||
return Err(error.into());
|
||||
}
|
||||
}
|
||||
None => return Err(error.into()),
|
||||
},
|
||||
None => return Err(error.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.cgroup_manager
|
||||
.delete()
|
||||
.context("delete cgroup manager")?;
|
||||
|
||||
if let Some(overhead) = self.overhead_cgroup_manager.as_ref() {
|
||||
for cg_pid in overhead.tasks() {
|
||||
overhead.remove_task(cg_pid)?;
|
||||
}
|
||||
overhead.delete().context("delete overhead")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn update_cgroups(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
h: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
let new_resources = self.calc_resource(linux_resources);
|
||||
let old_resources = self.update_resources(cid, new_resources.clone(), op).await;
|
||||
|
||||
if let Some(old_resource) = old_resources.clone() {
|
||||
if old_resource == new_resources {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
match self.do_update_cgroups(h).await {
|
||||
Err(e) => {
|
||||
// if update failed, we should roll back the records in resources
|
||||
let mut resources = self.resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add => {
|
||||
resources.remove(cid);
|
||||
}
|
||||
ResourceUpdateOp::Update | ResourceUpdateOp::Del => {
|
||||
if let Some(old_resource) = old_resources {
|
||||
resources.insert(cid.to_owned(), old_resource);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
new_resource: Resources,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Option<Resources> {
|
||||
let mut resources = self.resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
|
||||
resources.insert(cid.to_owned(), new_resource.clone())
|
||||
}
|
||||
ResourceUpdateOp::Del => resources.remove(cid),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_update_cgroups(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||
let merged_resources = self.merge_resources().await;
|
||||
self.cgroup_manager
|
||||
.apply(&merged_resources)
|
||||
.map_err(|e| anyhow!(e))?;
|
||||
|
||||
if self.overhead_cgroup_manager.is_some() {
|
||||
// If we have an overhead controller, new vCPU threads would start there,
|
||||
// as being children of the VMM PID.
|
||||
// We need to constrain them by moving them into the sandbox controller.
|
||||
self.constrain_hypervisor(h).await?
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// constrain_hypervisor will place the VMM and vCPU threads into resource controllers (cgroups on Linux).
|
||||
async fn constrain_hypervisor(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||
let tids = h.get_thread_ids().await?;
|
||||
let tids = tids.vcpus.values();
|
||||
|
||||
// All vCPU threads move to the sandbox controller.
|
||||
for tid in tids {
|
||||
self.cgroup_manager
|
||||
.add_task(CgroupPid { pid: *tid as u64 })?
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn merge_resources(&self) -> Resources {
|
||||
let resources = self.resources.read().await;
|
||||
|
||||
let mut cpu_list: HashSet<String> = HashSet::new();
|
||||
let mut mem_list: HashSet<String> = HashSet::new();
|
||||
|
||||
resources.values().for_each(|r| {
|
||||
if let Some(cpus) = &r.cpu.cpus {
|
||||
cpu_list.insert(cpus.clone());
|
||||
}
|
||||
if let Some(mems) = &r.cpu.mems {
|
||||
mem_list.insert(mems.clone());
|
||||
}
|
||||
});
|
||||
|
||||
let cpu_resource = CpuResources {
|
||||
cpus: Some(Vec::from_iter(cpu_list.into_iter()).join(",")),
|
||||
mems: Some(Vec::from_iter(mem_list.into_iter()).join(",")),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Resources {
|
||||
cpu: cpu_resource,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn calc_cpu_resources(&self, linux_resources: Option<&LinuxResources>) -> CpuResources {
|
||||
let cpus = linux_resources
|
||||
.and_then(|res| res.cpu().clone())
|
||||
.and_then(|cpu| cpu.cpus().clone());
|
||||
|
||||
let mems = linux_resources
|
||||
.and_then(|res| res.cpu().clone())
|
||||
.and_then(|cpu| cpu.mems().clone());
|
||||
|
||||
CpuResources {
|
||||
cpus,
|
||||
mems,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn calc_resource(&self, linux_resources: Option<&LinuxResources>) -> Resources {
|
||||
Resources {
|
||||
cpu: self.calc_cpu_resources(linux_resources),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Persist for CgroupsResource {
|
||||
type State = CgroupState;
|
||||
type ConstructorArgs = CgroupArgs;
|
||||
/// Save a state of the component.
|
||||
async fn save(&self) -> Result<Self::State> {
|
||||
Ok(CgroupState {
|
||||
path: Some(self.cgroup_config.path.clone()),
|
||||
overhead_path: Some(self.cgroup_config.overhead_path.clone()),
|
||||
sandbox_cgroup_only: self.cgroup_config.sandbox_cgroup_only,
|
||||
})
|
||||
}
|
||||
/// Restore a component from a specified state.
|
||||
async fn restore(
|
||||
cgroup_args: Self::ConstructorArgs,
|
||||
cgroup_state: Self::State,
|
||||
) -> Result<Self> {
|
||||
let hier = cgroups_rs::hierarchies::auto();
|
||||
let config = CgroupConfig::new(&cgroup_args.sid, &cgroup_args.config)?;
|
||||
let path = cgroup_state.path.unwrap_or_default();
|
||||
let cgroup_manager = Cgroup::load(hier, path.as_str());
|
||||
Ok(Self {
|
||||
cgroup_manager,
|
||||
resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
overhead_cgroup_manager: None,
|
||||
cgroup_config: config,
|
||||
path: path.clone(),
|
||||
overhead_path: overhead_path.clone(),
|
||||
sandbox_cgroup_only: state.sandbox_cgroup_only,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
99
src/runtime-rs/crates/resource/src/cgroups/resource.rs
Normal file
99
src/runtime-rs/crates/resource/src/cgroups/resource.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2025 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::config::TomlConfig;
|
||||
use oci_spec::runtime::LinuxResources;
|
||||
use persist::sandbox_persist::Persist;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::cgroups::cgroup_persist::CgroupState;
|
||||
use crate::cgroups::resource_inner::CgroupsResourceInner;
|
||||
use crate::cgroups::{CgroupArgs, CgroupConfig};
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
/// CgroupsResource manages sandbox cgroup and overhead cgroup.
|
||||
///
|
||||
/// Putting the processes under the cgroup from OCI spec (a.k.a sandbox
|
||||
/// cgroup) by default. The container runtime (e.g. containerd) imposes
|
||||
/// limits on the parent of that cgroup. In case of disabling
|
||||
/// `sandbox_cgroup_only`, the runtime and other components except for VMM
|
||||
/// (e.g. virtiofsd) are put under the overhead cgroup, which no resource
|
||||
/// limits are imposed on it.
|
||||
pub struct CgroupsResource {
|
||||
cgroup_config: CgroupConfig,
|
||||
inner: Arc<RwLock<CgroupsResourceInner>>,
|
||||
}
|
||||
|
||||
impl CgroupsResource {
|
||||
pub fn new(sid: &str, toml_config: &TomlConfig) -> Result<Self> {
|
||||
let cgroup_config = CgroupConfig::new(sid, toml_config)?;
|
||||
let inner = CgroupsResourceInner::new(&cgroup_config)?;
|
||||
let inner = Arc::new(RwLock::new(inner));
|
||||
|
||||
Ok(Self {
|
||||
cgroup_config,
|
||||
inner,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl CgroupsResource {
|
||||
pub async fn delete(&self) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.delete().await
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
&self,
|
||||
cid: &str,
|
||||
resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.update(cid, resources, op, hypervisor).await
|
||||
}
|
||||
|
||||
pub async fn setup_after_start_vm(&self, hypervisor: &dyn Hypervisor) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.setup_after_start_vm(hypervisor).await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Persist for CgroupsResource {
|
||||
type State = CgroupState;
|
||||
type ConstructorArgs = CgroupArgs;
|
||||
/// Save a state of the component.
|
||||
async fn save(&self) -> Result<Self::State> {
|
||||
Ok(CgroupState {
|
||||
path: Some(self.cgroup_config.path.clone()),
|
||||
overhead_path: Some(self.cgroup_config.overhead_path.clone()),
|
||||
sandbox_cgroup_only: self.cgroup_config.sandbox_cgroup_only,
|
||||
})
|
||||
}
|
||||
|
||||
/// Restore a component from a specified state.
|
||||
async fn restore(
|
||||
_cgroup_args: Self::ConstructorArgs,
|
||||
cgroup_state: Self::State,
|
||||
) -> Result<Self> {
|
||||
let cgroup_config = CgroupConfig::restore(&cgroup_state)?;
|
||||
let inner = CgroupsResourceInner::restore(&cgroup_config)
|
||||
.context("restore cgroups resource inner")?;
|
||||
let inner = Arc::new(RwLock::new(inner));
|
||||
|
||||
Ok(Self {
|
||||
cgroup_config,
|
||||
inner,
|
||||
})
|
||||
}
|
||||
}
|
||||
298
src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs
Normal file
298
src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs
Normal file
@@ -0,0 +1,298 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2025 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::process;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use cgroups_rs::manager::is_systemd_cgroup;
|
||||
use cgroups_rs::{CgroupPid, FsManager, Manager, SystemdManager};
|
||||
use hypervisor::Hypervisor;
|
||||
use oci_spec::runtime::{LinuxCpu, LinuxCpuBuilder, LinuxResources, LinuxResourcesBuilder};
|
||||
|
||||
use crate::cgroups::utils::get_tgid_from_pid;
|
||||
use crate::cgroups::CgroupConfig;
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
pub type CgroupManager = Box<dyn Manager>;
|
||||
|
||||
pub(crate) struct CgroupsResourceInner {
|
||||
/// Container resources, key is container id, and value is resources.
|
||||
resources: HashMap<String, LinuxResources>,
|
||||
sandbox_cgroup: CgroupManager,
|
||||
overhead_cgroup: Option<CgroupManager>,
|
||||
}
|
||||
|
||||
impl CgroupsResourceInner {
|
||||
/// Create cgroup managers according to the cgroup configuration.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// - `Ok((CgroupManager, Option<CgroupManager>))`: A tuple containing
|
||||
/// the sandbox cgroup manager and an optional overhead cgroup
|
||||
/// manager.
|
||||
fn new_cgroup_managers(
|
||||
config: &CgroupConfig,
|
||||
) -> Result<(CgroupManager, Option<CgroupManager>)> {
|
||||
let use_systemd = is_systemd_cgroup(&config.path);
|
||||
let sandbox_cgroup = if use_systemd {
|
||||
let mut manager = SystemdManager::new(&config.path).context("new systemd manager")?;
|
||||
// Set SIGTERM timeout to 5mins, so that the runtime has up to
|
||||
// 5mins to do graceful shutdown. Exceeding this timeout, the
|
||||
// systemd will forcibly kill the runtime by sending SIGKILL.
|
||||
manager.set_term_timeout(300).context("set term timeout")?;
|
||||
Box::new(manager) as Box<dyn Manager>
|
||||
} else {
|
||||
let manager = FsManager::new(&config.path).context("new fs manager")?;
|
||||
Box::new(manager) as Box<dyn Manager>
|
||||
};
|
||||
|
||||
let overhead_cgroup = if config.sandbox_cgroup_only {
|
||||
None
|
||||
} else if use_systemd {
|
||||
let mut manager = SystemdManager::new(&config.overhead_path)
|
||||
.context("new systemd manager for overhead")?;
|
||||
manager
|
||||
.set_term_timeout(300)
|
||||
.context("set term timeout for overhead")?;
|
||||
Some(Box::new(manager) as Box<dyn Manager>)
|
||||
} else {
|
||||
let manager =
|
||||
FsManager::new(&config.overhead_path).context("new fs manager for overhead")?;
|
||||
Some(Box::new(manager) as Box<dyn Manager>)
|
||||
};
|
||||
|
||||
Ok((sandbox_cgroup, overhead_cgroup))
|
||||
}
|
||||
|
||||
/// Create a new `CgroupsResourceInner` instance.
|
||||
pub(crate) fn new(config: &CgroupConfig) -> Result<Self> {
|
||||
let (mut sandbox_cgroup, mut overhead_cgroup) =
|
||||
Self::new_cgroup_managers(config).context("create new cgroups")?;
|
||||
|
||||
// The runtime is prioritized to be added to the overhead cgroup.
|
||||
let pid = CgroupPid::from(process::id() as u64);
|
||||
if let Some(overhead_cgroup) = overhead_cgroup.as_mut() {
|
||||
overhead_cgroup
|
||||
.add_proc(pid)
|
||||
.context("add runtime to overhead cgroup")?;
|
||||
} else {
|
||||
sandbox_cgroup
|
||||
.add_proc(pid)
|
||||
.context("add runtime to sandbox cgroup")?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
resources: HashMap::new(),
|
||||
sandbox_cgroup,
|
||||
overhead_cgroup,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn restore(config: &CgroupConfig) -> Result<Self> {
|
||||
let (sandbox_cgroup, overhead_cgroup) =
|
||||
Self::new_cgroup_managers(config).context("restore cgroups")?;
|
||||
|
||||
Ok(Self {
|
||||
resources: HashMap::new(),
|
||||
sandbox_cgroup,
|
||||
overhead_cgroup,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl CgroupsResourceInner {
|
||||
/// Add cpuset resources of all containers to the sandbox cgroup.
|
||||
fn collect_resources(&self) -> Result<LinuxResources> {
|
||||
let mut cpu_cpus = HashSet::new();
|
||||
let mut cpu_mems = HashSet::new();
|
||||
|
||||
for res in self.resources.values() {
|
||||
if let Some(cpu) = res.cpu() {
|
||||
if let Some(cpus) = cpu.cpus() {
|
||||
cpu_cpus.insert(cpus.to_string());
|
||||
}
|
||||
if let Some(mems) = cpu.mems() {
|
||||
cpu_mems.insert(mems.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut resources_builder = LinuxResourcesBuilder::default();
|
||||
|
||||
let mut cpu_builder = LinuxCpuBuilder::default();
|
||||
if !cpu_cpus.is_empty() {
|
||||
cpu_builder = cpu_builder.cpus(cpu_cpus.into_iter().collect::<Vec<_>>().join(","));
|
||||
}
|
||||
if !cpu_mems.is_empty() {
|
||||
cpu_builder = cpu_builder.mems(cpu_mems.into_iter().collect::<Vec<_>>().join(","));
|
||||
}
|
||||
let cpu = cpu_builder.build().context("build linux cpu")?;
|
||||
if cpu != LinuxCpu::default() {
|
||||
resources_builder = resources_builder.cpu(cpu);
|
||||
}
|
||||
|
||||
let resources = resources_builder.build().context("build linux resources")?;
|
||||
|
||||
Ok(resources)
|
||||
}
|
||||
|
||||
async fn move_vcpus_to_sandbox_cgroup(&mut self, hypervisor: &dyn Hypervisor) -> Result<usize> {
|
||||
let hv_pids = hypervisor.get_thread_ids().await?;
|
||||
let mut pids = hv_pids.vcpus.values();
|
||||
|
||||
// Use threaded mode only in cgroup v1 + cgroupfs
|
||||
if !self.sandbox_cgroup.systemd() && !self.sandbox_cgroup.v2() {
|
||||
for pid in pids {
|
||||
let pid = CgroupPid::from(*pid as u64);
|
||||
self.sandbox_cgroup
|
||||
.add_thread(pid)
|
||||
.with_context(|| format!("add vcpu pid {}", pid.pid))?
|
||||
}
|
||||
} else {
|
||||
// No vCPU, exits early
|
||||
let vcpu = match pids.next() {
|
||||
Some(pid) => *pid,
|
||||
None => return Ok(0),
|
||||
};
|
||||
|
||||
let tgid = get_tgid_from_pid(vcpu as i32).context("get tgid from vCPU thread")? as u64;
|
||||
self.sandbox_cgroup
|
||||
.add_proc(CgroupPid::from(tgid))
|
||||
.with_context(|| format!("add vcpu tgid {}", tgid))?;
|
||||
}
|
||||
|
||||
Ok(hv_pids.vcpus.len())
|
||||
}
|
||||
|
||||
async fn update_sandbox_cgroups(&mut self, hypervisor: &dyn Hypervisor) -> Result<bool> {
|
||||
// The runtime is under overhead cgroup if available. The
|
||||
// hypervisor as a child of the runtime is under the overhead
|
||||
// cgroup by default. We should move VMM process/vCPU threads to
|
||||
// the sandbox cgroup to prevent them from consuming excessive
|
||||
// resources.
|
||||
if self.overhead_cgroup.is_some() {
|
||||
let vcpu_num = self
|
||||
.move_vcpus_to_sandbox_cgroup(hypervisor)
|
||||
.await
|
||||
.context("move vcpus to sandbox cgroup")?;
|
||||
// The cgroup managers will not create cgroups if no processes
|
||||
// are added to it. `vcpu_num == 0` reflects that the
|
||||
// hypervisor hasn't been started yet. We skip resource
|
||||
// setting, as the sandbox cgroup might not be created yet.
|
||||
if vcpu_num == 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
let sandbox_resources = self.collect_resources().context("collect resources")?;
|
||||
self.sandbox_cgroup.set(&sandbox_resources).context("set")?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
impl CgroupsResourceInner {
|
||||
pub(crate) async fn delete(&mut self) -> Result<()> {
|
||||
self.sandbox_cgroup
|
||||
.destroy()
|
||||
.context("destroy sandbox cgroup")?;
|
||||
|
||||
if let Some(overhead_cgroup) = self.overhead_cgroup.as_mut() {
|
||||
overhead_cgroup
|
||||
.destroy()
|
||||
.context("destroy overhead cgroup")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn update(
|
||||
&mut self,
|
||||
cid: &str,
|
||||
resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
let old = match op {
|
||||
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
|
||||
let resources = resources.ok_or_else(|| {
|
||||
anyhow::anyhow!("resources should not be empty for Add or Update operation")
|
||||
})?;
|
||||
let new = new_cpuset_resources(resources).context("new cpuset resources")?;
|
||||
let old = self.resources.insert(cid.to_string(), new.clone());
|
||||
// If the new resources are the same as the old ones, we
|
||||
// can skip the update.
|
||||
if let Some(old) = old.as_ref() {
|
||||
if old == &new {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
old
|
||||
}
|
||||
ResourceUpdateOp::Del => self.resources.remove(cid),
|
||||
};
|
||||
|
||||
let ret = self
|
||||
.update_sandbox_cgroups(hypervisor)
|
||||
.await
|
||||
.context("update sandbox cgroups");
|
||||
|
||||
// Rollback if the update fails
|
||||
if ret.is_err() {
|
||||
match op {
|
||||
ResourceUpdateOp::Add => {
|
||||
self.resources.remove(cid);
|
||||
}
|
||||
ResourceUpdateOp::Update | ResourceUpdateOp::Del => {
|
||||
if let Some(old) = old {
|
||||
self.resources.insert(cid.to_string(), old);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret.map(|_| ())
|
||||
}
|
||||
|
||||
pub(crate) async fn setup_after_start_vm(&mut self, hypervisor: &dyn Hypervisor) -> Result<()> {
|
||||
let updated = self
|
||||
.update_sandbox_cgroups(hypervisor)
|
||||
.await
|
||||
.context("update sandbox cgroups after start vm")?;
|
||||
|
||||
// There is an overhead cgroup and we are falling to move the vCPUs
|
||||
// to the sandbox cgroup, it results in those threads being under
|
||||
// the overhead cgroup, and allowing them to consume more resources
|
||||
// than we have allocated for the sandbox.
|
||||
if self.overhead_cgroup.is_some() && !updated {
|
||||
return Err(anyhow!("hypervisor cannot be moved to sandbox cgroup"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy cpu.cpus and cpu.mems from the given resources to new resources.
|
||||
fn new_cpuset_resources(resources: &LinuxResources) -> Result<LinuxResources> {
|
||||
let cpu = resources.cpu();
|
||||
let cpus = cpu.as_ref().and_then(|c| c.cpus().clone());
|
||||
let mems = cpu.as_ref().and_then(|c| c.mems().clone());
|
||||
|
||||
let mut builder = LinuxCpuBuilder::default();
|
||||
if let Some(cpus) = cpus {
|
||||
builder = builder.cpus(cpus);
|
||||
}
|
||||
if let Some(mems) = mems {
|
||||
builder = builder.mems(mems);
|
||||
}
|
||||
let linux_cpu = builder.build().context("build linux cpu")?;
|
||||
|
||||
let builder = LinuxResourcesBuilder::default().cpu(linux_cpu);
|
||||
let resources = builder.build().context("build linux resources")?;
|
||||
|
||||
Ok(resources)
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
// Copyright (c) 2019-2025 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
// When the Kata overhead threads (I/O, VMM, etc) are not
|
||||
// placed in the sandbox resource controller (A cgroup on Linux),
|
||||
// they are moved to a specific, unconstrained resource controller.
|
||||
@@ -11,6 +13,55 @@
|
||||
// on a cgroup v1 system, the Kata overhead memory cgroup will be at
|
||||
// /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is
|
||||
// defined by the orchestrator.
|
||||
pub(crate) fn gen_overhead_path(path: &str) -> String {
|
||||
format!("kata_overhead/{}", path.trim_start_matches('/'))
|
||||
pub(crate) fn gen_overhead_path(systemd: bool, path: &str) -> String {
|
||||
if systemd {
|
||||
format!("kata-overhead.slice:runtime-rs:{}", path)
|
||||
} else {
|
||||
format!("kata_overhead/{}", path.trim_start_matches('/'))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the thread group ID (TGID) from `/proc/{pid}/status`.
|
||||
pub(crate) fn get_tgid_from_pid(pid: i32) -> Result<i32> {
|
||||
let status = std::fs::read_to_string(format!("/proc/{}/status", pid))
|
||||
.map_err(|e| anyhow!("failed to read /proc/{}/status: {}", pid, e))?;
|
||||
status
|
||||
.lines()
|
||||
.find_map(|line| {
|
||||
if line.starts_with("Tgid") {
|
||||
let part = line.split(":").nth(1)?;
|
||||
part.trim().parse::<i32>().ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.ok_or(anyhow!("tgid not found"))
|
||||
.with_context(|| anyhow!("failed to parse tgid"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::cgroups::utils::*;
|
||||
|
||||
#[test]
|
||||
fn test_gen_overhead_path() {
|
||||
let systemd = true;
|
||||
let path = "kata_sandboxed_pod";
|
||||
let expected = "kata-overhead.slice:runtime-rs:kata_sandboxed_pod";
|
||||
let actual = gen_overhead_path(systemd, path);
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let systemd = false;
|
||||
let expected = "kata_overhead/kata_sandboxed_pod";
|
||||
let actual = gen_overhead_path(systemd, path);
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_tgid_from_pid() {
|
||||
let pid = unsafe { libc::gettid() };
|
||||
let expected = unsafe { libc::getpid() };
|
||||
let actual = get_tgid_from_pid(pid).unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ pub struct CpuResource {
|
||||
pub(crate) current_vcpu: Arc<RwLock<u32>>,
|
||||
|
||||
/// Default number of vCPUs
|
||||
pub(crate) default_vcpu: u32,
|
||||
pub(crate) default_vcpu: f32,
|
||||
|
||||
/// CpuResource of each container
|
||||
pub(crate) container_cpu_resources: Arc<RwLock<HashMap<String, LinuxContainerCpuResources>>>,
|
||||
@@ -40,7 +40,7 @@ impl CpuResource {
|
||||
.context(format!("failed to get hypervisor {}", hypervisor_name))?;
|
||||
Ok(Self {
|
||||
current_vcpu: Arc::new(RwLock::new(hypervisor_config.cpu_info.default_vcpus as u32)),
|
||||
default_vcpu: hypervisor_config.cpu_info.default_vcpus as u32,
|
||||
default_vcpu: hypervisor_config.cpu_info.default_vcpus,
|
||||
container_cpu_resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
})
|
||||
}
|
||||
@@ -117,27 +117,66 @@ impl CpuResource {
|
||||
|
||||
// calculates the total required vcpus by adding each container's requirements within the pod
|
||||
async fn calc_cpu_resources(&self) -> Result<u32> {
|
||||
let mut total_vcpu = 0;
|
||||
let mut cpuset_vcpu: HashSet<u32> = HashSet::new();
|
||||
|
||||
let resources = self.container_cpu_resources.read().await;
|
||||
if resources.is_empty() {
|
||||
return Ok(self.default_vcpu.ceil() as u32);
|
||||
}
|
||||
|
||||
// If requests of individual containers are expresses with different
|
||||
// periods we'll need to rewrite them with a common denominator
|
||||
// (period) before we can add the numerators (quotas). We choose
|
||||
// to use the largest period as the common denominator since it
|
||||
// shifts precision out of the fractional part and into the
|
||||
// integral part in case a rewritten quota ends up non-integral.
|
||||
let max_period = resources
|
||||
.iter()
|
||||
.map(|(_, cpu_resource)| cpu_resource.period())
|
||||
.max()
|
||||
// It's ok to unwrap() here as we have checked that 'resources' is
|
||||
// not empty.
|
||||
.unwrap() as f64;
|
||||
|
||||
let mut cpuset_vcpu: HashSet<u32> = HashSet::new();
|
||||
// Even though summing up quotas is fixed-point conceptually we
|
||||
// represent the sum as floating-point because
|
||||
// - we might be rewriting the quota/period fractions if periods
|
||||
// vary,and a rewritten quota can end up non-integral. We want
|
||||
// to preserve the fractional parts until the final rounding
|
||||
// not to lose precision inadvertenty.
|
||||
// - also to avoid some tedious casting doing maths with quotas.
|
||||
// Using a 64-bit float to represent what are conceptually integral
|
||||
// numbers should be safe here - f64 starts losing precision for
|
||||
// integers only past 2^53 and a sums of quotas are extremely unlikely
|
||||
// to reach that magnitude.
|
||||
let mut total_quota: f64 = 0.0;
|
||||
|
||||
for (_, cpu_resource) in resources.iter() {
|
||||
let vcpu = cpu_resource.get_vcpus().unwrap_or(0) as u32;
|
||||
cpuset_vcpu.extend(cpu_resource.cpuset().iter());
|
||||
total_vcpu += vcpu;
|
||||
|
||||
let quota = cpu_resource.quota() as f64;
|
||||
let period = cpu_resource.period() as f64;
|
||||
if quota >= 0.0 && period != 0.0 {
|
||||
total_quota += quota * (max_period / period);
|
||||
}
|
||||
}
|
||||
|
||||
// contrained only by cpuset
|
||||
if total_vcpu == 0 && !cpuset_vcpu.is_empty() {
|
||||
if total_quota == 0.0 && !cpuset_vcpu.is_empty() {
|
||||
info!(sl!(), "(from cpuset)get vcpus # {:?}", cpuset_vcpu);
|
||||
return Ok(cpuset_vcpu.len() as u32);
|
||||
}
|
||||
|
||||
let total_vcpu = if total_quota > 0.0 && max_period != 0.0 {
|
||||
self.default_vcpu as f64 + total_quota / max_period
|
||||
} else {
|
||||
self.default_vcpu as f64
|
||||
};
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"(from cfs_quota&cfs_period)get vcpus count {}", total_vcpu
|
||||
);
|
||||
Ok(total_vcpu)
|
||||
Ok(total_vcpu.ceil() as u32)
|
||||
}
|
||||
|
||||
// do hotplug and hot-unplug the vcpu
|
||||
@@ -159,7 +198,7 @@ impl CpuResource {
|
||||
|
||||
// do not reduce computing power
|
||||
// the number of vcpus would not be lower than the default size
|
||||
let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);
|
||||
let new_vcpus = cmp::max(new_vcpus, self.default_vcpu.ceil() as u32);
|
||||
|
||||
let (_, new) = hypervisor
|
||||
.resize_vcpu(old_vcpus, new_vcpus)
|
||||
@@ -169,3 +208,238 @@ impl CpuResource {
|
||||
Ok(new)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kata_types::config::{Hypervisor, TomlConfig};
|
||||
use oci::LinuxCpu;
|
||||
|
||||
fn get_cpu_resource_with_default_vcpus(default_vcpus: f32) -> CpuResource {
|
||||
let mut config = TomlConfig::default();
|
||||
config
|
||||
.hypervisor
|
||||
.insert("qemu".to_owned(), Hypervisor::default());
|
||||
config
|
||||
.hypervisor
|
||||
.entry("qemu".to_owned())
|
||||
.and_modify(|hv_config| hv_config.cpu_info.default_vcpus = default_vcpus);
|
||||
config.runtime.hypervisor_name = "qemu".to_owned();
|
||||
CpuResource::new(Arc::new(config)).unwrap()
|
||||
}
|
||||
|
||||
async fn add_linux_container_cpu_resources(cpu_res: &mut CpuResource, res: Vec<(i64, u64)>) {
|
||||
let mut resources = cpu_res.container_cpu_resources.write().await;
|
||||
for (i, (quota, period)) in res.iter().enumerate() {
|
||||
let mut linux_cpu = LinuxCpu::default();
|
||||
linux_cpu.set_quota(Some(*quota));
|
||||
linux_cpu.set_period(Some(*period));
|
||||
let res = LinuxContainerCpuResources::try_from(&linux_cpu).unwrap();
|
||||
resources.insert(i.to_string(), res);
|
||||
}
|
||||
}
|
||||
|
||||
// A lot of the following tests document why a fixed-point-style
|
||||
// calc_cpu_resources() implementation is better than a f32-based one.
|
||||
#[tokio::test]
|
||||
async fn test_rounding() {
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(0.0);
|
||||
|
||||
// A f32-based calc_cpu_resources() implementation would fail this
|
||||
// test (adding 0.1 ten times gives roughly 1.0000001).
|
||||
// An f64-based implementation would pass this one (with the summation
|
||||
// result of 0.99999999999999989) but it still doesn't guarantee the
|
||||
// correct result in general. For instance, adding 0.1 twenty times
|
||||
// in 64 bits results in 2.0000000000000004.
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
(100_000, 1_000_000),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_big_allocation_1() {
|
||||
let default_vcpus = 10.0;
|
||||
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![
|
||||
(32_000_000, 1_000_000),
|
||||
(32_000_000, 1_000_000),
|
||||
(64_000_000, 1_000_000),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
128 + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_big_allocation_2() {
|
||||
let default_vcpus = 10.0;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![
|
||||
(33_000_000, 1_000_000),
|
||||
(31_000_000, 1_000_000),
|
||||
(77_000_011, 1_000_000),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
(33 + 31 + 77 + 1) + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_big_allocation_3() {
|
||||
let default_vcpus = 10.0;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(&mut cpu_resource, vec![(141_000_008, 1_000_000)]).await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
142 + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_big_allocation_4() {
|
||||
let default_vcpus = 10.0;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![
|
||||
(17_000_001, 1_000_000),
|
||||
(17_000_001, 1_000_000),
|
||||
(17_000_001, 1_000_000),
|
||||
(17_000_001, 1_000_000),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
(4 * 17 + 1) + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_divisible_periods() {
|
||||
let default_vcpus = 3.0;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![(1_000_000, 1_000_000), (1_000_000, 500_000)],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
3 + default_vcpus as u32
|
||||
);
|
||||
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![(3_000_000, 1_500_000), (1_000_000, 500_000)],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
4 + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_indivisible_periods() {
|
||||
let default_vcpus = 1.0;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![(1_000_000, 1_000_000), (900_000, 300_000)],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
4 + default_vcpus as u32
|
||||
);
|
||||
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![(1_000_000, 1_000_000), (900_000, 299_999)],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
5 + default_vcpus as u32
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fractional_default_vcpus() {
|
||||
let default_vcpus = 0.5;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(&mut cpu_resource, vec![(250_000, 1_000_000)]).await;
|
||||
|
||||
assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
|
||||
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(&mut cpu_resource, vec![(500_000, 1_000_000)]).await;
|
||||
|
||||
assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 1);
|
||||
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(&mut cpu_resource, vec![(500_001, 1_000_000)]).await;
|
||||
|
||||
assert_eq!(cpu_resource.calc_cpu_resources().await.unwrap(), 2);
|
||||
|
||||
// This test doesn't pass because 0.1 is periodic in binary and thus
|
||||
// not exactly representable by a float of any width for fundamental
|
||||
// reasons. Its actual representation is slightly over 0.1
|
||||
// (e.g. 0.100000001 in f32), which after adding the 900_000/1_000_000
|
||||
// container request pushes the sum over 1.
|
||||
// I don't think this problem is solvable without expressing
|
||||
// 'default_vcpus' in configuration.toml in a fixed point manner (e.g.
|
||||
// as an integral percentage of a vCPU).
|
||||
/*
|
||||
let default_vcpus = 0.1;
|
||||
let mut cpu_resource = get_cpu_resource_with_default_vcpus(default_vcpus);
|
||||
add_linux_container_cpu_resources(
|
||||
&mut cpu_resource,
|
||||
vec![(900_000, 1_000_000)],
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
cpu_resource.calc_cpu_resources().await.unwrap(),
|
||||
1
|
||||
);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ impl InitialSizeManager {
|
||||
.context("failed to get hypervisor config")?;
|
||||
|
||||
if self.resource.vcpu > 0 {
|
||||
hv.cpu_info.default_vcpus = self.resource.vcpu as i32
|
||||
hv.cpu_info.default_vcpus = self.resource.vcpu as f32
|
||||
}
|
||||
self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
|
||||
if self.resource.mem_mb > 0 {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
use std::{collections::HashMap, sync::Arc, thread};
|
||||
|
||||
use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
|
||||
use agent::{types::Device, ARPNeighbor, Agent, OnlineCPUMemRequest, Storage};
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
@@ -22,6 +22,7 @@ use kata_types::{
|
||||
config::{hypervisor::TopologyConfigInfo, TomlConfig},
|
||||
mount::{adjust_rootfs_mounts, KATA_IMAGE_FORCE_GUEST_PULL},
|
||||
};
|
||||
use libc::NUD_PERMANENT;
|
||||
use oci::{Linux, LinuxCpu, LinuxResources};
|
||||
use oci_spec::runtime::{self as oci, LinuxDeviceType};
|
||||
use persist::sandbox_persist::Persist;
|
||||
@@ -260,7 +261,14 @@ impl ResourceManagerInner {
|
||||
}
|
||||
|
||||
async fn handle_neighbours(&self, network: &dyn Network) -> Result<()> {
|
||||
let neighbors = network.neighs().await.context("neighs")?;
|
||||
let all_neighbors = network.neighs().await.context("neighs")?;
|
||||
|
||||
// We add only static ARP entries
|
||||
let neighbors: Vec<ARPNeighbor> = all_neighbors
|
||||
.iter()
|
||||
.filter(|n| n.state == NUD_PERMANENT as i32)
|
||||
.cloned()
|
||||
.collect();
|
||||
if !neighbors.is_empty() {
|
||||
info!(sl!(), "update neighbors {:?}", neighbors);
|
||||
self.agent
|
||||
@@ -288,6 +296,11 @@ impl ResourceManagerInner {
|
||||
}
|
||||
|
||||
pub async fn setup_after_start_vm(&mut self) -> Result<()> {
|
||||
self.cgroups_resource
|
||||
.setup_after_start_vm(self.hypervisor.as_ref())
|
||||
.await
|
||||
.context("setup cgroups after start vm")?;
|
||||
|
||||
if let Some(share_fs) = self.share_fs.as_ref() {
|
||||
share_fs
|
||||
.setup_device_after_start_vm(self.hypervisor.as_ref(), &self.device_manager)
|
||||
@@ -563,7 +576,7 @@ impl ResourceManagerInner {
|
||||
|
||||
// we should firstly update the vcpus and mems, and then update the host cgroups
|
||||
self.cgroups_resource
|
||||
.update_cgroups(cid, linux_resources, op, self.hypervisor.as_ref())
|
||||
.update(cid, linux_resources, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
|
||||
if let Some(swap) = self.swap_resource.as_ref() {
|
||||
|
||||
@@ -341,11 +341,8 @@ impl ShareFsVolume {
|
||||
oci_mount.set_source(Some(PathBuf::from(&dest)));
|
||||
oci_mount.set_options(m.options().clone());
|
||||
volume.mounts.push(oci_mount);
|
||||
} else if is_allowlisted_copy_volume(&src) {
|
||||
// For security reasons, we have restricted directory copying. Currently, only directories under
|
||||
// the path `/var/lib/kubelet/pods/<uid>/volumes/{kubernetes.io~configmap, kubernetes.io~secret, kubernetes.io~downward-api, kubernetes.io~projected}`
|
||||
// are allowed to be copied into the guest. Copying of other directories will be prohibited.
|
||||
|
||||
} else if src.is_dir() {
|
||||
// We allow directory copying wildly
|
||||
// source_path: "/var/lib/kubelet/pods/6dad7281-57ff-49e4-b844-c588ceabec16/volumes/kubernetes.io~projected/kube-api-access-8s2nl"
|
||||
info!(sl!(), "copying directory {:?} to guest", &source_path);
|
||||
|
||||
@@ -411,11 +408,13 @@ impl ShareFsVolume {
|
||||
volume.mounts.push(oci_mount);
|
||||
|
||||
// start monitoring
|
||||
let watcher = FsWatcher::new(Path::new(&source_path)).await?;
|
||||
let monitor_task = watcher
|
||||
.start_monitor(agent.clone(), src.clone(), dest_dir.into())
|
||||
.await;
|
||||
volume.monitor_task = Some(monitor_task);
|
||||
if is_watchable_volume(&src) {
|
||||
let watcher = FsWatcher::new(Path::new(&source_path)).await?;
|
||||
let monitor_task = watcher
|
||||
.start_monitor(agent.clone(), src.clone(), dest_dir.into())
|
||||
.await;
|
||||
volume.monitor_task = Some(monitor_task);
|
||||
}
|
||||
} else {
|
||||
// If not, we can ignore it. Let's issue a warning so that the user knows.
|
||||
warn!(
|
||||
@@ -770,14 +769,14 @@ pub fn generate_mount_path(id: &str, file_name: &str) -> String {
|
||||
format!("{}-{}-{}", nid, uid, file_name)
|
||||
}
|
||||
|
||||
/// This function is used to check whether a given volume is in the allowed copy allowlist.
|
||||
/// More specifically, it determines whether the volume's path is located under a predefined
|
||||
/// list of allowed copy directories.
|
||||
pub(crate) fn is_allowlisted_copy_volume(source_path: &PathBuf) -> bool {
|
||||
/// This function is used to check whether a given volume is a watchable volume.
|
||||
/// More specifically, it determines whether the volume's path is located under
|
||||
/// a predefined list of allowed copy directories.
|
||||
pub(crate) fn is_watchable_volume(source_path: &PathBuf) -> bool {
|
||||
if !source_path.is_dir() {
|
||||
return false;
|
||||
}
|
||||
// allowlist: { kubernetes.io~projected, kubernetes.io~configmap, kubernetes.io~secret, kubernetes.io~downward-api }
|
||||
// watchable list: { kubernetes.io~projected, kubernetes.io~configmap, kubernetes.io~secret, kubernetes.io~downward-api }
|
||||
is_projected(source_path)
|
||||
|| is_downward_api(source_path)
|
||||
|| is_secret(source_path)
|
||||
@@ -804,7 +803,7 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_allowlisted_copy_volume() {
|
||||
fn test_is_watchable_volume() {
|
||||
// The configmap is /var/lib/kubelet/pods/<uid>/volumes/kubernetes.io~configmap/kube-configmap-0s2no/{..data, key1, key2,...}
|
||||
// The secret is /var/lib/kubelet/pods/<uid>/volumes/kubernetes.io~secret/kube-secret-2s2np/{..data, key1, key2,...}
|
||||
// The projected is /var/lib/kubelet/pods/<uid>/volumes/kubernetes.io~projected/kube-api-access-8s2nl/{..data, key1, key2,...}
|
||||
@@ -827,9 +826,9 @@ mod test {
|
||||
let downward_api_path = temp_dir.path().join(downward_api);
|
||||
std::fs::create_dir_all(&downward_api_path).unwrap();
|
||||
|
||||
assert!(is_allowlisted_copy_volume(&cm_path));
|
||||
assert!(is_allowlisted_copy_volume(&secret_path));
|
||||
assert!(is_allowlisted_copy_volume(&projected_path));
|
||||
assert!(is_allowlisted_copy_volume(&downward_api_path));
|
||||
assert!(is_watchable_volume(&cm_path));
|
||||
assert!(is_watchable_volume(&secret_path));
|
||||
assert!(is_watchable_volume(&projected_path));
|
||||
assert!(is_watchable_volume(&downward_api_path));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ pub const DEFAULT_VOLUME_FS_TYPE: &str = "ext4";
|
||||
pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
|
||||
|
||||
pub const KATA_BLK_DEV_TYPE: &str = "blk";
|
||||
pub const KATA_SCSI_DEV_TYPE: &str = "scsi";
|
||||
|
||||
pub fn get_file_name<P: AsRef<Path>>(src: P) -> Result<String> {
|
||||
let file_name = src
|
||||
@@ -99,6 +100,13 @@ pub async fn handle_block_volume(
|
||||
return Err(anyhow!("block driver is blk but no pci path exists"));
|
||||
}
|
||||
}
|
||||
KATA_SCSI_DEV_TYPE => {
|
||||
if let Some(scsi_addr) = device.config.scsi_addr {
|
||||
scsi_addr.to_string()
|
||||
} else {
|
||||
return Err(anyhow!("block driver is scsi but no scsi address exists"));
|
||||
}
|
||||
}
|
||||
_ => device.config.virt_path,
|
||||
};
|
||||
device_id = device.device_id;
|
||||
|
||||
@@ -13,6 +13,7 @@ use anyhow::{anyhow, Context, Result};
|
||||
use nix::{
|
||||
mount::{mount, MsFlags},
|
||||
sched::{self, CloneFlags},
|
||||
sys::signal::{signal, SigHandler, Signal},
|
||||
};
|
||||
use shim::{config, Args, Error, ShimExecutor};
|
||||
|
||||
@@ -159,6 +160,17 @@ fn real_main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
fn main() {
|
||||
// When enabling systemd cgroup driver and sandbox cgroup only, the
|
||||
// shim is under a systemd unit. When the unit is stopping, systemd
|
||||
// sends SIGTERM to the shim. The shim can't exit immediately, as there
|
||||
// are some cleanups to do. Therefore, ignoring SIGTERM is required
|
||||
// here. The shim should complete the work within a period (Kata sets
|
||||
// it to 300s by default). Once a timeout occurs, systemd will send
|
||||
// SIGKILL.
|
||||
unsafe {
|
||||
signal(Signal::SIGTERM, SigHandler::SigIgn).unwrap();
|
||||
}
|
||||
|
||||
if let Err(err) = real_main() {
|
||||
show_version(Some(err));
|
||||
}
|
||||
|
||||
@@ -4,31 +4,22 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
|
||||
use crate::Error;
|
||||
|
||||
pub(crate) fn set_logger(path: &str, sid: &str, is_debug: bool) -> Result<slog_async::AsyncGuard> {
|
||||
//it's better to open the log pipe file with read & write option,
|
||||
//otherwise, once the containerd reboot and closed the read endpoint,
|
||||
//kata shim would write the log pipe with broken pipe error.
|
||||
let fifo = std::fs::OpenOptions::new()
|
||||
.custom_flags(libc::O_NONBLOCK)
|
||||
.create(true)
|
||||
.read(true)
|
||||
.write(true)
|
||||
.open(path)
|
||||
.context(Error::FileOpen(path.to_string()))?;
|
||||
|
||||
pub(crate) fn set_logger(_path: &str, sid: &str, is_debug: bool) -> Result<slog_async::AsyncGuard> {
|
||||
let level = if is_debug {
|
||||
slog::Level::Debug
|
||||
} else {
|
||||
slog::Level::Info
|
||||
};
|
||||
|
||||
let (logger, async_guard) = logging::create_logger("kata-runtime", sid, level, fifo);
|
||||
// Use journal logger to send logs to systemd journal with "kata" identifier
|
||||
let (logger, async_guard) = logging::create_logger_with_destination(
|
||||
"kata-runtime",
|
||||
sid,
|
||||
level,
|
||||
logging::LogDestination::Journal,
|
||||
);
|
||||
|
||||
// not reset global logger when drop
|
||||
slog_scope::set_global_logger(logger).cancel_reset();
|
||||
|
||||
@@ -16,9 +16,11 @@ ifeq ($(ARCH),)
|
||||
endif
|
||||
ifeq ($(ARCH),x86_64)
|
||||
override ARCH = amd64
|
||||
override EDK2_NAME = ovmf
|
||||
endif
|
||||
ifeq ($(ARCH),aarch64)
|
||||
override ARCH = arm64
|
||||
override EDK2_NAME = aavmf
|
||||
endif
|
||||
ifeq ($(ARCH),riscv64gc)
|
||||
override ARCH = riscv64
|
||||
@@ -215,7 +217,7 @@ DEFMAXMEMSZ := 0
|
||||
#Default number of bridges
|
||||
DEFBRIDGES := 1
|
||||
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\"]
|
||||
DEFENABLEANNOTATIONSTEE := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\"]
|
||||
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
|
||||
DEFDISABLEGUESTSECCOMP := true
|
||||
DEFDISABLEGUESTEMPTYDIR := false
|
||||
#Default experimental features enabled
|
||||
@@ -460,10 +462,10 @@ ifneq (,$(QEMUCMD))
|
||||
FIRMWAREPATH_NV := $(PREFIXDEPS)/share/ovmf/OVMF.fd
|
||||
|
||||
ifneq (,$(QEMUFW))
|
||||
FIRMWAREPATH := $(PREFIXDEPS)/share/ovmf/$(QEMUFW)
|
||||
FIRMWAREPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFW)
|
||||
endif
|
||||
ifneq (,$(QEMUFWVOL))
|
||||
FIRMWAREVOLUMEPATH := $(PREFIXDEPS)/share/ovmf/$(QEMUFWVOL)
|
||||
FIRMWAREVOLUMEPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFWVOL)
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -729,7 +731,7 @@ USER_VARS += DEFVIRTIOFSCACHE
|
||||
USER_VARS += DEFVIRTIOFSQUEUESIZE
|
||||
USER_VARS += DEFVIRTIOFSEXTRAARGS
|
||||
USER_VARS += DEFENABLEANNOTATIONS
|
||||
USER_VARS += DEFENABLEANNOTATIONSTEE
|
||||
USER_VARS += DEFENABLEANNOTATIONS_COCO
|
||||
USER_VARS += DEFENABLEIOTHREADS
|
||||
USER_VARS += DEFSECCOMPSANDBOXPARAM
|
||||
USER_VARS += DEFENABLEVHOSTUSERSTORE
|
||||
|
||||
@@ -163,6 +163,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -55,7 +55,7 @@ rootfs_type=@DEFROOTFSTYPE@
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONSTEE@
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
@@ -212,6 +212,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -62,8 +62,9 @@ valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@
|
||||
|
||||
# SNP 'ID Block' and 'ID Authentication Information Structure'.
|
||||
# If one of snp_id_block or snp_id_auth is specified, the other must be specified, too.
|
||||
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, and the IDBlock
|
||||
# must be generated with exactly this policy.
|
||||
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, if not explicitly
|
||||
# set via 'snp_guest_policy' option. The IDBlock contains the guest policy as field, and
|
||||
# it must match the value from 'snp_guest_policy' or, if unset, the QEMU default policy.
|
||||
#
|
||||
# 96-byte, base64-encoded blob to provide the ‘ID Block’ structure for the
|
||||
# SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
@@ -72,6 +73,13 @@ valid_hypervisor_paths = @QEMUSNPVALIDHYPERVISORPATHS@
|
||||
# for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
#snp_id_auth = ""
|
||||
|
||||
# SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
|
||||
# If unset, the QEMU default policy (0x30000) will be used.
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# won't start at all if the policy denys it. This will be indicated by a
|
||||
# 'SNP_LAUNCH_START' error.
|
||||
#snp_guest_policy = 196608
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
@@ -221,6 +229,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -205,6 +205,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -210,6 +210,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -40,7 +40,7 @@ confidential_guest = true
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS@
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
@@ -196,6 +196,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -62,8 +62,9 @@ valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
|
||||
# SNP 'ID Block' and 'ID Authentication Information Structure'.
|
||||
# If one of snp_id_block or snp_id_auth is specified, the other must be specified, too.
|
||||
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, and the IDBlock
|
||||
# must be generated with exactly this policy.
|
||||
# Notice that the default SNP policy of QEMU (0x30000) is used by Kata, if not explicitly
|
||||
# set via 'snp_guest_policy' option. The IDBlock contains the guest policy as field, and
|
||||
# it must match the value from 'snp_guest_policy' or, if unset, the QEMU default policy.
|
||||
#
|
||||
# 96-byte, base64-encoded blob to provide the ‘ID Block’ structure for the
|
||||
# SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
@@ -72,6 +73,13 @@ valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
# for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (QEMU default: all-zero)
|
||||
#snp_id_auth = ""
|
||||
|
||||
# SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
|
||||
# If unset, the QEMU default policy (0x30000) will be used.
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# won't start at all if the policy denys it. This will be indicated by a
|
||||
# 'SNP_LAUNCH_START' error.
|
||||
#snp_guest_policy = 196608
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
@@ -221,6 +229,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -49,7 +49,7 @@ confidential_guest = true
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONSTEE@
|
||||
enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
@@ -206,6 +206,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -211,6 +211,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -144,6 +144,10 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - metadata
|
||||
# Metadata and pathname lookup are cached in guest and never expire.
|
||||
# Data is never cached in guest.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module github.com/kata-containers/kata-containers/src/runtime
|
||||
|
||||
// Keep in sync with version in versions.yaml
|
||||
go 1.23.0
|
||||
go 1.23.12
|
||||
|
||||
// WARNING: Do NOT use `replace` directives as those break dependabot:
|
||||
// https://github.com/kata-containers/kata-containers/issues/11020
|
||||
|
||||
@@ -330,6 +330,9 @@ type Object struct {
|
||||
// for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (default: all-zero)
|
||||
SnpIdAuth string
|
||||
|
||||
// SnpGuestPolicy is the integer representation of the SEV-SNP guest policy.
|
||||
SnpGuestPolicy *uint64
|
||||
|
||||
// Raw byte slice of initdata digest
|
||||
InitdataDigest []byte
|
||||
}
|
||||
@@ -415,6 +418,9 @@ func (object Object) QemuParams(config *Config) []string {
|
||||
if object.SnpIdAuth != "" {
|
||||
objectParams = append(objectParams, fmt.Sprintf("id-auth=%s", object.SnpIdAuth))
|
||||
}
|
||||
if object.SnpGuestPolicy != nil {
|
||||
objectParams = append(objectParams, fmt.Sprintf("policy=%d", *object.SnpGuestPolicy))
|
||||
}
|
||||
if len(object.InitdataDigest) > 0 {
|
||||
// due to https://github.com/confidential-containers/qemu/blob/amd-snp-202402240000/qapi/qom.json#L926-L929
|
||||
// hostdata in SEV-SNP should be exactly 32 bytes
|
||||
|
||||
@@ -1257,25 +1257,23 @@ func (q *QMP) isDieIDSupported(driver string) bool {
|
||||
// node/board the CPU belongs to, coreID is the core number within socket the CPU belongs to, threadID is the
|
||||
// thread number within core the CPU belongs to. Note that socketID and threadID are not a requirement for
|
||||
// architecures like ppc64le.
|
||||
func (q *QMP) ExecuteCPUDeviceAdd(ctx context.Context, driver, cpuID, socketID, dieID, coreID, threadID, romfile string) error {
|
||||
func (q *QMP) ExecuteCPUDeviceAdd(ctx context.Context, driver, cpuID string, socketID, dieID, coreID, threadID int, romfile string) error {
|
||||
args := map[string]interface{}{
|
||||
"driver": driver,
|
||||
"id": cpuID,
|
||||
"core-id": coreID,
|
||||
}
|
||||
|
||||
if socketID != "" && isSocketIDSupported(driver) {
|
||||
if socketID >= 0 && isSocketIDSupported(driver) {
|
||||
args["socket-id"] = socketID
|
||||
}
|
||||
|
||||
if threadID != "" && isThreadIDSupported(driver) {
|
||||
if threadID >= 0 && isThreadIDSupported(driver) {
|
||||
args["thread-id"] = threadID
|
||||
}
|
||||
|
||||
if q.isDieIDSupported(driver) {
|
||||
if dieID != "" {
|
||||
args["die-id"] = dieID
|
||||
}
|
||||
if dieID >= 0 && q.isDieIDSupported(driver) {
|
||||
args["die-id"] = dieID
|
||||
}
|
||||
|
||||
return q.executeCommand(ctx, "device_add", args, nil)
|
||||
|
||||
@@ -1140,10 +1140,10 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
|
||||
func TestQMPCPUDeviceAdd(t *testing.T) {
|
||||
drivers := []string{"host-x86_64-cpu", "host-s390x-cpu", "host-powerpc64-cpu"}
|
||||
cpuID := "cpu-0"
|
||||
socketID := "0"
|
||||
dieID := "0"
|
||||
coreID := "1"
|
||||
threadID := "0"
|
||||
socketID := 0
|
||||
dieID := 0
|
||||
coreID := 1
|
||||
threadID := 0
|
||||
for _, d := range drivers {
|
||||
connectedCh := make(chan *QMPVersion)
|
||||
disconnectedCh := make(chan struct{})
|
||||
|
||||
@@ -109,6 +109,7 @@ type hypervisor struct {
|
||||
RemoteHypervisorSocket string `toml:"remote_hypervisor_socket"`
|
||||
SnpIdBlock string `toml:"snp_id_block"`
|
||||
SnpIdAuth string `toml:"snp_id_auth"`
|
||||
SnpGuestPolicy *uint64 `toml:"snp_guest_policy"`
|
||||
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
|
||||
JailerPathList []string `toml:"valid_jailer_paths"`
|
||||
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
|
||||
@@ -992,6 +993,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
ExtraMonitorSocket: extraMonitorSocket,
|
||||
SnpIdBlock: h.SnpIdBlock,
|
||||
SnpIdAuth: h.SnpIdAuth,
|
||||
SnpGuestPolicy: h.SnpGuestPolicy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -885,6 +885,9 @@ func TestAddRuntimeAnnotations(t *testing.T) {
|
||||
|
||||
runtimeConfig := RuntimeConfig{
|
||||
HypervisorType: vc.QemuHypervisor,
|
||||
HypervisorConfig: vc.HypervisorConfig{
|
||||
EnableAnnotations: []string{"cc_init_data"},
|
||||
},
|
||||
}
|
||||
|
||||
ocispec.Annotations[vcAnnotations.DisableGuestSeccomp] = "true"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user