Compare commits

..

1 Commits

Author SHA1 Message Date
Aurélien Bombo
1bacc72540 Add 'foo' to README.md 2025-10-17 14:23:59 -05:00
175 changed files with 1398 additions and 3533 deletions

View File

@@ -8,27 +8,25 @@ self-hosted-runner:
# Labels of self-hosted runner that linter should ignore
labels:
- amd64-nvidia-a100
- amd64-nvidia-h100-snp
- arm64-k8s
- containerd-v1.7-overlayfs
- containerd-v2.0-overlayfs
- containerd-v2.1-overlayfs
- containerd-v1.7
- containerd-v2.0
- containerd-v2.1
- containerd-v2.2
- containerd-v2.2-overlayfs
- garm-ubuntu-2004
- garm-ubuntu-2004-smaller
- garm-ubuntu-2204
- garm-ubuntu-2304
- garm-ubuntu-2304-smaller
- garm-ubuntu-2204-smaller
- ppc64le
- ppc64le-k8s
- ppc64le-small
- k8s-ppc64le
- ubuntu-24.04-ppc64le
- metrics
- ppc64le
- riscv-builder
- sev-snp
- s390x
- s390x-large
- tdx
- ubuntu-22.04-arm
- ubuntu-24.04-s390x

View File

@@ -71,7 +71,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu']
vmm: ['clh', 'cloud-hypervisor', 'dragonball', 'qemu', 'stratovirt']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}
@@ -117,7 +117,7 @@ jobs:
fail-fast: false
matrix:
containerd_version: ['lts', 'active']
vmm: ['clh', 'qemu', 'dragonball']
vmm: ['clh', 'qemu', 'dragonball', 'stratovirt']
runs-on: ubuntu-22.04
env:
CONTAINERD_VERSION: ${{ matrix.containerd_version }}

View File

@@ -12,7 +12,7 @@ name: Build checks
jobs:
check:
name: check
runs-on: ${{ matrix.component.name == 'runtime' && inputs.instance == 'ubuntu-24.04-s390x' && 's390x' || matrix.component.name == 'runtime' && inputs.instance == 'ubuntu-24.04-ppc64le' && 'ppc64le' || inputs.instance }}
runs-on: ${{ inputs.instance }}
strategy:
fail-fast: false
matrix:

View File

@@ -63,6 +63,7 @@ jobs:
- qemu
- qemu-snp-experimental
- qemu-tdx-experimental
- stratovirt
- trace-forwarder
- virtiofsd
stage:

View File

@@ -23,8 +23,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: false
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -51,6 +49,7 @@ jobs:
- nydus
- ovmf
- qemu
- stratovirt
- virtiofsd
env:
PERFORM_ATTESTATION: ${{ matrix.asset == 'agent' && inputs.push-to-registry == 'yes' && 'yes' || 'no' }}
@@ -90,7 +89,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: Parse OCI image name and digest
id: parse-oci-segments
@@ -196,7 +194,6 @@ jobs:
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ inputs.target-branch }}
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
KBUILD_SIGN_PIN: ${{ contains(matrix.asset, 'nvidia') && secrets.KBUILD_SIGN_PIN || '' }}
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2

View File

@@ -32,7 +32,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ppc64le-small
runs-on: ubuntu-24.04-ppc64le
strategy:
matrix:
asset:
@@ -89,7 +89,7 @@ jobs:
build-asset-rootfs:
name: build-asset-rootfs
runs-on: ppc64le-small
runs-on: ubuntu-24.04-ppc64le
needs: build-asset
permissions:
contents: read
@@ -170,7 +170,7 @@ jobs:
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: ppc64le-small
runs-on: ubuntu-24.04-ppc64le
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -230,7 +230,7 @@ jobs:
create-kata-tarball:
name: create-kata-tarball
runs-on: ppc64le-small
runs-on: ubuntu-24.04-ppc64le
needs: [build-asset, build-asset-rootfs, build-asset-shim-v2]
permissions:
contents: read

View File

@@ -32,7 +32,7 @@ permissions: {}
jobs:
build-asset:
name: build-asset
runs-on: s390x
runs-on: ubuntu-24.04-s390x
permissions:
contents: read
packages: write
@@ -257,7 +257,7 @@ jobs:
build-asset-shim-v2:
name: build-asset-shim-v2
runs-on: s390x
runs-on: ubuntu-24.04-s390x
needs: [build-asset, build-asset-rootfs, remove-rootfs-binary-artifacts]
permissions:
contents: read
@@ -319,7 +319,7 @@ jobs:
create-kata-tarball:
name: create-kata-tarball
runs-on: s390x
runs-on: ubuntu-24.04-s390x
needs:
- build-asset
- build-asset-rootfs

View File

@@ -86,8 +86,6 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
secrets:
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
publish-kata-deploy-payload-arm64:
needs: build-kata-static-tarball-arm64
@@ -161,7 +159,7 @@ jobs:
tag: ${{ inputs.tag }}-s390x
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: s390x
runner: ubuntu-24.04-s390x
arch: s390x
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -179,7 +177,7 @@ jobs:
tag: ${{ inputs.tag }}-ppc64le
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ppc64le-small
runner: ppc64le
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -311,6 +309,18 @@ jobs:
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
run-k8s-tests-on-amd64:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-amd64
uses: ./.github/workflows/run-k8s-tests-on-amd64.yaml
with:
registry: ghcr.io
repo: ${{ github.repository_owner }}/kata-deploy-ci
tag: ${{ inputs.tag }}-amd64
commit-hash: ${{ inputs.commit-hash }}
pr-number: ${{ inputs.pr-number }}
target-branch: ${{ inputs.target-branch }}
run-k8s-tests-on-arm64:
if: ${{ inputs.skip-test != 'yes' }}
needs: publish-kata-deploy-payload-arm64
@@ -429,11 +439,13 @@ jobs:
{ containerd_version: lts, vmm: clh },
{ containerd_version: lts, vmm: dragonball },
{ containerd_version: lts, vmm: qemu },
{ containerd_version: lts, vmm: stratovirt },
{ containerd_version: lts, vmm: cloud-hypervisor },
{ containerd_version: lts, vmm: qemu-runtime-rs },
{ containerd_version: active, vmm: clh },
{ containerd_version: active, vmm: dragonball },
{ containerd_version: active, vmm: qemu },
{ containerd_version: active, vmm: stratovirt },
{ containerd_version: active, vmm: cloud-hypervisor },
{ containerd_version: active, vmm: qemu-runtime-rs },
]
@@ -481,7 +493,7 @@ jobs:
tarball-suffix: -${{ inputs.tag }}
commit-hash: ${{ inputs.commit-hash }}
target-branch: ${{ inputs.target-branch }}
runner: ppc64le-small
runner: ppc64le
arch: ppc64le
containerd_version: ${{ matrix.params.containerd_version }}
vmm: ${{ matrix.params.vmm }}

View File

@@ -39,7 +39,6 @@ jobs:
target-branch: ${{ github.ref_name }}
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-assets-s390x:
permissions:
@@ -131,7 +130,7 @@ jobs:
repo: kata-containers/kata-deploy-ci
tag: kata-containers-latest-ppc64le
target-branch: ${{ github.ref_name }}
runner: ppc64le-small
runner: ppc64le
arch: ppc64le
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
@@ -162,42 +161,3 @@ jobs:
env:
KATA_DEPLOY_IMAGE_TAGS: "kata-containers-latest"
KATA_DEPLOY_REGISTRIES: "quay.io/kata-containers/kata-deploy-ci"
upload-helm-chart-tarball:
name: upload-helm-chart-tarball
needs: publish-manifest
runs-on: ubuntu-22.04
permissions:
packages: write # needed to push the helm chart to ghcr.io
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Install helm
uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0
id: install
- name: Login to the OCI registries
env:
QUAY_DEPLOYER_USERNAME: ${{ vars.QUAY_DEPLOYER_USERNAME }}
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
GITHUB_TOKEN: ${{ github.token }}
run: |
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
- name: Push helm chart to the OCI registries
run: |
echo "Adjusting the Chart.yaml and values.yaml"
yq eval '.version = "0.0.0-dev" | .appVersion = "0.0.0-dev"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml
yq eval '.image.reference = "quay.io/kata-containers/kata-deploy-ci" | .image.tag = "kata-containers-latest"' -i tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml
echo "Generating the chart package"
helm dependencies update tools/packaging/kata-deploy/helm-chart/kata-deploy
helm package tools/packaging/kata-deploy/helm-chart/kata-deploy
echo "Pushing the chart to the OCI registries"
helm push "kata-deploy-0.0.0-dev.tgz" oci://quay.io/kata-containers/kata-deploy-charts
helm push "kata-deploy-0.0.0-dev.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts

View File

@@ -8,8 +8,6 @@ on:
secrets:
QUAY_DEPLOYER_PASSWORD:
required: true
KBUILD_SIGN_PIN:
required: true
permissions: {}
@@ -21,7 +19,6 @@ jobs:
stage: release
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
permissions:
contents: read
packages: write

View File

@@ -31,7 +31,7 @@ jobs:
permissions:
contents: read
packages: write
runs-on: ppc64le-small
runs-on: ppc64le
steps:
- name: Login to Kata Containers ghcr.io
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0

View File

@@ -49,7 +49,6 @@ jobs:
target-arch: arm64
secrets:
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
build-and-push-assets-s390x:
needs: release

View File

@@ -1,4 +1,4 @@
name: CI | Run containerd guest pull stability tests
name: CI | Run containerd multi-snapshotter stability test
on:
schedule:
- cron: "0 */1 * * *" #run every hour
@@ -7,22 +7,20 @@ permissions: {}
# This job relies on k8s pre-installed using kubeadm
jobs:
run-containerd-guest-pull-stability-tests:
name: run-containerd-guest-pull-stability-tests-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
run-containerd-multi-snapshotter-stability-tests:
name: run-containerd-multi-snapshotter-stability-tests
strategy:
fail-fast: false
matrix:
environment: [
{ test-type: multi-snapshotter, containerd: v2.2 },
{ test-type: force-guest-pull, containerd: v1.7 },
{ test-type: force-guest-pull, containerd: v2.0 },
{ test-type: force-guest-pull, containerd: v2.1 },
{ test-type: force-guest-pull, containerd: v2.2 },
]
containerd:
- v1.7
- v2.0
- v2.1
- v2.2
env:
# I don't want those to be inside double quotes, so I'm deliberately ignoring the double quotes here.
IMAGES_LIST: quay.io/mongodb/mongodb-community-server@sha256:8b73733842da21b6bbb6df4d7b2449229bb3135d2ec8c6880314d88205772a11 ghcr.io/edgelesssys/redis@sha256:ecb0a964c259a166a1eb62f0eb19621d42bd1cce0bc9bb0c71c828911d4ba93d
runs-on: containerd-${{ matrix.environment.test-type }}-${{ matrix.environment.containerd }}
runs-on: containerd-${{ matrix.containerd }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -39,9 +37,8 @@ jobs:
env:
KATA_HYPERVISOR: qemu-coco-dev
KUBERNETES: vanilla
SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' && 'nydus' || '' }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: ${{ matrix.environment.test-type == 'multi-snapshotter' }}
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ matrix.environment.test-type == 'force-guest-pull' && 'qemu-coco-dev' || '' }}
SNAPSHOTTER: nydus
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: true
# This is needed as we may hit the createContainerTimeout
- name: Adjust Kata Containers' create_container_timeout

View File

@@ -49,6 +49,7 @@ jobs:
- dragonball
- qemu
- qemu-runtime-rs
- stratovirt
- cloud-hypervisor
instance-type:
- small
@@ -58,13 +59,16 @@ jobs:
vmm: clh
instance-type: small
genpolicy-pull-method: oci-distribution
auto-generate-policy: yes
- host_os: cbl-mariner
vmm: clh
instance-type: small
genpolicy-pull-method: containerd
auto-generate-policy: yes
- host_os: cbl-mariner
vmm: clh
instance-type: normal
auto-generate-policy: yes
runs-on: ubuntu-22.04
permissions:
contents: read
@@ -78,8 +82,10 @@ jobs:
KATA_HOST_OS: ${{ matrix.host_os }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "false"
K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }}
GENPOLICY_PULL_METHOD: ${{ matrix.genpolicy-pull-method }}
AUTO_GENERATE_POLICY: ${{ matrix.auto-generate-policy }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -135,7 +141,7 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
- name: Run tests
@@ -143,7 +149,6 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -0,0 +1,130 @@
name: CI | Run kubernetes tests on amd64
on:
workflow_call:
inputs:
registry:
required: true
type: string
repo:
required: true
type: string
tag:
required: true
type: string
pr-number:
required: true
type: string
commit-hash:
required: false
type: string
target-branch:
required: false
type: string
default: ""
permissions: {}
jobs:
run-k8s-tests-amd64:
name: run-k8s-tests-amd64
strategy:
fail-fast: false
matrix:
vmm:
- qemu
container_runtime:
- containerd
snapshotter:
- devmapper
k8s:
- k3s
include:
- vmm: qemu
container_runtime: crio
snapshotter: ""
k8s: k0s
runs-on: ubuntu-22.04
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
KUBERNETES_EXTRA_PARAMS: ${{ matrix.container_runtime != 'crio' && '' || '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.commit-hash }}
fetch-depth: 0
persist-credentials: false
- name: Rebase atop of the latest target branch
run: |
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
env:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Remove unnecessary directories to free up space
run: |
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/lib/jvm
sudo rm -rf /usr/share/swift
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/julia*
sudo rm -rf /opt/az
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /opt/microsoft
sudo rm -rf /opt/google
sudo rm -rf /usr/lib/firefox
- name: Configure CRI-O
if: matrix.container_runtime == 'crio'
run: bash tests/integration/kubernetes/gha-run.sh setup-crio
- name: Deploy ${{ matrix.k8s }}
run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s
env:
CONTAINER_RUNTIME: ${{ matrix.container_runtime }}
- name: Configure the ${{ matrix.snapshotter }} snapshotter
if: matrix.snapshotter != ''
run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter
- name: Deploy Kata
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
continue-on-error: true
- name: Archive artifacts ${{ matrix.vmm }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.snapshotter }}-${{ matrix.k8s }}-${{ inputs.tag }}
path: /tmp/artifacts
retention-days: 1
- name: Delete kata-deploy
if: always()
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -42,6 +42,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
TARGET_ARCH: "aarch64"
steps:
@@ -58,7 +59,7 @@ jobs:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
@@ -82,5 +83,5 @@ jobs:
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -29,22 +29,23 @@ permissions: {}
jobs:
run-nvidia-gpu-tests-on-amd64:
name: run-${{ matrix.environment.name }}-tests-on-amd64
name: run-nvidia-gpu-tests-on-amd64
strategy:
fail-fast: false
matrix:
environment: [
{ name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100 },
{ name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp },
]
runs-on: ${{ matrix.environment.runner }}
vmm:
- qemu-nvidia-gpu
k8s:
- kubeadm
runs-on: amd64-nvidia-a100
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
KUBERNETES: kubeadm
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
K8S_TEST_HOST_TYPE: all
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -60,30 +61,30 @@ jobs:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Install `bats`
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Run tests ${{ matrix.environment.vmm }}
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
env:
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
- name: Collect artifacts ${{ matrix.environment.vmm }}
- name: Collect artifacts ${{ matrix.vmm }}
if: always()
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
continue-on-error: true
- name: Archive artifacts ${{ matrix.environment.vmm }}
- name: Archive artifacts ${{ matrix.vmm }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: k8s-tests-${{ matrix.environment.vmm }}-kubeadm-${{ inputs.tag }}
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }}
path: /tmp/artifacts
retention-days: 1
- name: Delete kata-deploy
if: always()
timeout-minutes: 15
timeout-minutes: 5
run: bash tests/integration/kubernetes/gha-run.sh cleanup

View File

@@ -34,7 +34,7 @@ jobs:
- qemu
k8s:
- kubeadm
runs-on: ppc64le-k8s
runs-on: k8s-ppc64le
env:
DOCKER_REGISTRY: ${{ inputs.registry }}
DOCKER_REPO: ${{ inputs.repo }}
@@ -43,6 +43,7 @@ jobs:
GOPATH: ${{ github.workspace }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
TARGET_ARCH: "ppc64le"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -62,16 +63,19 @@ jobs:
./tests/install_go.sh -f -p
echo "/usr/local/go/bin" >> "$GITHUB_PATH"
- name: Prepare the runner for k8s test suite
run: bash "${HOME}/scripts/k8s_cluster_prepare.sh"
- name: Prepare the runner for k8s cluster creation
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"
- name: Check if cluster is healthy to run the tests
run: bash "${HOME}/scripts/k8s_cluster_check.sh"
- name: Create k8s cluster using kubeadm
run: bash "${HOME}/scripts/k8s_cluster_create.sh"
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
- name: Run tests
timeout-minutes: 30
run: bash tests/integration/kubernetes/gha-run.sh run-tests
- name: Delete cluster and post cleanup actions
run: bash "${HOME}/scripts/k8s_cluster_cleanup.sh"

View File

@@ -46,9 +46,11 @@ jobs:
include:
- snapshotter: devmapper
pull-type: default
using-nfd: true
deploy-cmd: configure-snapshotter
- snapshotter: nydus
pull-type: guest-pull
using-nfd: false
deploy-cmd: deploy-snapshotter
exclude:
- snapshotter: overlayfs
@@ -74,6 +76,7 @@ jobs:
KUBERNETES: ${{ matrix.k8s }}
PULL_TYPE: ${{ matrix.pull-type }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: ${{ matrix.using-nfd }}
TARGET_ARCH: "s390x"
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
@@ -109,7 +112,7 @@ jobs:
if: ${{ matrix.snapshotter != 'overlayfs' }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-zvsi
- name: Uninstall previous `kbs-client`

View File

@@ -70,6 +70,7 @@ jobs:
AUTHENTICATED_IMAGE_USER: ${{ vars.AUTHENTICATED_IMAGE_USER }}
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -140,7 +141,6 @@ jobs:
run: bash tests/stability/gha-stability-run.sh run-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -57,6 +57,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "false"
KBS: "true"
K8S_TEST_HOST_TYPE: "baremetal"
KBS_INGRESS: "nodeport"
@@ -80,7 +81,7 @@ jobs:
TARGET_BRANCH: ${{ inputs.target-branch }}
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Uninstall previous `kbs-client`
@@ -162,6 +163,7 @@ jobs:
# host type chose it will result on the creation of a cluster with
# insufficient resources.
K8S_TEST_HOST_TYPE: "all"
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -212,7 +214,7 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh get-cluster-credentials
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-aks
env:
EXPERIMENTAL_FORCE_GUEST_PULL: ${{ env.PULL_TYPE == 'experimental-force-guest-pull' && env.KATA_HYPERVISOR || '' }}
@@ -240,7 +242,6 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh report-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}
@@ -282,6 +283,7 @@ jobs:
SNAPSHOTTER: ${{ matrix.snapshotter }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
K8S_TEST_HOST_TYPE: "all"
USING_NFD: "false"
# We are skipping the auto generated policy tests for now,
# but those should be enabled as soon as we work on that.
AUTO_GENERATE_POLICY: "no"
@@ -327,7 +329,7 @@ jobs:
run: bash tests/integration/kubernetes/gha-run.sh install-bats
- name: Deploy Kata
timeout-minutes: 20
timeout-minutes: 10
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
- name: Deploy CSI driver

View File

@@ -59,6 +59,7 @@ jobs:
KATA_HOST_OS: ${{ matrix.host_os }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: "vanilla"
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
@@ -103,7 +104,6 @@ jobs:
run: bash tests/functional/kata-deploy/gha-run.sh run-tests
- name: Refresh OIDC token in case access token expired
if: always()
uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2.3.0
with:
client-id: ${{ secrets.AZ_APPID }}

View File

@@ -45,6 +45,7 @@ jobs:
GH_PR_NUMBER: ${{ inputs.pr-number }}
KATA_HYPERVISOR: ${{ matrix.vmm }}
KUBERNETES: ${{ matrix.k8s }}
USING_NFD: "false"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:

View File

@@ -44,6 +44,7 @@ jobs:
DOCKER_TAG: ${{ inputs.tag }}
GH_PR_NUMBER: ${{ inputs.pr-number }}
K8S_TEST_HOST_TYPE: "baremetal"
USING_NFD: "false"
KUBERNETES: kubeadm
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

View File

@@ -29,7 +29,7 @@ jobs:
matrix:
instance:
- "ubuntu-22.04-arm"
- "s390x"
- "ubuntu-24.04-s390x"
- "ubuntu-24.04-ppc64le"
uses: ./.github/workflows/build-checks.yaml
with:

View File

@@ -1,3 +1,5 @@
foo
<img src="https://object-storage-ca-ymq-1.vexxhost.net/swift/v1/6e4619c416ff4bd19e1c087f27a43eea/www-images-prod/openstack-logo/kata/SVG/kata-1.svg" width="900">
[![CI | Publish Kata Containers payload](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml) [![Kata Containers Nightly CI](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml/badge.svg)](https://github.com/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml)

View File

@@ -1 +1 @@
3.22.0
3.21.0

View File

@@ -44,9 +44,8 @@ WORKAROUND_9206_CRIO=${WORKAROUND_9206_CRIO:-no}
#
apply_kata_deploy() {
if ! command -v helm &>/dev/null; then
echo "Helm not installed, installing in current location..."
PATH=".:${PATH}"
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | HELM_INSTALL_DIR='.' bash -s -- --no-sudo
echo "Helm not installed, installing..."
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
fi
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
@@ -54,9 +53,6 @@ apply_kata_deploy() {
version=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true
echo "Installing kata using helm ${chart} ${version}"
helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
}

View File

@@ -168,55 +168,16 @@ See [this issue](https://github.com/kata-containers/runtime/issues/2812) for mor
### Kubernetes [hostPath][k8s-hostpath] volumes
In Kata, Kubernetes hostPath volumes can mount host directories and
regular files into the guest VM via filesystem sharing, if it is enabled
through the `shared_fs` [configuration][runtime-config] flag.
When the source path of a hostPath volume is under `/dev`, and the path
either corresponds to a host device or is not accessible by the Kata
shim, the Kata agent bind mounts the source path directly from the
*guest* filesystem into the container.
By default:
- Non-TEE environment: Filesystem sharing is used to mount host files.
- TEE environment: Filesystem sharing is disabled. Instead, host files
are copied into the guest VM when the container starts, and file
changes are *not* synchronized between the host and the guest.
In some cases, the behavior of hostPath volumes in Kata is further
different compared to `runc` containers:
**Mounting host block devices**: When a hostPath volume is of type
[`BlockDevice`][k8s-blockdevice], Kata hotplugs the host block device
into the guest and exposes it directly to the container.
**Mounting guest devices**: When the source path of a hostPath volume is
under `/dev`, and the path either corresponds to a host device or is not
accessible by the Kata shim, the Kata agent bind mounts the source path
directly from the *guest* filesystem into the container.
[runtime-config]: /src/runtime/README.md#configuration
[k8s-hostpath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath
[k8s-blockdevice]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath-volume-types
### Mounting `procfs` and `sysfs`
## Host resource sharing
For security reasons, the following mounts are disallowed:
| Type | Source | Destination | Rationale |
|-------------------|-----------|----------------------------------|----------------|
| `bind` | `!= proc` | `/proc` | CVE-2019-16884 |
| `bind` | `*` | `/proc/*` (see exceptions below) | CVE-2019-16884 |
| `proc \|\| sysfs` | `*` | not a directory (e.g. symlink) | CVE-2019-19921 |
For bind mounts under /proc, these destinations are allowed:
* `/proc/cpuinfo`
* `/proc/diskstats`
* `/proc/meminfo`
* `/proc/stat`
* `/proc/swaps`
* `/proc/uptime`
* `/proc/loadavg`
* `/proc/net/dev`
## Privileged containers
### Privileged containers
Privileged support in Kata is essentially different from `runc` containers.
The container runs with elevated capabilities within the guest.

View File

@@ -31,7 +31,6 @@
- [Setting Sysctls with Kata](how-to-use-sysctls-with-kata.md)
- [What Is VMCache and How To Enable It](what-is-vm-cache-and-how-do-I-use-it.md)
- [What Is VM Templating and How To Enable It](what-is-vm-templating-and-how-do-I-use-it.md)
- [How to Use Template in runtime-rs](how-to-use-template-in-runtime-rs.md)
- [Privileged Kata Containers](privileged.md)
- [How to load kernel modules in Kata Containers](how-to-load-kernel-modules-with-kata.md)
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)

View File

@@ -104,20 +104,12 @@ LOW_WATER_MARK=32768
sudo dmsetup create "${POOL_NAME}" \
--table "0 ${LENGTH_IN_SECTORS} thin-pool ${META_DEV} ${DATA_DEV} ${DATA_BLOCK_SIZE} ${LOW_WATER_MARK}"
# Determine plugin name based on containerd config version
CONFIG_VERSION=$(containerd config dump | awk '/^version/ {print $3}')
if [ "$CONFIG_VERSION" -ge 2 ]; then
PLUGIN="io.containerd.snapshotter.v1.devmapper"
else
PLUGIN="devmapper"
fi
cat << EOF
#
# Add this to your config.toml configuration file and restart containerd daemon
#
[plugins]
[plugins."${PLUGIN}"]
[plugins.devmapper]
pool_name = "${POOL_NAME}"
root_path = "${DATA_DIR}"
base_image_size = "10GB"

View File

@@ -1,119 +0,0 @@
# How to Use Template in runtime-rs
## What is VM Templating
VM templating is a Kata Containers feature that enables new VM creation using a cloning technique. When enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. It is very much like a process fork done by the kernel but here we *fork* VMs.
For more details on VM templating, refer to the [What is VM templating and how do I use it](./what-is-vm-templating-and-how-do-I-use-it.md) article.
## How to Enable VM Templating
VM templating can be enabled by changing your Kata Containers config file (`/opt/kata/share/defaults/kata-containers/runtime-rs/configuration.toml`, overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
- `qemu` version `v4.1.0` or above is specified in `hypervisor.qemu`->`path` section
- `enable_template = true`
- `template_path = "/run/vc/vm/template"` (default value, can be customized as needed)
- `initrd =` is set
- `image =` option is commented out or removed
- `shared_fs =` option is commented out or removed
- `default_memory =` should be set to more than 256MB
Then you can create a VM template for later usage by calling:
### Initialize and create the VM template
The `factory init` command creates a VM template by launching a new VM, initializing the Kata Agent, then pausing and saving its state (memory and device snapshots) to the template directory. This saved template is used to rapidly clone new VMs using QEMU's memory sharing capabilities.
```bash
sudo kata-ctl factory init
```
### Check the status of the VM template
The `factory status` command checks whether a VM template currently exists by verifying the presence of template files (memory snapshot and device state). It will output "VM factory is on" if the template exists, or "VM factory is off" otherwise.
```bash
sudo kata-ctl factory status
```
### Destroy and clean up the VM template
The `factory destroy` command removes the VM template by remove the `tmpfs` filesystem and deleting the template directory along with all its contents.
```bash
sudo kata-ctl factory destroy
```
## How to Create a New VM from VM Template
In the Go version of Kata Containers, the VM templating mechanism is implemented using virtio-9p (9pfs). However, 9pfs is not supported in runtime-rs due to its poor performance, limited cache coherence, and security risks. Instead, runtime-rs adopts `VirtioFS` as the default mechanism to provide rootfs for containers and VMs.
Yet, when enabling the VM template mechanism, `VirtioFS` introduces conflicts in memory sharing because its DAX-based shared memory mapping overlaps with the template's page-sharing design. To resolve these conflicts and ensure strict isolation between cloned VMs, runtime-rs replaces `VirtioFS` with the snapshotter approach — specifically, the `blockfile` snapshotter.
The `blockfile` snapshotter is used in runtime-rs because it provides each VM with an independent block-based root filesystem, ensuring strong isolation and full compatibility with the VM templating mechanism.
### Configure Snapshotter
#### Check if `Blockfile` Snapshotter is Available
```bash
ctr plugins ls | grep blockfile
```
If not available, continue with the following steps:
#### Create Scratch File
```bash
dd if=/dev/zero of=/opt/containerd/blockfile bs=1M count=500
sudo mkfs.ext4 /opt/containerd/blockfile
```
#### Configure containerd
Edit the containerd configuration file:
```bash
sudo vim /etc/containerd/config.toml
```
Add or modify the following configuration for the `blockfile` snapshotter:
```toml
[plugins."io.containerd.snapshotter.v1.blockfile"]
scratch_file = "/opt/containerd/blockfile"
root_path = ""
fs_type = "ext4"
mount_options = []
recreate_scratch = true
```
#### Restart containerd
After modifying the configuration, restart containerd to apply changes:
```bash
sudo systemctl restart containerd
```
### Run Container with `blockfile` Snapshotter
After the VM template is created, you can pull an image and run a container using the `blockfile` snapshotter:
```bash
ctr run --rm -t --snapshotter blockfile docker.io/library/busybox:latest template sh
```
We can verify whether a VM was launched from a template or started normally by checking the launch parameters — if the parameters contain `incoming`, it indicates that the VM was started from a template rather than created directly.
## Performance Test
The comparative experiment between **template-based VM** creation and **direct VM** creation showed that the template-based approach achieved a ≈ **73.2%** reduction in startup latency (average launch time of **0.6s** vs. **0.82s**) and a ≈ **79.8%** reduction in memory usage (average memory usage of **178.2 MiB** vs. **223.2 MiB**), demonstrating significant improvements in VM startup efficiency and resource utilization.
The test script is as follows:
```bash
# Clear the page cache, dentries, and inodes to free up memory
echo 3 | sudo tee /proc/sys/vm/drop_caches
# Display the current memory usage
free -h
# Create 100 normal VMs and template-based VMs, and track the time
time for I in $(seq 100); do
echo -n " ${I}th" # Display the iteration number
ctr run -d --runtime io.containerd.kata.v2 --snapshotter blockfile docker.io/library/busybox:latest normal/template${I}
done
# Display the memory usage again after running the test
free -h

View File

@@ -8,11 +8,50 @@ Kata Containers requires nested virtualization or bare metal. Check
[hardware requirements](./../../README.md#hardware-requirements) to see if your system is capable of running Kata
Containers.
The Kata Deploy Helm chart is the preferred way to install all of the binaries and
## Packaged installation methods
The packaged installation method uses your distribution's native package format (such as RPM or DEB).
> **Note:**
>
> We encourage you to select an installation method that provides
> automatic updates, to ensure you get the latest security updates and
> bug fixes.
| Installation method | Description | Automatic updates | Use case |
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
| [Using kata-deploy Helm chart](#kata-deploy-helm-chart) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. |
### Kata Deploy Helm Chart
The Kata Deploy Helm chart is a convenient way to install all of the binaries and
artifacts required to run Kata Containers on Kubernetes.
[Use Kata Deploy Helm Chart](/tools/packaging/kata-deploy/helm-chart/README.md) to install Kata Containers on a Kubernetes Cluster.
### Official packages
Kata packages are provided by official distribution repositories for:
| Distribution (link to installation guide) | Minimum versions |
|----------------------------------------------------------|--------------------------------------------------------------------------------|
| [CentOS](centos-installation-guide.md) | 8 |
| [Fedora](fedora-installation-guide.md) | 34 |
### Automatic Installation
[Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system.
## Installing on a Cloud Service Platform
* [Amazon Web Services (AWS)](aws-installation-guide.md)
* [Google Compute Engine (GCE)](gce-installation-guide.md)
* [Microsoft Azure](azure-installation-guide.md)
* [Minikube](minikube-installation-guide.md)
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
## Further information
* [upgrading document](../Upgrading.md)

View File

@@ -0,0 +1,135 @@
# Install Kata Containers on Amazon Web Services
Kata Containers on Amazon Web Services (AWS) makes use of [i3.metal](https://aws.amazon.com/ec2/instance-types/i3/) instances. Most of the installation procedure is identical to that for Kata on your preferred distribution, except that you have to run it on bare metal instances since AWS doesn't support nested virtualization yet. This guide walks you through creating an i3.metal instance.
## Install and Configure AWS CLI
### Requirements
* Python:
* Python 2 version 2.6.5+
* Python 3 version 3.3+
### Install
Install with this command:
```bash
$ pip install awscli --upgrade --user
```
### Configure
First, verify it:
```bash
$ aws --version
```
Then configure it:
```bash
$ aws configure
```
Specify the required parameters:
```
AWS Access Key ID []: <your-key-id-from-iam>
AWS Secret Access Key []: <your-secret-access-key-from-iam>
Default region name []: <your-aws-region-for-your-i3-metal-instance>
Default output format [None]: <yaml-or-json-or-empty>
```
Alternatively, you can create the files: `~/.aws/credentials` and `~/.aws/config`:
```bash
$ cat <<EOF > ~/.aws/credentials
[default]
aws_access_key_id = <your-key-id-from-iam>
aws_secret_access_key = <your-secret-access-key-from-iam>
EOF
$ cat <<EOF > ~/.aws/config
[default]
region = <your-aws-region-for-your-i3-metal-instance>
EOF
```
For more information on how to get AWS credentials please refer to [this guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). Alternatively, you can ask the administrator of your AWS account to issue one with the AWS CLI:
```sh
$ aws_username="myusername"
$ aws iam create-access-key --user-name="$aws_username"
```
More general AWS CLI guidelines can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/installing.html).
## Create or Import an EC2 SSH key pair
You will need this to access your instance.
To create:
```bash
$ aws ec2 create-key-pair --key-name MyKeyPair | grep KeyMaterial | cut -d: -f2- | tr -d ' \n\"\,' > MyKeyPair.pem
$ chmod 400 MyKeyPair.pem
```
Alternatively to import using your public SSH key:
```bash
$ aws ec2 import-key-pair --key-name "MyKeyPair" --public-key-material file://MyKeyPair.pub
```
## Launch i3.metal instance
Get the latest Bionic Ubuntu AMI (Amazon Image) or the latest AMI for the Linux distribution you would like to use. For example:
```bash
$ aws ec2 describe-images --owners 099720109477 --filters "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" --query 'sort_by(Images, &CreationDate)[].ImageId '
```
This command will produce output similar to the following:
```
[
...
"ami-063aa838bd7631e0b",
"ami-03d5270fcb641f79b"
]
```
Launch the EC2 instance and pick IP the `INSTANCEID`:
```bash
$ aws ec2 run-instances --image-id ami-03d5270fcb641f79b --count 1 --instance-type i3.metal --key-name MyKeyPair --associate-public-ip-address > /tmp/aws.json
$ export INSTANCEID=$(grep InstanceId /tmp/aws.json | cut -d: -f2- | tr -d ' \n\"\,')
```
Wait for the instance to come up, the output of the following command should be `running`:
```bash
$ aws ec2 describe-instances --instance-id=${INSTANCEID} | grep running | cut -d: -f2- | tr -d ' \"\,'
```
Get the public IP address for the instances:
```bash
$ export IP=$(aws ec2 describe-instances --instance-id=${INSTANCEID} | grep PublicIpAddress | cut -d: -f2- | tr -d ' \n\"\,')
```
Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-launch.html) for more details on how to launch instances with the AWS CLI.
SSH into the machine
```bash
$ ssh -i MyKeyPair.pem ubuntu@${IP}
```
Go onto the next step.
## Install Kata
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).

View File

@@ -0,0 +1,18 @@
# Install Kata Containers on Microsoft Azure
Kata Containers on Azure use nested virtualization to provide an identical installation
experience to Kata on your preferred Linux distribution.
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
machine (SSH). Instructions will use the Azure Portal to avoid
local dependencies and setup.
## Create a new virtual machine with nesting support
Create a new virtual machine with:
* Nesting support (v3 series)
* your distro of choice
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

View File

@@ -0,0 +1,21 @@
# Install Kata Containers on CentOS
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf install -y centos-release-advanced-virtualization
$ sudo -E dnf module disable -y virt:rhel
$ source /etc/os-release
$ cat <<EOF | sudo -E tee /etc/yum.repos.d/kata-containers.repo
[kata-containers]
name=Kata Containers
baseurl=http://mirror.centos.org/\$contentdir/\$releasever/virt/\$basearch/kata-containers
enabled=1
gpgcheck=1
skip_if_unavailable=1
EOF
$ sudo -E dnf install -y kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,10 @@
# Install Kata Containers on Fedora
1. Install the Kata Containers components with the following commands:
```bash
$ sudo -E dnf -y install kata-containers
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@@ -0,0 +1,127 @@
# Install Kata Containers on Google Compute Engine
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime check` checks for nested virtualization, but does not fail if support is not found.
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:
```bash
$ gcloud info || { echo "ERROR: no Google Cloud SDK"; exit 1; }
```
## Create an Image with Nested Virtualization Enabled
VM images on GCE are grouped into families under projects. Officially supported images are automatically discoverable with `gcloud compute images list`. That command produces a list similar to the following (likely with different image names):
```bash
$ gcloud compute images list
NAME PROJECT FAMILY DEPRECATED STATUS
centos-7-v20180523 centos-cloud centos-7 READY
coreos-stable-1745-5-0-v20180531 coreos-cloud coreos-stable READY
cos-beta-67-10575-45-0 cos-cloud cos-beta READY
cos-stable-66-10452-89-0 cos-cloud cos-stable READY
debian-9-stretch-v20180510 debian-cloud debian-9 READY
rhel-7-v20180522 rhel-cloud rhel-7 READY
sles-11-sp4-v20180523 suse-cloud sles-11 READY
ubuntu-1604-xenial-v20180522 ubuntu-os-cloud ubuntu-1604-lts READY
ubuntu-1804-bionic-v20180522 ubuntu-os-cloud ubuntu-1804-lts READY
```
Each distribution has its own project, and each project can host images for multiple versions of the distribution, typically grouped into families. We recommend you select images by project and family, rather than by name. This ensures any scripts or other automation always works with a non-deprecated image, including security updates, updates to GCE-specific scripts, etc.
### Create the Image
The following example (substitute your preferred distribution project and image family) produces an image with nested virtualization enabled in your currently active GCE project:
```bash
$ SOURCE_IMAGE_PROJECT=ubuntu-os-cloud
$ SOURCE_IMAGE_FAMILY=ubuntu-1804-lts
$ IMAGE_NAME=${SOURCE_IMAGE_FAMILY}-nested
$ gcloud compute images create \
--source-image-project $SOURCE_IMAGE_PROJECT \
--source-image-family $SOURCE_IMAGE_FAMILY \
--licenses=https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx \
$IMAGE_NAME
```
If successful, `gcloud` reports that the image was created. Verify that the image has the nested virtualization license with `gcloud compute images describe $IMAGE_NAME`. This produces output like the following (some fields have been removed for clarity and to redact personal info):
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: ubuntu-1804-lts-nested
sourceImage: https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20180522
sourceImageId: '3280575157699667619'
sourceType: RAW
status: READY
```
The primary criterion of interest here is the presence of the `enable-vmx` license. Without that licence Kata will not work. Without that license Kata does not work. The presence of that license instructs the Google Compute Engine hypervisor to enable Intel's VT-x instructions in virtual machines created from the image. Note that nested virtualization is only available in VMs running on Intel Haswell or later CPU micro-architectures.
### Verify VMX is Available
Assuming you created a nested-enabled image using the previous instructions, verify that VMs created from this image are VMX-enabled with the following:
1. Create a VM from the image created previously:
```bash
$ gcloud compute instances create \
--image $IMAGE_NAME \
--machine-type n1-standard-2 \
--min-cpu-platform "Intel Broadwell" \
kata-testing
```
> **NOTE**: In most zones the `--min-cpu-platform` argument can be omitted. It is only necessary in GCE Zones that include hosts based on Intel's Ivybridge platform.
2. Verify that the VMX CPUID flag is set:
```bash
$ gcloud compute ssh kata-testing
# While ssh'd into the VM:
$ [ -z "$(lscpu|grep GenuineIntel)" ] && { echo "ERROR: Need an Intel CPU"; exit 1; }
```
If this fails, ensure you created your instance from the correct image and that the previously listed `enable-vmx` license is included.
## Install Kata
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).
## Create a Kata-enabled Image
Optionally, after installing Kata, create an image to preserve the fruits of your labor:
```bash
$ gcloud compute instances stop kata-testing
$ gcloud compute images create \
--source-disk kata-testing \
kata-base
```
The result is an image that includes any changes made to the `kata-testing` instance as well as the `enable-vmx` flag. Verify this with `gcloud compute images describe kata-base`. The result, which omits some fields for clarity, should be similar to the following:
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: kata-base
selfLink: https://www.googleapis.com/compute/v1/projects/my-kata-project/global/images/kata-base
sourceDisk: https://www.googleapis.com/compute/v1/projects/my-kata-project/zones/us-west1-a/disks/kata-testing
sourceType: RAW
status: READY
```

View File

@@ -0,0 +1,16 @@
# Install Kata Containers on VEXXHOST
Kata Containers on VEXXHOST use nested virtualization to provide an identical
installation experience to Kata on your preferred Linux distribution.
This guide assumes you have an OpenStack public cloud account set up and tools
to remotely connect to your virtual machine (SSH).
## Create a new virtual machine with nesting support
All regions support nested virtualization using the V2 flavors (those prefixed
with v2). The recommended machine type for container workloads is `v2-highcpu` range.
## Set up with distribution specific quick start
Follow distribution specific [install guides](../install/README.md#packaged-installation-methods).

4
src/agent/Cargo.lock generated
View File

@@ -2065,7 +2065,7 @@ dependencies = [
"libc",
"log",
"logging",
"mem-agent",
"mem-agent-lib",
"netlink-packet-core",
"netlink-packet-route",
"netlink-sys 0.7.0",
@@ -2351,7 +2351,7 @@ dependencies = [
]
[[package]]
name = "mem-agent"
name = "mem-agent-lib"
version = "0.2.0"
dependencies = [
"anyhow",

View File

@@ -85,7 +85,7 @@ kata-agent-policy = { path = "policy" }
rustjail = { path = "rustjail" }
vsock-exporter = { path = "vsock-exporter" }
mem-agent = { path = "../libs/mem-agent" }
mem-agent = { path = "../mem-agent", package = "mem-agent-lib" }
kata-sys-util = { path = "../libs/kata-sys-util" }
kata-types = { path = "../libs/kata-types", features = ["safe-path"] }

View File

@@ -336,17 +336,11 @@ mod tests {
let plain = slog_term::PlainSyncDecorator::new(std::io::stdout());
let logger = Logger::root(slog_term::FullFormat::new(plain).build().fuse(), o!());
// Detect actual filesystem types mounted in this environment
// Z runners mount /dev as tmpfs, while normal systems use devtmpfs
let dev_fs_type = get_mount_fs_type("/dev").unwrap_or_else(|_| String::from("devtmpfs"));
let proc_fs_type = get_mount_fs_type("/proc").unwrap_or_else(|_| String::from("proc"));
let sys_fs_type = get_mount_fs_type("/sys").unwrap_or_else(|_| String::from("sysfs"));
let test_cases = [
("dev", "/dev", dev_fs_type.as_str()),
("udev", "/dev", dev_fs_type.as_str()),
("proc", "/proc", proc_fs_type.as_str()),
("sysfs", "/sys", sys_fs_type.as_str()),
("dev", "/dev", "devtmpfs"),
("udev", "/dev", "devtmpfs"),
("proc", "/proc", "proc"),
("sysfs", "/sys", "sysfs"),
];
for &(source, destination, fs_type) in &test_cases {
@@ -387,22 +381,6 @@ mod tests {
let drain = slog::Discard;
let logger = slog::Logger::root(drain, o!());
// Detect filesystem type of root directory
let tmp_fs_type = get_mount_fs_type("/").unwrap_or_else(|_| String::from("unknown"));
// Error messages that vary based on filesystem type
const DEFAULT_ERROR_EPERM: &str = "Operation not permitted";
const BTRFS_ERROR_ENODEV: &str = "No such device";
// Helper to select error message based on filesystem type (e.g. btrfs for s390x runners)
let get_error_msg = |default: &'static str, btrfs_specific: &'static str| -> &'static str {
if tmp_fs_type == "btrfs" && !btrfs_specific.is_empty() {
btrfs_specific
} else {
default
}
};
let tests = &[
TestData {
test_user: TestUserType::Any,
@@ -438,7 +416,7 @@ mod tests {
fs_type: "bind",
flags: MsFlags::empty(),
options: "bind",
error_contains: get_error_msg(DEFAULT_ERROR_EPERM, BTRFS_ERROR_ENODEV),
error_contains: "Operation not permitted",
},
TestData {
test_user: TestUserType::NonRootOnly,
@@ -518,14 +496,7 @@ mod tests {
let err = result.unwrap_err();
let error_msg = format!("{}", err);
assert!(
error_msg.contains(d.error_contains),
"{}: expected error containing '{}', got '{}'",
msg,
d.error_contains,
error_msg
);
assert!(error_msg.contains(d.error_contains), "{}", msg);
}
}

View File

@@ -922,18 +922,6 @@ mod tests {
const TEST_DUMMY_INTERFACE: &str = "dummy_for_arp";
const TEST_ARP_IP: &str = "192.0.2.127";
/// Helper function to check if the result is a netlink EACCES error
fn is_netlink_permission_error<T>(result: &Result<T>) -> bool {
if let Err(e) = result {
let error_string = format!("{:?}", e);
if error_string.contains("code: Some(-13)") {
println!("INFO: skipping test - netlink operations are restricted in this environment (EACCES)");
return true;
}
}
false
}
#[tokio::test]
async fn find_link_by_name() {
let message = Handle::new()
@@ -1057,14 +1045,10 @@ mod tests {
let lo = handle.find_link(LinkFilter::Name("lo")).await.unwrap();
for network in list {
let result = handle.add_addresses(lo.index(), iter::once(network)).await;
// Skip test if netlink operations are restricted (EACCES = -13)
if is_netlink_permission_error(&result) {
return;
}
result.expect("Failed to add IP");
handle
.add_addresses(lo.index(), iter::once(network))
.await
.expect("Failed to add IP");
// Make sure the address is there
let result = handle
@@ -1079,14 +1063,10 @@ mod tests {
assert!(result.is_some());
// Update it
let result = handle.add_addresses(lo.index(), iter::once(network)).await;
// Skip test if netlink operations are restricted (EACCES = -13)
if is_netlink_permission_error(&result) {
return;
}
result.expect("Failed to delete address");
handle
.add_addresses(lo.index(), iter::once(network))
.await
.expect("Failed to delete address");
}
}

View File

@@ -59,26 +59,10 @@ pub fn reseed_rng(data: &[u8]) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use nix::errno::Errno;
use std::fs::File;
use std::io::prelude::*;
use test_utils::skip_if_not_root;
/// Helper function to check if the result is an EPERM error
fn is_permission_error(result: &Result<()>) -> bool {
if let Err(e) = result {
if let Some(errno) = e.downcast_ref::<Errno>() {
if *errno == Errno::EPERM {
println!(
"EPERM: skipping test - reseeding RNG is not permitted in this environment"
);
return true;
}
}
}
false
}
#[test]
fn test_reseed_rng() {
skip_if_not_root!();
@@ -89,9 +73,6 @@ mod tests {
// Ensure the buffer was filled.
assert!(n == POOL_SIZE);
let ret = reseed_rng(&seed);
if is_permission_error(&ret) {
return;
}
assert!(ret.is_ok());
}
@@ -104,9 +85,6 @@ mod tests {
// Ensure the buffer was filled.
assert!(n == POOL_SIZE);
let ret = reseed_rng(&seed);
if is_permission_error(&ret) {
return;
}
if nix::unistd::Uid::effective().is_root() {
assert!(ret.is_ok());
} else {

View File

@@ -2481,26 +2481,6 @@ mod tests {
// normally this module should eixsts...
m.name = "bridge".to_string();
let result = load_kernel_module(&m);
// Skip test if loading kernel modules is not permitted
// or kernel module is not found
if let Err(e) = &result {
let error_string = format!("{:?}", e);
// Let's print out the error message first
println!("DEBUG: error: {}", error_string);
if error_string.contains("Operation not permitted")
|| error_string.contains("EPERM")
|| error_string.contains("Permission denied")
{
println!("INFO: skipping test - loading kernel modules is not permitted in this environment");
return;
}
if error_string.contains("not found") {
println!("INFO: skipping test - kernel module is not found in this environment");
return;
}
}
assert!(result.is_ok(), "load module should success");
}

View File

@@ -1121,9 +1121,9 @@ dependencies = [
[[package]]
name = "kvm-ioctls"
version = "0.12.1"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d592c9b0da14bacab1fe89c78e7ed873b20cf7f502d0fc26f628d733215b1e5"
checksum = "c3a321cabd827642499c77e27314f388dd83a717a5ca716b86476fb947f73ae4"
dependencies = [
"kvm-bindings",
"libc",

View File

@@ -31,7 +31,7 @@ resolver = "2"
# Rust-VMM crates
event-manager = "0.2.1"
kvm-bindings = "0.6.0"
kvm-ioctls = "=0.12.1"
kvm-ioctls = "0.12.0"
linux-loader = "0.8.0"
seccompiler = "0.5.0"
vfio-bindings = "0.3.0"

View File

@@ -3,9 +3,8 @@ members = [
"kata-sys-util",
"kata-types",
"logging",
"mem-agent",
"protocols",
"runtime-spec",
"protocols",
"safe-path",
"shim-interface",
"test-utils",

View File

@@ -9,7 +9,6 @@
use crate::config::agent::AGENT_NAME_KATA;
use crate::config::hypervisor::HYPERVISOR_NAME_DRAGONBALL;
use crate::config::runtime::RUNTIME_NAME_VIRTCONTAINER;
use crate::machine_type::MACHINE_TYPE_Q35_TYPE;
use lazy_static::lazy_static;
lazy_static! {
@@ -66,7 +65,7 @@ pub const MIN_DRAGONBALL_MEMORY_SIZE_MB: u32 = 64;
pub const DEFAULT_QEMU_BINARY_PATH: &str = "/usr/bin/qemu-system-x86_64";
pub const DEFAULT_QEMU_ROOTFS_TYPE: &str = "ext4";
pub const DEFAULT_QEMU_CONTROL_PATH: &str = "";
pub const DEFAULT_QEMU_MACHINE_TYPE: &str = MACHINE_TYPE_Q35_TYPE;
pub const DEFAULT_QEMU_MACHINE_TYPE: &str = "q35";
pub const DEFAULT_QEMU_ENTROPY_SOURCE: &str = "/dev/urandom";
pub const DEFAULT_QEMU_GUEST_KERNEL_IMAGE: &str = "vmlinuz";
pub const DEFAULT_QEMU_GUEST_KERNEL_PARAMS: &str = "";

View File

@@ -49,7 +49,7 @@ mod remote;
pub use self::remote::{RemoteConfig, HYPERVISOR_NAME_REMOTE};
mod rate_limiter;
pub use self::rate_limiter::{RateLimiterConfig, DEFAULT_RATE_LIMITER_REFILL_TIME};
pub use self::rate_limiter::RateLimiterConfig;
/// Virtual PCI block device driver.
pub const VIRTIO_BLK_PCI: &str = "virtio-blk-pci";
@@ -1224,52 +1224,6 @@ pub struct RemoteInfo {
pub default_gpu_model: String,
}
/// Configuration information for vm template.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct VmTemplateInfo {
/// Indicate whether the VM is being created as a template VM.
#[serde(default)]
pub boot_to_be_template: bool,
/// Indicate whether the VM should be created from an existing template VM.
#[serde(default)]
pub boot_from_template: bool,
/// memory_path is the memory file path of VM memory.
#[serde(default)]
pub memory_path: String,
/// device_state_path is the VM device state file path.
#[serde(default)]
pub device_state_path: String,
}
impl VmTemplateInfo {
/// Adjust the configuration information after loading from configuration file.
pub fn adjust_config(&mut self) -> Result<()> {
Ok(())
}
/// Validate the configuration information.
pub fn validate(&self) -> Result<()> {
Ok(())
}
}
/// Configuration information for VM factory (templating, caches, etc.).
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct Factory {
/// Enable VM templating support.
/// When enabled, new VMs may be created from a template to speed up creation.
#[serde(default, rename = "enable_template")]
pub enable_template: bool,
/// Specifies the path of template.
/// Example: "/run/vc/vm/template"
#[serde(default)]
pub template_path: String,
}
/// Common configuration information for hypervisors.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct Hypervisor {
@@ -1360,14 +1314,6 @@ pub struct Hypervisor {
#[serde(default, flatten)]
pub remote_info: RemoteInfo,
/// vm template configuration information.
#[serde(default, flatten)]
pub vm_template: VmTemplateInfo,
/// VM factory configuration information.
#[serde(default)]
pub factory: Factory,
/// A sandbox annotation used to specify the host path to the `prefetch_files.list`
/// for the container image being used. The runtime will pass this path to the
/// Hypervisor to search for the corresponding prefetch list file.
@@ -1441,7 +1387,6 @@ impl ConfigOps for Hypervisor {
hv.network_info.adjust_config()?;
hv.security_info.adjust_config()?;
hv.shared_fs.adjust_config()?;
hv.vm_template.adjust_config()?;
resolve_path!(
hv.prefetch_list_path,
"prefetch_list_path `{}` is invalid: {}"
@@ -1479,7 +1424,6 @@ impl ConfigOps for Hypervisor {
hv.network_info.validate()?;
hv.security_info.validate()?;
hv.shared_fs.validate()?;
hv.vm_template.validate()?;
validate_path!(hv.path, "Hypervisor binary path `{}` is invalid: {}")?;
validate_path!(
hv.ctlpath,

View File

@@ -6,10 +6,10 @@
use serde::{Deserialize, Serialize};
/// The DEFAULT_RATE_LIMITER_REFILL_TIME is used for calculating the rate at
/// which a TokenBucket is replinished, in cases where a RateLimiter is
/// applied to either network or disk I/O.
pub const DEFAULT_RATE_LIMITER_REFILL_TIME: u64 = 1000;
// The DEFAULT_RATE_LIMITER_REFILL_TIME is used for calculating the rate at
// which a TokenBucket is replinished, in cases where a RateLimiter is
// applied to either network or disk I/O.
pub(crate) const DEFAULT_RATE_LIMITER_REFILL_TIME: u64 = 1000;
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct TokenBucketConfig {

View File

@@ -24,9 +24,8 @@ pub mod hypervisor;
pub use self::agent::Agent;
use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT;
pub use self::hypervisor::{
BootInfo, CloudHypervisorConfig, DragonballConfig, Factory, FirecrackerConfig, Hypervisor,
QemuConfig, RemoteConfig, HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER,
HYPERVISOR_NAME_QEMU,
BootInfo, CloudHypervisorConfig, DragonballConfig, FirecrackerConfig, Hypervisor, QemuConfig,
RemoteConfig, HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_FIRECRACKER, HYPERVISOR_NAME_QEMU,
};
mod runtime;
@@ -178,15 +177,6 @@ impl TomlConfig {
Ok(config)
}
/// Get the `Factory` configuration from the active hypervisor.
pub fn get_factory(&self) -> Factory {
let hypervisor_name = self.runtime.hypervisor_name.as_str();
self.hypervisor
.get(hypervisor_name)
.map(|hv| hv.factory.clone())
.unwrap_or_default()
}
/// Adjust Kata configuration information.
pub fn adjust_config(&mut self) -> Result<()> {
Hypervisor::adjust_config(self)?;

View File

@@ -51,9 +51,6 @@ pub mod initdata;
/// rootless vmm
pub mod rootless;
/// machine type
pub mod machine_type;
use std::path::Path;
use crate::rootless::{is_rootless, rootless_dir};

View File

@@ -1,11 +0,0 @@
// Copyright 2025 Kata Contributors
//
// SPDX-License-Identifier: Apache-2.0
//
/// Default x86-64 machine type
pub const MACHINE_TYPE_Q35_TYPE: &str = "q35";
/// S390x CCW virtio machine type identifier.
/// Used on IBM Z architecture for channel I/O (CCW) virtio devices.
pub const MACHINE_TYPE_S390X_TYPE: &str = "s390-ccw-virtio";

View File

@@ -1,5 +1,5 @@
[package]
name = "mem-agent"
name = "mem-agent-lib"
version = "0.2.0"
edition = "2021"
@@ -20,5 +20,3 @@ slog-term = "2.9.0"
slog-async = "2.7"
once_cell = "1.9.0"
lazy_static = "1.4"
nix = { version = "0.30.1", features = ["user"] }
test-utils = { path = "../test-utils" }

View File

@@ -105,7 +105,7 @@ async fn async_get_remaining_tokio_duration(
fn agent_work(mut memcg: memcg::MemCG, mut comp: compact::Compact) -> Result<Duration> {
let memcg_work_list = memcg.get_timeout_list();
if !memcg_work_list.is_empty() {
if memcg_work_list.len() > 0 {
info!("memcg.work start");
memcg
.work(&memcg_work_list)
@@ -202,8 +202,10 @@ async fn mem_agent_loop(
});
mas.timeout = false;
} else if mas.refresh() {
continue;
} else {
if mas.refresh() {
continue;
}
}
info!("mem_agent_loop wait timeout {:?}", mas.duration);
@@ -344,11 +346,9 @@ impl MemAgent {
#[cfg(test)]
mod tests {
use super::*;
use test_utils::skip_if_not_root;
#[test]
fn test_agent() {
skip_if_not_root!();
let mut memcg_config = memcg::Config::default();
memcg_config.default.disabled = true;
let compact_config = compact::Config {
@@ -381,7 +381,6 @@ mod tests {
#[test]
fn test_agent_memcg_status() {
skip_if_not_root!();
let mut memcg_config = memcg::Config::default();
memcg_config.default.disabled = true;
let compact_config = compact::Config {

View File

@@ -2,10 +2,6 @@
//
// SPDX-License-Identifier: Apache-2.0
// TODO: Enable precedence and identity_op check
#![allow(clippy::precedence)]
#![allow(clippy::identity_op)]
use crate::cgroup::CGROUP_PATH;
use crate::proc;
use crate::psi;
@@ -67,7 +63,7 @@ impl Default for Config {
compact_sec_max: 5 * 60,
compact_order: PAGE_REPORTING_MIN_ORDER,
compact_threshold: 2 << PAGE_REPORTING_MIN_ORDER,
compact_force_times: u64::MAX,
compact_force_times: std::u64::MAX,
}
}
}
@@ -133,7 +129,7 @@ impl CompactCore {
}
fn need_force_compact(&self) -> bool {
if self.config.compact_force_times == u64::MAX {
if self.config.compact_force_times == std::u64::MAX {
return false;
}
@@ -354,12 +350,14 @@ impl Compact {
} else {
debug!("compact killed and keep wait");
}
} else if rest_sec <= 0 {
debug!("compact timeout");
child
.kill()
.map_err(|e| anyhow!("child.kill failed: {}", e))?;
killed = true;
} else {
if rest_sec <= 0 {
debug!("compact timeout");
child
.kill()
.map_err(|e| anyhow!("child.kill failed: {}", e))?;
killed = true;
}
}
let percent = compact_psi

View File

@@ -14,7 +14,7 @@ use page_size;
use std::collections::HashMap;
use std::collections::HashSet;
use std::hash::Hash;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use tokio::time::Duration as TokioDuration;
@@ -113,13 +113,24 @@ impl SingleConfig {
}
}
#[derive(Debug, Clone, PartialEq, Default)]
#[derive(Debug, Clone, PartialEq)]
pub struct CgroupConfig {
pub no_subdir: bool,
pub numa_id: Vec<u32>,
pub config: SingleConfig,
}
impl Default for CgroupConfig {
fn default() -> Self {
Self {
no_subdir: false,
// empty numa_id means this config not limit numa
numa_id: vec![],
config: SingleConfig::default(),
}
}
}
impl CgroupConfig {
fn set(&mut self, config: &CgroupOptionConfig) -> bool {
let mut need_reset = false;
@@ -211,7 +222,7 @@ impl Config {
// make sure the empty numa_id CgroupConfig at the end of Cgroup
for vec in self.cgroups.values_mut() {
let (keep, moved) = vec.drain(..).partition(|c| !c.numa_id.is_empty());
let (keep, moved) = vec.drain(..).partition(|c| c.numa_id.len() > 0);
*vec = keep;
vec.extend(moved);
}
@@ -289,7 +300,7 @@ pub struct Numa {
}
impl Numa {
fn new(mglru: &MGenLRU, path: &str, psi_path: &Path) -> Self {
fn new(mglru: &MGenLRU, path: &str, psi_path: &PathBuf) -> Self {
Self {
max_seq: mglru.max_seq,
min_seq: mglru.min_seq,
@@ -351,9 +362,9 @@ impl MemCgroup {
id: &usize,
ino: &usize,
path: &str,
numa: &[u32],
numa: &Vec<u32>,
hmg: &HashMap<usize, MGenLRU>,
psi_path: &Path,
psi_path: &PathBuf,
) -> Self {
let m = Self {
id: *id as u16,
@@ -361,8 +372,11 @@ impl MemCgroup {
numa: numa
.iter()
.filter_map(|numa_id| {
hmg.get(&(*numa_id as usize))
.map(|hmg| (*numa_id, Numa::new(hmg, path, psi_path)))
if let Some(hmg) = hmg.get(&(*numa_id as usize)) {
Some((*numa_id, Numa::new(hmg, path, psi_path)))
} else {
None
}
})
.collect(),
};
@@ -375,7 +389,7 @@ impl MemCgroup {
&mut self,
numa: &Vec<u32>,
path: &str,
psi_path: &Path,
psi_path: &PathBuf,
hmg: &HashMap<usize, MGenLRU>,
) {
for numa_id in numa {
@@ -438,7 +452,7 @@ impl Info {
fn new(path: &str, memcg_id: usize, numa_id: usize, numa: &Numa) -> Self {
Self {
memcg_id,
numa_id,
numa_id: numa_id,
path: path.to_string(),
min_seq: numa.min_seq,
max_seq: numa.max_seq,
@@ -520,9 +534,9 @@ impl MemCgroups {
}
should_keep
});
!path_cgs.is_empty()
path_cgs.len() != 0
});
!period_cgs.cgs.is_empty()
period_cgs.cgs.len() != 0
});
self.cgroups.retain(|path, cgroup| {
@@ -560,7 +574,7 @@ impl MemCgroups {
let need_insert = if update_cgroups {
if let Some(mg) = self.cgroups.get_mut(path) {
// Update current
mg.update_from_hostmemcg(hmg);
mg.update_from_hostmemcg(&hmg);
false
} else {
true
@@ -580,8 +594,8 @@ impl MemCgroups {
loop {
if let Some(secs_config_map) = self.config_map.get_mut(&config.period_secs)
{
if let Some(config_map) = secs_config_map.cgs.get_mut(config) {
if config_map.get_mut(path).is_some() {
if let Some(config_map) = secs_config_map.cgs.get_mut(&config) {
if let Some(_) = config_map.get_mut(path) {
error!(
"update_and_add found an memcg {:?} {} existed",
config, path
@@ -610,7 +624,7 @@ impl MemCgroups {
);
cgroups.add_numa(
numa_id,
&numa_id,
path,
&self.config.psi_path,
hmg,
@@ -627,7 +641,7 @@ impl MemCgroups {
id,
ino,
path,
numa_id,
&numa_id,
hmg,
&self.config.psi_path,
),
@@ -668,7 +682,7 @@ impl MemCgroups {
for (path, numa_map) in path_map {
if let Some(mcg) = self.cgroups.get_mut(path) {
for numa_id in &numa_map.numa {
if let Some(numa) = mcg.numa.get_mut(numa_id) {
if let Some(numa) = mcg.numa.get_mut(&numa_id) {
let pass = match numa
.check_psi(single_config.period_psi_percent_limit as u64)
{
@@ -688,7 +702,7 @@ impl MemCgroups {
}
info_ret.push(Info::new(
path,
&path,
mcg.id as usize,
*numa_id as usize,
numa,
@@ -704,7 +718,7 @@ impl MemCgroups {
}
}
if !info_ret.is_empty() {
if info_ret.len() > 0 {
infos_ret.push((single_config.clone(), info_ret));
}
}
@@ -795,7 +809,7 @@ impl MemCgroups {
pub fn get_remaining_tokio_duration(&self) -> TokioDuration {
let mut ret = TokioDuration::MAX;
for secs_map in self.config_map.values() {
for (_, secs_map) in &self.config_map {
let cur = secs_map.timeout.remaining_tokio_duration();
trace!(
@@ -808,7 +822,7 @@ impl MemCgroups {
// check secs_map, make sure it has enabled config
let mut has_enable_config = false;
for single_config in secs_map.cgs.keys() {
for (single_config, _) in &secs_map.cgs {
if !single_config.disabled {
has_enable_config = true;
break;
@@ -841,7 +855,11 @@ impl MemCgroups {
let cur_path = format_path(&path);
let should_del = if let Some(configs) = self.config.cgroups.get_mut(&cur_path) {
configs.retain(|cfg| cfg.numa_id != numa);
configs.is_empty()
if configs.is_empty() {
true
} else {
false
}
} else {
false
};
@@ -870,10 +888,8 @@ impl MemCgroups {
}
}
let mut numa_cg = CgroupConfig {
numa_id: numa,
..Default::default()
};
let mut numa_cg = CgroupConfig::default();
numa_cg.numa_id = numa;
numa_cg.set(&oc);
numa_cgs.push(numa_cg);
@@ -1008,7 +1024,7 @@ impl MemCG {
let mut mgs = self.memcgs.blocking_write();
if target_paths.is_empty() {
if target_paths.len() == 0 {
mgs.remove_changed(&mg_hash);
}
mgs.update_and_add(&mg_hash, true);
@@ -1016,7 +1032,7 @@ impl MemCG {
Ok(())
}
fn run_aging(&mut self, config_infov: &mut [(SingleConfig, Vec<Info>)]) {
fn run_aging(&mut self, config_infov: &mut Vec<(SingleConfig, Vec<Info>)>) {
for (config, infov) in config_infov.iter_mut() {
debug!("run_aging_single_config {:?}", config);
self.run_aging_single_config(infov, config.swap);
@@ -1078,10 +1094,10 @@ impl MemCG {
c as u8
}
fn run_eviction(&mut self, config_infov: &mut [(SingleConfig, Vec<Info>)]) -> Result<()> {
fn run_eviction(&mut self, config_infov: &mut Vec<(SingleConfig, Vec<Info>)>) -> Result<()> {
for (config, infov) in config_infov.iter_mut() {
debug!("run_eviction_single_config {:?}", config);
self.run_eviction_single_config(infov, config)?;
self.run_eviction_single_config(infov, &config)?;
}
Ok(())
@@ -1103,7 +1119,7 @@ impl MemCG {
}
let psi_path = self.memcgs.blocking_read().config.psi_path.clone();
for info in infov.iter_mut() {
for info in infov.into_iter() {
info.eviction = Some(EvictionInfo {
psi: psi::Period::new(&psi_path.join(info.path.trim_start_matches('/')), false),
last_min_lru_file: 0,
@@ -1119,7 +1135,7 @@ impl MemCG {
let mut ret = Ok(());
'main_loop: while !infov.is_empty() {
'main_loop: while infov.len() != 0 {
// update infov
let path_set: HashSet<String> = infov.iter().map(|info| info.path.clone()).collect();
match self.refresh(&path_set) {
@@ -1196,14 +1212,16 @@ impl MemCG {
);
ei.file_page_count += released;
if !ei.only_swap_mode && ci.min_lru_file == 0 {
info!(
"{} {} run_eviction stop because min_lru_file is 0, release {} {} pages",
ci.path, ci.numa_id, ei.anon_page_count, ei.file_page_count,
);
ei.stop_reason = EvictionStopReason::NoMinLru;
removed_infov.push(infov.remove(i));
continue;
if !ei.only_swap_mode {
if ci.min_lru_file == 0 {
info!(
"{} {} run_eviction stop because min_lru_file is 0, release {} {} pages",
ci.path, ci.numa_id, ei.anon_page_count, ei.file_page_count,
);
ei.stop_reason = EvictionStopReason::NoMinLru;
removed_infov.push(infov.remove(i));
continue;
}
}
let percent = match ei.psi.get_percent() {
@@ -1296,7 +1314,7 @@ impl MemCG {
}
let mut mgs = self.memcgs.blocking_write();
mgs.record_eviction(infov);
mgs.record_eviction(&infov);
mgs.record_eviction(&removed_infov);
ret
@@ -1335,15 +1353,12 @@ impl MemCG {
}
}
#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use super::*;
use test_utils::skip_if_not_root;
#[test]
fn test_memcg_swap_not_available() {
skip_if_not_root!();
let is_cg_v2 = crate::cgroup::is_cgroup_v2().unwrap();
let m = MemCG::new(is_cg_v2, Config::default()).unwrap();
assert!(m.swap_not_available().is_ok());
@@ -1351,7 +1366,6 @@ mod tests {
#[test]
fn test_memcg_get_swappiness() {
skip_if_not_root!();
let is_cg_v2 = crate::cgroup::is_cgroup_v2().unwrap();
let m = MemCG::new(is_cg_v2, Config::default()).unwrap();
assert_eq!(m.get_swappiness(100, 50), 133);
@@ -1359,9 +1373,8 @@ mod tests {
#[test]
fn test_memcg_get_timeout_list() {
skip_if_not_root!();
let is_cg_v2 = crate::cgroup::is_cgroup_v2().unwrap();
let m = MemCG::new(is_cg_v2, Config::default()).unwrap();
assert!(!m.get_timeout_list().is_empty());
assert_eq!(m.get_timeout_list().len() > 0, true);
}
}

View File

@@ -26,8 +26,7 @@ fn lru_gen_head_parse(line: &str) -> Result<(usize, String)> {
return Err(anyhow!("line {} format is not right", line));
}
let id = words[1]
.parse::<usize>()
let id = usize::from_str_radix(words[1], 10)
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
Ok((id, words[2].to_string()))
@@ -78,7 +77,7 @@ impl MGenLRU {
fn lru_gen_lines_parse(reader: &mut BufReader<File>) -> Result<(String, HashMap<usize, MGenLRU>)> {
let mut line = String::new();
let mut ret_hash = HashMap::new();
while !line.is_empty()
while line.len() > 0
|| reader
.read_line(&mut line)
.map_err(|e| anyhow!("read file {} failed: {}", LRU_GEN_PATH, e))?
@@ -87,8 +86,7 @@ fn lru_gen_lines_parse(reader: &mut BufReader<File>) -> Result<(String, HashMap<
let words: Vec<&str> = line.split_whitespace().map(|word| word.trim()).collect();
if words.len() == 2 && words[0] == "node" {
// Got a new node
let node_id = words[1]
.parse::<usize>()
let node_id = usize::from_str_radix(words[1], 10)
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
let (ret_line, node_size) = lru_gen_seq_lines_parse(reader)
.map_err(|e| anyhow!("lru_gen_seq_lines_parse failed: {}", e))?;
@@ -110,7 +108,7 @@ fn str_to_u64(str: &str) -> Result<u64> {
warn!("{} format {} is not right", LRU_GEN_PATH, str);
return Ok(0);
}
Ok(str.parse::<u64>()?)
Ok(u64::from_str_radix(str, 10)?)
}
//result:
@@ -131,8 +129,7 @@ fn lru_gen_seq_lines_parse(reader: &mut BufReader<File>) -> Result<(String, Opti
break;
}
let msecs = words[1]
.parse::<i64>()
let msecs = i64::from_str_radix(words[1], 10)
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
// Use milliseconds because will got build error with try_milliseconds.
#[allow(deprecated)]
@@ -141,12 +138,11 @@ fn lru_gen_seq_lines_parse(reader: &mut BufReader<File>) -> Result<(String, Opti
let mut gen = GenLRU::new();
gen.birth = birth;
gen.seq = words[0]
.parse::<u64>()
gen.seq = u64::from_str_radix(words[0], 10)
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
gen.anon = str_to_u64(words[2 + WORKINGSET_ANON])
gen.anon = str_to_u64(&words[2 + WORKINGSET_ANON])
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
gen.file = str_to_u64(words[2 + WORKINGSET_FILE])
gen.file = str_to_u64(&words[2 + WORKINGSET_FILE])
.map_err(|e| anyhow!("parse line {} failed: {}", line, e))?;
if !got {
@@ -177,15 +173,14 @@ fn lru_gen_seq_lines_parse(reader: &mut BufReader<File>) -> Result<(String, Opti
// HashMap<node_id, MGenLRU> will be empty.
//result:
// HashMap<path, (id, HashMap<node_id, MGenLRU>)>
#[allow(clippy::type_complexity)]
fn lru_gen_file_parse(
reader: &mut BufReader<File>,
mut reader: &mut BufReader<File>,
target_patchs: &HashSet<String>,
parse_line: bool,
) -> Result<HashMap<String, (usize, HashMap<usize, MGenLRU>)>> {
let mut line = String::new();
let mut ret_hash = HashMap::new();
while !line.is_empty()
while line.len() > 0
|| reader
.read_line(&mut line)
.map_err(|e| anyhow!("read file {} failed: {}", LRU_GEN_PATH, e))?
@@ -194,9 +189,9 @@ fn lru_gen_file_parse(
let mut clear_line = true;
// Not handle the Err of lru_gen_head_parse because all lines of file will be checked.
if let Ok((id, path)) = lru_gen_head_parse(&line) {
if target_patchs.is_empty() || target_patchs.contains(&path) {
if target_patchs.len() == 0 || target_patchs.contains(&path) {
let seq_data = if parse_line {
let (ret_line, data) = lru_gen_lines_parse(reader).map_err(|e| {
let (ret_line, data) = lru_gen_lines_parse(&mut reader).map_err(|e| {
anyhow!(
"lru_gen_seq_lines_parse file {} failed: {}",
LRU_GEN_PATH,
@@ -227,7 +222,6 @@ fn lru_gen_file_parse(
Ok(ret_hash)
}
#[allow(clippy::type_complexity)]
fn file_parse(
target_patchs: &HashSet<String>,
parse_line: bool,
@@ -242,7 +236,6 @@ fn file_parse(
//result:
// HashMap<path, (id, ino, HashMap<node_id, MGenLRU>)>
#[allow(clippy::type_complexity)]
pub fn host_memcgs_get(
target_patchs: &HashSet<String>,
parse_line: bool,
@@ -283,8 +276,8 @@ pub fn check() -> Result<()> {
let content = fs::read_to_string(LRU_GEN_ENABLED_PATH)
.map_err(|e| anyhow!("open file {} failed: {}", LRU_GEN_ENABLED_PATH, e))?;
let content = content.trim();
let r = if let Some(stripped) = content.strip_prefix("0x") {
u32::from_str_radix(stripped, 16)
let r = if content.starts_with("0x") {
u32::from_str_radix(&content[2..], 16)
} else {
content.parse()
};
@@ -343,7 +336,9 @@ mod tests {
use maplit::hashmap;
use once_cell::sync::OnceCell;
use slog::{Drain, Level, Logger};
use slog_async;
use slog_scope::set_global_logger;
use slog_term;
use std::collections::HashMap;
use std::fs;
use std::fs::File;

View File

@@ -9,35 +9,35 @@ pub fn sl() -> slog::Logger {
#[macro_export]
macro_rules! error {
($($arg:tt)*) => {
slog::error!($crate::misc::sl(), "{}", format_args!($($arg)*))
slog::error!(crate::misc::sl(), "{}", format_args!($($arg)*))
}
}
#[macro_export]
macro_rules! warn {
($($arg:tt)*) => {
slog::warn!($crate::misc::sl(), "{}", format_args!($($arg)*))
slog::warn!(crate::misc::sl(), "{}", format_args!($($arg)*))
}
}
#[macro_export]
macro_rules! info {
($($arg:tt)*) => {
slog::info!($crate::misc::sl(), "{}", format_args!($($arg)*))
slog::info!(crate::misc::sl(), "{}", format_args!($($arg)*))
}
}
#[macro_export]
macro_rules! trace {
($($arg:tt)*) => {
slog::trace!($crate::misc::sl(), "{}", format_args!($($arg)*))
slog::trace!(crate::misc::sl(), "{}", format_args!($($arg)*))
}
}
#[macro_export]
macro_rules! debug {
($($arg:tt)*) => {
slog::debug!($crate::misc::sl(), "{}", format_args!($($arg)*))
slog::debug!(crate::misc::sl(), "{}", format_args!($($arg)*))
}
}

View File

@@ -10,7 +10,7 @@ use std::fs;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::path::PathBuf;
const MEM_PSI: &str = "memory.pressure";
const IO_PSI: &str = "io.pressure";
@@ -20,8 +20,10 @@ fn find_psi_subdirs() -> Result<PathBuf> {
for entry in fs::read_dir(CGROUP_PATH)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() && path.join(MEM_PSI).is_file() && path.join(IO_PSI).is_file() {
return Ok(path.clone());
if path.is_dir() {
if path.join(MEM_PSI).is_file() && path.join(IO_PSI).is_file() {
return Ok(path.clone());
}
}
}
@@ -31,15 +33,15 @@ fn find_psi_subdirs() -> Result<PathBuf> {
}
}
pub fn check(psi_path: &Path) -> Result<PathBuf> {
pub fn check(psi_path: &PathBuf) -> Result<PathBuf> {
if crate::misc::is_test_environment() {
return Ok(psi_path.to_path_buf());
return Ok(psi_path.clone());
}
let p = if psi_path.as_os_str().is_empty() {
find_psi_subdirs().map_err(|e| anyhow!("find_psi_subdirs failed: {}", e))?
} else {
psi_path.to_path_buf()
psi_path.clone()
};
let mem_psi_path = p.join(MEM_PSI);
@@ -62,7 +64,7 @@ fn read_pressure_some_total(file_path: PathBuf) -> Result<u64> {
if reader
.read_line(&mut first_line)
.map_err(|e| anyhow!("reader.read_line failed: {}", e))?
== 0
<= 0
{
return Err(anyhow!("File is empty"));
}

View File

@@ -67,22 +67,22 @@ mod tests {
let mut timeout = Timeout::new(1);
// timeout should be timeout at once.
assert!(timeout.is_timeout());
assert_eq!(timeout.is_timeout(), true);
timeout.reset();
assert!(!timeout.is_timeout());
assert_eq!(timeout.is_timeout(), false);
thread::sleep(Duration::from_secs(2));
assert!(timeout.is_timeout());
assert_eq!(timeout.is_timeout(), true);
timeout.set_sleep_duration(2);
timeout.reset();
assert!(!timeout.is_timeout());
assert_eq!(timeout.is_timeout(), false);
thread::sleep(Duration::from_secs(1));
assert!(!timeout.is_timeout());
assert_eq!(timeout.is_timeout(), false);
thread::sleep(Duration::from_secs(1));
assert!(timeout.is_timeout());
assert_eq!(timeout.is_timeout(), true);
}
}

View File

@@ -2309,9 +2309,9 @@ dependencies = [
[[package]]
name = "kvm-ioctls"
version = "0.12.1"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d592c9b0da14bacab1fe89c78e7ed873b20cf7f502d0fc26f628d733215b1e5"
checksum = "c3a321cabd827642499c77e27314f388dd83a717a5ca716b86476fb947f73ae4"
dependencies = [
"kvm-bindings",
"libc",

View File

@@ -264,7 +264,7 @@ ifneq (,$(CLHCMD))
VMROOTFSDRIVER_CLH := virtio-pmem
DEFSANDBOXCGROUPONLY_CLH := true
DEFSTATICRESOURCEMGMT_CLH := false
DEFSTATICRESOURCEMGMT_CLH := true
endif
ifneq (,$(QEMUCMD))
@@ -306,7 +306,7 @@ ifneq (,$(QEMUCMD))
KERNELPATH_QEMU_SE = $(KERNELDIR)/$(KERNEL_NAME_QEMU_SE)
# overriding options
DEFSTATICRESOURCEMGMT_QEMU := false
DEFSTATICRESOURCEMGMT_QEMU := true
# qemu-specific options
DEFSANDBOXCGROUPONLY_QEMU := false

View File

@@ -225,11 +225,6 @@ block_device_driver = "virtio-blk-pci"
# result in memory pre allocation
#enable_hugepages = true
# Enable running clh VMM as a non-root user.
# By default clh VMM run as root. When this is set to true, clh VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
#rootless = true
# Disable the 'seccomp' feature from Cloud Hypervisor, firecracker or dragonball, default false
# disable_seccomp = true

View File

@@ -509,7 +509,7 @@ disable_selinux=@DEFDISABLESELINUX@
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
[hypervisor.qemu.factory]
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
# agent memory by mapping it readonly. It helps speeding up new container

View File

@@ -49,10 +49,6 @@ impl AgentManager for KataAgent {
async fn agent_config(&self) -> AgentConfig {
self.agent_config().await
}
async fn disconnect(&self) -> Result<()> {
self.disconnect().await.context("disconnect agent")
}
}
// implement for health service

View File

@@ -14,7 +14,6 @@ use std::{
use anyhow::{Context, Result};
use kata_types::config::Agent as AgentConfig;
use nix::unistd;
use protocols::{agent_ttrpc_async as agent_ttrpc, health_ttrpc_async as health_ttrpc};
use tokio::sync::RwLock;
use ttrpc::asynchronous::Client;
@@ -162,21 +161,4 @@ impl KataAgent {
let inner = self.inner.read().await;
inner.config.clone()
}
/// Disconnect from the agent gRPC server and clean up related resources.
pub(crate) async fn disconnect(&self) -> Result<()> {
let mut inner = self.inner.write().await;
inner.log_forwarder.stop();
// If there is a valid client, drop it
inner.client.take();
// If fd is valid (> 0), close it
if inner.client_fd >= 0 {
unistd::close(inner.client_fd).context("failed to close agent client fd")?;
inner.client_fd = -1;
}
Ok(())
}
}

View File

@@ -39,7 +39,6 @@ pub const AGENT_KATA: &str = "kata";
pub trait AgentManager: Send + Sync {
async fn start(&self, address: &str) -> Result<()>;
async fn stop(&self);
async fn disconnect(&self) -> Result<()>;
async fn agent_sock(&self) -> Result<String>;
async fn agent_config(&self) -> AgentConfig;

View File

@@ -6,12 +6,10 @@ use crate::{
DeviceConfig, DiskConfig, FsConfig, NetConfig, VmConfig, VmInfo, VmResize, VsockConfig,
};
use anyhow::{anyhow, Context, Result};
use api_client::{
simple_api_full_command_and_response, simple_api_full_command_with_fds_and_response,
};
use api_client::simple_api_full_command_and_response;
use serde::{Deserialize, Serialize};
use std::os::{fd::RawFd, unix::net::UnixStream};
use std::os::unix::net::UnixStream;
use tokio::task;
pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result<Option<String>> {
@@ -122,28 +120,6 @@ pub async fn cloud_hypervisor_vm_netdev_add(
.await?
}
pub async fn cloud_hypervisor_vm_netdev_add_with_fds(
mut socket: UnixStream,
net_config: NetConfig,
request_fds: Vec<RawFd>,
) -> Result<Option<String>> {
let serialised = serde_json::to_string(&net_config)?;
task::spawn_blocking(move || -> Result<Option<String>> {
let response = simple_api_full_command_with_fds_and_response(
&mut socket,
"PUT",
"vm.add-net",
Some(&serialised),
request_fds,
)
.map_err(|e| anyhow!(e))?;
Ok(response)
})
.await?
}
pub async fn cloud_hypervisor_vm_device_add(
mut socket: UnixStream,
device_config: DeviceConfig,

View File

@@ -5,11 +5,8 @@
// SPDX-License-Identifier: Apache-2.0
use super::inner::CloudHypervisorInner;
use crate::ch::utils::get_rootless_symlink_sandbox_jailer_root;
use crate::device::pci_path::PciPath;
use crate::device::DeviceType;
use crate::utils::create_dir_all_with_inherit_owner;
use crate::utils::open_named_tuntap;
use crate::HybridVsockDevice;
use crate::NetworkConfig;
use crate::NetworkDevice;
@@ -22,20 +19,15 @@ use anyhow::{anyhow, Context, Result};
use ch_config::ch_api::cloud_hypervisor_vm_device_add;
use ch_config::ch_api::{
cloud_hypervisor_vm_blockdev_add, cloud_hypervisor_vm_device_remove,
cloud_hypervisor_vm_fs_add, cloud_hypervisor_vm_netdev_add_with_fds,
cloud_hypervisor_vm_vsock_add, PciDeviceInfo, VmRemoveDeviceData,
cloud_hypervisor_vm_fs_add, cloud_hypervisor_vm_netdev_add, cloud_hypervisor_vm_vsock_add,
PciDeviceInfo, VmRemoveDeviceData,
};
use ch_config::convert::{DEFAULT_DISK_QUEUES, DEFAULT_DISK_QUEUE_SIZE, DEFAULT_NUM_PCI_SEGMENTS};
use ch_config::DiskConfig;
use ch_config::{net_util::MacAddr, DeviceConfig, FsConfig, NetConfig, VsockConfig};
use kata_sys_util::netns::NetnsGuard;
use kata_types::config::hypervisor::RateLimiterConfig;
use kata_types::rootless::is_rootless;
use safe_path::scoped_join;
use std::convert::TryFrom;
use std::os::fd::AsRawFd;
use std::os::fd::IntoRawFd;
use std::os::unix::fs::symlink;
use std::path::PathBuf;
const VIRTIO_FS: &str = "virtio-fs";
@@ -373,20 +365,11 @@ impl CloudHypervisorInner {
.ok_or("missing socket")
.map_err(|e| anyhow!(e))?;
let mut clh_net_config = NetConfig::try_from(device.config)?;
// When using fds to pass the tap device to cloud-hypervisor, tap and id fields should be None
clh_net_config.tap = None;
clh_net_config.id = None;
let clh_net_config = NetConfig::try_from(device.config)?;
let files = open_named_tuntap(&netdev.config.host_dev_name, netdev.config.queue_num as u32)
.context("open named tuntap")?;
let fds = files.iter().map(|f| f.as_raw_fd()).collect();
let response = cloud_hypervisor_vm_netdev_add_with_fds(
let response = cloud_hypervisor_vm_netdev_add(
socket.try_clone().context("failed to clone socket")?,
clh_net_config,
fds,
)
.await?;
@@ -413,70 +396,12 @@ impl CloudHypervisorInner {
DeviceType::ShareFs(dev) => {
let settings = ShareFsSettings::new(dev.config, self.vm_path.clone());
let fs_cfg = if is_rootless() {
// TODO: Replace this symlink workaround if a better approach for rootless socket paths appears.
// In rootless mode the virtiofsd.sock lives under the rootless directory,
// and its full path can exceed the 108-byte Unix domain socket limit.
// To ensure the cloud-hypervisor VMM can connect to virtiofsd, create a
// short symlink inside the rootless directory and point the VMM at it.
let mut fs_cfg = FsConfig::try_from(settings)?;
let rootless_symlink_sanbox_jailer_root =
get_rootless_symlink_sandbox_jailer_root(self.id.as_str());
create_dir_all_with_inherit_owner(
rootless_symlink_sanbox_jailer_root.as_str(),
0x750,
)
.map_err(|e| {
anyhow!(
"failed to create rootless sharefs symlink jailer root dir: {}",
e
)
})?;
let virtiofsd_name = fs_cfg.socket.file_name().ok_or_else(|| {
anyhow!(
"failed to get virtiofsd socket file name from path: {:?}",
fs_cfg.socket
)
})?;
let virtiofsd_symlink_path =
PathBuf::from(rootless_symlink_sanbox_jailer_root.as_str())
.join(virtiofsd_name);
symlink(&fs_cfg.socket, &virtiofsd_symlink_path).map_err(|e| {
anyhow!(
"failed to create symlink for rootless sharefs socket: {}",
e
)
})?;
fs_cfg.socket = virtiofsd_symlink_path;
fs_cfg
} else {
FsConfig::try_from(settings)?
};
let fs_cfg = FsConfig::try_from(settings)?;
shared_fs_devices.push(fs_cfg);
}
DeviceType::Network(net_device) => {
let mut net_config = NetConfig::try_from(net_device.config.clone())?;
// When using fds to pass the tap device to cloud-hypervisor, tap and id fields should be None
net_config.tap = None;
net_config.id = None;
// we need ensure opening network device happens in netns.
let netns = self.netns.clone().unwrap_or_default();
let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?;
let fds = open_named_tuntap(
&net_device.config.host_dev_name,
net_device.config.queue_num as u32,
)
.context("open named tuntap")?
.into_iter()
.map(|f| f.into_raw_fd())
.collect();
net_config.fds = Some(fds);
let net_config = NetConfig::try_from(net_device.config)?;
network_devices.push(net_config);
}
DeviceType::Vfio(vfio_device) => {

View File

@@ -5,20 +5,15 @@
use super::inner::CloudHypervisorInner;
use crate::ch::utils::get_api_socket_path;
use crate::ch::utils::get_rootless_symlink_sandbox_path;
use crate::ch::utils::get_vsock_path;
use crate::kernel_param::KernelParams;
use crate::selinux;
use crate::utils::create_dir_all_with_inherit_owner;
use crate::utils::set_groups;
use crate::utils::vm_cleanup;
use crate::utils::{bytes_to_megs, get_jailer_root, get_sandbox_path, megs_to_bytes};
use crate::MemoryConfig;
use crate::VM_ROOTFS_DRIVER_BLK;
use crate::VM_ROOTFS_DRIVER_PMEM;
use crate::{VcpuThreadIds, VmmState};
use anyhow::{anyhow, Context, Result};
use ch_config::ch_api::cloud_hypervisor_vm_netdev_add_with_fds;
use ch_config::{
ch_api::{
cloud_hypervisor_vm_create, cloud_hypervisor_vm_info, cloud_hypervisor_vm_resize,
@@ -32,20 +27,14 @@ use futures::future::join_all;
use kata_sys_util::protection::{available_guest_protection, GuestProtection};
use kata_types::capabilities::{Capabilities, CapabilityBits};
use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE;
use kata_types::config::hypervisor::RootlessUser;
use kata_types::rootless::is_rootless;
use lazy_static::lazy_static;
use nix::sched::{setns, CloneFlags};
use nix::unistd::setgid;
use nix::unistd::setuid;
use nix::unistd::Gid;
use nix::unistd::Uid;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fs;
use std::fs::remove_dir_all;
use std::fs::create_dir_all;
use std::os::unix::io::AsRawFd;
use std::os::unix::net::UnixStream;
use std::path::Path;
@@ -206,8 +195,7 @@ impl CloudHypervisorInner {
let sandbox_path = get_sandbox_path(&self.id);
create_dir_all_with_inherit_owner(sandbox_path.clone(), 0o750)
.context("failed to create sandbox path")?;
std::fs::create_dir_all(sandbox_path.clone()).context("failed to create sandbox path")?;
let vsock_socket_path = get_vsock_path(&self.id)?;
@@ -226,8 +214,8 @@ impl CloudHypervisorInner {
cfg: self.config.clone(),
guest_protection_to_use: self.guest_protection_to_use.clone(),
shared_fs_devices,
network_devices,
host_devices,
..Default::default()
};
let cfg = VmConfig::try_from(named_cfg)?;
@@ -247,28 +235,6 @@ impl CloudHypervisorInner {
debug!(sl!(), "vm boot response: {:?}", detail);
}
if let Some(network_devices) = network_devices {
for net in network_devices {
let vm_fds = net.fds.clone().unwrap_or_default();
let response = cloud_hypervisor_vm_netdev_add_with_fds(
socket.try_clone().context("failed to clone socket")?,
net,
vm_fds.clone(),
)
.await
.context("failed to add vm netdev with fds")?;
if let Some(detail) = response {
debug!(sl!(), "vm netdev add response: {:?}", detail);
}
for fd in vm_fds {
// Explicitly close the fd now that it has been sent to CLH.
nix::unistd::close(fd).context("failed to close netdev fd")?;
}
}
}
let response =
cloud_hypervisor_vm_start(socket.try_clone().context("failed to clone socket")?)
.await?;
@@ -403,20 +369,6 @@ impl CloudHypervisorInner {
);
}
let user: Option<RootlessUser> = if is_rootless() {
Some(
self.config
.security_info
.rootless_user
.clone()
.ok_or_else(|| {
anyhow!("rootless user must be specified for rootless cloud-hypervisor")
})?,
)
} else {
None
};
unsafe {
let selinux_label = self.config.security_info.selinux_label.clone();
let _pre = cmd.pre_exec(move || {
@@ -437,16 +389,6 @@ impl CloudHypervisorInner {
);
}
}
if let Some(user) = &user {
let groups = user.groups.clone();
let gid = Gid::from_raw(user.gid);
let uid = Uid::from_raw(user.uid);
let _ = set_groups(&groups);
let _ = setgid(gid).context("setgid failed");
let _ = setuid(uid).context("setuid failed");
}
Ok(())
});
}
@@ -679,11 +621,11 @@ impl CloudHypervisorInner {
self.run_dir = get_sandbox_path(&self.id);
self.vm_path = self.run_dir.to_string();
create_dir_all_with_inherit_owner(&self.run_dir, 0o750)
create_dir_all(&self.run_dir)
.with_context(|| anyhow!("failed to create sandbox directory {}", self.run_dir))?;
if !self.jailer_root.is_empty() {
create_dir_all_with_inherit_owner(self.jailer_root.as_str(), 0o750)
create_dir_all(self.jailer_root.as_str())
.map_err(|e| anyhow!("Failed to create dir {} err : {:?}", self.jailer_root, e))?;
}
@@ -762,9 +704,7 @@ impl CloudHypervisorInner {
}
pub(crate) async fn cleanup(&self) -> Result<()> {
info!(sl!(), "CloudHypervisor::cleanup()");
remove_dir_all(get_rootless_symlink_sandbox_path(self.id.as_str()))?;
vm_cleanup(&self.config, self.vm_path.as_str())
Ok(())
}
pub(crate) async fn resize_vcpu(
@@ -843,7 +783,7 @@ impl CloudHypervisorInner {
pub(crate) async fn get_jailer_root(&self) -> Result<String> {
let root_path = get_jailer_root(&self.id);
create_dir_all_with_inherit_owner(&root_path, 0o750)?;
std::fs::create_dir_all(&root_path)?;
Ok(root_path)
}

View File

@@ -2,12 +2,9 @@
//
// SPDX-License-Identifier: Apache-2.0
use std::path::Path;
use anyhow::Result;
use anyhow::{Ok, Result};
use kata_types::build_path;
use crate::{utils::get_sandbox_path, JAILER_ROOT};
use crate::utils::get_sandbox_path;
// The socket used to connect to CH. This is used for CH API communications.
const CH_API_SOCKET_NAME: &str = "ch-api.sock";
@@ -37,17 +34,3 @@ pub fn get_vsock_path(id: &str) -> Result<String> {
Ok(path)
}
/// Returns the symlink path of the sandbox for the virtio-fs socket in rootless mode.
pub fn get_rootless_symlink_sandbox_path(id: &str) -> String {
Path::new(build_path(id).as_str())
.to_string_lossy()
.to_string()
}
/// Returns the symlink path of the sandbox's jailer root for the virtio-fs socket in rootless mode.
pub fn get_rootless_symlink_sandbox_jailer_root(id: &str) -> String {
let sandbox_path = get_rootless_symlink_sandbox_path(id);
[&sandbox_path, JAILER_ROOT].join("/")
}

View File

@@ -23,12 +23,10 @@ use dragonball::api::v1::{
BlockDeviceConfigInfo, FsDeviceConfigInfo, FsMountConfigInfo, NetworkInterfaceConfig,
VsockDeviceConfigInfo,
};
use dragonball::config_manager::{RateLimiterConfigInfo, TokenBucketConfigInfo};
use dragonball::device_manager::{
blk_dev_mgr::BlockDeviceType,
vfio_dev_mgr::{HostDeviceConfig, VfioPciDeviceConfig},
};
use kata_types::config::hypervisor::DEFAULT_RATE_LIMITER_REFILL_TIME;
const MB_TO_B: u32 = 1024 * 1024;
const DEFAULT_VIRTIO_FS_NUM_QUEUES: i32 = 1;
@@ -227,27 +225,6 @@ impl DragonballInner {
let jailed_drive = self.get_resource(path, id).context("get resource")?;
self.cached_block_devices.insert(id.to_string());
let bandwidth = TokenBucketConfigInfo {
size: self.config.blockdev_info.disk_rate_limiter_bw_max_rate,
one_time_burst: self
.config
.blockdev_info
.disk_rate_limiter_bw_one_time_burst
.unwrap_or(0),
refill_time: DEFAULT_RATE_LIMITER_REFILL_TIME,
};
let ops = TokenBucketConfigInfo {
size: self.config.blockdev_info.disk_rate_limiter_ops_max_rate,
one_time_burst: self
.config
.blockdev_info
.disk_rate_limiter_ops_one_time_burst
.unwrap_or(0),
refill_time: DEFAULT_RATE_LIMITER_REFILL_TIME,
};
let block_rate_limit = RateLimiterConfigInfo { bandwidth, ops };
let blk_cfg = BlockDeviceConfigInfo {
drive_id: id.to_string(),
device_type: BlockDeviceType::get_type(path),
@@ -256,7 +233,6 @@ impl DragonballInner {
no_drop,
is_read_only: read_only,
use_pci_bus,
rate_limiter: Some(block_rate_limit),
..Default::default()
};
self.vmm_instance

View File

@@ -219,6 +219,5 @@ pub fn get_process_seccomp_rules() -> Vec<(i64, Vec<seccompiler::SeccompRule>)>
(libc::SYS_chmod, vec![]),
#[cfg(target_arch = "x86_64")]
(libc::SYS_fchmodat2, vec![]),
(libc::SYS_pselect6, vec![]),
]
}

View File

@@ -1476,23 +1476,6 @@ impl ToQemuParams for Rtc {
}
}
// Template represents QEMU template boot configuration.
#[derive(Debug)]
struct Template {}
impl Template {
fn new() -> Template {
Template {}
}
}
#[async_trait]
impl ToQemuParams for Template {
async fn qemu_params(&self) -> Result<Vec<String>> {
Ok(vec!["-incoming".to_owned(), "defer".to_owned()])
}
}
#[derive(Debug)]
struct ObjectRngRandom {
// id is the device ID
@@ -2238,10 +2221,6 @@ impl<'a> QemuCmdLine<'a> {
qemu_cmd_line.add_rtc();
if config.vm_template.boot_from_template {
qemu_cmd_line.add_template();
}
if bus_type(config) != VirtioBusType::Ccw {
qemu_cmd_line.add_rng();
}
@@ -2281,11 +2260,6 @@ impl<'a> QemuCmdLine<'a> {
self.devices.push(Box::new(rtc));
}
fn add_template(&mut self) {
let template = Template::new();
self.devices.push(Box::new(template));
}
fn add_rng(&mut self) {
let rng_object = ObjectRngRandom::new();
let rng_device = DeviceRng::new();

View File

@@ -33,9 +33,6 @@ use std::cmp::Ordering;
use std::convert::TryInto;
use std::path::Path;
use std::process::Stdio;
use std::time::Duration;
use tokio::time::sleep;
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::{Child, ChildStderr, Command},
@@ -291,15 +288,7 @@ impl QemuInner {
}
}
// Start the virtual machine by restoring it from a VM template if enabled.
if self.config.vm_template.boot_from_template {
self.boot_from_template()
.await
.context("boot from template")?;
self.resume_vm().context("resume vm")?;
}
// When hypervisor debug is enabled, output the kernel boot messages for debugging.
//When hypervisor debug is enabled, output the kernel boot messages for debugging.
if self.config.debug_info.enable_debug {
let stream = UnixStream::connect(console_socket_path.as_os_str()).await?;
tokio::spawn(log_qemu_console(stream));
@@ -308,42 +297,6 @@ impl QemuInner {
Ok(())
}
async fn boot_from_template(&mut self) -> Result<()> {
let qmp = self
.qmp
.as_mut()
.context("failed to get QMP connection for boot from template")?;
qmp.set_ignore_shared_memory_capability()
.context("failed to set ignore shared memory capability")?;
let uri = format!("exec:cat {}", self.config.vm_template.device_state_path);
qmp.execute_migration_incoming(&uri)
.context("failed to execute migration incoming")?;
self.wait_for_migration()
.await
.context("failed to wait for migration")?;
info!(sl!(), "migration complete");
Ok(())
}
pub async fn wait_for_migration(&mut self) -> Result<()> {
if self.qmp.is_none() {
return Err(anyhow!("QMP is not connected"));
}
// The result format returned by QEMU version 9.1.2 does not match
// the expected format of the existing QAPI version 0.14.
// However, no issues were found when tested with QAPI version 0.15.
// Therefore, we will temporarily skip this issue.
sleep(Duration::from_millis(280)).await;
Ok(())
}
pub(crate) async fn stop_vm(&mut self) -> Result<()> {
info!(sl!(), "Stopping QEMU VM");
@@ -376,36 +329,18 @@ impl QemuInner {
}
}
pub(crate) fn pause_vm(&mut self) -> Result<()> {
let qmp = self.qmp.as_mut().ok_or(anyhow!("qmp not initialized"))?;
qmp.qmp_stop().context("pause vm")
pub(crate) fn pause_vm(&self) -> Result<()> {
info!(sl!(), "Pausing QEMU VM");
todo!()
}
pub(crate) fn resume_vm(&mut self) -> Result<()> {
let qmp = self.qmp.as_mut().ok_or(anyhow!("qmp not initialized"))?;
qmp.qmp_cont().context("resume vm")
pub(crate) fn resume_vm(&self) -> Result<()> {
info!(sl!(), "Resuming QEMU VM");
todo!()
}
pub(crate) async fn save_vm(&mut self) -> Result<()> {
let qmp = self.qmp.as_mut().ok_or(anyhow!("QMP not initialized"))?;
if self.config.vm_template.boot_to_be_template {
qmp.set_ignore_shared_memory_capability()
.context("failed to set ignore shared memory capability")?;
}
let uri = format!("exec:cat >{}", self.config.vm_template.device_state_path);
qmp.execute_migration(&uri)
.context("failed to execute migration")?;
self.wait_for_migration()
.await
.context("failed to wait for migration")?;
info!(sl!(), "migration finished successfully");
Ok(())
pub(crate) async fn save_vm(&self) -> Result<()> {
todo!()
}
pub(crate) async fn get_agent_socket(&self) -> Result<String> {
@@ -603,15 +538,15 @@ impl QemuInner {
}
};
let coldplugged_mem_mb = self.config.memory_info.default_memory;
let coldplugged_mem = megs_to_bytes(coldplugged_mem_mb);
let coldplugged_mem = megs_to_bytes(self.config.memory_info.default_memory);
let new_total_mem = megs_to_bytes(new_total_mem_mb);
if new_total_mem < coldplugged_mem {
warn!(sl!(), "asked to resize to {} M but that is less than cold-plugged memory size ({}), nothing to do",new_total_mem_mb,
bytes_to_megs(coldplugged_mem));
return Ok((coldplugged_mem_mb, MemoryConfig::default()));
return Err(anyhow!(
"asked to resize to {} M but that is less than cold-plugged memory size ({})",
new_total_mem_mb,
bytes_to_megs(coldplugged_mem)
));
}
let guest_mem_block_size = qmp.guest_memory_block_size();

View File

@@ -91,17 +91,17 @@ impl Hypervisor for Qemu {
}
async fn pause_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
let inner = self.inner.read().await;
inner.pause_vm()
}
async fn resume_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
let inner = self.inner.read().await;
inner.resume_vm()
}
async fn save_vm(&self) -> Result<()> {
let mut inner = self.inner.write().await;
let inner = self.inner.read().await;
inner.save_vm().await
}

View File

@@ -12,12 +12,6 @@ use anyhow::{anyhow, Context, Result};
use kata_types::config::hypervisor::VIRTIO_SCSI;
use kata_types::rootless::is_rootless;
use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags};
use qapi_qmp::{
self as qmp, BlockdevAioOptions, BlockdevOptions, BlockdevOptionsBase,
BlockdevOptionsGenericFormat, BlockdevOptionsRaw, BlockdevRef, PciDeviceInfo,
};
use qapi_qmp::{migrate, migrate_incoming, migrate_set_capabilities};
use qapi_qmp::{MigrationCapability, MigrationCapabilityStatus};
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fmt::{Debug, Error, Formatter};
@@ -27,7 +21,12 @@ use std::os::unix::net::UnixStream;
use std::str::FromStr;
use std::time::Duration;
use qapi_qmp::{
self as qmp, BlockdevAioOptions, BlockdevOptions, BlockdevOptionsBase,
BlockdevOptionsGenericFormat, BlockdevOptionsRaw, BlockdevRef, PciDeviceInfo,
};
use qapi_spec::Dictionary;
/// default qmp connection read timeout
const DEFAULT_QMP_READ_TIMEOUT: u64 = 250;
@@ -83,40 +82,6 @@ impl Qmp {
Ok(qmp)
}
pub fn set_ignore_shared_memory_capability(&mut self) -> Result<()> {
self.qmp
.execute(&migrate_set_capabilities {
capabilities: vec![MigrationCapabilityStatus {
capability: MigrationCapability::x_ignore_shared,
state: true,
}],
})
.map(|_| ())
.context("set ignore shared memory capability")
}
pub fn execute_migration(&mut self, uri: &str) -> Result<()> {
self.qmp
.execute(&migrate {
detach: None,
resume: None,
blk: None,
inc: None,
uri: uri.to_string(),
})
.map(|_| ())
.context("execute migration")
}
pub fn execute_migration_incoming(&mut self, uri: &str) -> Result<()> {
self.qmp
.execute(&migrate_incoming {
uri: uri.to_string(),
})
.map(|_| ())
.context("execute migration incoming")
}
pub fn hotplug_vcpus(&mut self, vcpu_cnt: u32) -> Result<u32> {
let hotpluggable_cpus = self.qmp.execute(&qmp::query_hotpluggable_cpus {})?;
//info!(sl!(), "hotpluggable CPUs: {:#?}", hotpluggable_cpus);
@@ -776,20 +741,6 @@ impl Qmp {
Ok(Some(pci_path))
}
pub fn qmp_stop(&mut self) -> Result<()> {
self.qmp
.execute(&qmp::stop {})
.map(|_| ())
.context("execute qmp stop")
}
pub fn qmp_cont(&mut self) -> Result<()> {
self.qmp
.execute(&qmp::cont {})
.map(|_| ())
.context("execute qmp cont")
}
/// Get vCPU thread IDs through QMP query_cpus_fast.
pub fn get_vcpu_thread_ids(&mut self) -> Result<VcpuThreadIds> {
let vcpu_info = self

View File

@@ -13,7 +13,7 @@ use anyhow::Result;
use async_trait::async_trait;
use std::sync::Arc;
#[derive(Clone, Default)]
#[derive(Clone)]
pub struct SandboxNetworkEnv {
pub netns: Option<String>,
pub network_created: bool,
@@ -31,7 +31,6 @@ impl std::fmt::Debug for SandboxNetworkEnv {
#[async_trait]
pub trait Sandbox: Send + Sync {
async fn start(&self) -> Result<()>;
async fn start_template(&self) -> Result<()>;
async fn stop(&self) -> Result<()>;
async fn cleanup(&self) -> Result<()>;
async fn shutdown(&self) -> Result<()>;

View File

@@ -27,7 +27,6 @@ async-std = "1.12.0"
tracing = { workspace = true }
oci-spec = { workspace = true }
strum = { workspace = true }
uuid = { version = "1", features = ["v4"] }
# Local dependencies
agent = { workspace = true }

View File

@@ -215,13 +215,6 @@ impl Container {
.await?;
if let Some(linux) = &mut spec.linux_mut() {
linux.set_resources(resources);
// In certain scenarios, particularly under CoCo/Agent Policy enforcement, the default initial value of `Linux.Resources.Devices`
// is considered non-compliant, leading to container creation failures. To address this issue and ensure consistency with the behavior
// in `runtime-go`, the default value of `Linux.Resources.Devices` from the OCI Spec should be removed.
if let Some(resource) = linux.resources_mut() {
clean_linux_resources_devices(resource);
}
}
let container_name = k8s::container_name(&spec);
@@ -250,12 +243,6 @@ impl Container {
.await?;
}
info!(
sl!(),
"OCI Spec {:?} within CreateContainerRequest.",
spec.clone()
);
// create container
let r = agent::CreateContainerRequest {
process_id: agent::ContainerProcessID::new(&config.container_id, ""),
@@ -638,6 +625,30 @@ fn amend_spec(
linux.set_seccomp(None);
}
// In certain scenarios, particularly under CoCo/Agent Policy enforcement, the default initial value of `Linux.Resources.Devices`
// is considered non-compliant, leading to container creation failures. To address this issue and ensure consistency with the behavior
// in `runtime-go`, the default value of `Linux.Resources.Devices` from the OCI Spec should be removed.
if let Some(resources) = linux.resources_mut() {
if let Some(devices) = resources.devices_mut().take() {
let cleaned_devices: Vec<LinuxDeviceCgroup> = devices
.into_iter()
.filter(|device| {
!(!device.allow()
&& device.typ().is_none()
&& device.major().is_none()
&& device.minor().is_none()
&& device.access().as_deref() == Some("rwm"))
})
.collect();
resources.set_devices(if cleaned_devices.is_empty() {
None
} else {
Some(cleaned_devices)
});
}
}
// Host pidns path does not make sense in kata. Let's just align it with
// sandbox namespace whenever it is set.
let ns: Vec<oci::LinuxNamespace> = linux
@@ -686,30 +697,6 @@ fn is_pid_namespace_enabled(spec: &oci::Spec) -> bool {
false
}
/// Cleans or filters specific device cgroup rules within the `devices` field of the `LinuxResources`.
/// Specifically, it iterates through all `LinuxDeviceCgroup` rules in `resources`
/// and removes those considered to be "default, all-access (rwm), and non-specific device" rules.
fn clean_linux_resources_devices(resources: &mut LinuxResources) {
if let Some(devices) = resources.devices_mut().take() {
let cleaned_devices: Vec<LinuxDeviceCgroup> = devices
.into_iter()
.filter(|device| {
!(!device.allow()
&& device.typ().is_none()
&& device.major().is_none()
&& device.minor().is_none()
&& device.access().as_deref() == Some("rwm"))
})
.collect();
resources.set_devices(if cleaned_devices.is_empty() {
None
} else {
Some(cleaned_devices)
});
}
}
#[cfg(test)]
mod tests {
use super::amend_spec;

View File

@@ -192,12 +192,9 @@ impl ContainerManager for VirtContainerManager {
if req.spec_type_url.is_empty() {
return Err(anyhow!("invalid type url"));
}
let mut oci_process: OCIProcess =
let oci_process: OCIProcess =
serde_json::from_slice(&req.spec_value).context("serde from slice")?;
oci_process.set_apparmor_profile(None);
oci_process.set_capabilities(None);
let containers = self.containers.read().await;
let container_id = &req.process.container_id.container_id;
let c = containers

View File

@@ -1,153 +0,0 @@
// Copyright 2025 Kata Contributors
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use kata_sys_util::mount::umount_all;
use kata_types::config::TomlConfig;
use serde::{Deserialize, Serialize};
use slog::{error, info, warn};
use crate::factory::{template::Template, vm::VmConfig};
pub mod template;
pub mod vm;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FactoryConfig {
/// Path to the directory where VM templates are stored.
#[serde(default)]
pub template_path: String,
/// Full configuration of the virtual machine to be used.
#[serde(default)]
pub vm_config: VmConfig,
/// Whether VM template feature is enabled.
#[serde(default)]
pub template: bool,
}
impl FactoryConfig {
pub fn new(toml_config: &TomlConfig) -> Self {
Self {
template: toml_config.get_factory().enable_template,
template_path: toml_config.get_factory().template_path,
vm_config: VmConfig::new(toml_config),
}
}
}
/// Load and validate factory configuration
fn load_and_validate_factory_config() -> Result<(TomlConfig, FactoryConfig)> {
let (toml_config, _) = TomlConfig::load_from_default().context("load toml config")?;
let factory_config = FactoryConfig::new(&toml_config);
if !factory_config.template {
return Err(anyhow!("vm factory is not enabled"));
}
Ok((toml_config, factory_config))
}
pub async fn init_factory_command() -> Result<()> {
let (toml_config, mut factory_config) = load_and_validate_factory_config()?;
new_factory(&mut factory_config, toml_config, false)
.await
.context("new factory")?;
info!(sl!(), "create vm factory successfully");
Ok(())
}
pub async fn destroy_factory_command() -> Result<()> {
let (toml_config, mut factory_config) = load_and_validate_factory_config()?;
new_factory(&mut factory_config, toml_config, true)
.await
.context("new factory")?;
close_factory(&mut factory_config).context(" close VM factory")?;
info!(sl!(), "vm factory destroyed");
Ok(())
}
pub async fn status_factory_command() -> Result<()> {
let (toml_config, mut factory_config) = load_and_validate_factory_config()?;
if new_factory(&mut factory_config, toml_config, true)
.await
.is_ok()
{
info!(sl!(), "vm factory is on");
} else {
info!(sl!(), "vm factory is off");
}
Ok(())
}
pub async fn new_factory(
config: &mut FactoryConfig,
toml_config: TomlConfig,
fetch_only: bool,
) -> Result<()> {
if !config.template {
anyhow::bail!("template must be enabled");
} else {
VmConfig::validate_hypervisor_config(&mut config.vm_config.hypervisor_config)
.context("validate hypervisor config")?;
let path: PathBuf = config.template_path.clone().into();
if fetch_only {
Template::fetch(config.vm_config.clone(), path).context("fetch VM template")?;
} else {
Template::create(config.vm_config.clone(), toml_config, path)
.await
.context("initialize VM template factory")?;
}
}
Ok(())
}
pub fn close_factory(config: &mut FactoryConfig) -> Result<()> {
let state_path = Path::new(&config.template_path);
// Check if the path exists
if !state_path.exists() {
warn!(
sl!(),
"Template path {:?} does not exist, skipping unmount", state_path
);
return Ok(());
}
// Use umount_all to unmount all filesystems at the mountpoint
// First try normal umount (lazy_umount = false)
if let Err(e) = umount_all(state_path, false) {
error!(sl!(), "Normal umount failed for {:?}: {}", state_path, e);
// If normal umount fails, try lazy umount (with MNT_DETACH flag)
umount_all(state_path, true)
.with_context(|| format!("Failed to lazy unmount {}", state_path.display()))?;
info!(sl!(), "Lazy umount succeeded for {:?}", state_path);
} else {
info!(sl!(), "Normal umount succeeded for {:?}", state_path);
}
// Remove the directory after successful unmount
fs::remove_dir_all(state_path)
.with_context(|| format!("failed to remove {}", state_path.display()))?;
Ok(())
}

View File

@@ -1,173 +0,0 @@
// Copyright 2025 Kata Contributors
//
// SPDX-License-Identifier: Apache-2.0
//
use std::fmt::Debug;
use std::fs::File;
use std::path::PathBuf;
use std::thread::sleep;
use std::time::Duration;
use anyhow::{anyhow, Context, Result};
use kata_types::config::TomlConfig;
use nix::mount::{mount, MsFlags};
use crate::factory::vm::{TemplateVm, VmConfig};
/// Maximum time to wait for the Kata Agent to become ready when initializing a template VM.
const TEMPLATE_WAIT_FOR_AGENT: Duration = Duration::from_secs(2);
/// Preallocated size (in MB) for saving the device state snapshot of the template VM.
const TEMPLATE_DEVICE_STATE_SIZE_MB: u32 = 8;
#[derive(Debug)]
pub struct Template {
pub state_path: PathBuf,
pub config: VmConfig,
}
impl Template {
/// Creates a new Template instance with the given configuration and path.
pub fn new(config: VmConfig, template_path: PathBuf) -> Self {
Template {
state_path: template_path,
config,
}
}
pub fn fetch(config: VmConfig, template_path: PathBuf) -> Result<Box<Template>> {
let t = Template::new(config, template_path);
// Call template_vm_exists to validate the template's files
if !t.template_vm_exists() {
return Err(anyhow!("no template vm found"));
}
Ok(Box::new(t))
}
/// Creates and saves a new template VM to disk.
/// This will prepare template files, create a VM, and save its state.
pub async fn create(
config: VmConfig,
toml_config: TomlConfig,
template_path: PathBuf,
) -> Result<Box<Template>> {
let t = Template::new(config, template_path);
if t.template_vm_exists() {
return Err(anyhow!(
"There is already a VM template in {:?}",
t.state_path
));
}
t.prepare_template_files()
.context("prepare template files")?;
t.save_to_template(toml_config)
.await
.context("create template files")?;
Ok(Box::new(t))
}
pub fn template_vm_exists(&self) -> bool {
let memory_path = self.state_path.join("memory");
let state_path = self.state_path.join("state");
memory_path.exists() && state_path.exists()
}
pub fn prepare_template_files(&self) -> Result<()> {
// Create state directory
std::fs::create_dir_all(&self.state_path)
.context(format!("failed to create directory: {:?}", self.state_path))?;
// Verify directory was created and is accessible
if !self.state_path.exists() {
return Err(anyhow!(
"state path {:?} does not exist after creation",
self.state_path
));
}
// Mount tmpfs to store template VM memory data in memory for:
// - Accelerating VM cloning by avoiding disk I/O
// - Enhancing security by keeping sensitive data in memory
// - Supporting QEMU's shared memory clone model
let opts = format!(
"size={}M",
self.config.hypervisor_config.memory_info.default_memory
+ TEMPLATE_DEVICE_STATE_SIZE_MB
);
mount(
Some("tmpfs"),
&self.state_path,
Some("tmpfs"),
MsFlags::MS_NOSUID | MsFlags::MS_NODEV,
Some(opts.as_str()),
)
.context(format!("failed to mount tmpfs at {:?}", self.state_path))?;
// Verify mount successfully by checking if directory is still accessible
if !self.state_path.is_dir() {
return Err(anyhow!(
"state path {:?} is not a directory after mount",
self.state_path
));
}
// Create memory file
let memory_file = self.state_path.join("memory");
File::create(&memory_file)
.context(format!("failed to create memory file: {:?}", memory_file))?;
// Verify memory file was created successfully
if !memory_file.exists() {
return Err(anyhow!(
"memory file {:?} does not exist after creation",
memory_file
));
}
Ok(())
}
/// Configures the VM configuration for template operations.
fn prepare_vm_config(&self, boot_to_be_template: bool) -> VmConfig {
let mut config = self.config.clone();
config.hypervisor_config.vm_template.boot_to_be_template = boot_to_be_template;
config.hypervisor_config.vm_template.boot_from_template = !boot_to_be_template;
config.hypervisor_config.vm_template.memory_path =
self.state_path.join("memory").to_string_lossy().to_string();
config.hypervisor_config.vm_template.device_state_path =
self.state_path.join("state").to_string_lossy().to_string();
config
}
pub async fn save_to_template(&self, toml_config: TomlConfig) -> Result<()> {
let config = self.prepare_vm_config(true);
let vm = TemplateVm::new_vm(config, toml_config)
.await
.context("new template vm")?;
vm.disconnect().await.context("disconnect template vm")?;
// Sleep a bit to let the agent grpc server clean up
// See: src/runtime/virtcontainers/factory/template/template_linux.go#L139-L145
// When we close connection to the agent, it needs sometime to cleanup
// and restart listening on the communication( serial or vsock) port.
// That time can be saved if we sleep a bit to wait for the agent to
// come around and start listening again. The sleep is only done when
// creating new vm templates and saves time for every new vm that are
// created from template, so it worth the invest.
sleep(TEMPLATE_WAIT_FOR_AGENT);
vm.pause().await.context("pause template vm")?;
vm.save().await.context("save template vm")?;
Ok(())
}
}

View File

@@ -1,332 +0,0 @@
// Copyright 2025 Kata Contributors
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{collections::HashMap, sync::Arc};
use agent::{kata::KataAgent, Agent, AGENT_KATA};
use anyhow::{anyhow, Context, Result};
use common::{message::Message, types::SandboxConfig, Sandbox, SandboxNetworkEnv};
use hypervisor::device::driver::{VIRTIO_BLOCK_CCW, VIRTIO_BLOCK_PCI};
use hypervisor::{qemu::Qemu, Hypervisor, HYPERVISOR_QEMU};
use kata_types::config::{
default, Agent as AgentConfig, Hypervisor as HypervisorConfig, TomlConfig,
};
use kata_types::machine_type::MACHINE_TYPE_S390X_TYPE;
use resource::{cpu_mem::initial_size::InitialSizeManager, ResourceManager};
use runtime_spec;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::channel;
use uuid::Uuid;
use crate::sandbox::VirtSandbox;
const MESSAGE_BUFFER_SIZE: usize = 8;
/// VM is an abstraction of a virtual machine.
#[derive(Clone)]
pub struct TemplateVm {
/// The hypervisor responsible for managing the virtual machine lifecycle.
pub hypervisor: Arc<dyn Hypervisor>,
/// The guest agent that communicates with the virtual machine.
pub agent: Arc<dyn Agent>,
/// Unique identifier of the virtual machine.
pub id: String,
/// Number of vCPUs assigned to the VM.
pub cpu: f32,
/// Amount of memory (in MB) assigned to the VM.
pub memory: u32,
/// Tracks the difference in vCPU count since last update.
pub cpu_delta: i32,
}
/// VmConfig holds all configuration information required to start a new VM instance.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VmConfig {
/// Type of hypervisor to be used (e.g., qemu, cloud-hypervisor).
#[serde(default)]
pub hypervisor_name: String,
#[serde(default)]
pub agent_name: String,
/// Configuration for the guest agent.
#[serde(default)]
pub agent_config: AgentConfig,
/// Configuration for the hypervisor.
#[serde(default)]
pub hypervisor_config: HypervisorConfig,
}
impl VmConfig {
pub fn new(toml_config: &TomlConfig) -> Self {
let hypervisor_name = toml_config.runtime.hypervisor_name.clone();
let agent_name = toml_config.runtime.agent_name.clone();
let hypervisor_config = toml_config
.hypervisor
.get(&hypervisor_name)
.cloned()
.unwrap_or_default();
let agent_config = toml_config
.agent
.get(&agent_name)
.cloned()
.unwrap_or_default();
VmConfig {
hypervisor_name,
agent_name,
hypervisor_config,
agent_config,
}
}
/// Validates boot configuration based on security mode
fn validate_boot_configuration(conf: &HypervisorConfig) -> Result<()> {
let is_secure_execution = conf.security_info.confidential_guest
&& conf.machine_info.machine_type == MACHINE_TYPE_S390X_TYPE;
let has_image = !conf.boot_info.image.is_empty();
let has_initrd = !conf.boot_info.initrd.is_empty();
// Secure execution mode does not allow image or initrd
if is_secure_execution {
if has_image || has_initrd {
return Err(anyhow!(
"secure execution mode does not allow image or initrd"
));
}
return Ok(());
}
// Standard mode: must have exactly one of image or initrd
if !has_image && !has_initrd {
return Err(anyhow!("missing image and initrd path"));
}
if has_image && has_initrd {
return Err(anyhow!("image and initrd path cannot both be set"));
}
Ok(())
}
pub fn validate_hypervisor_config(conf: &mut HypervisorConfig) -> Result<()> {
// remote hypervisor_socket
if !conf.remote_info.hypervisor_socket.is_empty() {
return Ok(());
}
// kernel_path
if conf.boot_info.kernel.is_empty() {
return Err(anyhow!("missing kernel path"));
}
// Validate boot configuration based on security mode
Self::validate_boot_configuration(conf)?;
// vcpus
if conf.cpu_info.default_vcpus == 0.0 {
conf.cpu_info.default_vcpus = default::DEFAULT_GUEST_VCPUS as f32;
}
// memory_size
if conf.memory_info.default_memory == 0 {
conf.memory_info.default_memory = default::DEFAULT_QEMU_MEMORY_SIZE_MB;
}
// default_bridges
if conf.device_info.default_bridges == 0 {
conf.device_info.default_bridges = default::DEFAULT_QEMU_PCI_BRIDGES;
}
// block_device_driver
if conf.blockdev_info.block_device_driver.is_empty() {
conf.blockdev_info.block_device_driver = default::DEFAULT_BLOCK_DEVICE_TYPE.to_string();
} else if conf.blockdev_info.block_device_driver == VIRTIO_BLOCK_PCI
&& conf.machine_info.machine_type == MACHINE_TYPE_S390X_TYPE
{
conf.blockdev_info.block_device_driver = VIRTIO_BLOCK_CCW.to_string();
}
// default_maxvcpus
if conf.cpu_info.default_maxvcpus == 0
|| conf.cpu_info.default_maxvcpus > default::MAX_QEMU_VCPUS
{
conf.cpu_info.default_maxvcpus = default::MAX_QEMU_VCPUS;
}
Ok(())
}
}
impl TemplateVm {
/// Creates a new TemplateVm instance with the provided components and resources.
/// Currently, only QEMU is supported; other hypervisors are not yet implemented.
pub fn new(
id: String,
hypervisor: Arc<dyn Hypervisor>,
agent: Arc<dyn Agent>,
cpu: f32,
memory: u32,
) -> Self {
Self {
id,
hypervisor,
agent,
cpu,
memory,
cpu_delta: 0,
}
}
/// Initializes the QEMU hypervisor for Kata
async fn new_hypervisor(config: &VmConfig) -> Result<Arc<dyn Hypervisor>> {
let hypervisor: Arc<dyn Hypervisor> = match config.hypervisor_name.as_str() {
HYPERVISOR_QEMU => {
let h = Qemu::new();
h.set_hypervisor_config(config.hypervisor_config.clone())
.await;
Arc::new(h)
}
// TODO: Add support for additional hypervisors or proper error handling here.
_ => return Err(anyhow!("Unsupported hypervisor {}", config.hypervisor_name)),
};
Ok(hypervisor)
}
/// Initializes the Kata agent, handling necessary configurations and setup
fn new_agent(config: &VmConfig) -> Result<Arc<KataAgent>> {
let agent_name = &config.agent_name;
let agent_config = config.agent_config.clone();
match agent_name.as_str() {
AGENT_KATA => {
let agent = KataAgent::new(agent_config.clone());
Ok(Arc::new(agent))
}
_ => Err(anyhow!("Unsupported agent {}", &agent_name)),
}
}
/// Create an empty `sandbox_config` structure
fn new_empty_sandbox_config() -> SandboxConfig {
SandboxConfig {
sandbox_id: String::new(),
hostname: String::new(),
dns: Vec::new(),
network_env: SandboxNetworkEnv::default(),
annotations: HashMap::default(),
hooks: None,
state: runtime_spec::State {
version: Default::default(),
id: String::new(),
status: runtime_spec::ContainerState::Creating,
pid: 0,
bundle: String::new(),
annotations: Default::default(),
},
shm_size: 0,
}
}
/// Creates a new VM based on the provided configuration.
pub async fn new_vm(config: VmConfig, toml_config: TomlConfig) -> Result<Self> {
let sid = Uuid::new_v4().to_string();
let (sender, _receiver) = channel::<Message>(MESSAGE_BUFFER_SIZE);
let hypervisor = Self::new_hypervisor(&config)
.await
.context("new hypervisor")?;
let agent = Self::new_agent(&config).context("new agent")?;
let sandbox_config = Self::new_empty_sandbox_config();
let initial_size_manager = InitialSizeManager::new_from(&sandbox_config.annotations)
.context("failed to construct static resource manager")?;
// We need to update the `toml_config` with runtime information,
// but due to ownership issues with the variables, we cannot
// pass them as parameters.
// Therefore, for now, we directly set the `slot` and
// `maxmemory` values in the configuration file to non-zero.
let factory = toml_config.get_factory();
let toml_config_arc = Arc::new(toml_config);
let resource_manager = Arc::new(
ResourceManager::new(
&sid,
agent.clone(),
hypervisor.clone(),
toml_config_arc,
initial_size_manager,
)
.await
.context("build resource manager")?,
);
let sandbox = VirtSandbox::new(
&sid,
sender.clone(),
agent.clone(),
hypervisor.clone(),
resource_manager.clone(),
sandbox_config,
factory,
)
.await
.context("build sandbox")?;
sandbox.start_template().await.context("start template")?;
info!(sl!(), "VM has been started from template");
let hypervisor_config = sandbox.get_hypervisor().hypervisor_config().await;
let vm = TemplateVm::new(
sandbox.get_sid(),
sandbox.get_hypervisor(),
sandbox.get_agent(),
hypervisor_config.cpu_info.default_vcpus,
hypervisor_config.memory_info.default_memory,
);
Ok(vm)
}
/// Stop a VM
pub async fn stop(&self) -> Result<()> {
self.hypervisor
.stop_vm()
.await
.map_err(|e| anyhow::anyhow!("failed to stop vm: {}", e))
}
/// Disconnect agent
pub async fn disconnect(&self) -> Result<()> {
self.agent.disconnect().await.context("disconnect vm")
}
/// Pause a VM.
pub async fn pause(&self) -> Result<()> {
self.hypervisor.pause_vm().await.context("pause vm")
}
/// Save a VM to persistent disk.
pub async fn save(&self) -> Result<()> {
self.hypervisor.save_vm().await.context("save vm")
}
/// Resume resumes a paused VM.
pub async fn resume(&self) -> Result<()> {
self.hypervisor.resume_vm().await.context("resume vm")
}
}

View File

@@ -10,15 +10,13 @@ extern crate slog;
logging::logger_with_subsystem!(sl, "virt-container");
mod container_manager;
pub mod factory;
pub mod health_check;
pub mod sandbox;
pub mod sandbox_persist;
use std::path::Path;
use std::sync::Arc;
use agent::{kata::KataAgent, Agent, AGENT_KATA};
use agent::{kata::KataAgent, AGENT_KATA};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use common::{message::Message, types::SandboxConfig, RuntimeHandler, RuntimeInstance};
@@ -45,7 +43,6 @@ use hypervisor::ch::CloudHypervisor;
))]
use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig};
use crate::factory::vm::VmConfig;
use resource::cpu_mem::initial_size::InitialSizeManager;
use resource::ResourceManager;
use sandbox::VIRTCONTAINER;
@@ -107,18 +104,10 @@ impl RuntimeHandler for VirtContainer {
init_size_manager: InitialSizeManager,
sandbox_config: SandboxConfig,
) -> Result<RuntimeInstance> {
let factory = config.get_factory();
let (hypervisor, agent) = if factory.enable_template {
build_vm_from_template()
.await
.context("build vm from template")?
} else {
(
new_hypervisor(&config).await.context("new hypervisor")?,
new_agent(&config).context("new agent")? as Arc<dyn agent::Agent>,
)
};
let hypervisor = new_hypervisor(&config).await.context("new hypervisor")?;
// get uds from hypervisor and get config from toml_config
let agent = new_agent(&config).context("new agent")?;
let resource_manager = Arc::new(
ResourceManager::new(
sid,
@@ -138,7 +127,6 @@ impl RuntimeHandler for VirtContainer {
hypervisor.clone(),
resource_manager.clone(),
sandbox_config,
factory,
)
.await
.context("new virt sandbox")?;
@@ -161,28 +149,6 @@ impl RuntimeHandler for VirtContainer {
}
}
async fn build_vm_from_template() -> Result<(Arc<dyn Hypervisor>, Arc<dyn Agent>)> {
let (mut toml_config, _) =
TomlConfig::load_from_default().context("failed to load toml config")?;
let hypervisor_name = toml_config.runtime.hypervisor_name.clone();
if let Some(h) = toml_config.hypervisor.get_mut(&hypervisor_name) {
h.vm_template.boot_to_be_template = false;
h.vm_template.boot_from_template = true;
let path = Path::new(&h.factory.template_path);
h.vm_template.memory_path = path.join("memory").to_string_lossy().to_string();
h.vm_template.device_state_path = path.join("state").to_string_lossy().to_string();
let _ = VmConfig::validate_hypervisor_config(h);
} else {
return Err(anyhow!("hypervisor '{}' not found", hypervisor_name));
}
let hypervisor = new_hypervisor(&toml_config)
.await
.context("new hypervisor")?;
let agent = new_agent(&toml_config).context("new agent")? as Arc<dyn agent::Agent>;
Ok((hypervisor, agent))
}
async fn new_hypervisor(toml_config: &TomlConfig) -> Result<Arc<dyn Hypervisor>> {
let hypervisor_name = &toml_config.runtime.hypervisor_name;
let hypervisor_config = toml_config

View File

@@ -40,7 +40,7 @@ use kata_sys_util::spec::load_oci_spec;
use kata_types::capabilities::CapabilityBits;
use kata_types::config::hypervisor::Hypervisor as HypervisorConfig;
use kata_types::config::hypervisor::HYPERVISOR_NAME_CH;
use kata_types::config::{hypervisor::Factory, TomlConfig};
use kata_types::config::TomlConfig;
use kata_types::initdata::{calculate_initdata_digest, ProtectedPlatform};
use oci_spec::runtime as oci;
use persist::{self, sandbox_persist::Persist};
@@ -97,7 +97,6 @@ pub struct VirtSandbox {
monitor: Arc<HealthCheck>,
sandbox_config: Option<SandboxConfig>,
shm_size: u64,
factory: Option<Factory>,
}
impl std::fmt::Debug for VirtSandbox {
@@ -105,13 +104,6 @@ impl std::fmt::Debug for VirtSandbox {
f.debug_struct("VirtSandbox")
.field("sid", &self.sid)
.field("msg_sender", &self.msg_sender)
.field("inner", &"<SandboxInner>")
.field("resource_manager", &self.resource_manager)
.field("agent", &"<Agent>")
.field("hypervisor", &self.hypervisor)
.field("monitor", &"<HealthCheck>")
.field("sandbox_config", &self.sandbox_config)
.field("factory", &self.factory)
.finish()
}
}
@@ -124,7 +116,6 @@ impl VirtSandbox {
hypervisor: Arc<dyn Hypervisor>,
resource_manager: Arc<ResourceManager>,
sandbox_config: SandboxConfig,
factory: Factory,
) -> Result<Self> {
let config = resource_manager.config().await;
let keep_abnormal = config.runtime.keep_abnormal;
@@ -138,22 +129,9 @@ impl VirtSandbox {
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
shm_size: sandbox_config.shm_size,
sandbox_config: Some(sandbox_config),
factory: Some(factory),
})
}
pub fn get_agent(&self) -> Arc<dyn Agent> {
self.agent.clone()
}
pub fn get_sid(&self) -> String {
self.sid.clone()
}
pub fn get_hypervisor(&self) -> Arc<dyn Hypervisor> {
self.hypervisor.clone()
}
#[instrument]
async fn prepare_for_start_sandbox(
&self,
@@ -706,58 +684,6 @@ impl Sandbox for VirtSandbox {
Ok(())
}
/// Core function for starting a VM from a template
///
/// This function is responsible for creating and starting a VM sandbox from a predefined template,
/// serving as the core implementation of the template mechanism.
async fn start_template(&self) -> Result<()> {
info!(sl!(), "sandbox::start_template()"; "sandbox:" => format!("{:?}", self));
let id = &self.sid;
let sandbox_config = self.sandbox_config.as_ref().unwrap();
// if sandbox is not in SandboxState::Init then return,
// otherwise try to create sandbox
let inner = self.inner.write().await;
if inner.state != SandboxState::Init {
return Ok(());
}
let selinux_label = load_oci_spec().ok().and_then(|spec| {
spec.process()
.as_ref()
.and_then(|process| process.selinux_label().clone())
});
self.hypervisor
.prepare_vm(
id,
sandbox_config.network_env.netns.clone(),
&sandbox_config.annotations,
selinux_label,
)
.await
.context("prepare vm")?;
// generate device and setup before start vm
// should after hypervisor.prepare_vm
let resources = self
.prepare_for_start_sandbox(id, sandbox_config.network_env.clone())
.await
.context("prepare resources before start vm")?;
self.resource_manager
.prepare_before_start_vm(resources)
.await
.context("set up device before start vm")?;
self.hypervisor
.start_vm(10_000)
.await
.context("start template vm")?;
info!(sl!(), "vm started from template");
Ok(())
}
async fn status(&self) -> Result<SandboxStatus> {
info!(sl!(), "get sandbox status");
let inner = self.inner.read().await;
@@ -1011,7 +937,6 @@ impl Persist for VirtSandbox {
monitor: Arc::new(HealthCheck::new(true, keep_abnormal)),
sandbox_config: None,
shm_size: DEFAULT_SHM_SIZE,
factory: None,
})
}
}

View File

@@ -473,8 +473,6 @@ ifneq (,$(QEMUCMD))
KERNELTDXPARAMS_NV += "authorize_allow_devs=pci:ALL"
KERNELSNPPARAMS_NV = $(KERNELPARAMS_NV)
#TODO: temporary until the attestation agent activates the device after successful attestation
KERNELSNPPARAMS_NV += "nvrc.smi.srs=1"
# Setting this to false can lead to cgroup leakages in the host
# Best practice for production is to set this to true

View File

@@ -17,7 +17,6 @@ kernel = "@KERNELPATH_CONFIDENTIAL_NV@"
initrd = "@INITRDPATH_CONFIDENTIAL_NV@"
machine_type = "@MACHINETYPE@"
tdx_quote_generation_service_socket_port = @QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT@
# rootfs filesystem type:
# - ext4 (default)

View File

@@ -1,7 +1,7 @@
module github.com/kata-containers/kata-containers/src/runtime
// Keep in sync with version in versions.yaml
go 1.24.9
go 1.24.6
// WARNING: Do NOT use `replace` directives as those break dependabot:
// https://github.com/kata-containers/kata-containers/issues/11020
@@ -62,7 +62,6 @@ require (
google.golang.org/protobuf v1.36.6
k8s.io/apimachinery v0.33.0
k8s.io/cri-api v0.33.0
tags.cncf.io/container-device-interface v1.0.1
)
require (
@@ -138,6 +137,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
tags.cncf.io/container-device-interface v1.0.1 // indirect
tags.cncf.io/container-device-interface/specs-go v1.0.0 // indirect
)

View File

@@ -43,7 +43,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"tags.cncf.io/container-device-interface/pkg/cdi"
)
type startManagementServerFunc func(s *service, ctx context.Context, ociSpec *specs.Spec)
@@ -185,13 +184,6 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
}
}
// config.WithCDI() has used the CDI annotations to inject
// host-side devices. As these annotations reference device kinds
// that don't exist in the guest (e.g., nvidia.com/pgpu), we
// remove them before creating the sandbox and the containers
// within it.
removeCDIAnnotations(ociSpec.Annotations)
// Pass service's context instead of local ctx to CreateSandbox(), since local
// ctx will be canceled after this rpc service call, but the sandbox will live
// across multiple rpc service calls.
@@ -231,12 +223,6 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
}
}()
// CDI annotations have been processed during PodSandbox creation
// and cold-plug. CDI annotations referencing device kinds that
// exist in the guest (e.g., nvidia.com/gpu) will be generated
// during device attachment.
removeCDIAnnotations(ociSpec.Annotations)
_, err = katautils.CreateContainer(ctx, s.sandbox, *ociSpec, rootFs, r.ID, bundlePath, disableOutput, runtimeConfig.DisableGuestEmptyDir)
if err != nil {
return nil, err
@@ -447,16 +433,3 @@ func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig, sandboxId stri
}
return fmt.Errorf("failed to get the gid of /dev/kvm")
}
func removeCDIAnnotations(annotations map[string]string) {
if annotations == nil {
return
}
for key := range annotations {
if strings.HasPrefix(key, cdi.AnnotationPrefix) {
shimLog.Debugf("removing CDI annotation: %s=%s", key, annotations[key])
delete(annotations, key)
}
}
}

View File

@@ -705,7 +705,14 @@ func WithCDI(annotations map[string]string, cdiSpecDirs []string, spec *specs.Sp
if _, err := registry.InjectDevices(spec, devsFromAnnotations...); err != nil {
return nil, fmt.Errorf("CDI device injection failed: %w", err)
}
// Once we injected the device into the ociSpec we do not need to CDI
// device annotation from the outer runtime. The runtime will create the
// appropriate inner runtime CDI annotation dependent on the device.
for key := range spec.Annotations {
if strings.HasPrefix(key, cdi.AnnotationPrefix) {
delete(spec.Annotations, key)
}
}
// One crucial thing to keep in mind is that CDI device injection
// might add OCI Spec environment variables, hooks, and mounts as
// well. Therefore it is important that none of the corresponding

View File

@@ -1095,13 +1095,7 @@ func (c *Container) annotateContainerWithVFIOMetadata(devices interface{}) {
func (c *Container) siblingAnnotation(devPath string, siblings []DeviceRelation) {
for _, sibling := range siblings {
if sibling.Path == devPath {
// We have here either /dev/vfio/<num> or /dev/vfio/devices/vfio<num>
baseName := filepath.Base(devPath)
vfioNum := baseName
// For IOMMUFD format /dev/vfio/devices/vfio<num>, strip "vfio" prefix
if strings.HasPrefix(baseName, "vfio") {
vfioNum = strings.TrimPrefix(baseName, "vfio")
}
vfioNum := filepath.Base(devPath)
annoKey := fmt.Sprintf("cdi.k8s.io/vfio%s", vfioNum)
annoValue := fmt.Sprintf("nvidia.com/gpu=%d", sibling.Index)
if c.config.CustomSpec.Annotations == nil {

View File

@@ -36,7 +36,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils/retry"
ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations"
crioAnnotations "github.com/cri-o/cri-o/pkg/annotations"
@@ -598,31 +597,7 @@ func (k *kataAgent) updateInterface(ctx context.Context, ifc *pbTypes.Interface)
ifcReq := &grpc.UpdateInterfaceRequest{
Interface: ifc,
}
// Since the network device hotplug is an asynchronous operation,
// it's possible that the hotplug operation had returned, but the network device
// hasn't ready in guest, thus it's better to retry on this operation to
// wait until the device ready in guest.
var resultingInterface interface{}
err := retry.Do(func() error {
if resInterface, nerr := k.sendReq(ctx, ifcReq); nerr != nil {
errMsg := nerr.Error()
if !strings.Contains(errMsg, "Link not found") {
return retry.Unrecoverable(nerr)
}
return nerr
} else {
resultingInterface = resInterface
return nil
}
},
retry.Attempts(20),
retry.LastErrorOnly(true),
retry.Delay(20*time.Millisecond))
resultingInterface, err := k.sendReq(ctx, ifcReq)
if err != nil {
k.Logger().WithFields(logrus.Fields{
"interface-requested": fmt.Sprintf("%+v", ifc),

View File

@@ -191,32 +191,37 @@ func TestQemuArm64AppendProtectionDevice(t *testing.T) {
// PEF protection
arm64.(*qemuArm64).protection = pefProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// Secure Execution protection
arm64.(*qemuArm64).protection = seProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// SEV protection
arm64.(*qemuArm64).protection = sevProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// SNP protection
arm64.(*qemuArm64).protection = snpProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// TDX protection
arm64.(*qemuArm64).protection = tdxProtection
devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(""))
assert.Error(err)
assert.Empty(devices)
assert.Empty(bios)
assert.NoError(err)
// CCA RME protection
arm64.(*qemuArm64).protection = ccaProtection

Some files were not shown because too many files have changed in this diff Show More