mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-03-17 18:22:14 +00:00
Compare commits
103 Commits
3.25.0
...
topic/kata
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
622b912369 | ||
|
|
62fef5a5e4 | ||
|
|
2e9ed9aa4c | ||
|
|
e8a896aaa2 | ||
|
|
e430b2641c | ||
|
|
e257430976 | ||
|
|
dda1b30c34 | ||
|
|
c9061f9e36 | ||
|
|
0fb2c500fd | ||
|
|
fef93f1e08 | ||
|
|
f90c12d4df | ||
|
|
30c7325e75 | ||
|
|
30494abe48 | ||
|
|
8a449d358f | ||
|
|
6bb77a2f13 | ||
|
|
6702b48858 | ||
|
|
0530a3494f | ||
|
|
93dcaee965 | ||
|
|
62ad0814c5 | ||
|
|
870630c421 | ||
|
|
927be7b8ad | ||
|
|
6e98df2bac | ||
|
|
d7ff54769c | ||
|
|
4d860dcaf5 | ||
|
|
dc8d9e056d | ||
|
|
8b0c199f43 | ||
|
|
4d1095e653 | ||
|
|
b85393e70b | ||
|
|
500146bfee | ||
|
|
20ca4d2d79 | ||
|
|
5e60d384a2 | ||
|
|
ea627166b9 | ||
|
|
0d8fbdef07 | ||
|
|
1440dd7468 | ||
|
|
c7bc428e59 | ||
|
|
932920cb86 | ||
|
|
5250d4bacd | ||
|
|
eb3d204ff3 | ||
|
|
971b096a1f | ||
|
|
272ff9c568 | ||
|
|
ca43a8cbb8 | ||
|
|
6c0ae4eb04 | ||
|
|
a59f791bf5 | ||
|
|
d0fe60e784 | ||
|
|
4b2d4e96ae | ||
|
|
26c534d610 | ||
|
|
04f45a379c | ||
|
|
c9e9a682ab | ||
|
|
cfe9bcbaf1 | ||
|
|
d8a3272f85 | ||
|
|
3be57bb501 | ||
|
|
a76cdb5814 | ||
|
|
4c3989c3e4 | ||
|
|
678b560e6d | ||
|
|
609a25e643 | ||
|
|
aa94038355 | ||
|
|
5471fa133c | ||
|
|
68d671af0f | ||
|
|
c7f5ff45a2 | ||
|
|
37a0c81b6a | ||
|
|
e7b8b302ac | ||
|
|
8a0fad4b95 | ||
|
|
6438fe7f2d | ||
|
|
0d35b36652 | ||
|
|
5b82b160e2 | ||
|
|
29e7dd27f1 | ||
|
|
d0bfb27857 | ||
|
|
ac8436e326 | ||
|
|
2cd76796bd | ||
|
|
fb7390ce3c | ||
|
|
bc131a84b9 | ||
|
|
dacb14619d | ||
|
|
89e287c3b2 | ||
|
|
869dd5ac65 | ||
|
|
d4ea02e339 | ||
|
|
69dd9679c2 | ||
|
|
606c12df6d | ||
|
|
ec18dd79ba | ||
|
|
86e0b08b13 | ||
|
|
2369cf585d | ||
|
|
19efeae12e | ||
|
|
70f6543333 | ||
|
|
4eb50d7b59 | ||
|
|
ba47bb6583 | ||
|
|
62847e1efb | ||
|
|
78824e0181 | ||
|
|
d135a186e1 | ||
|
|
949e0c2ca0 | ||
|
|
83b0c44986 | ||
|
|
7a02c54b6c | ||
|
|
bf1539b802 | ||
|
|
0fd9eebf0f | ||
|
|
3f1533ae8a | ||
|
|
cf3441bd2c | ||
|
|
e0158869b1 | ||
|
|
5aff81198f | ||
|
|
b5a986eacf | ||
|
|
c7570427d2 | ||
|
|
c1216598e8 | ||
|
|
96e1fb4ca6 | ||
|
|
f68c25de6a | ||
|
|
d7aa793dde | ||
|
|
2edb161c53 |
1
.github/dependabot.yml
vendored
1
.github/dependabot.yml
vendored
@@ -12,7 +12,6 @@ updates:
|
||||
- "/src/tools/agent-ctl"
|
||||
- "/src/tools/genpolicy"
|
||||
- "/src/tools/kata-ctl"
|
||||
- "/src/tools/runk"
|
||||
- "/src/tools/trace-forwarder"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
36
.github/workflows/basic-ci-amd64.yaml
vendored
36
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -163,42 +163,6 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/nydus/gha-run.sh run
|
||||
|
||||
run-runk:
|
||||
name: run-runk
|
||||
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
|
||||
if: false
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: lts
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/runk/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run runk tests
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/runk/gha-run.sh run
|
||||
|
||||
run-tracing:
|
||||
name: run-tracing
|
||||
strategy:
|
||||
|
||||
75
.github/workflows/build-helm-image.yaml
vendored
Normal file
75
.github/workflows/build-helm-image.yaml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
name: Build helm multi-arch image
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run every Sunday at 12:00 UTC (12 hours after kubectl image build)
|
||||
- cron: '0 12 * * 0'
|
||||
workflow_dispatch:
|
||||
# Allow manual triggering
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'tools/packaging/helm/Dockerfile'
|
||||
- '.github/workflows/build-helm-image.yaml'
|
||||
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
REGISTRY: quay.io
|
||||
IMAGE_NAME: kata-containers/helm
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: Build and push multi-arch image
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
|
||||
|
||||
- name: Login to Quay.io
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ vars.QUAY_DEPLOYER_USERNAME }}
|
||||
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
- name: Get helm version
|
||||
id: helm-version
|
||||
run: |
|
||||
HELM_VERSION=$(curl -s https://api.github.com/repos/helm/helm/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||
echo "version=${HELM_VERSION}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Generate image metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=latest
|
||||
type=raw,value={{date 'YYYYMMDD'}}
|
||||
type=raw,value=${{ steps.helm-version.outputs.version }}
|
||||
type=sha,prefix=
|
||||
|
||||
- name: Build and push multi-arch image
|
||||
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
|
||||
with:
|
||||
context: tools/packaging/helm/
|
||||
file: tools/packaging/helm/Dockerfile
|
||||
platforms: linux/amd64,linux/arm64,linux/s390x,linux/ppc64le
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
36
.github/workflows/ci-nightly-rust.yaml
vendored
36
.github/workflows/ci-nightly-rust.yaml
vendored
@@ -1,36 +0,0 @@
|
||||
name: Kata Containers Nightly CI (Rust)
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 1 * * *' # Run at 1 AM UTC (1 hour after script-based nightly)
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
kata-containers-ci-on-push-rust:
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write
|
||||
attestations: write
|
||||
uses: ./.github/workflows/ci.yaml
|
||||
with:
|
||||
commit-hash: ${{ github.sha }}
|
||||
pr-number: "nightly-rust"
|
||||
tag: ${{ github.sha }}-nightly-rust
|
||||
target-branch: ${{ github.ref_name }}
|
||||
build-type: "rust" # Use Rust-based build
|
||||
secrets:
|
||||
AUTHENTICATED_IMAGE_PASSWORD: ${{ secrets.AUTHENTICATED_IMAGE_PASSWORD }}
|
||||
AZ_APPID: ${{ secrets.AZ_APPID }}
|
||||
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
|
||||
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
23
.github/workflows/ci.yaml
vendored
23
.github/workflows/ci.yaml
vendored
@@ -19,11 +19,6 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: no
|
||||
build-type:
|
||||
description: The build type for kata-deploy. Use 'rust' for Rust-based build, empty or omit for script-based (default).
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
AUTHENTICATED_IMAGE_PASSWORD:
|
||||
required: true
|
||||
@@ -77,7 +72,6 @@ jobs:
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-22.04
|
||||
arch: amd64
|
||||
build-type: ${{ inputs.build-type }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -110,7 +104,6 @@ jobs:
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-arm
|
||||
arch: arm64
|
||||
build-type: ${{ inputs.build-type }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -156,7 +149,6 @@ jobs:
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-s390x
|
||||
arch: s390x
|
||||
build-type: ${{ inputs.build-type }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -175,7 +167,6 @@ jobs:
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
runner: ubuntu-24.04-ppc64le
|
||||
arch: ppc64le
|
||||
build-type: ${{ inputs.build-type }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -297,7 +288,7 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -313,7 +304,7 @@ jobs:
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-arm64${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-arm64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -326,7 +317,7 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -348,7 +339,7 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -366,7 +357,7 @@ jobs:
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-s390x${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-s390x
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -380,7 +371,7 @@ jobs:
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-ppc64le${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-ppc64le
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
@@ -392,7 +383,7 @@ jobs:
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64${{ inputs.build-type == 'rust' && '-rust' || '' }}
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
4
.github/workflows/payload-after-push.yaml
vendored
4
.github/workflows/payload-after-push.yaml
vendored
@@ -82,7 +82,6 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: ubuntu-22.04
|
||||
arch: amd64
|
||||
build-type: "" # Use script-based build (default)
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -100,7 +99,6 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: ubuntu-24.04-arm
|
||||
arch: arm64
|
||||
build-type: "" # Use script-based build (default)
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -118,7 +116,6 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: s390x
|
||||
arch: s390x
|
||||
build-type: "" # Use script-based build (default)
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
@@ -136,7 +133,6 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
runner: ubuntu-24.04-ppc64le
|
||||
arch: ppc64le
|
||||
build-type: "" # Use script-based build (default)
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
|
||||
|
||||
@@ -30,11 +30,6 @@ on:
|
||||
description: The arch of the tarball.
|
||||
required: true
|
||||
type: string
|
||||
build-type:
|
||||
description: The build type for kata-deploy. Use 'rust' for Rust-based build, empty or omit for script-based (default).
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
@@ -106,10 +101,8 @@ jobs:
|
||||
REGISTRY: ${{ inputs.registry }}
|
||||
REPO: ${{ inputs.repo }}
|
||||
TAG: ${{ inputs.tag }}
|
||||
BUILD_TYPE: ${{ inputs.build-type }}
|
||||
run: |
|
||||
./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
|
||||
"$(pwd)/kata-static.tar.zst" \
|
||||
"${REGISTRY}/${REPO}" \
|
||||
"${TAG}" \
|
||||
"${BUILD_TYPE}"
|
||||
"${TAG}"
|
||||
|
||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -284,11 +284,15 @@ jobs:
|
||||
echo "${QUAY_DEPLOYER_PASSWORD}" | helm registry login quay.io --username "${QUAY_DEPLOYER_USERNAME}" --password-stdin
|
||||
echo "${GITHUB_TOKEN}" | helm registry login ghcr.io --username "${GITHUB_ACTOR}" --password-stdin
|
||||
|
||||
- name: Push helm chart to the OCI registries
|
||||
- name: Push helm charts to the OCI registries
|
||||
run: |
|
||||
release_version=$(./tools/packaging/release/release.sh release-version)
|
||||
# Push kata-deploy chart
|
||||
helm push "kata-deploy-${release_version}.tgz" oci://quay.io/kata-containers/kata-deploy-charts
|
||||
helm push "kata-deploy-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
|
||||
# Push kata-lifecycle-manager chart
|
||||
helm push "kata-lifecycle-manager-${release_version}.tgz" oci://quay.io/kata-containers/kata-deploy-charts
|
||||
helm push "kata-lifecycle-manager-${release_version}.tgz" oci://ghcr.io/kata-containers/kata-deploy-charts
|
||||
|
||||
publish-release:
|
||||
name: publish-release
|
||||
|
||||
@@ -32,6 +32,7 @@ jobs:
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu
|
||||
- qemu-runtime-rs
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: arm64-k8s
|
||||
|
||||
@@ -126,5 +126,6 @@ jobs:
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always() && matrix.environment.name != 'nvidia-gpu'
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
2
.github/workflows/run-k8s-tests-on-zvsi.yaml
vendored
2
.github/workflows/run-k8s-tests-on-zvsi.yaml
vendored
@@ -137,10 +137,12 @@ jobs:
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup-zvsi
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
if [ "${KBS}" == "true" ]; then
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
2
.github/workflows/run-kata-coco-tests.yaml
vendored
2
.github/workflows/run-kata-coco-tests.yaml
vendored
@@ -120,10 +120,12 @@ jobs:
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 15
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
[[ "${KATA_HYPERVISOR}" == "qemu-tdx" ]] && echo "ITA_KEY=${GH_ITA_KEY}" >> "${GITHUB_ENV}"
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
2
.github/workflows/run-kata-deploy-tests.yaml
vendored
2
.github/workflows/run-kata-deploy-tests.yaml
vendored
@@ -87,4 +87,4 @@ jobs:
|
||||
|
||||
- name: Report tests
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh report-tests
|
||||
run: bash tests/functional/kata-deploy/gha-run.sh report-tests
|
||||
|
||||
54
.github/workflows/run-runk-tests.yaml
vendored
54
.github/workflows/run-runk-tests.yaml
vendored
@@ -1,54 +0,0 @@
|
||||
name: CI | Run runk tests
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
tarball-suffix:
|
||||
required: false
|
||||
type: string
|
||||
commit-hash:
|
||||
required: false
|
||||
type: string
|
||||
target-branch:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-runk:
|
||||
name: run-runk
|
||||
# Skip runk tests as we have no maintainers. TODO: Decide when to remove altogether
|
||||
if: false
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
CONTAINERD_VERSION: lts
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash tests/integration/runk/gha-run.sh install-dependencies
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: get-kata-tarball
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
||||
path: kata-artifacts
|
||||
|
||||
- name: Install kata
|
||||
run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts
|
||||
|
||||
- name: Run runk tests
|
||||
run: bash tests/integration/runk/gha-run.sh run
|
||||
9
.github/workflows/stale.yaml
vendored
9
.github/workflows/stale.yaml
vendored
@@ -6,14 +6,21 @@ on:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
name: stale
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: write # Needed to manage caches for state persistence across runs
|
||||
pull-requests: write # Needed to add/remove labels, post comments, or close PRs
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
||||
with:
|
||||
stale-pr-message: 'This PR has been opened without with no activity for 180 days. Comment on the issue otherwise it will be closed in 7 days'
|
||||
stale-pr-message: 'This PR has been opened without activity for 180 days. Please comment on the issue or it will be closed in 7 days.'
|
||||
days-before-pr-stale: 180
|
||||
days-before-pr-close: 7
|
||||
days-before-issue-stale: -1
|
||||
|
||||
2
.github/workflows/zizmor.yaml
vendored
2
.github/workflows/zizmor.yaml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Run zizmor
|
||||
uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
|
||||
uses: zizmorcore/zizmor-action@135698455da5c3b3e55f73f4419e481ab68cdd95 # v0.4.1
|
||||
with:
|
||||
advanced-security: false
|
||||
annotations: true
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4005,6 +4005,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common",
|
||||
"containerd-shim-protos",
|
||||
"go-flag",
|
||||
"logging",
|
||||
"nix 0.26.4",
|
||||
|
||||
1
Makefile
1
Makefile
@@ -18,7 +18,6 @@ TOOLS =
|
||||
TOOLS += agent-ctl
|
||||
TOOLS += kata-ctl
|
||||
TOOLS += log-parser
|
||||
TOOLS += runk
|
||||
TOOLS += trace-forwarder
|
||||
|
||||
STANDARD_TARGETS = build check clean install static-checks-build test vendor
|
||||
|
||||
@@ -139,7 +139,6 @@ The table below lists the remaining parts of the project:
|
||||
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
|
||||
| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. |
|
||||
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
|
||||
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
|
||||
| [`ci`](.github/workflows) | CI | Continuous Integration configuration files and scripts. |
|
||||
| [`ocp-ci`](ci/openshift-ci/README.md) | CI | Continuous Integration configuration for the OpenShift pipelines. |
|
||||
| [`katacontainers.io`](https://github.com/kata-containers/www.katacontainers.io) | Source for the [`katacontainers.io`](https://www.katacontainers.io) site. |
|
||||
|
||||
@@ -46,16 +46,12 @@ fi
|
||||
[[ ${SELINUX_PERMISSIVE} == "yes" ]] && oc delete -f "${deployments_dir}/machineconfig_selinux.yaml.in"
|
||||
|
||||
# Delete kata-containers
|
||||
pushd "${katacontainers_repo_dir}/tools/packaging/kata-deploy" || { echo "Failed to push to ${katacontainers_repo_dir}/tools/packaging/kata-deploy"; exit 125; }
|
||||
oc delete -f kata-deploy/base/kata-deploy.yaml
|
||||
helm uninstall kata-deploy --wait --namespace kube-system
|
||||
oc -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||
oc apply -f kata-cleanup/base/kata-cleanup.yaml
|
||||
echo "Wait for all related pods to be gone"
|
||||
( repeats=1; for _ in $(seq 1 600); do
|
||||
oc get pods -l name="kubelet-kata-cleanup" --no-headers=true -n kube-system 2>&1 | grep "No resources found" -q && ((repeats++)) || repeats=1
|
||||
[[ "${repeats}" -gt 5 ]] && echo kata-cleanup finished && break
|
||||
sleep 1
|
||||
done) || { echo "There are still some kata-cleanup related pods after 600 iterations"; oc get all -n kube-system; exit 1; }
|
||||
oc delete -f kata-cleanup/base/kata-cleanup.yaml
|
||||
oc delete -f kata-rbac/base/kata-rbac.yaml
|
||||
oc delete -f runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
@@ -51,13 +51,13 @@ apply_kata_deploy() {
|
||||
|
||||
oc label --overwrite ns kube-system pod-security.kubernetes.io/enforce=privileged pod-security.kubernetes.io/warn=baseline pod-security.kubernetes.io/audit=baseline
|
||||
local version chart
|
||||
version=$(curl -sSL https://api.github.com/repos/kata-containers/kata-containers/releases/latest | jq .tag_name | tr -d '"')
|
||||
version='0.0.0-dev'
|
||||
chart="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy"
|
||||
|
||||
# Ensure any potential leftover is cleaned up ... and this secret usually is not in case of previous failures
|
||||
oc delete secret sh.helm.release.v1.kata-deploy.v1 -n kube-system || true
|
||||
|
||||
echo "Installing kata using helm ${chart} ${version}"
|
||||
echo "Installing kata using helm ${chart} ${version} (sha printed in helm output)"
|
||||
helm install kata-deploy --wait --namespace kube-system --set "image.reference=${KATA_DEPLOY_IMAGE%%:*},image.tag=${KATA_DEPLOY_IMAGE##*:}" "${chart}" --version "${version}"
|
||||
}
|
||||
|
||||
|
||||
@@ -157,6 +157,16 @@ if [[ -z "${CAA_IMAGE}" ]]; then
|
||||
fi
|
||||
|
||||
# Get latest PP image
|
||||
#
|
||||
# You can list the CI images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cocopodvm-d0e4f35f-5530-4b9c-8596-112487cdea85" --gallery-image-definition "podvm_image0" --output table
|
||||
# or the release images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora" --output table
|
||||
# or the release debug images by:
|
||||
# az sig image-version list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --gallery-image-definition "peerpod-podvm-fedora-debug" --output table
|
||||
#
|
||||
# Note there are other flavours of the released images, you can list them by:
|
||||
# az sig image-definition list-community --location "eastus" --public-gallery-name "cococommunity-42d8482d-92cd-415b-b332-7648bd978eff" --output table
|
||||
if [[ -z "${PP_IMAGE_ID}" ]]; then
|
||||
SUCCESS_TIME=$(curl -s \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
|
||||
@@ -125,7 +125,7 @@ If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kerne
|
||||
Enable full debug as follows:
|
||||
|
||||
```bash
|
||||
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i -E 's/^(\s*enable_debug\s*=\s*)false/\1true/' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
|
||||
118
docs/Kata-Containers-Lifecycle-Management.md
Normal file
118
docs/Kata-Containers-Lifecycle-Management.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# Kata Containers Lifecycle Management
|
||||
|
||||
## Overview
|
||||
|
||||
Kata Containers lifecycle management in Kubernetes consists of two operations:
|
||||
|
||||
1. **Installation** - Deploy Kata Containers to cluster nodes
|
||||
2. **Upgrades** - Update Kata Containers to newer versions without disrupting workloads
|
||||
|
||||
The Kata Containers project provides two Helm charts to address these needs:
|
||||
|
||||
| Chart | Purpose |
|
||||
|-------|---------|
|
||||
| `kata-deploy` | Initial installation and configuration |
|
||||
| `kata-lifecycle-manager` | Orchestrated rolling upgrades with verification |
|
||||
|
||||
---
|
||||
|
||||
## Installation with kata-deploy
|
||||
|
||||
The `kata-deploy` Helm chart installs Kata Containers across all (or selected) nodes using a Kubernetes DaemonSet. When deployed, it:
|
||||
|
||||
- Installs Kata runtime binaries on each node
|
||||
- Configures the container runtime (containerd) to use Kata
|
||||
- Registers RuntimeClasses (`kata-qemu-nvidia-gpu-snp`, `kata-qemu-nvidia-gpu-tdx`, `kata-qemu-nvidia-gpu`, etc.)
|
||||
|
||||
After installation, workloads can use Kata isolation by specifying `runtimeClassName: kata-qemu-nvidia-gpu-snp` (or another Kata RuntimeClass) in their pod spec.
|
||||
|
||||
---
|
||||
|
||||
## Upgrades with kata-lifecycle-manager
|
||||
|
||||
### The Problem
|
||||
|
||||
Standard `helm upgrade kata-deploy` updates all nodes simultaneously via the DaemonSet. This approach:
|
||||
|
||||
- Provides no per-node verification
|
||||
- Offers no controlled rollback mechanism
|
||||
- Can leave the cluster in an inconsistent state if something fails
|
||||
|
||||
### The Solution
|
||||
|
||||
The `kata-lifecycle-manager` Helm chart uses Argo Workflows to orchestrate upgrades with the following guarantees:
|
||||
|
||||
| Guarantee | Description |
|
||||
|-----------|-------------|
|
||||
| **Sequential Processing** | Nodes are upgraded one at a time |
|
||||
| **Per-Node Verification** | A user-provided pod validates Kata functionality after each node upgrade |
|
||||
| **Fail-Fast** | If verification fails, the workflow stops immediately |
|
||||
| **Automatic Rollback** | On failure, Helm rollback is executed and the node is restored |
|
||||
|
||||
### Upgrade Flow
|
||||
|
||||
For each node in the cluster:
|
||||
|
||||
1. **Cordon** - Mark node as unschedulable
|
||||
2. **Drain** (optional) - Evict existing workloads
|
||||
3. **Upgrade** - Run `helm upgrade kata-deploy` targeting this node
|
||||
4. **Wait** - Ensure kata-deploy DaemonSet pod is ready
|
||||
5. **Verify** - Run verification pod to confirm Kata works
|
||||
6. **Uncordon** - Mark node as schedulable again
|
||||
|
||||
If verification fails on any node, the workflow:
|
||||
- Rolls back the Helm release
|
||||
- Uncordons the node
|
||||
- Stops processing (remaining nodes are not upgraded)
|
||||
|
||||
### Verification Pod
|
||||
|
||||
Users must provide a verification pod that tests Kata functionality. This pod:
|
||||
|
||||
- Uses a Kata RuntimeClass
|
||||
- Is scheduled on the specific node being verified
|
||||
- Runs whatever validation logic the user requires (smoke tests, attestation checks, etc.)
|
||||
|
||||
**Basic GPU Verification Example:**
|
||||
|
||||
For clusters with NVIDIA GPUs, the CUDA VectorAdd sample provides a more comprehensive verification:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${TEST_POD}
|
||||
spec:
|
||||
runtimeClassName: kata-qemu-nvidia-gpu-snp # or kata-qemu-nvidia-gpu-tdx
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: ${NODE}
|
||||
containers:
|
||||
- name: cuda-vectoradd
|
||||
image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0-ubuntu22.04
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
memory: 16Gi
|
||||
```
|
||||
|
||||
This verifies that GPU passthrough works correctly with the upgraded Kata runtime.
|
||||
|
||||
The placeholders `${NODE}` and `${TEST_POD}` are substituted at runtime.
|
||||
|
||||
---
|
||||
|
||||
## Demo Recordings
|
||||
|
||||
| Demo | Description | Link |
|
||||
|------|-------------|------|
|
||||
| Sunny Path | Successful upgrade from 3.24.0 to 3.25.0 | [TODO] |
|
||||
| Rainy Path | Failed verification triggers rollback | [TODO] |
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [kata-deploy Helm Chart](tools/packaging/kata-deploy/helm-chart/README.md)
|
||||
- [kata-lifecycle-manager Helm Chart](tools/packaging/kata-deploy/helm-chart/kata-lifecycle-manager/README.md)
|
||||
- [kata-lifecycle-manager Design Document](docs/design/kata-lifecycle-manager-design.md)
|
||||
@@ -28,13 +28,15 @@ Bug fixes are released as part of `MINOR` or `MAJOR` releases only. `PATCH` is a
|
||||
|
||||
## Release Process
|
||||
|
||||
### Bump the `VERSION` and `Chart.yaml` file
|
||||
### Bump the `VERSION` and `Chart.yaml` files
|
||||
|
||||
When the `kata-containers/kata-containers` repository is ready for a new release,
|
||||
first create a PR to set the release in the [`VERSION`](./../VERSION) file and update the
|
||||
`version` and `appVersion` in the
|
||||
[`Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml) file and
|
||||
have it merged.
|
||||
`version` and `appVersion` in the following `Chart.yaml` files:
|
||||
- [`kata-deploy/Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-deploy/Chart.yaml)
|
||||
- [`kata-lifecycle-manager/Chart.yaml`](./../tools/packaging/kata-deploy/helm-chart/kata-lifecycle-manager/Chart.yaml)
|
||||
|
||||
Have the PR merged before proceeding.
|
||||
|
||||
### Lock the `main` branch
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ Kata Containers design documents:
|
||||
- [Design for direct-assigned volume](direct-blk-device-assignment.md)
|
||||
- [Design for core-scheduling](core-scheduling.md)
|
||||
- [Virtualization Reference Architecture](kata-vra.md)
|
||||
- [Design for kata-lifecycle-manager Helm chart](kata-lifecycle-manager-design.md)
|
||||
---
|
||||
|
||||
- [Design proposals](proposals)
|
||||
|
||||
@@ -51,6 +51,7 @@ containers started after the VM has been launched.
|
||||
Users can check to see if the container uses the `devicemapper` block
|
||||
device as its rootfs by calling `mount(8)` within the container. If
|
||||
the `devicemapper` block device is used, the root filesystem (`/`)
|
||||
will be mounted from `/dev/vda`. Users can disable direct mounting of
|
||||
the underlying block device through the runtime
|
||||
[configuration](README.md#configuration).
|
||||
will be mounted from `/dev/vda`. Users can enable direct mounting of
|
||||
the underlying block device by setting the runtime
|
||||
[configuration](README.md#configuration) flag `disable_block_device_use` to
|
||||
`false`.
|
||||
|
||||
502
docs/design/kata-lifecycle-manager-design.md
Normal file
502
docs/design/kata-lifecycle-manager-design.md
Normal file
@@ -0,0 +1,502 @@
|
||||
# Kata Containers Lifecycle Manager Design
|
||||
|
||||
## Summary
|
||||
|
||||
This document proposes a Helm chart-based orchestration solution for Kata Containers that
|
||||
enables controlled, node-by-node upgrades with verification and rollback capabilities
|
||||
using Argo Workflows.
|
||||
|
||||
## Motivation
|
||||
|
||||
### Problem Statement
|
||||
|
||||
Upgrading Kata Containers in a production Kubernetes cluster presents several challenges:
|
||||
|
||||
1. **Workload Scheduling Control**: New Kata workloads should not be scheduled on a node
|
||||
during upgrade until the new runtime is verified.
|
||||
|
||||
2. **Verification Gap**: There is no standardized way to verify that Kata is working correctly
|
||||
after an upgrade before allowing workloads to return to the node. This solution addresses
|
||||
the gap by running a user-provided verification pod on each upgraded node.
|
||||
|
||||
3. **Rollback Complexity**: If an upgrade fails, administrators must manually coordinate
|
||||
rollback across multiple nodes.
|
||||
|
||||
4. **Controlled Rollout**: Operators need the ability to upgrade nodes incrementally
|
||||
(canary approach) with fail-fast behavior if any node fails verification.
|
||||
|
||||
5. **Multi-Architecture Support**: The upgrade tooling must work across all architectures
|
||||
supported by Kata Containers (amd64, arm64, s390x, ppc64le).
|
||||
|
||||
### Current State
|
||||
|
||||
The `kata-deploy` Helm chart provides installation and configuration of Kata Containers,
|
||||
including a post-install verification job. However, there is no built-in mechanism for
|
||||
orchestrating upgrades across nodes in a controlled manner.
|
||||
|
||||
## Goals
|
||||
|
||||
1. Provide a standardized, automated way to upgrade Kata Containers node-by-node
|
||||
2. Ensure each node is verified before returning to service
|
||||
3. Support user-defined verification logic
|
||||
4. Automatically rollback if verification fails
|
||||
5. Work with the existing `kata-deploy` Helm chart
|
||||
6. Support all Kata-supported architectures
|
||||
|
||||
## Non-Goals
|
||||
|
||||
1. Initial Kata Containers installation (use kata-deploy Helm chart for that)
|
||||
2. Managing Kubernetes cluster upgrades
|
||||
3. Providing Kata-specific verification logic (this is user responsibility)
|
||||
4. Managing Argo Workflows installation
|
||||
|
||||
## Argo Workflows Dependency
|
||||
|
||||
### What Works Without Argo
|
||||
|
||||
The following components work independently of Argo Workflows:
|
||||
|
||||
| Component | Description |
|
||||
|-----------|-------------|
|
||||
| **kata-deploy Helm chart** | Full installation, configuration, `RuntimeClasses` |
|
||||
| **Post-install verification** | Helm hook runs verification pod after install |
|
||||
| **Label-gated deployment** | Progressive rollout via node labels |
|
||||
| **Manual upgrades** | User can script: cordon, helm upgrade, verify, `uncordon` |
|
||||
|
||||
Users who do not want Argo can still:
|
||||
- Install and configure Kata via kata-deploy
|
||||
- Perform upgrades manually or with custom scripts
|
||||
- Use the verification pod pattern in their own automation
|
||||
|
||||
### What Requires Argo
|
||||
|
||||
The kata-lifecycle-manager Helm chart provides orchestration via Argo Workflows:
|
||||
|
||||
| Feature | Description |
|
||||
|---------|-------------|
|
||||
| **Automated node-by-node upgrades** | Sequential processing with fail-fast |
|
||||
| **Taint-based node selection** | Select nodes by taint key/value |
|
||||
| **`WorkflowTemplate`** | Reusable upgrade workflow |
|
||||
| **Rollback entrypoint** | `argo submit --entrypoint rollback-node` |
|
||||
| **Status tracking** | Node annotations updated at each phase |
|
||||
|
||||
### For Users Already Using Argo
|
||||
|
||||
If your cluster already has Argo Workflows installed:
|
||||
|
||||
```bash
|
||||
# Install kata-lifecycle-manager - integrates with your existing Argo installation
|
||||
helm install kata-lifecycle-manager oci://ghcr.io/kata-containers/kata-deploy-charts/kata-lifecycle-manager \
|
||||
--set argoNamespace=argo \
|
||||
--set-file defaults.verificationPod=./verification-pod.yaml
|
||||
|
||||
# Trigger upgrades via argo CLI or integrate with existing workflows
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager -p target-version=3.25.0
|
||||
```
|
||||
|
||||
kata-lifecycle-manager can also be triggered by other Argo workflows, CI/CD pipelines, or `GitOps`
|
||||
tools that support Argo.
|
||||
|
||||
### For Users Not Wanting Argo
|
||||
|
||||
If you prefer not to use Argo Workflows:
|
||||
|
||||
1. **Use kata-deploy directly** - handles installation and basic verification
|
||||
2. **Script your own orchestration** - example approach:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Manual upgrade script (no Argo required)
|
||||
set -euo pipefail
|
||||
|
||||
VERSION="3.25.0"
|
||||
|
||||
# Upgrade each node with Kata runtime
|
||||
kubectl get nodes -l katacontainers.io/kata-runtime=true -o name | while read -r node_path; do
|
||||
NODE="${node_path#node/}"
|
||||
echo "Upgrading $NODE..."
|
||||
kubectl cordon "$NODE"
|
||||
|
||||
helm upgrade kata-deploy oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy \
|
||||
--namespace kube-system \
|
||||
--version "$VERSION" \
|
||||
--reuse-values \
|
||||
--wait
|
||||
|
||||
# Wait for DaemonSet pod on this node
|
||||
kubectl rollout status daemonset/kata-deploy -n kube-system
|
||||
|
||||
# Run verification (apply your pod, wait, check exit code)
|
||||
kubectl apply -f verification-pod.yaml
|
||||
kubectl wait pod/kata-verify --for=jsonpath='{.status.phase}'=Succeeded --timeout=180s
|
||||
kubectl delete pod/kata-verify
|
||||
|
||||
kubectl uncordon "$NODE"
|
||||
echo "$NODE upgraded successfully"
|
||||
done
|
||||
```
|
||||
|
||||
This approach requires more manual effort but avoids the Argo dependency.
|
||||
|
||||
## Proposed Design
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Argo Workflows Controller │
|
||||
│ (pre-installed) │
|
||||
└────────────────────────────┬────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────────────────────────────┐
|
||||
│ kata-lifecycle-manager Helm Chart │
|
||||
│ ┌────────────────────────────────────────────────────────┐ │
|
||||
│ │ WorkflowTemplate │ │
|
||||
│ │ - upgrade-all-nodes (entrypoint) │ │
|
||||
│ │ - upgrade-single-node (per-node steps) │ │
|
||||
│ │ - rollback-node (manual recovery) │ │
|
||||
│ └────────────────────────────────────────────────────────┘ │
|
||||
│ ┌────────────────────────────────────────────────────────┐ │
|
||||
│ │ RBAC Resources │ │
|
||||
│ │ - ServiceAccount │ │
|
||||
│ │ - ClusterRole (node, pod, helm operations) │ │
|
||||
│ │ - ClusterRoleBinding │ │
|
||||
│ └────────────────────────────────────────────────────────┘ │
|
||||
└──────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ kata-deploy Helm Chart │
|
||||
│ (existing installation) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Upgrade Flow
|
||||
|
||||
For each node selected by the upgrade label:
|
||||
|
||||
```text
|
||||
┌────────────┐ ┌──────────────┐ ┌────────────┐ ┌────────────┐
|
||||
│ Prepare │───▶│ Cordon │───▶│ Upgrade │───▶│Wait Ready │
|
||||
│ (annotate) │ │ (mark │ │ (helm │ │(kata-deploy│
|
||||
│ │ │unschedulable)│ │ upgrade) │ │ DaemonSet) │
|
||||
└────────────┘ └──────────────┘ └────────────┘ └────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────┐ ┌──────────────┐ ┌────────────┐
|
||||
│ Complete │◀───│ Uncordon │◀───│ Verify │
|
||||
│ (annotate │ │ (mark │ │ (user pod)│
|
||||
│ version) │ │schedulable) │ │ │
|
||||
└────────────┘ └──────────────┘ └────────────┘
|
||||
```
|
||||
|
||||
**Note:** Drain is not required for Kata upgrades. Running Kata VMs continue using
|
||||
the in-memory binaries. Only new workloads use the upgraded binaries. Cordon ensures
|
||||
the verification pod runs before any new workloads are scheduled with the new runtime.
|
||||
|
||||
**Optional Drain:** For users who prefer to evict workloads before any maintenance
|
||||
operation, an optional drain step can be enabled via `drain-enabled=true`. When
|
||||
enabled, an additional drain step runs after cordon and before upgrade.
|
||||
|
||||
### Node Selection Model
|
||||
|
||||
Nodes can be selected for upgrade using **labels**, **taints**, or **both**.
|
||||
|
||||
**Label-based selection:**
|
||||
|
||||
```bash
|
||||
# Select nodes by label
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
-p target-version=3.25.0 \
|
||||
-p node-selector="katacontainers.io/kata-lifecycle-manager-window=true"
|
||||
```
|
||||
|
||||
**Taint-based selection:**
|
||||
|
||||
Some organizations use taints to mark nodes for maintenance. The workflow supports
|
||||
selecting nodes by taint key and optionally taint value:
|
||||
|
||||
```bash
|
||||
# Select nodes with a specific taint
|
||||
kubectl taint nodes worker-1 kata-lifecycle-manager=pending:NoSchedule
|
||||
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
-p target-version=3.25.0 \
|
||||
-p node-taint-key=kata-lifecycle-manager \
|
||||
-p node-taint-value=pending
|
||||
```
|
||||
|
||||
**Combined selection:**
|
||||
|
||||
Labels and taints can be used together for precise targeting:
|
||||
|
||||
```bash
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
-p target-version=3.25.0 \
|
||||
-p node-selector="node-pool=kata-pool" \
|
||||
-p node-taint-key=maintenance
|
||||
```
|
||||
|
||||
This allows operators to:
|
||||
1. Upgrade a single canary node first
|
||||
2. Gradually add nodes to the upgrade window
|
||||
3. Control upgrade timing via `GitOps` or automation
|
||||
4. Integrate with existing taint-based maintenance workflows
|
||||
|
||||
### Node Pool Support
|
||||
|
||||
The node selector and taint selector parameters enable basic node pool targeting:
|
||||
|
||||
```bash
|
||||
# Upgrade only nodes matching a specific node pool label
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
-p target-version=3.25.0 \
|
||||
-p node-selector="node-pool=kata-pool"
|
||||
```
|
||||
|
||||
**Current Capabilities:**
|
||||
|
||||
| Feature | Status | Chart | Notes |
|
||||
|---------|--------|-------|-------|
|
||||
| Label-based selection | Supported | kata-lifecycle-manager | Works with any label combination |
|
||||
| Taint-based selection | Supported | kata-lifecycle-manager | Select by taint key/value |
|
||||
| Sequential upgrades | Supported | kata-lifecycle-manager | One node at a time with fail-fast |
|
||||
| Pool-specific verification pods | Not supported | kata-lifecycle-manager | Same verification for all nodes |
|
||||
| Pool-ordered upgrades | Not supported | kata-lifecycle-manager | Upgrade pool A before pool B |
|
||||
|
||||
See the [Potential Enhancements](#potential-enhancements) section for future work.
|
||||
|
||||
### Verification Model
|
||||
|
||||
**Verification runs on each node that is upgraded.** The node is only `uncordoned` after
|
||||
its verification pod succeeds. If verification fails, automatic rollback is triggered
|
||||
to restore the previous version before `uncordoning` the node.
|
||||
|
||||
**Common failure modes detected by verification:**
|
||||
- Pod stuck in Pending/`ContainerCreating` (runtime can't start VM)
|
||||
- Pod crashes immediately (containerd/CRI-O configuration issues)
|
||||
- Pod times out (resource issues, image pull failures)
|
||||
- Pod exits with non-zero code (verification logic failed)
|
||||
|
||||
All of these trigger automatic rollback. The workflow logs include pod status, events,
|
||||
and logs to help diagnose the issue.
|
||||
|
||||
The user provides a complete Pod YAML that:
|
||||
- Uses the Kata runtime class they want to verify
|
||||
- Contains their verification logic (e.g., attestation checks)
|
||||
- Exits 0 on success, non-zero on failure
|
||||
- Includes tolerations for cordoned nodes (verification runs while node is cordoned)
|
||||
- Includes a `nodeSelector` to ensure it runs on the specific node being upgraded
|
||||
|
||||
When upgrading multiple nodes (via label selector), nodes are processed sequentially.
|
||||
For each node, the following placeholders are substituted with that node's specific values,
|
||||
ensuring the verification pod runs on the exact node that was just upgraded:
|
||||
|
||||
- `${NODE}` - The hostname of the node being upgraded/verified
|
||||
- `${TEST_POD}` - A generated unique pod name
|
||||
|
||||
Example verification pod:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${TEST_POD}
|
||||
spec:
|
||||
runtimeClassName: kata-qemu
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: ${NODE}
|
||||
tolerations:
|
||||
- operator: Exists # Required: node is cordoned during verification
|
||||
containers:
|
||||
- name: verify
|
||||
image: quay.io/kata-containers/alpine-bash-curl:latest
|
||||
command: ["uname", "-a"]
|
||||
```
|
||||
|
||||
This design keeps verification logic entirely in the user's domain, supporting:
|
||||
- Different runtime classes (`kata-qemu`, `kata-qemu-snp`, `kata-qemu-tdx`, etc.)
|
||||
- TEE-specific attestation verification
|
||||
- GPU/accelerator validation
|
||||
- Custom application smoke tests
|
||||
|
||||
### Sequential Execution with Fail-Fast
|
||||
|
||||
Nodes are upgraded strictly sequentially using recursive Argo templates. This design
|
||||
ensures that if any node fails verification, the workflow stops immediately before
|
||||
touching remaining nodes, preventing a mixed-version fleet.
|
||||
|
||||
Alternative approaches considered:
|
||||
- **`withParam` + semaphore**: Provides cleaner UI but semaphore only controls concurrency,
|
||||
not failure propagation. Other nodes would still proceed after one fails.
|
||||
- **`withParam` + `failFast`**: Would be ideal, but Argo only supports `failFast` for DAG
|
||||
tasks, not for steps with `withParam`.
|
||||
|
||||
The recursive template approach (`upgrade-node-chain`) naturally provides fail-fast
|
||||
behavior because if any step in the chain fails, the recursion stops.
|
||||
|
||||
### Status Tracking
|
||||
|
||||
Node upgrade status is tracked via Kubernetes annotations:
|
||||
|
||||
| Annotation | Values |
|
||||
|------------|--------|
|
||||
| `katacontainers.io/kata-lifecycle-manager-status` | preparing, cordoned, draining, upgrading, verifying, completed, rolling-back, rolled-back |
|
||||
| `katacontainers.io/kata-current-version` | Version string (e.g., "3.25.0") |
|
||||
|
||||
This enables:
|
||||
- Monitoring upgrade progress via `kubectl get nodes`
|
||||
- Integration with external monitoring systems
|
||||
- Recovery from interrupted upgrades
|
||||
|
||||
### Rollback Support
|
||||
|
||||
**Automatic rollback on verification failure:** If the verification pod fails (non-zero exit),
|
||||
kata-lifecycle-manager automatically:
|
||||
1. Runs `helm rollback` to revert to the previous Helm release
|
||||
2. Waits for kata-deploy DaemonSet to be ready with the previous version
|
||||
3. `Uncordons` the node
|
||||
4. Annotates the node with `rolled-back` status
|
||||
|
||||
This ensures nodes are never left in a broken state.
|
||||
|
||||
**Manual rollback:** For cases where you need to rollback a successfully upgraded node:
|
||||
|
||||
```bash
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
--entrypoint rollback-node \
|
||||
-p node-name=worker-1
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### Container Images
|
||||
|
||||
Two multi-architecture container images are built and published:
|
||||
|
||||
| Image | Purpose | Architectures |
|
||||
|-------|---------|---------------|
|
||||
| `quay.io/kata-containers/kubectl:latest` | Kubernetes operations | amd64, arm64, s390x, ppc64le |
|
||||
| `quay.io/kata-containers/helm:latest` | Helm operations | amd64, arm64, s390x, ppc64le |
|
||||
|
||||
Images are rebuilt weekly to pick up security updates and tool version upgrades.
|
||||
|
||||
### Helm Chart Structure
|
||||
|
||||
```text
|
||||
kata-lifecycle-manager/
|
||||
├── Chart.yaml # Chart metadata
|
||||
├── values.yaml # Configurable defaults
|
||||
├── README.md # Usage documentation
|
||||
└── templates/
|
||||
├── _helpers.tpl # Template helpers
|
||||
├── rbac.yaml # ServiceAccount, ClusterRole, ClusterRoleBinding
|
||||
└── workflow-template.yaml # Argo `WorkflowTemplate`
|
||||
```
|
||||
|
||||
### RBAC Requirements
|
||||
|
||||
The workflow requires the following permissions:
|
||||
|
||||
| Resource | Verbs | Purpose |
|
||||
|----------|-------|---------|
|
||||
| nodes | get, list, watch, patch | `cordon`/`uncordon`, annotations |
|
||||
| pods | get, list, watch, create, delete | Verification pods |
|
||||
| pods/log | get | Verification output |
|
||||
| `daemonsets` | get, list, watch | Wait for `kata-deploy` |
|
||||
|
||||
## User Experience
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# Install kata-lifecycle-manager with verification config
|
||||
helm install kata-lifecycle-manager oci://ghcr.io/kata-containers/kata-deploy-charts/kata-lifecycle-manager \
|
||||
--set-file defaults.verificationPod=/path/to/verification-pod.yaml
|
||||
```
|
||||
|
||||
### Triggering an Upgrade
|
||||
|
||||
```bash
|
||||
# Label nodes for upgrade
|
||||
kubectl label node worker-1 katacontainers.io/kata-lifecycle-manager-window=true
|
||||
|
||||
# Submit upgrade workflow
|
||||
argo submit -n argo --from workflowtemplate/kata-lifecycle-manager \
|
||||
-p target-version=3.25.0
|
||||
|
||||
# Watch progress
|
||||
argo watch @latest
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
|
||||
```bash
|
||||
kubectl get nodes \
|
||||
-L katacontainers.io/kata-runtime \
|
||||
-L katacontainers.io/kata-lifecycle-manager-status \
|
||||
-L katacontainers.io/kata-current-version
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Namespace-Scoped Templates**: The chart creates a `WorkflowTemplate` (namespace-scoped)
|
||||
rather than `ClusterWorkflowTemplate` by default, reducing blast radius.
|
||||
|
||||
2. **Required Verification**: The chart fails to install if `defaults.verificationPod` is
|
||||
not provided, ensuring upgrades are always verified.
|
||||
|
||||
3. **Minimal RBAC**: The `ServiceAccount` has only the permissions required for upgrade
|
||||
operations.
|
||||
|
||||
4. **User-Controlled Verification**: Verification logic is entirely user-defined, avoiding
|
||||
any hardcoded assumptions about what "working" means.
|
||||
|
||||
## Integration with Release Process
|
||||
|
||||
The `kata-lifecycle-manager` chart is:
|
||||
- Packaged alongside `kata-deploy` during releases
|
||||
- Published to the same OCI registries (`quay.io`, `ghcr.io`)
|
||||
- Versioned to match `kata-deploy`
|
||||
|
||||
## Potential Enhancements
|
||||
|
||||
The following enhancements could be considered if needed:
|
||||
|
||||
### kata-lifecycle-manager
|
||||
|
||||
1. **Pool-Specific Verification**: Different verification pods for different node pools
|
||||
(e.g., GPU nodes vs. CPU-only nodes).
|
||||
|
||||
2. **Ordered Pool Upgrades**: Upgrade node pool A completely before starting pool B.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### 1. DaemonSet-Based Upgrades
|
||||
|
||||
Using a DaemonSet to coordinate upgrades on each node.
|
||||
|
||||
**Rejected because**: DaemonSets don't provide the node-by-node sequencing and
|
||||
verification workflow needed for controlled upgrades.
|
||||
|
||||
### 2. Operator Pattern
|
||||
|
||||
Building a Kubernetes Operator to manage upgrades.
|
||||
|
||||
**Rejected because**: Adds significant complexity and maintenance burden. Argo Workflows
|
||||
is already widely adopted and provides the orchestration primitives needed.
|
||||
|
||||
### 3. Shell Script Orchestration
|
||||
|
||||
Providing a shell script that loops through nodes.
|
||||
|
||||
**Rejected because**: Less reliable, harder to monitor, no built-in retry/recovery,
|
||||
and doesn't integrate with Kubernetes-native tooling.
|
||||
|
||||
## References
|
||||
|
||||
- [kata-deploy Helm Chart](https://github.com/kata-containers/kata-containers/tree/main/tools/packaging/kata-deploy/helm-chart/kata-deploy)
|
||||
- [Argo Workflows](https://argoproj.github.io/argo-workflows/)
|
||||
- [Helm Documentation](https://helm.sh/docs/)
|
||||
@@ -50,7 +50,7 @@ There are several kinds of Kata configurations and they are listed below.
|
||||
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
|
||||
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
|
||||
| `io.katacontainers.config.hypervisor.default_vcpus` | float32| the default vCPUs assigned for a VM by the hypervisor |
|
||||
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
|
||||
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disable hotplugging host block devices to guest VMs for container rootfs |
|
||||
| `io.katacontainers.config.hypervisor.disable_image_nvdimm` | `boolean` | specify if a `nvdimm` device should be used as rootfs for the guest (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
|
||||
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
|
||||
|
||||
@@ -103,48 +103,8 @@ $ minikube ssh "grep -c -E 'vmx|svm' /proc/cpuinfo"
|
||||
|
||||
## Installing Kata Containers
|
||||
|
||||
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
|
||||
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
|
||||
configured for you) to deploy them:
|
||||
|
||||
```sh
|
||||
$ git clone https://github.com/kata-containers/kata-containers.git
|
||||
$ cd kata-containers/tools/packaging/kata-deploy
|
||||
$ kubectl apply -f kata-rbac/base/kata-rbac.yaml
|
||||
$ kubectl apply -f kata-deploy/base/kata-deploy.yaml
|
||||
```
|
||||
|
||||
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
|
||||
a few minutes for the operation to complete. You can check the installation has worked by checking
|
||||
the status of the `kata-deploy` pod, which will be executing
|
||||
[this script](../../tools/packaging/kata-deploy/scripts/kata-deploy.sh),
|
||||
and will be executing a `sleep infinity` once it has successfully completed its work.
|
||||
You can accomplish this by running the following:
|
||||
|
||||
```sh
|
||||
$ podname=$(kubectl -n kube-system get pods -o=name | grep -F kata-deploy | sed 's?pod/??')
|
||||
$ kubectl -n kube-system exec ${podname} -- ps -ef | grep -F infinity
|
||||
```
|
||||
|
||||
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
|
||||
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
|
||||
> completed on all nodes.
|
||||
|
||||
## Enabling Kata Containers
|
||||
|
||||
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
|
||||
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
|
||||
|
||||
### Register the runtime
|
||||
|
||||
Now register the `kata qemu` runtime with that class. This should result in no errors:
|
||||
|
||||
```sh
|
||||
$ cd kata-containers/tools/packaging/kata-deploy/runtimeclasses
|
||||
$ kubectl apply -f kata-runtimeClasses.yaml
|
||||
```
|
||||
|
||||
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
|
||||
You can now install the Kata Containers runtime components
|
||||
[following the official instructions](../../tools/packaging/kata-deploy/helm-chart).
|
||||
|
||||
## Testing Kata Containers
|
||||
|
||||
|
||||
1
src/agent/Cargo.lock
generated
1
src/agent/Cargo.lock
generated
@@ -4305,6 +4305,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
name = "test-utils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"nix 0.26.4",
|
||||
]
|
||||
|
||||
|
||||
@@ -1588,9 +1588,11 @@ async fn join_namespaces(
|
||||
cm.apply(p.pid)?;
|
||||
}
|
||||
|
||||
if p.init && res.is_some() {
|
||||
info!(logger, "set properties to cgroups!");
|
||||
cm.set(res.unwrap(), false)?;
|
||||
if p.init {
|
||||
if let Some(resource) = res {
|
||||
info!(logger, "set properties to cgroups!");
|
||||
cm.set(resource, false)?;
|
||||
}
|
||||
}
|
||||
|
||||
info!(logger, "notify child to continue");
|
||||
|
||||
@@ -752,15 +752,6 @@ fn parse_mount(m: &Mount) -> (MsFlags, MsFlags, String) {
|
||||
(flags, pgflags, data.join(","))
|
||||
}
|
||||
|
||||
// This function constructs a canonicalized path by combining the `rootfs` and `unsafe_path` elements.
|
||||
// The resulting path is guaranteed to be ("below" / "in a directory under") the `rootfs` directory.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// - `rootfs` is the absolute path to the root of the containers root filesystem directory.
|
||||
// - `unsafe_path` is path inside a container. It is unsafe since it may try to "escape" from the containers
|
||||
// rootfs by using one or more "../" path elements or is its a symlink to path.
|
||||
|
||||
fn mount_from(
|
||||
cfd_log: RawFd,
|
||||
m: &Mount,
|
||||
|
||||
@@ -10,7 +10,7 @@ use std::fs::File;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crossbeam_channel::{Receiver, Sender, TryRecvError};
|
||||
use log::{debug, error, info, warn};
|
||||
use log::{debug, info, warn};
|
||||
use std::sync::mpsc;
|
||||
use tracing::instrument;
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ use dbs_legacy_devices::ConsoleHandler;
|
||||
use dbs_pci::CAPABILITY_BAR_SIZE;
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use kvm_ioctls::VmFd;
|
||||
use log::error;
|
||||
use virtio_queue::QueueSync;
|
||||
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
|
||||
@@ -770,10 +770,11 @@ impl MachineInfo {
|
||||
}
|
||||
|
||||
/// Huge page type for VM RAM backend
|
||||
#[derive(Clone, Debug, Deserialize_enum_str, Serialize_enum_str, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Deserialize_enum_str, Serialize_enum_str, PartialEq, Eq, Default)]
|
||||
pub enum HugePageType {
|
||||
/// Memory allocated using hugetlbfs backend
|
||||
#[serde(rename = "hugetlbfs")]
|
||||
#[default]
|
||||
Hugetlbfs,
|
||||
|
||||
/// Memory allocated using transparent huge pages
|
||||
@@ -781,12 +782,6 @@ pub enum HugePageType {
|
||||
THP,
|
||||
}
|
||||
|
||||
impl Default for HugePageType {
|
||||
fn default() -> Self {
|
||||
Self::Hugetlbfs
|
||||
}
|
||||
}
|
||||
|
||||
/// Virtual machine memory configuration information.
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
pub struct MemoryInfo {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs;
|
||||
use std::io::{self, Result};
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -206,8 +206,8 @@ impl TomlConfig {
|
||||
}
|
||||
|
||||
/// Get agent-specfic kernel parameters for further Hypervisor config revision
|
||||
pub fn get_agent_kernel_params(&self) -> Result<HashMap<String, String>> {
|
||||
let mut kv = HashMap::new();
|
||||
pub fn get_agent_kernel_params(&self) -> Result<BTreeMap<String, String>> {
|
||||
let mut kv = BTreeMap::new();
|
||||
if let Some(cfg) = self.agent.get(&self.runtime.agent_name) {
|
||||
if cfg.debug {
|
||||
kv.insert(LOG_LEVEL_OPTION.to_string(), LOG_LEVEL_DEBUG.to_string());
|
||||
|
||||
@@ -366,8 +366,8 @@ key = "value"
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
// This might fail depending on whether algorithm is required
|
||||
if result.is_err() {
|
||||
assert!(result.unwrap_err().to_string().contains("parse initdata"));
|
||||
if let Err(error) = result {
|
||||
assert!(error.to_string().contains("parse initdata"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -386,8 +386,8 @@ key = "value"
|
||||
|
||||
let result = add_hypervisor_initdata_overrides(&encoded);
|
||||
// This might fail depending on whether version is required
|
||||
if result.is_err() {
|
||||
assert!(result.unwrap_err().to_string().contains("parse initdata"));
|
||||
if let Err(error) = result {
|
||||
assert!(error.to_string().contains("parse initdata"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -488,7 +488,7 @@ key = "value"
|
||||
let valid_toml = r#"
|
||||
version = "0.1.0"
|
||||
algorithm = "sha384"
|
||||
|
||||
|
||||
[data]
|
||||
valid_key = "valid_value"
|
||||
"#;
|
||||
@@ -497,7 +497,7 @@ key = "value"
|
||||
// Invalid TOML (missing version)
|
||||
let invalid_toml = r#"
|
||||
algorithm = "sha256"
|
||||
|
||||
|
||||
[data]
|
||||
key = "value"
|
||||
"#;
|
||||
|
||||
@@ -136,8 +136,6 @@ macro_rules! skip_loop_by_user {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{skip_if_kvm_unaccessable, skip_if_not_root, skip_if_root};
|
||||
|
||||
#[test]
|
||||
fn test_skip_if_not_root() {
|
||||
skip_if_not_root!();
|
||||
|
||||
@@ -22,6 +22,7 @@ cloud-hypervisor = ["runtimes/cloud-hypervisor"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
containerd-shim-protos = { workspace = true }
|
||||
go-flag = { workspace = true }
|
||||
nix = { workspace = true }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
|
||||
@@ -130,8 +130,23 @@ FCJAILERPATH = $(FCBINDIR)/$(FCJAILERCMD)
|
||||
FCVALIDJAILERPATHS = [\"$(FCJAILERPATH)\"]
|
||||
|
||||
PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR)
|
||||
|
||||
# EDK2 firmware names per architecture
|
||||
ifeq ($(ARCH), aarch64)
|
||||
EDK2_NAME := aavmf
|
||||
endif
|
||||
|
||||
# Set firmware paths from QEMUFW/QEMUFWVOL if defined
|
||||
FIRMWAREPATH :=
|
||||
FIRMWAREVOLUMEPATH :=
|
||||
ifneq (,$(QEMUCMD))
|
||||
ifneq (,$(QEMUFW))
|
||||
FIRMWAREPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFW)
|
||||
endif
|
||||
ifneq (,$(QEMUFWVOL))
|
||||
FIRMWAREVOLUMEPATH := $(PREFIXDEPS)/share/$(EDK2_NAME)/$(QEMUFWVOL)
|
||||
endif
|
||||
endif
|
||||
|
||||
ROOTMEASURECONFIG ?= ""
|
||||
KERNELTDXPARAMS += $(ROOTMEASURECONFIG)
|
||||
@@ -374,6 +389,11 @@ ifneq (,$(QEMUCMD))
|
||||
ifeq ($(ARCH), s390x)
|
||||
VMROOTFSDRIVER_QEMU := virtio-blk-ccw
|
||||
DEFBLOCKSTORAGEDRIVER_QEMU := virtio-blk-ccw
|
||||
else ifeq ($(ARCH), aarch64)
|
||||
# NVDIMM/virtio-pmem has issues on arm64 (cache coherency problems with DAX),
|
||||
# so we use virtio-blk-pci instead.
|
||||
VMROOTFSDRIVER_QEMU := virtio-blk-pci
|
||||
DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
|
||||
else
|
||||
VMROOTFSDRIVER_QEMU := virtio-pmem
|
||||
DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
|
||||
|
||||
@@ -4,12 +4,16 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
MACHINETYPE :=
|
||||
# ARM 64 settings
|
||||
|
||||
MACHINETYPE := virt
|
||||
KERNELPARAMS := cgroup_no_v1=all systemd.unified_cgroup_hierarchy=1
|
||||
MACHINEACCELERATORS :=
|
||||
MACHINEACCELERATORS := usb=off,gic-version=host
|
||||
CPUFEATURES := pmu=off
|
||||
|
||||
QEMUCMD := qemu-system-aarch64
|
||||
QEMUFW := AAVMF_CODE.fd
|
||||
QEMUFWVOL := AAVMF_VARS.fd
|
||||
|
||||
# dragonball binary name
|
||||
DBCMD := dragonball
|
||||
|
||||
@@ -19,7 +19,7 @@ image = "@IMAGEPATH@"
|
||||
# - xfs
|
||||
# - erofs
|
||||
rootfs_type = @DEFROOTFSTYPE@
|
||||
|
||||
|
||||
# Block storage driver to be used for the VM rootfs is backed
|
||||
# by a block device.
|
||||
vm_rootfs_driver = "@VMROOTFSDRIVER_CLH@"
|
||||
@@ -41,7 +41,7 @@ valid_hypervisor_paths = @CLHVALIDHYPERVISORPATHS@
|
||||
|
||||
# List of valid annotations values for ctlpath
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends:
|
||||
# Your distribution recommends:
|
||||
valid_ctlpaths = []
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
|
||||
@@ -23,7 +23,7 @@ image = "@IMAGEPATH@"
|
||||
# - erofs
|
||||
rootfs_type = @DEFROOTFSTYPE@
|
||||
|
||||
|
||||
|
||||
# Block storage driver to be used for the VM rootfs is backed
|
||||
# by a block device. This is virtio-blk-pci, virtio-blk-mmio or nvdimm
|
||||
vm_rootfs_driver = "@VMROOTFSDRIVER_DB@"
|
||||
@@ -41,7 +41,7 @@ valid_hypervisor_paths = @DBVALIDHYPERVISORPATHS@
|
||||
|
||||
# List of valid annotations values for ctlpath
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends:
|
||||
# Your distribution recommends:
|
||||
valid_ctlpaths = []
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
|
||||
@@ -373,16 +373,16 @@ disable_image_nvdimm = false
|
||||
# Default false
|
||||
hotplug_vfio_on_root_bus = false
|
||||
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "no-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
|
||||
@@ -767,4 +767,4 @@ dan_conf = "@DEFDANCONF@"
|
||||
# to non-k8s cases)
|
||||
# cold_plug_vfio != no_port AND pod_resource_api_sock != "" => kubelet
|
||||
# based cold plug.
|
||||
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
|
||||
pod_resource_api_sock = "@DEFPODRESOURCEAPISOCK@"
|
||||
|
||||
@@ -39,7 +39,7 @@ vm_rootfs_driver = "virtio-blk-pci"
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
|
||||
@@ -304,7 +304,7 @@ debug_console_enabled = false
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
dial_timeout = 45
|
||||
|
||||
# Confidential Data Hub API timeout value in seconds
|
||||
# (default: 50)
|
||||
|
||||
@@ -2296,6 +2296,14 @@ impl<'a> QemuCmdLine<'a> {
|
||||
}
|
||||
|
||||
fn add_iommu(&mut self) {
|
||||
// vIOMMU (Intel IOMMU) is not supported on the "virt" machine type (arm64)
|
||||
if self.machine.r#type == "virt" {
|
||||
self.kernel
|
||||
.params
|
||||
.append(&mut KernelParams::from_string("iommu.passthrough=0"));
|
||||
return;
|
||||
}
|
||||
|
||||
let dev_iommu = DeviceIntelIommu::new();
|
||||
self.devices.push(Box::new(dev_iommu));
|
||||
|
||||
|
||||
@@ -28,8 +28,13 @@ use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use qapi_spec::Dictionary;
|
||||
use std::thread;
|
||||
use std::time::Instant;
|
||||
|
||||
/// default qmp connection read timeout
|
||||
const DEFAULT_QMP_READ_TIMEOUT: u64 = 250;
|
||||
const DEFAULT_QMP_CONNECT_DEADLINE_MS: u64 = 5000;
|
||||
const DEFAULT_QMP_RETRY_SLEEP_MS: u64 = 50;
|
||||
|
||||
pub struct Qmp {
|
||||
qmp: qapi::Qmp<qapi::Stream<BufReader<UnixStream>, UnixStream>>,
|
||||
@@ -58,29 +63,43 @@ impl Debug for Qmp {
|
||||
|
||||
impl Qmp {
|
||||
pub fn new(qmp_sock_path: &str) -> Result<Self> {
|
||||
let stream = UnixStream::connect(qmp_sock_path)?;
|
||||
let try_new_once_fn = || -> Result<Qmp> {
|
||||
let stream = UnixStream::connect(qmp_sock_path)?;
|
||||
|
||||
// Set the read timeout to protect runtime-rs from blocking forever
|
||||
// trying to set up QMP connection if qemu fails to launch. The exact
|
||||
// value is a matter of judegement. Setting it too long would risk
|
||||
// being ineffective since container runtime would timeout first anyway
|
||||
// (containerd's task creation timeout is 2 s by default). OTOH
|
||||
// setting it too short would risk interfering with a normal launch,
|
||||
// perhaps just seeing some delay due to a heavily loaded host.
|
||||
stream.set_read_timeout(Some(Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT)))?;
|
||||
stream
|
||||
.set_read_timeout(Some(Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT)))
|
||||
.context("set qmp read timeout")?;
|
||||
|
||||
let mut qmp = Qmp {
|
||||
qmp: qapi::Qmp::new(qapi::Stream::new(
|
||||
BufReader::new(stream.try_clone()?),
|
||||
stream,
|
||||
)),
|
||||
guest_memory_block_size: 0,
|
||||
let mut qmp = Qmp {
|
||||
qmp: qapi::Qmp::new(qapi::Stream::new(
|
||||
BufReader::new(stream.try_clone()?),
|
||||
stream,
|
||||
)),
|
||||
guest_memory_block_size: 0,
|
||||
};
|
||||
|
||||
let info = qmp.qmp.handshake().context("qmp handshake failed")?;
|
||||
info!(sl!(), "QMP initialized: {:#?}", info);
|
||||
|
||||
Ok(qmp)
|
||||
};
|
||||
|
||||
let info = qmp.qmp.handshake()?;
|
||||
info!(sl!(), "QMP initialized: {:#?}", info);
|
||||
let deadline = Instant::now() + Duration::from_millis(DEFAULT_QMP_CONNECT_DEADLINE_MS);
|
||||
let mut last_err: Option<anyhow::Error> = None;
|
||||
|
||||
Ok(qmp)
|
||||
while Instant::now() < deadline {
|
||||
match try_new_once_fn() {
|
||||
Ok(qmp) => return Ok(qmp),
|
||||
Err(e) => {
|
||||
debug!(sl!(), "QMP not ready yet: {}", e);
|
||||
last_err = Some(e);
|
||||
thread::sleep(Duration::from_millis(DEFAULT_QMP_RETRY_SLEEP_MS));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_err.unwrap_or_else(|| anyhow!("QMP init timed out")))
|
||||
.with_context(|| format!("timed out waiting for QMP ready: {}", qmp_sock_path))
|
||||
}
|
||||
|
||||
pub fn set_ignore_shared_memory_capability(&mut self) -> Result<()> {
|
||||
|
||||
@@ -6,39 +6,54 @@
|
||||
|
||||
use std::{
|
||||
io,
|
||||
os::{
|
||||
fd::IntoRawFd,
|
||||
unix::{
|
||||
fs::OpenOptionsExt,
|
||||
io::{FromRawFd, RawFd},
|
||||
net::UnixStream as StdUnixStream,
|
||||
prelude::AsRawFd,
|
||||
},
|
||||
os::unix::{
|
||||
fs::{FileTypeExt, OpenOptionsExt},
|
||||
io::RawFd,
|
||||
prelude::AsRawFd,
|
||||
},
|
||||
pin::Pin,
|
||||
task::{Context as TaskContext, Poll},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{Context, Result};
|
||||
use tokio::{
|
||||
fs::OpenOptions,
|
||||
fs::{File, OpenOptions},
|
||||
io::{AsyncRead, AsyncWrite},
|
||||
net::UnixStream as AsyncUnixStream,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
fn open_fifo_write(path: &str) -> Result<AsyncUnixStream> {
|
||||
/// Clear O_NONBLOCK for an fd (turn it into blocking mode).
|
||||
fn set_flag_with_blocking(fd: RawFd) {
|
||||
let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
|
||||
if flag < 0 {
|
||||
error!(sl!(), "failed to fcntl(F_GETFL) fd {} ret {}", fd, flag);
|
||||
return;
|
||||
}
|
||||
|
||||
let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
|
||||
if ret < 0 {
|
||||
error!(sl!(), "failed to fcntl(F_SETFL) fd {} ret {}", fd, ret);
|
||||
}
|
||||
}
|
||||
|
||||
fn open_fifo_write(path: &str) -> Result<File> {
|
||||
let std_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
// It's not for non-block openning FIFO but for non-block stream which
|
||||
// will be add into tokio runtime.
|
||||
.custom_flags(libc::O_NONBLOCK)
|
||||
.open(path)
|
||||
.with_context(|| format!("open {path} with write"))?;
|
||||
let fd = std_file.into_raw_fd();
|
||||
let std_stream = unsafe { StdUnixStream::from_raw_fd(fd) };
|
||||
.with_context(|| format!("open fifo for write: {path}"))?;
|
||||
|
||||
AsyncUnixStream::from_std(std_stream).map_err(|e| anyhow!(e))
|
||||
// Debug
|
||||
let meta = std_file.metadata()?;
|
||||
if !meta.file_type().is_fifo() {
|
||||
debug!(sl!(), "[DEBUG]{} is not a fifo (type mismatch)", path);
|
||||
}
|
||||
|
||||
set_flag_with_blocking(std_file.as_raw_fd());
|
||||
|
||||
Ok(File::from_std(std_file))
|
||||
}
|
||||
|
||||
pub struct ShimIo {
|
||||
@@ -58,14 +73,6 @@ impl ShimIo {
|
||||
"new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr
|
||||
);
|
||||
|
||||
let set_flag_with_blocking = |fd: RawFd| {
|
||||
let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
|
||||
let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
|
||||
if ret < 0 {
|
||||
error!(sl!(), "failed to set fcntl for fd {} error {}", fd, ret);
|
||||
}
|
||||
};
|
||||
|
||||
let stdin_fd: Option<Box<dyn AsyncRead + Send + Unpin>> = if let Some(stdin) = stdin {
|
||||
info!(sl!(), "open stdin {:?}", &stdin);
|
||||
|
||||
@@ -98,9 +105,7 @@ impl ShimIo {
|
||||
None => None,
|
||||
Some(out) => match Url::parse(out.as_str()) {
|
||||
Err(url::ParseError::RelativeUrlWithoutBase) => {
|
||||
let out = "fifo://".to_owned() + out.as_str();
|
||||
let u = Url::parse(out.as_str()).unwrap();
|
||||
Some(u)
|
||||
Url::parse(&format!("fifo://{}", out)).ok()
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(sl!(), "unable to parse stdout uri: {}", err);
|
||||
@@ -111,26 +116,25 @@ impl ShimIo {
|
||||
}
|
||||
};
|
||||
|
||||
let stdout_url = get_url(stdout);
|
||||
let get_fd = |url: &Option<Url>| -> Option<Box<dyn AsyncWrite + Send + Unpin>> {
|
||||
info!(sl!(), "get fd for {:?}", &url);
|
||||
if let Some(url) = url {
|
||||
if url.scheme() == "fifo" {
|
||||
let path = url.path();
|
||||
match open_fifo_write(path) {
|
||||
Ok(s) => {
|
||||
return Some(Box::new(ShimIoWrite::Stream(s)));
|
||||
}
|
||||
Err(err) => {
|
||||
error!(sl!(), "failed to open file {} error {:?}", url.path(), err);
|
||||
}
|
||||
Ok(f) => return Some(Box::new(ShimIoWrite::File(f))),
|
||||
Err(err) => error!(sl!(), "failed to open fifo {} error {:?}", path, err),
|
||||
}
|
||||
} else {
|
||||
warn!(sl!(), "unsupported io scheme {}", url.scheme());
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
let stdout_url = get_url(stdout);
|
||||
let stderr_url = get_url(stderr);
|
||||
|
||||
Ok(Self {
|
||||
stdin: stdin_fd,
|
||||
stdout: get_fd(&stdout_url),
|
||||
@@ -141,7 +145,7 @@ impl ShimIo {
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ShimIoWrite {
|
||||
Stream(AsyncUnixStream),
|
||||
File(File),
|
||||
// TODO: support other type
|
||||
}
|
||||
|
||||
@@ -151,20 +155,20 @@ impl AsyncWrite for ShimIoWrite {
|
||||
cx: &mut TaskContext<'_>,
|
||||
buf: &[u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
match *self {
|
||||
ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_write(cx, buf),
|
||||
match &mut *self {
|
||||
ShimIoWrite::File(f) => Pin::new(f).poll_write(cx, buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll<io::Result<()>> {
|
||||
match *self {
|
||||
ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_flush(cx),
|
||||
match &mut *self {
|
||||
ShimIoWrite::File(f) => Pin::new(f).poll_flush(cx),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll<io::Result<()>> {
|
||||
match *self {
|
||||
ShimIoWrite::Stream(ref mut s) => Pin::new(s).poll_shutdown(cx),
|
||||
match &mut *self {
|
||||
ShimIoWrite::File(f) => Pin::new(f).poll_shutdown(cx),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,10 +6,15 @@
|
||||
|
||||
use std::{
|
||||
ffi::{OsStr, OsString},
|
||||
io::Write,
|
||||
path::PathBuf,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use containerd_shim_protos::{
|
||||
protobuf::Message,
|
||||
types::introspection::{RuntimeInfo, RuntimeVersion},
|
||||
};
|
||||
use nix::{
|
||||
mount::{mount, MsFlags},
|
||||
sched::{self, CloneFlags},
|
||||
@@ -29,11 +34,13 @@ enum Action {
|
||||
Delete(Args),
|
||||
Help,
|
||||
Version,
|
||||
Info,
|
||||
}
|
||||
|
||||
fn parse_args(args: &[OsString]) -> Result<Action> {
|
||||
let mut help = false;
|
||||
let mut version = false;
|
||||
let mut info = false;
|
||||
let mut shim_args = Args::default();
|
||||
|
||||
// Crate `go_flag` is used to keep compatible with go/flag package.
|
||||
@@ -46,6 +53,7 @@ fn parse_args(args: &[OsString]) -> Result<Action> {
|
||||
flags.add_flag("publish-binary", &mut shim_args.publish_binary);
|
||||
flags.add_flag("help", &mut help);
|
||||
flags.add_flag("version", &mut version);
|
||||
flags.add_flag("info", &mut info);
|
||||
})
|
||||
.context(Error::ParseArgument(format!("{args:?}")))?;
|
||||
|
||||
@@ -53,6 +61,8 @@ fn parse_args(args: &[OsString]) -> Result<Action> {
|
||||
Ok(Action::Help)
|
||||
} else if version {
|
||||
Ok(Action::Version)
|
||||
} else if info {
|
||||
Ok(Action::Info)
|
||||
} else if rest_args.is_empty() {
|
||||
Ok(Action::Run(shim_args))
|
||||
} else if rest_args[0] == "start" {
|
||||
@@ -83,6 +93,8 @@ fn show_help(cmd: &OsStr) {
|
||||
enable debug output in logs
|
||||
-id string
|
||||
id of the task
|
||||
-info
|
||||
output the runtime info as protobuf (for containerd v2.0+)
|
||||
-namespace string
|
||||
namespace that owns the shim
|
||||
-publish-binary string
|
||||
@@ -114,6 +126,25 @@ fn show_version(err: Option<anyhow::Error>) {
|
||||
}
|
||||
}
|
||||
|
||||
fn show_info() -> Result<()> {
|
||||
let mut version = RuntimeVersion::new();
|
||||
version.version = config::RUNTIME_VERSION.to_string();
|
||||
version.revision = config::RUNTIME_GIT_COMMIT.to_string();
|
||||
|
||||
let mut info = RuntimeInfo::new();
|
||||
info.name = config::CONTAINERD_RUNTIME_NAME.to_string();
|
||||
info.version = Some(version).into();
|
||||
|
||||
let data = info
|
||||
.write_to_bytes()
|
||||
.context("failed to marshal RuntimeInfo")?;
|
||||
std::io::stdout()
|
||||
.write_all(&data)
|
||||
.context("failed to write RuntimeInfo to stdout")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_tokio_runtime() -> Result<tokio::runtime::Runtime> {
|
||||
let worker_threads = std::env::var(ENV_TOKIO_RUNTIME_WORKER_THREADS)
|
||||
.unwrap_or_default()
|
||||
@@ -155,6 +186,7 @@ fn real_main() -> Result<()> {
|
||||
}
|
||||
Action::Help => show_help(&args[0]),
|
||||
Action::Version => show_version(None),
|
||||
Action::Info => show_info().context("show info")?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -174,10 +174,6 @@ HYPERVISORS := $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISO
|
||||
QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD)
|
||||
QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"]
|
||||
|
||||
#QEMUTDXPATH := $(QEMUBINDIR)/$(QEMUTDXCMD)
|
||||
QEMUTDXPATH := PLACEHOLDER_FOR_DISTRO_QEMU_WITH_TDX_SUPPORT
|
||||
QEMUTDXVALIDHYPERVISORPATHS := [\"$(QEMUTDXPATH)\"]
|
||||
|
||||
QEMUTDXEXPERIMENTALPATH := $(QEMUBINDIR)/$(QEMUTDXEXPERIMENTALCMD)
|
||||
QEMUTDXEXPERIMENTALVALIDHYPERVISORPATHS := [\"$(QEMUTDXEXPERIMENTALPATH)\"]
|
||||
|
||||
@@ -250,7 +246,7 @@ DEFSECCOMPSANDBOXPARAM :=
|
||||
DEFENTROPYSOURCE := /dev/urandom
|
||||
DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]
|
||||
|
||||
DEFDISABLEBLOCK := false
|
||||
DEFDISABLEBLOCK := true
|
||||
DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
|
||||
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
|
||||
# Please keep DEFSHAREDFS_QEMU_COCO_DEV_VIRTIOFS in sync with TDX/SNP
|
||||
@@ -702,18 +698,15 @@ USER_VARS += PROJECT_TYPE
|
||||
USER_VARS += PROJECT_URL
|
||||
USER_VARS += QEMUBINDIR
|
||||
USER_VARS += QEMUCMD
|
||||
USER_VARS += QEMUTDXCMD
|
||||
USER_VARS += QEMUTDXEXPERIMENTALCMD
|
||||
USER_VARS += QEMUCCAEXPERIMENTALCMD
|
||||
USER_VARS += QEMUSNPCMD
|
||||
USER_VARS += QEMUPATH
|
||||
USER_VARS += QEMUTDXPATH
|
||||
USER_VARS += QEMUTDXEXPERIMENTALPATH
|
||||
USER_VARS += QEMUTDXQUOTEGENERATIONSERVICESOCKETPORT
|
||||
USER_VARS += QEMUSNPPATH
|
||||
USER_VARS += QEMUCCAEXPERIMENTALPATH
|
||||
USER_VARS += QEMUVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUTDXVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUTDXEXPERIMENTALVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUCCAVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUCCAEXPERIMENTALVALIDHYPERVISORPATHS
|
||||
|
||||
@@ -9,7 +9,9 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
containerdtypes "github.com/containerd/containerd/api/types"
|
||||
shimapi "github.com/containerd/containerd/runtime/v2/shim"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||
@@ -21,6 +23,25 @@ func shimConfig(config *shimapi.Config) {
|
||||
config.NoSubreaper = true
|
||||
}
|
||||
|
||||
func handleInfoFlag() {
|
||||
info := &containerdtypes.RuntimeInfo{
|
||||
Name: types.DefaultKataRuntimeName,
|
||||
Version: &containerdtypes.RuntimeVersion{
|
||||
Version: katautils.VERSION,
|
||||
Revision: katautils.COMMIT,
|
||||
},
|
||||
}
|
||||
|
||||
data, err := proto.Marshal(info)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to marshal RuntimeInfo: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
os.Stdout.Write(data)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
if len(os.Args) == 2 && os.Args[1] == "--version" {
|
||||
@@ -28,5 +49,9 @@ func main() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if len(os.Args) == 2 && os.Args[1] == "-info" {
|
||||
handleInfoFlag()
|
||||
}
|
||||
|
||||
shimapi.Run(types.DefaultKataRuntimeName, shim.New, shimConfig)
|
||||
}
|
||||
|
||||
@@ -109,6 +109,20 @@ memory_slots = @DEFMEMSLOTS@
|
||||
# > amount of physical RAM --> will be set to the actual amount of physical RAM
|
||||
default_maxmemory = @DEFMAXMEMSZ@
|
||||
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
# - virtio-fs (default)
|
||||
# - virtio-fs-nydus
|
||||
@@ -237,9 +251,9 @@ guest_hook_path = ""
|
||||
# and we strongly advise users to refer the Cloud Hypervisor official
|
||||
# documentation for a better understanding of its internals:
|
||||
# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
|
||||
#
|
||||
#
|
||||
# Bandwidth rate limiter options
|
||||
#
|
||||
#
|
||||
# net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec
|
||||
# for SB/VM).
|
||||
# The same value is used for inbound and outbound bandwidth.
|
||||
@@ -273,9 +287,9 @@ net_rate_limiter_ops_one_time_burst = 0
|
||||
# and we strongly advise users to refer the Cloud Hypervisor official
|
||||
# documentation for a better understanding of its internals:
|
||||
# https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md
|
||||
#
|
||||
#
|
||||
# Bandwidth rate limiter options
|
||||
#
|
||||
#
|
||||
# disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec
|
||||
# for SB/VM).
|
||||
# The same value is used for inbound and outbound bandwidth.
|
||||
@@ -462,9 +476,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -367,9 +367,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -159,12 +159,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -630,9 +636,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -145,12 +145,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -356,17 +362,17 @@ msize_9p = @DEFMSIZE9P@
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
|
||||
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "no-port"
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "no-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
@@ -688,9 +694,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ rootfs_type = @DEFROOTFSTYPE@
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
@@ -75,7 +75,7 @@ snp_id_auth = ""
|
||||
|
||||
# SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
|
||||
# If unset, the QEMU default policy (0x30000) will be used.
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# won't start at all if the policy denys it. This will be indicated by a
|
||||
# 'SNP_LAUNCH_START' error.
|
||||
snp_guest_policy = 196608
|
||||
@@ -185,12 +185,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -388,10 +394,10 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
|
||||
pcie_root_port = 0
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
@@ -704,9 +710,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ rootfs_type = @DEFROOTFSTYPE@
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
@@ -162,12 +162,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -365,10 +371,10 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
|
||||
pcie_root_port = 0
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
@@ -681,9 +687,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
|
||||
@@ -144,12 +144,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -355,16 +361,16 @@ msize_9p = @DEFMSIZE9P@
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_NV@
|
||||
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "@DEFAULTVFIOPORT_NV@"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
@@ -683,9 +689,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFAULTTIMEOUT_NV@
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ machine_type = "@MACHINETYPE@"
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
@@ -153,12 +153,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -343,7 +349,7 @@ msize_9p = @DEFMSIZE9P@
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
|
||||
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
@@ -671,9 +677,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ rootfs_type = @DEFROOTFSTYPE@
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
@@ -74,7 +74,7 @@ snp_id_auth = ""
|
||||
|
||||
# SNP Guest Policy, the ‘POLICY’ parameter to the SNP_LAUNCH_START command.
|
||||
# If unset, the QEMU default policy (0x30000) will be used.
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# Notice that the guest policy is enforced at VM launch, and your pod VMs
|
||||
# won't start at all if the policy denys it. This will be indicated by a
|
||||
# 'SNP_LAUNCH_START' error.
|
||||
snp_guest_policy = 196608
|
||||
@@ -184,12 +184,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -696,9 +702,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# XXX: Type: @PROJECT_TYPE@
|
||||
|
||||
[hypervisor.qemu]
|
||||
path = "@QEMUTDXPATH@"
|
||||
path = "@QEMUPATH@"
|
||||
kernel = "@KERNELCONFIDENTIALPATH@"
|
||||
image = "@IMAGECONFIDENTIALPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
@@ -33,7 +33,7 @@ rootfs_type = @DEFROOTFSTYPE@
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
@@ -54,7 +54,7 @@ enable_annotations = @DEFENABLEANNOTATIONS_COCO@
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @QEMUTDXVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
@@ -161,12 +161,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -673,9 +679,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -144,12 +144,18 @@ memory_offset = 0
|
||||
# Default false
|
||||
enable_virtio_mem = false
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -355,17 +361,17 @@ msize_9p = @DEFMSIZE9P@
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM@
|
||||
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port"
|
||||
hot_plug_vfio = "no-port"
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "no-port"
|
||||
# security.
|
||||
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||
# root-port or switch-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
cold_plug_vfio = "no-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
@@ -687,9 +693,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -103,12 +103,18 @@ default_maxmemory = @DEFMAXMEMSZ@
|
||||
# Default 0
|
||||
memory_offset = 0
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# Disable hotplugging host block devices to guest VMs for container rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
# WARNING:
|
||||
# Don't set this flag to false if you don't understand well the behavior of
|
||||
# your container runtime and image snapshotter. Some snapshotters might use
|
||||
# container image storage devices that are not meant to be hotplugged into a
|
||||
# guest VM - e.g., because they contain files used by the host or by other
|
||||
# guests.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
@@ -404,9 +410,9 @@ enable_pprof = false
|
||||
|
||||
# Indicates the CreateContainer request timeout needed for the workload(s)
|
||||
# It using guest_pull this includes the time to pull the image inside the guest
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
|
||||
# Note: The effective timeout is determined by the lesser of two values: runtime-request-timeout from kubelet config
|
||||
# (https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=runtime%2Drequest%2Dtimeout) and create_container_timeout.
|
||||
# In essence, the timeout used for guest pull=runtime-request-timeout<create_container_timeout?runtime-request-timeout:create_container_timeout.
|
||||
create_container_timeout = @DEFCREATECONTAINERTIMEOUT@
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module github.com/kata-containers/kata-containers/src/runtime
|
||||
|
||||
// Keep in sync with version in versions.yaml
|
||||
go 1.24.11
|
||||
go 1.24.12
|
||||
|
||||
// WARNING: Do NOT use `replace` directives as those break dependabot:
|
||||
// https://github.com/kata-containers/kata-containers/issues/11020
|
||||
@@ -49,7 +49,7 @@ require (
|
||||
github.com/safchain/ethtool v0.6.2
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/urfave/cli v1.22.15
|
||||
github.com/urfave/cli v1.22.17
|
||||
github.com/vishvananda/netlink v1.3.1
|
||||
github.com/vishvananda/netns v0.0.5
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220601114329-47893b162965
|
||||
@@ -85,7 +85,7 @@ require (
|
||||
github.com/containerd/log v0.1.0 // indirect
|
||||
github.com/containerd/platforms v0.2.1 // indirect
|
||||
github.com/containernetworking/cni v1.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.6.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
|
||||
@@ -8,7 +8,6 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h
|
||||
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 h1:59MxjQVfjXsBpLy+dbd2/ELV5ofnUkUZBvWSC85sheA=
|
||||
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0/go.mod h1:OahwfttHWG6eJ0clwcfBAHoDI6X/LV/15hx/wlMZSrU=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
|
||||
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
|
||||
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
|
||||
@@ -70,9 +69,8 @@ github.com/containernetworking/plugins v1.9.0 h1:Mg3SXBdRGkdXyFC4lcwr6u2ZB2SDeL6
|
||||
github.com/containernetworking/plugins v1.9.0/go.mod h1:JG3BxoJifxxHBhG3hFyxyhid7JgRVBu/wtooGEvWf1c=
|
||||
github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
|
||||
github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/cri-o/cri-o v1.34.0 h1:ux2URwAyENy5e5hD9Z95tshdfy98eqatZk0fxx3rhuk=
|
||||
github.com/cri-o/cri-o v1.34.0/go.mod h1:kP40HG+1EW5CDNHjqQBFhb6dehT5dCBKcmtO5RZAm6k=
|
||||
github.com/cyphar/filepath-securejoin v0.6.0 h1:BtGB77njd6SVO6VztOHfPxKitJvd/VPT+OFBFMOi1Is=
|
||||
@@ -289,13 +287,13 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||
github.com/urfave/cli v1.22.15 h1:nuqt+pdC/KqswQKhETJjo7pvn/k4xMUxgW6liI7XpnM=
|
||||
github.com/urfave/cli v1.22.15/go.mod h1:wSan1hmo5zeyLGBjRJbzRTNk8gwoYa2B9n4q9dmRIc0=
|
||||
github.com/urfave/cli v1.22.17 h1:SYzXoiPfQjHBbkYxbew5prZHS1TOLT3ierW8SYLqtVQ=
|
||||
github.com/urfave/cli v1.22.17/go.mod h1:b0ht0aqgH/6pBYzzxURyrM4xXNgsoT/n2ZzwQiEhNVo=
|
||||
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
||||
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
||||
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
||||
|
||||
@@ -19,8 +19,13 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// containerd CRI annotations
|
||||
nameAnnotation = "io.kubernetes.cri.sandbox-name"
|
||||
namespaceAnnotation = "io.kubernetes.cri.sandbox-namespace"
|
||||
|
||||
// CRI-O annotations
|
||||
crioNameAnnotation = "io.kubernetes.cri-o.KubeName"
|
||||
crioNamespaceAnnotation = "io.kubernetes.cri-o.Namespace"
|
||||
)
|
||||
|
||||
// coldPlugDevices handles cold plug of CDI devices into the sandbox
|
||||
@@ -78,8 +83,7 @@ func coldPlugWithAPI(ctx context.Context, s *service, ociSpec *specs.Spec) error
|
||||
// the Kubelet does not pass the device information via CRI during
|
||||
// Sandbox creation.
|
||||
func getDeviceSpec(ctx context.Context, socket string, ann map[string]string) ([]string, error) {
|
||||
podName := ann[nameAnnotation]
|
||||
podNs := ann[namespaceAnnotation]
|
||||
podName, podNs := getPodIdentifiers(ann)
|
||||
|
||||
// create dialer for unix socket
|
||||
dialer := func(ctx context.Context, target string) (net.Conn, error) {
|
||||
@@ -111,7 +115,7 @@ func getDeviceSpec(ctx context.Context, socket string, ann map[string]string) ([
|
||||
}
|
||||
resp, err := client.Get(ctx, prr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cold plug: GetPodResources failed: %w", err)
|
||||
return nil, fmt.Errorf("cold plug: GetPodResources failed for pod(%s) in namespace(%s): %w", podName, podNs, err)
|
||||
}
|
||||
podRes := resp.PodResources
|
||||
if podRes == nil {
|
||||
@@ -141,6 +145,24 @@ func formatCDIDevIDs(specName string, devIDs []string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
func debugPodID(ann map[string]string) string {
|
||||
return fmt.Sprintf("%s/%s", ann[namespaceAnnotation], ann[nameAnnotation])
|
||||
// getPodIdentifiers returns the pod name and namespace from annotations.
|
||||
// It first checks containerd CRI annotations, then falls back to CRI-O annotations.
|
||||
func getPodIdentifiers(ann map[string]string) (podName, podNamespace string) {
|
||||
podName = ann[nameAnnotation]
|
||||
podNamespace = ann[namespaceAnnotation]
|
||||
|
||||
// Fall back to CRI-O annotations if containerd annotations are empty
|
||||
if podName == "" {
|
||||
podName = ann[crioNameAnnotation]
|
||||
}
|
||||
if podNamespace == "" {
|
||||
podNamespace = ann[crioNamespaceAnnotation]
|
||||
}
|
||||
|
||||
return podName, podNamespace
|
||||
}
|
||||
|
||||
func debugPodID(ann map[string]string) string {
|
||||
podName, podNamespace := getPodIdentifiers(ann)
|
||||
return fmt.Sprintf("%s/%s", podNamespace, podName)
|
||||
}
|
||||
|
||||
1
src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
generated
vendored
1
src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
generated
vendored
@@ -1,3 +1,4 @@
|
||||
// Package md2man aims in converting markdown into roff (man pages).
|
||||
package md2man
|
||||
|
||||
import (
|
||||
|
||||
15
src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
generated
vendored
15
src/runtime/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
generated
vendored
@@ -47,13 +47,13 @@ const (
|
||||
tableStart = "\n.TS\nallbox;\n"
|
||||
tableEnd = ".TE\n"
|
||||
tableCellStart = "T{\n"
|
||||
tableCellEnd = "\nT}\n"
|
||||
tableCellEnd = "\nT}"
|
||||
tablePreprocessor = `'\" t`
|
||||
)
|
||||
|
||||
// NewRoffRenderer creates a new blackfriday Renderer for generating roff documents
|
||||
// from markdown
|
||||
func NewRoffRenderer() *roffRenderer { // nolint: golint
|
||||
func NewRoffRenderer() *roffRenderer {
|
||||
return &roffRenderer{}
|
||||
}
|
||||
|
||||
@@ -316,9 +316,8 @@ func (r *roffRenderer) handleTableCell(w io.Writer, node *blackfriday.Node, ente
|
||||
} else if nodeLiteralSize(node) > 30 {
|
||||
end = tableCellEnd
|
||||
}
|
||||
if node.Next == nil && end != tableCellEnd {
|
||||
// Last cell: need to carriage return if we are at the end of the
|
||||
// header row and content isn't wrapped in a "tablecell"
|
||||
if node.Next == nil {
|
||||
// Last cell: need to carriage return if we are at the end of the header row.
|
||||
end += crTag
|
||||
}
|
||||
out(w, end)
|
||||
@@ -356,7 +355,7 @@ func countColumns(node *blackfriday.Node) int {
|
||||
}
|
||||
|
||||
func out(w io.Writer, output string) {
|
||||
io.WriteString(w, output) // nolint: errcheck
|
||||
io.WriteString(w, output) //nolint:errcheck
|
||||
}
|
||||
|
||||
func escapeSpecialChars(w io.Writer, text []byte) {
|
||||
@@ -395,7 +394,7 @@ func escapeSpecialCharsLine(w io.Writer, text []byte) {
|
||||
i++
|
||||
}
|
||||
if i > org {
|
||||
w.Write(text[org:i]) // nolint: errcheck
|
||||
w.Write(text[org:i]) //nolint:errcheck
|
||||
}
|
||||
|
||||
// escape a character
|
||||
@@ -403,7 +402,7 @@ func escapeSpecialCharsLine(w io.Writer, text []byte) {
|
||||
break
|
||||
}
|
||||
|
||||
w.Write([]byte{'\\', text[i]}) // nolint: errcheck
|
||||
w.Write([]byte{'\\', text[i]}) //nolint:errcheck
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
4
src/runtime/vendor/modules.txt
vendored
4
src/runtime/vendor/modules.txt
vendored
@@ -257,7 +257,7 @@ github.com/containernetworking/plugins/pkg/testutils
|
||||
# github.com/coreos/go-systemd/v22 v22.6.0
|
||||
## explicit; go 1.23
|
||||
github.com/coreos/go-systemd/v22/dbus
|
||||
# github.com/cpuguy83/go-md2man/v2 v2.0.6
|
||||
# github.com/cpuguy83/go-md2man/v2 v2.0.7
|
||||
## explicit; go 1.12
|
||||
github.com/cpuguy83/go-md2man/v2/md2man
|
||||
# github.com/cri-o/cri-o v1.34.0
|
||||
@@ -526,7 +526,7 @@ github.com/stretchr/testify/assert/yaml
|
||||
# github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
|
||||
## explicit
|
||||
github.com/syndtr/gocapability/capability
|
||||
# github.com/urfave/cli v1.22.15
|
||||
# github.com/urfave/cli v1.22.17
|
||||
## explicit; go 1.11
|
||||
github.com/urfave/cli
|
||||
# github.com/vishvananda/netlink v1.3.1
|
||||
|
||||
@@ -861,6 +861,10 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
return fmt.Errorf("Cannot get VFIO device from IOMMUFD with device: %v err: %v", dev, err)
|
||||
}
|
||||
} else {
|
||||
if q.config.ConfidentialGuest {
|
||||
return fmt.Errorf("ConfidentialGuest needs IOMMUFD - cannot use %s", dev.HostPath)
|
||||
}
|
||||
|
||||
vfioDevices, err = drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module kata-containers/csi-kata-directvolume
|
||||
|
||||
// Keep in sync with version in versions.yaml
|
||||
go 1.24.11
|
||||
go 1.24.12
|
||||
|
||||
// WARNING: Do NOT use `replace` directives as those break dependabot:
|
||||
// https://github.com/kata-containers/kata-containers/issues/11020
|
||||
|
||||
8
src/tools/kata-ctl/Cargo.lock
generated
8
src/tools/kata-ctl/Cargo.lock
generated
@@ -3024,9 +3024,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "qapi"
|
||||
version = "0.14.0"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6412bdd014ebee03ddbbe79ac03a0b622cce4d80ba45254f6357c847f06fa38"
|
||||
checksum = "7b047adab56acc4948d4b9b58693c1f33fd13efef2d6bb5f0f66a47436ceada8"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures",
|
||||
@@ -3061,9 +3061,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "qapi-qmp"
|
||||
version = "0.14.0"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8b944db7e544d2fa97595e9a000a6ba5c62c426fa185e7e00aabe4b5640b538"
|
||||
checksum = "45303cac879d89361cad0287ae15f9ae1e7799b904b474152414aeece39b9875"
|
||||
dependencies = [
|
||||
"qapi-codegen",
|
||||
"qapi-spec",
|
||||
|
||||
@@ -81,6 +81,7 @@ pub enum Commands {
|
||||
#[error("Argument is not valid")]
|
||||
pub struct CheckArgument {
|
||||
#[clap(subcommand)]
|
||||
#[allow(unused_assignments)]
|
||||
pub command: CheckSubCommand,
|
||||
}
|
||||
|
||||
|
||||
@@ -486,11 +486,11 @@ mod tests {
|
||||
let releases = get_kata_all_releases_by_url(KATA_GITHUB_RELEASE_URL);
|
||||
// sometime in GitHub action accessing to github.com API may fail
|
||||
// we can skip this test to prevent the whole test fail.
|
||||
if releases.is_err() {
|
||||
if let Err(error) = releases {
|
||||
warn!(
|
||||
sl!(),
|
||||
"get kata version failed({:?}), this maybe a temporary error, just skip the test.",
|
||||
releases.unwrap_err()
|
||||
error
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module github.com/kata-containers/kata-containers/src/tools/log-parser
|
||||
|
||||
// Keep in sync with version in versions.yaml
|
||||
go 1.24.11
|
||||
go 1.24.12
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.1.0
|
||||
|
||||
1
src/tools/runk/.gitignore
vendored
1
src/tools/runk/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
/vendor/
|
||||
3943
src/tools/runk/Cargo.lock
generated
3943
src/tools/runk/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,38 +0,0 @@
|
||||
[package]
|
||||
name = "runk"
|
||||
version = "0.0.1"
|
||||
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
|
||||
description = "runk: Kata OCI container runtime based on Kata agent"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
libcontainer = { path = "./libcontainer" }
|
||||
rustjail = { path = "../../agent/rustjail", features = [
|
||||
"standard-oci-runtime",
|
||||
] }
|
||||
runtime-spec = { path = "../../libs/runtime-spec" }
|
||||
oci-spec = { version = "0.8.1", features = ["runtime"] }
|
||||
logging = { path = "../../libs/logging" }
|
||||
liboci-cli = "0.5.3"
|
||||
clap = { version = "4.5.40", features = ["derive", "cargo"] }
|
||||
libc = "0.2.108"
|
||||
nix = "0.23.0"
|
||||
anyhow = "1.0.52"
|
||||
slog = "2.7.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
slog-async = "2.7.0"
|
||||
tokio = { version = "1.44.2", features = ["full"] }
|
||||
serde = { version = "1.0.133", features = ["derive"] }
|
||||
serde_json = "1.0.74"
|
||||
uzers = "0.12.1"
|
||||
tabwriter = "1.2.1"
|
||||
|
||||
[features]
|
||||
seccomp = ["rustjail/seccomp"]
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.19.1"
|
||||
|
||||
[workspace]
|
||||
members = ["libcontainer"]
|
||||
@@ -1,67 +0,0 @@
|
||||
# Copyright 2021-2022 Sony Group Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# LIBC=musl|gnu (default: gnu)
|
||||
LIBC ?= gnu
|
||||
|
||||
include ../../../utils.mk
|
||||
|
||||
TARGET = runk
|
||||
TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET)
|
||||
AGENT_SOURCE_PATH = ../../agent
|
||||
|
||||
EXTRA_RUSTFEATURES :=
|
||||
|
||||
# Define if runk enables seccomp support (default: yes)
|
||||
SECCOMP := yes
|
||||
|
||||
# BINDIR is a directory for installing executable programs
|
||||
BINDIR := /usr/local/bin
|
||||
|
||||
ifeq ($(SECCOMP),yes)
|
||||
override EXTRA_RUSTFEATURES += seccomp
|
||||
endif
|
||||
|
||||
ifneq ($(EXTRA_RUSTFEATURES),)
|
||||
override EXTRA_RUSTFEATURES := --features "$(EXTRA_RUSTFEATURES)"
|
||||
endif
|
||||
|
||||
.DEFAULT_GOAL := default
|
||||
default: build
|
||||
|
||||
build:
|
||||
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) --$(BUILD_TYPE) $(EXTRA_RUSTFEATURES)
|
||||
|
||||
static-checks-build:
|
||||
@echo "INFO: static-checks-build do nothing.."
|
||||
|
||||
install:
|
||||
install -D $(TARGET_PATH) $(BINDIR)/$(TARGET)
|
||||
|
||||
clean:
|
||||
cargo clean
|
||||
|
||||
vendor:
|
||||
cargo vendor
|
||||
|
||||
test: test-runk test-agent
|
||||
|
||||
test-runk:
|
||||
cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture
|
||||
|
||||
test-agent:
|
||||
make test -C $(AGENT_SOURCE_PATH) STANDARD_OCI_RUNTIME=yes
|
||||
|
||||
check: standard_rust_check
|
||||
|
||||
.PHONY: \
|
||||
build \
|
||||
install \
|
||||
clean \
|
||||
clippy \
|
||||
format \
|
||||
vendor \
|
||||
test \
|
||||
check \
|
||||
@@ -1,352 +0,0 @@
|
||||
# runk
|
||||
|
||||
## Overview
|
||||
|
||||
> **Warnings:**
|
||||
> `runk` is currently an experimental tool.
|
||||
> Only continue if you are using a non-critical system.
|
||||
|
||||
`runk` is a standard OCI container runtime written in Rust based on a modified version of
|
||||
the [Kata Container agent](https://github.com/kata-containers/kata-containers/tree/main/src/agent), `kata-agent`.
|
||||
|
||||
`runk` conforms to the [OCI Container Runtime specifications](https://github.com/opencontainers/runtime-spec).
|
||||
|
||||
Unlike the [Kata Container runtime](https://github.com/kata-containers/kata-containers/tree/main/src/agent#features),
|
||||
`kata-runtime`, `runk` spawns and runs containers on the host machine directly.
|
||||
The user can run `runk` in the same way as the existing container runtimes such as `runc`,
|
||||
the most used implementation of the OCI runtime specs.
|
||||
|
||||
## Why does `runk` exist?
|
||||
|
||||
The `kata-agent` is a process running inside a virtual machine (VM) as a supervisor for managing containers
|
||||
and processes running within those containers.
|
||||
In other words, the `kata-agent` is a kind of "low-level" container runtime inside VM because the agent
|
||||
spawns and runs containers according to the OCI runtime specs.
|
||||
However, the `kata-agent` does not have the OCI Command-Line Interface (CLI) that is defined in the
|
||||
[runtime spec](https://github.com/opencontainers/runtime-spec/blob/main/runtime.md).
|
||||
The `kata-runtime` provides the CLI part of the Kata Containers runtime component,
|
||||
but the `kata-runtime` is a container runtime for creating hardware-virtualized containers running on the host.
|
||||
|
||||
`runk` is a Rust-based standard OCI container runtime that manages normal containers,
|
||||
not hardware-virtualized containers.
|
||||
`runk` aims to become one of the alternatives to existing OCI compliant container runtimes.
|
||||
The `kata-agent` has most of the [features](https://github.com/kata-containers/kata-containers/tree/main/src/agent#features)
|
||||
needed for the container runtime and delivers high performance with a low memory footprint owing to the
|
||||
implementation by Rust language.
|
||||
Therefore, `runk` leverages the mechanism of the `kata-agent` to avoid reinventing the wheel.
|
||||
|
||||
## Performance
|
||||
|
||||
`runk` is faster than `runc` and has a lower memory footprint.
|
||||
|
||||
This table shows the average of the elapsed time and the memory footprint (maximum resident set size)
|
||||
for running sequentially 100 containers, the containers run `/bin/true` using `run` command with
|
||||
[detached mode](https://github.com/opencontainers/runc/blob/main/docs/terminals.md#detached)
|
||||
on 12 CPU cores (`3.8 GHz AMD Ryzen 9 3900X`) and 32 GiB of RAM.
|
||||
`runk` always runs containers with detached mode currently.
|
||||
|
||||
Evaluation Results:
|
||||
|
||||
| | `runk` (v0.0.1) | `runc` (v1.0.3) | `crun` (v1.4.2) |
|
||||
|-----------------------|---------------|---------------|---------------|
|
||||
| time [ms] | 39.83 | 50.39 | 38.41 |
|
||||
| memory footprint [MB] | 4.013 | 10.78 | 1.738 |
|
||||
|
||||
## Status of `runk`
|
||||
|
||||
We drafted the initial code here, and any contributions to `runk` and [`kata-agent`](https://github.com/kata-containers/kata-containers/tree/main/src/agent)
|
||||
are welcome.
|
||||
|
||||
Regarding features compared to `runc`, see the `Status of runk` section in the [issue](https://github.com/kata-containers/kata-containers/issues/2784).
|
||||
|
||||
## Building
|
||||
|
||||
In order to enable seccomp support, you need to install the `libseccomp` library on
|
||||
your platform.
|
||||
|
||||
> e.g. `libseccomp-dev` for Ubuntu, or `libseccomp-devel` for CentOS
|
||||
|
||||
You can build `runk`:
|
||||
|
||||
```bash
|
||||
$ cd runk
|
||||
$ make
|
||||
```
|
||||
|
||||
If you want to build a statically linked binary of `runk`, set the environment
|
||||
variables for the [`libseccomp` crate](https://github.com/libseccomp-rs/libseccomp-rs) and
|
||||
set the `LIBC` to `musl`:
|
||||
|
||||
```bash
|
||||
$ export LIBSECCOMP_LINK_TYPE=static
|
||||
$ export LIBSECCOMP_LIB_PATH="the path of the directory containing libseccomp.a"
|
||||
$ export LIBC=musl
|
||||
$ make
|
||||
```
|
||||
|
||||
> **Note**:
|
||||
>
|
||||
> - If the compilation fails when `runk` tries to link the `libseccomp` library statically
|
||||
> against `musl`, you will need to build the `libseccomp` manually with `-U_FORTIFY_SOURCE`.
|
||||
> For the details, see [our script](https://github.com/kata-containers/kata-containers/blob/main/ci/install_libseccomp.sh)
|
||||
> to install the `libseccomp` for the agent.
|
||||
> - On `ppc64le` and `s390x`, `glibc` should be used even if `LIBC=musl` is specified.
|
||||
> - If you do not want to enable seccomp support, run `make SECCOMP=no`.
|
||||
|
||||
To install `runk` into default directory for executable program (`/usr/local/bin`):
|
||||
|
||||
```bash
|
||||
$ sudo -E make install
|
||||
```
|
||||
|
||||
## Using `runk` directly
|
||||
|
||||
Please note that `runk` is a low level tool not developed with an end user in mind.
|
||||
It is mostly employed by other higher-level container software like `containerd`.
|
||||
|
||||
If you still want to use `runk` directly, here's how.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
It is necessary to create an OCI bundle to use the tool. The simplest method is:
|
||||
|
||||
``` bash
|
||||
$ bundle_dir="bundle"
|
||||
$ rootfs_dir="$bundle_dir/rootfs"
|
||||
$ image="busybox"
|
||||
$ mkdir -p "$rootfs_dir" && (cd "$bundle_dir" && runk spec)
|
||||
$ sudo docker export $(sudo docker create "$image") | tar -C "$rootfs_dir" -xf -
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
> If you use the unmodified `runk spec` template, this should give a `sh` session inside the container.
|
||||
> However, if you use `runk` directly and run a container with the unmodified template,
|
||||
> `runk` cannot launch the `sh` session because `runk` does not support terminal handling yet.
|
||||
> You need to edit the process field in the `config.json` should look like this below
|
||||
> with `"terminal": false` and `"args": ["sleep", "10"]`.
|
||||
|
||||
```json
|
||||
"process": {
|
||||
"terminal": false,
|
||||
"user": {
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
"args": [
|
||||
"sleep",
|
||||
"10"
|
||||
],
|
||||
"env": [
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm"
|
||||
],
|
||||
"cwd": "/",
|
||||
[...]
|
||||
}
|
||||
```
|
||||
|
||||
If you want to launch the `sh` session inside the container, you need to run `runk` from `containerd`.
|
||||
|
||||
Please refer to the [Using `runk` from containerd](#using-runk-from-containerd) section
|
||||
|
||||
### Running a container
|
||||
|
||||
Now you can go through the [lifecycle operations](https://github.com/opencontainers/runtime-spec/blob/main/runtime.md)
|
||||
in your shell.
|
||||
You need to run `runk` as `root` because `runk` does not have the rootless feature which is the ability
|
||||
to run containers without root privileges.
|
||||
|
||||
```bash
|
||||
$ cd $bundle_dir
|
||||
|
||||
# Create a container
|
||||
$ sudo runk create test
|
||||
|
||||
# View the container is created and in the "created" state
|
||||
$ sudo runk state test
|
||||
|
||||
# Start the process inside the container
|
||||
$ sudo runk start test
|
||||
|
||||
# After 10 seconds view that the container has exited and is now in the "stopped" state
|
||||
$ sudo runk state test
|
||||
|
||||
# Now delete the container
|
||||
$ sudo runk delete test
|
||||
```
|
||||
|
||||
## Using `runk` from `Docker`
|
||||
|
||||
`runk` can run containers using [`Docker`](https://github.com/docker).
|
||||
|
||||
First, install `Docker` from package by following the
|
||||
[`Docker` installation instructions](https://docs.docker.com/engine/install/).
|
||||
|
||||
### Running a container with `Docker` command line
|
||||
|
||||
Start the docker daemon:
|
||||
|
||||
```bash
|
||||
$ sudo dockerd --experimental --add-runtime="runk=/usr/local/bin/runk"
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
> Before starting the `dockerd`, you need to stop the normal docker daemon
|
||||
> running on your environment (i.e., `systemctl stop docker`).
|
||||
|
||||
Launch a container in a different terminal:
|
||||
|
||||
```bash
|
||||
$ sudo docker run -it --rm --runtime runk busybox sh
|
||||
/ #
|
||||
```
|
||||
|
||||
## Using `runk` from `Podman`
|
||||
|
||||
`runk` can run containers using [`Podman`](https://github.com/containers/podman).
|
||||
|
||||
First, install `Podman` from source code or package by following the
|
||||
[`Podman` installation instructions](https://podman.io/getting-started/installation).
|
||||
|
||||
### Running a container with `Podman` command line
|
||||
|
||||
```bash
|
||||
$ sudo podman --runtime /usr/local/bin/runk run -it --rm busybox sh
|
||||
/ #
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
> `runk` does not support some commands except
|
||||
> [OCI standard operations](https://github.com/opencontainers/runtime-spec/blob/main/runtime.md#operations)
|
||||
> yet, so those commands do not work in `Docker/Podman`. Regarding commands currently
|
||||
> implemented in `runk`, see the [Status of `runk`](#status-of-runk) section.
|
||||
|
||||
## Using `runk` from `containerd`
|
||||
|
||||
`runk` can run containers with the containerd runtime handler support on `containerd`.
|
||||
|
||||
### Prerequisites for `runk` with containerd
|
||||
|
||||
* `containerd` v1.2.4 or above
|
||||
* `cri-tools`
|
||||
|
||||
> **Note:**
|
||||
> [`cri-tools`](https://github.com/kubernetes-sigs/cri-tools) is a set of tools for CRI
|
||||
> used for development and testing.
|
||||
|
||||
Install `cri-tools` from source code:
|
||||
|
||||
```bash
|
||||
$ go get github.com/kubernetes-sigs/cri-tools
|
||||
$ pushd $GOPATH/src/github.com/kubernetes-sigs/cri-tools
|
||||
$ make
|
||||
$ sudo -E make install
|
||||
$ popd
|
||||
```
|
||||
|
||||
Write the `crictl` configuration file:
|
||||
|
||||
``` bash
|
||||
$ cat <<EOF | sudo tee /etc/crictl.yaml
|
||||
runtime-endpoint: unix:///run/containerd/containerd.sock
|
||||
EOF
|
||||
```
|
||||
|
||||
### Configure `containerd` to use `runk`
|
||||
|
||||
Update `/etc/containerd/config.toml`:
|
||||
|
||||
```bash
|
||||
$ cat <<EOF | sudo tee /etc/containerd/config.toml
|
||||
version = 2
|
||||
[plugins."io.containerd.runtime.v1.linux"]
|
||||
shim_debug = true
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runk]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runk.options]
|
||||
BinaryName = "/usr/local/bin/runk"
|
||||
EOF
|
||||
```
|
||||
|
||||
Restart `containerd`:
|
||||
|
||||
```bash
|
||||
$ sudo systemctl restart containerd
|
||||
```
|
||||
|
||||
### Running a container with `crictl` command line
|
||||
|
||||
You can run containers in `runk` via containerd's CRI.
|
||||
|
||||
Pull the `busybox` image:
|
||||
|
||||
``` bash
|
||||
$ sudo crictl pull busybox
|
||||
```
|
||||
|
||||
Create the sandbox configuration:
|
||||
|
||||
``` bash
|
||||
$ cat <<EOF | tee sandbox.json
|
||||
{
|
||||
"metadata": {
|
||||
"name": "busybox-sandbox",
|
||||
"namespace": "default",
|
||||
"attempt": 1,
|
||||
"uid": "hdishd83djaidwnduwk28bcsb"
|
||||
},
|
||||
"log_directory": "/tmp",
|
||||
"linux": {
|
||||
}
|
||||
}
|
||||
EOF
|
||||
```
|
||||
|
||||
Create the container configuration:
|
||||
|
||||
``` bash
|
||||
$ cat <<EOF | tee container.json
|
||||
{
|
||||
"metadata": {
|
||||
"name": "busybox"
|
||||
},
|
||||
"image": {
|
||||
"image": "docker.io/busybox"
|
||||
},
|
||||
"command": [
|
||||
"sh"
|
||||
],
|
||||
"envs": [
|
||||
{
|
||||
"key": "PATH",
|
||||
"value": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
},
|
||||
{
|
||||
"key": "TERM",
|
||||
"value": "xterm"
|
||||
}
|
||||
],
|
||||
"log_path": "busybox.0.log",
|
||||
"stdin": true,
|
||||
"stdin_once": true,
|
||||
"tty": true
|
||||
}
|
||||
EOF
|
||||
```
|
||||
|
||||
With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag.
|
||||
|
||||
Launch a sandbox and container using the `crictl`:
|
||||
|
||||
```bash
|
||||
# Run a container inside a sandbox
|
||||
$ sudo crictl run -r runk container.json sandbox.json
|
||||
f492eee753887ba3dfbba9022028975380739aba1269df431d097b73b23c3871
|
||||
|
||||
# Attach to the running container
|
||||
$ sudo crictl attach --stdin --tty f492eee753887ba3dfbba9022028975380739aba1269df431d097b73b23c3871
|
||||
/ #
|
||||
```
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
[package]
|
||||
name = "libcontainer"
|
||||
version = "0.0.1"
|
||||
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
|
||||
description = "Library for runk container"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
rustjail = { path = "../../../agent/rustjail", features = [
|
||||
"standard-oci-runtime",
|
||||
] }
|
||||
runtime-spec = { path = "../../../libs/runtime-spec" }
|
||||
oci-spec = { version = "0.8.1", features = ["runtime"] }
|
||||
kata-sys-util = { path = "../../../libs/kata-sys-util" }
|
||||
logging = { path = "../../../libs/logging" }
|
||||
derive_builder = "0.10.2"
|
||||
libc = "0.2.108"
|
||||
nix = "0.23.0"
|
||||
anyhow = "1.0.52"
|
||||
slog = "2.7.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
serde = { version = "1.0.133", features = ["derive"] }
|
||||
serde_json = "1.0.74"
|
||||
scopeguard = "1.1.0"
|
||||
cgroups = { package = "cgroups-rs", git = "https://github.com/kata-containers/cgroups-rs", rev = "v0.3.5" }
|
||||
procfs = "0.14.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.19.1"
|
||||
test-utils = { path = "../../../libs/test-utils" }
|
||||
protocols = { path = "../../../libs/protocols" }
|
||||
@@ -1,336 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::container::{load_linux_container, Container, ContainerLauncher};
|
||||
use crate::status::Status;
|
||||
use crate::utils::validate_spec;
|
||||
use anyhow::{anyhow, Result};
|
||||
use derive_builder::Builder;
|
||||
use oci::{Process as OCIProcess, Spec};
|
||||
use oci_spec::runtime as oci;
|
||||
use runtime_spec::ContainerState;
|
||||
use rustjail::container::update_namespaces;
|
||||
use slog::{debug, Logger};
|
||||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Used for exec command. It will prepare the options for joining an existing container.
|
||||
#[derive(Default, Builder, Debug, Clone)]
|
||||
#[builder(build_fn(validate = "Self::validate"))]
|
||||
pub struct ActivatedContainer {
|
||||
pub id: String,
|
||||
pub root: PathBuf,
|
||||
pub console_socket: Option<PathBuf>,
|
||||
pub pid_file: Option<PathBuf>,
|
||||
pub tty: bool,
|
||||
pub cwd: Option<PathBuf>,
|
||||
pub env: Vec<(String, String)>,
|
||||
pub no_new_privs: bool,
|
||||
pub args: Vec<String>,
|
||||
pub process: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ActivatedContainerBuilder {
|
||||
/// pre-validate before building ActivatedContainer
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
// ensure container exists
|
||||
let id = self.id.as_ref().unwrap();
|
||||
let root = self.root.as_ref().unwrap();
|
||||
let status_path = Status::get_dir_path(root, id);
|
||||
if !status_path.exists() {
|
||||
return Err(format!(
|
||||
"container {} does not exist at path {:?}",
|
||||
id, root
|
||||
));
|
||||
}
|
||||
|
||||
// ensure argv will not be empty in process exec phase later
|
||||
let process = self.process.as_ref().unwrap();
|
||||
let args = self.args.as_ref().unwrap();
|
||||
if process.is_none() && args.is_empty() {
|
||||
return Err("process and args cannot be all empty".to_string());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ActivatedContainer {
|
||||
/// Create ContainerLauncher that can be used to spawn a process in an existing container.
|
||||
/// This reads the spec from status file of an existing container and adapts it with given process file
|
||||
/// or other options like args, env, etc. It also changes the namespace in spec to join the container.
|
||||
pub fn create_launcher(self, logger: &Logger) -> Result<ContainerLauncher> {
|
||||
debug!(
|
||||
logger,
|
||||
"enter ActivatedContainer::create_launcher {:?}", self
|
||||
);
|
||||
let mut container = Container::load(&self.root, &self.id)?;
|
||||
|
||||
// If state is Created or Running, we can execute the process.
|
||||
if container.state != ContainerState::Created && container.state != ContainerState::Running
|
||||
{
|
||||
return Err(anyhow!(
|
||||
"cannot exec in a stopped or paused container, state: {:?}",
|
||||
container.state
|
||||
));
|
||||
}
|
||||
|
||||
let spec = container
|
||||
.status
|
||||
.config
|
||||
.spec
|
||||
.as_mut()
|
||||
.ok_or_else(|| anyhow!("spec config was not present"))?;
|
||||
self.adapt_exec_spec(spec, container.status.pid, logger)?;
|
||||
debug!(logger, "adapted spec: {:?}", spec);
|
||||
validate_spec(spec, &self.console_socket)?;
|
||||
|
||||
debug!(
|
||||
logger,
|
||||
"load LinuxContainer with config: {:?}", &container.status.config
|
||||
);
|
||||
let runner = load_linux_container(&container.status, self.console_socket, logger)?;
|
||||
|
||||
Ok(ContainerLauncher::new(
|
||||
&self.id,
|
||||
&container.status.bundle,
|
||||
&self.root,
|
||||
false,
|
||||
runner,
|
||||
self.pid_file,
|
||||
))
|
||||
}
|
||||
|
||||
/// Adapt spec to execute a new process which will join the container.
|
||||
fn adapt_exec_spec(&self, spec: &mut Spec, pid: i32, logger: &Logger) -> Result<()> {
|
||||
// If with --process, load process from file.
|
||||
// Otherwise, update process with args and other options.
|
||||
if let Some(process_path) = self.process.as_ref() {
|
||||
spec.set_process(Some(Self::get_process(process_path)?));
|
||||
} else if let Some(process) = spec.process_mut().as_mut() {
|
||||
self.update_process(process)?;
|
||||
} else {
|
||||
return Err(anyhow!("process is empty in spec"));
|
||||
};
|
||||
// Exec process will join the container's namespaces
|
||||
update_namespaces(logger, spec, pid)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update process with args and other options.
|
||||
fn update_process(&self, process: &mut OCIProcess) -> Result<()> {
|
||||
process.set_args(Some(self.args.clone()));
|
||||
process.set_no_new_privileges(Some(self.no_new_privs));
|
||||
process.set_terminal(Some(self.tty));
|
||||
if let Some(cwd) = self.cwd.as_ref() {
|
||||
process.set_cwd(cwd.as_path().to_path_buf());
|
||||
}
|
||||
if let Some(process_env) = process.env_mut() {
|
||||
process_env.extend(self.env.iter().map(|kv| format!("{}={}", kv.0, kv.1)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read and parse OCI Process from path
|
||||
fn get_process(process_path: &Path) -> Result<OCIProcess> {
|
||||
let f = File::open(process_path)?;
|
||||
Ok(serde_json::from_reader(f)?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::status::Status;
|
||||
use crate::utils::test_utils::*;
|
||||
use nix::unistd::getpid;
|
||||
use oci_spec::runtime::{LinuxBuilder, LinuxNamespaceBuilder, ProcessBuilder, User};
|
||||
use rustjail::container::TYPETONAME;
|
||||
use scopeguard::defer;
|
||||
use slog::o;
|
||||
use std::{
|
||||
fs::{create_dir_all, File},
|
||||
path::PathBuf,
|
||||
};
|
||||
use tempfile::tempdir;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
fn create_activated_dirs(root: &Path, id: &str, bundle: &Path) {
|
||||
Status::create_dir(root, id).unwrap();
|
||||
create_dir_all(bundle.join(TEST_ROOTFS_PATH)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_activated_container_validate() {
|
||||
let root = tempdir().unwrap();
|
||||
let id = TEST_CONTAINER_ID.to_string();
|
||||
Status::create_dir(root.path(), &id).unwrap();
|
||||
let result = ActivatedContainerBuilder::default()
|
||||
.id(id)
|
||||
.root(root.into_path())
|
||||
.console_socket(None)
|
||||
.pid_file(None)
|
||||
.tty(false)
|
||||
.cwd(None)
|
||||
.env(Vec::new())
|
||||
.no_new_privs(false)
|
||||
.process(None)
|
||||
.args(vec!["sleep".to_string(), "10".to_string()])
|
||||
.build();
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_activated_container_create() {
|
||||
// create cgroup directory needs root permission
|
||||
skip_if_not_root!();
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
let bundle_dir = tempdir().unwrap();
|
||||
let root = tempdir().unwrap();
|
||||
// Since tests are executed concurrently, container_id must be unique in tests with cgroup.
|
||||
// Or the cgroup directory may be removed by other tests in advance.
|
||||
let id = "test_activated_container_create".to_string();
|
||||
create_activated_dirs(root.path(), &id, bundle_dir.path());
|
||||
let pid = getpid().as_raw();
|
||||
|
||||
let mut spec = create_dummy_spec();
|
||||
spec.root_mut()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set_path(bundle_dir.path().join(TEST_ROOTFS_PATH));
|
||||
|
||||
let status = create_custom_dummy_status(&id, pid, root.path(), &spec);
|
||||
status.save().unwrap();
|
||||
|
||||
// create empty cgroup directory to avoid is_pause failing
|
||||
let cgroup = create_dummy_cgroup(Path::new(id.as_str()));
|
||||
defer!(cgroup.delete().unwrap());
|
||||
|
||||
let result = ActivatedContainerBuilder::default()
|
||||
.id(id)
|
||||
.root(root.into_path())
|
||||
.console_socket(Some(PathBuf::from(TEST_CONSOLE_SOCKET_PATH)))
|
||||
.pid_file(Some(PathBuf::from(TEST_PID_FILE_PATH)))
|
||||
.tty(true)
|
||||
.cwd(Some(PathBuf::from(TEST_BUNDLE_PATH)))
|
||||
.env(vec![
|
||||
("K1".to_string(), "V1".to_string()),
|
||||
("K2".to_string(), "V2".to_string()),
|
||||
])
|
||||
.no_new_privs(true)
|
||||
.process(None)
|
||||
.args(vec!["sleep".to_string(), "10".to_string()])
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let linux = LinuxBuilder::default()
|
||||
.namespaces(
|
||||
TYPETONAME
|
||||
.iter()
|
||||
.filter(|&(_, &name)| name != "user")
|
||||
.map(|ns| {
|
||||
LinuxNamespaceBuilder::default()
|
||||
.typ(ns.0.clone())
|
||||
.path(PathBuf::from(&format!("/proc/{}/ns/{}", pid, ns.1)))
|
||||
.build()
|
||||
.unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
spec.set_linux(Some(linux));
|
||||
let process = ProcessBuilder::default()
|
||||
.terminal(result.tty)
|
||||
.user(User::default())
|
||||
.args(result.args.clone())
|
||||
.cwd(result.cwd.clone().unwrap().to_string_lossy().to_string())
|
||||
.env(vec![
|
||||
"PATH=/bin:/usr/bin".to_string(),
|
||||
"K1=V1".to_string(),
|
||||
"K2=V2".to_string(),
|
||||
])
|
||||
.no_new_privileges(result.no_new_privs)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
spec.set_process(Some(process));
|
||||
let launcher = result.clone().create_launcher(&logger).unwrap();
|
||||
assert!(!launcher.init);
|
||||
assert_eq!(launcher.runner.config.spec.unwrap(), spec);
|
||||
assert_eq!(
|
||||
launcher.runner.console_socket,
|
||||
result.console_socket.unwrap()
|
||||
);
|
||||
assert_eq!(launcher.pid_file, result.pid_file);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_activated_container_create_with_process() {
|
||||
// create cgroup directory needs root permission
|
||||
skip_if_not_root!();
|
||||
let bundle_dir = tempdir().unwrap();
|
||||
let process_file = bundle_dir.path().join(TEST_PROCESS_FILE_NAME);
|
||||
|
||||
let mut process_template = OCIProcess::default();
|
||||
process_template.set_args(Some(vec!["sleep".to_string(), "10".to_string()]));
|
||||
process_template.set_cwd(PathBuf::from("/"));
|
||||
|
||||
let file = File::create(process_file.clone()).unwrap();
|
||||
serde_json::to_writer(&file, &process_template).unwrap();
|
||||
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
let root = tempdir().unwrap();
|
||||
// Since tests are executed concurrently, container_id must be unique in tests with cgroup.
|
||||
// Or the cgroup directory may be removed by other tests in advance.
|
||||
let id = "test_activated_container_create_with_process".to_string();
|
||||
let pid = getpid().as_raw();
|
||||
let mut spec = create_dummy_spec();
|
||||
spec.root_mut()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set_path(bundle_dir.path().join(TEST_ROOTFS_PATH));
|
||||
create_activated_dirs(root.path(), &id, bundle_dir.path());
|
||||
|
||||
let status = create_custom_dummy_status(&id, pid, root.path(), &spec);
|
||||
status.save().unwrap();
|
||||
// create empty cgroup directory to avoid is_pause failing
|
||||
let cgroup = create_dummy_cgroup(Path::new(id.as_str()));
|
||||
defer!(cgroup.delete().unwrap());
|
||||
|
||||
let launcher = ActivatedContainerBuilder::default()
|
||||
.id(id)
|
||||
.root(root.into_path())
|
||||
.console_socket(Some(PathBuf::from(TEST_CONSOLE_SOCKET_PATH)))
|
||||
.pid_file(None)
|
||||
.tty(true)
|
||||
.cwd(Some(PathBuf::from(TEST_BUNDLE_PATH)))
|
||||
.env(vec![
|
||||
("K1".to_string(), "V1".to_string()),
|
||||
("K2".to_string(), "V2".to_string()),
|
||||
])
|
||||
.no_new_privs(true)
|
||||
.process(Some(process_file))
|
||||
.args(vec!["sleep".to_string(), "10".to_string()])
|
||||
.build()
|
||||
.unwrap()
|
||||
.create_launcher(&logger)
|
||||
.unwrap();
|
||||
|
||||
assert!(!launcher.init);
|
||||
|
||||
assert_eq!(
|
||||
launcher
|
||||
.runner
|
||||
.config
|
||||
.spec
|
||||
.unwrap()
|
||||
.process()
|
||||
.clone()
|
||||
.unwrap(),
|
||||
process_template
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::anyhow;
|
||||
use anyhow::Result;
|
||||
use cgroups;
|
||||
use cgroups::freezer::{FreezerController, FreezerState};
|
||||
use std::{thread, time};
|
||||
|
||||
// Try to remove the provided cgroups path five times with increasing delay between tries.
|
||||
// If after all there are not removed cgroups, an appropriate error will be returned.
|
||||
pub fn remove_cgroup_dir(cgroup: &cgroups::Cgroup) -> Result<()> {
|
||||
let mut retries = 5;
|
||||
let mut delay = time::Duration::from_millis(10);
|
||||
while retries != 0 {
|
||||
if retries != 5 {
|
||||
delay *= 2;
|
||||
thread::sleep(delay);
|
||||
}
|
||||
|
||||
if cgroup.delete().is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
retries -= 1;
|
||||
}
|
||||
|
||||
Err(anyhow!("failed to remove cgroups paths"))
|
||||
}
|
||||
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup is still undergoing freezing.
|
||||
pub fn get_freezer_state(freezer: &FreezerController) -> Result<FreezerState> {
|
||||
let mut retries = 10;
|
||||
while retries != 0 {
|
||||
let state = freezer.state()?;
|
||||
match state {
|
||||
FreezerState::Thawed => return Ok(FreezerState::Thawed),
|
||||
FreezerState::Frozen => return Ok(FreezerState::Frozen),
|
||||
FreezerState::Freezing => {
|
||||
// sleep for 10 ms, wait for the cgroup to finish freezing
|
||||
thread::sleep(time::Duration::from_millis(10));
|
||||
retries -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(FreezerState::Freezing)
|
||||
}
|
||||
|
||||
// check whether freezer state is frozen
|
||||
pub fn is_paused(cgroup: &cgroups::Cgroup) -> Result<bool> {
|
||||
let freezer_controller: &FreezerController = cgroup
|
||||
.controller_of()
|
||||
.ok_or_else(|| anyhow!("failed to get freezer controller"))?;
|
||||
let freezer_state = get_freezer_state(freezer_controller)?;
|
||||
match freezer_state {
|
||||
FreezerState::Frozen => Ok(true),
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn freeze(cgroup: &cgroups::Cgroup, state: FreezerState) -> Result<()> {
|
||||
let freezer_controller: &FreezerController = cgroup
|
||||
.controller_of()
|
||||
.ok_or_else(|| anyhow!("failed to get freezer controller"))?;
|
||||
match state {
|
||||
FreezerState::Frozen => {
|
||||
freezer_controller.freeze()?;
|
||||
}
|
||||
FreezerState::Thawed => {
|
||||
freezer_controller.thaw()?;
|
||||
}
|
||||
_ => return Err(anyhow!("invalid freezer state")),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,437 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::cgroup::{freeze, remove_cgroup_dir};
|
||||
use crate::status::{self, get_current_container_state, Status};
|
||||
use anyhow::{anyhow, Result};
|
||||
use cgroups;
|
||||
use cgroups::freezer::FreezerState;
|
||||
use cgroups::hierarchies::is_cgroup2_unified_mode;
|
||||
use nix::sys::signal::kill;
|
||||
use nix::{
|
||||
sys::signal::Signal,
|
||||
sys::signal::SIGKILL,
|
||||
unistd::{chdir, unlink, Pid},
|
||||
};
|
||||
use procfs;
|
||||
use runtime_spec::{ContainerState, State as OCIState};
|
||||
use rustjail::cgroups::fs::Manager as CgroupManager;
|
||||
use rustjail::{
|
||||
container::{BaseContainer, LinuxContainer, EXEC_FIFO_FILENAME},
|
||||
process::{Process, ProcessOperations},
|
||||
specconv::CreateOpts,
|
||||
};
|
||||
use scopeguard::defer;
|
||||
use slog::{debug, info, Logger};
|
||||
use std::{
|
||||
env::current_dir,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use kata_sys_util::hooks::HookStates;
|
||||
|
||||
pub const CONFIG_FILE_NAME: &str = "config.json";
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub enum ContainerAction {
|
||||
Create,
|
||||
Start,
|
||||
Run,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Container {
|
||||
pub status: Status,
|
||||
pub state: ContainerState,
|
||||
pub cgroup: cgroups::Cgroup,
|
||||
}
|
||||
|
||||
// Container represents a container that is created by the container runtime.
|
||||
impl Container {
|
||||
pub fn load(state_root: &Path, id: &str) -> Result<Self> {
|
||||
let status = Status::load(state_root, id)?;
|
||||
let spec = status
|
||||
.config
|
||||
.spec
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("spec config was not present"))?;
|
||||
let linux = spec
|
||||
.linux()
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("linux config was not present"))?;
|
||||
let cpath = if linux.cgroups_path().is_none() {
|
||||
id.to_string()
|
||||
} else {
|
||||
linux
|
||||
.cgroups_path()
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.display()
|
||||
.to_string()
|
||||
.trim_start_matches('/')
|
||||
.to_string()
|
||||
};
|
||||
let cgroup = cgroups::Cgroup::load(cgroups::hierarchies::auto(), cpath);
|
||||
let state = get_current_container_state(&status, &cgroup)?;
|
||||
Ok(Self {
|
||||
status,
|
||||
state,
|
||||
cgroup,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn processes(&self) -> Result<Vec<Pid>> {
|
||||
let pids = self.cgroup.tasks();
|
||||
let result = pids.iter().map(|x| Pid::from_raw(x.pid as i32)).collect();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn kill(&self, signal: Signal, all: bool) -> Result<()> {
|
||||
if all {
|
||||
let pids = self.processes()?;
|
||||
for pid in pids {
|
||||
if !status::is_process_running(pid)? {
|
||||
continue;
|
||||
}
|
||||
kill(pid, signal)?;
|
||||
}
|
||||
} else {
|
||||
// If --all option is not specified and the container is stopped,
|
||||
// kill operation generates an error in accordance with the OCI runtime spec.
|
||||
if self.state == ContainerState::Stopped {
|
||||
return Err(anyhow!(
|
||||
"container {} can't be killed because it is {:?}",
|
||||
self.status.id,
|
||||
self.state
|
||||
)
|
||||
// This error message mustn't be chagned because the containerd integration tests
|
||||
// expect that OCI container runtimes return the message.
|
||||
// Ref. https://github.com/containerd/containerd/blob/release/1.7/pkg/process/utils.go#L135
|
||||
.context("container not running"));
|
||||
}
|
||||
|
||||
let pid = Pid::from_raw(self.status.pid);
|
||||
if status::is_process_running(pid)? {
|
||||
kill(pid, signal)?;
|
||||
}
|
||||
}
|
||||
// For cgroup v1, killing a process in a frozen cgroup does nothing until it's thawed.
|
||||
// Only thaw the cgroup for SIGKILL.
|
||||
// Ref: https://github.com/opencontainers/runc/pull/3217
|
||||
if !is_cgroup2_unified_mode() && self.state == ContainerState::Paused && signal == SIGKILL {
|
||||
freeze(&self.cgroup, FreezerState::Thawed)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn delete(&self, force: bool, logger: &Logger) -> Result<()> {
|
||||
let status = &self.status;
|
||||
let spec = status
|
||||
.config
|
||||
.spec
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("spec config was not present in the status"))?;
|
||||
|
||||
let oci_state = OCIState {
|
||||
version: status.oci_version.clone(),
|
||||
id: status.id.clone(),
|
||||
status: self.state,
|
||||
pid: status.pid,
|
||||
bundle: status
|
||||
.bundle
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("invalid bundle path"))?
|
||||
.to_string(),
|
||||
annotations: spec.annotations().clone().unwrap_or_default(),
|
||||
};
|
||||
|
||||
if let Some(hooks) = spec.hooks().as_ref() {
|
||||
info!(&logger, "Poststop Hooks");
|
||||
let mut poststop_hookstates = HookStates::new();
|
||||
poststop_hookstates.execute_hooks(
|
||||
&hooks.poststop().clone().unwrap_or_default(),
|
||||
Some(oci_state.clone()),
|
||||
)?;
|
||||
}
|
||||
|
||||
match oci_state.status {
|
||||
ContainerState::Stopped => {
|
||||
self.destroy()?;
|
||||
}
|
||||
ContainerState::Created => {
|
||||
// Kill an init process
|
||||
self.kill(SIGKILL, false)?;
|
||||
self.destroy()?;
|
||||
}
|
||||
_ => {
|
||||
if force {
|
||||
self.kill(SIGKILL, true)?;
|
||||
self.destroy()?;
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"cannot delete container {} that is not stopped",
|
||||
&status.id
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pause(&self) -> Result<()> {
|
||||
if self.state != ContainerState::Running && self.state != ContainerState::Created {
|
||||
return Err(anyhow!(
|
||||
"failed to pause container: current status is: {:?}",
|
||||
self.state
|
||||
));
|
||||
}
|
||||
freeze(&self.cgroup, FreezerState::Frozen)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn resume(&self) -> Result<()> {
|
||||
if self.state != ContainerState::Paused {
|
||||
return Err(anyhow!(
|
||||
"failed to resume container: current status is: {:?}",
|
||||
self.state
|
||||
));
|
||||
}
|
||||
freeze(&self.cgroup, FreezerState::Thawed)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn destroy(&self) -> Result<()> {
|
||||
remove_cgroup_dir(&self.cgroup)?;
|
||||
self.status.remove_dir()
|
||||
}
|
||||
}
|
||||
|
||||
/// Used to run a process. If init is set, it will create a container and run the process in it.
|
||||
/// If init is not set, it will run the process in an existing container.
|
||||
#[derive(Debug)]
|
||||
pub struct ContainerLauncher {
|
||||
pub id: String,
|
||||
pub bundle: PathBuf,
|
||||
pub state_root: PathBuf,
|
||||
pub init: bool,
|
||||
pub runner: LinuxContainer,
|
||||
pub pid_file: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ContainerLauncher {
|
||||
pub fn new(
|
||||
id: &str,
|
||||
bundle: &Path,
|
||||
state_root: &Path,
|
||||
init: bool,
|
||||
runner: LinuxContainer,
|
||||
pid_file: Option<PathBuf>,
|
||||
) -> Self {
|
||||
ContainerLauncher {
|
||||
id: id.to_string(),
|
||||
bundle: bundle.to_path_buf(),
|
||||
state_root: state_root.to_path_buf(),
|
||||
init,
|
||||
runner,
|
||||
pid_file,
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a process. For init containers, we will create a container. For non-init, it will join an existing container.
|
||||
pub async fn launch(&mut self, action: ContainerAction, logger: &Logger) -> Result<()> {
|
||||
if self.init {
|
||||
self.spawn_container(action, logger).await?;
|
||||
} else {
|
||||
if action == ContainerAction::Create {
|
||||
return Err(anyhow!(
|
||||
"ContainerAction::Create is used for init-container only"
|
||||
));
|
||||
}
|
||||
self.spawn_process(action, logger).await?;
|
||||
}
|
||||
if let Some(pid_file) = self.pid_file.as_ref() {
|
||||
fs::write(
|
||||
pid_file,
|
||||
format!("{}", self.runner.get_process(self.id.as_str())?.pid()),
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create the container by invoking runner to spawn the first process and save status.
|
||||
async fn spawn_container(&mut self, action: ContainerAction, logger: &Logger) -> Result<()> {
|
||||
// State root path root/id has been created in LinuxContainer::new(),
|
||||
// so we don't have to create it again.
|
||||
|
||||
// Spawn a new process in the container by using the agent's codes.
|
||||
self.spawn_process(action, logger).await?;
|
||||
|
||||
let status = self.get_status()?;
|
||||
status.save()?;
|
||||
debug!(logger, "saved status is {:?}", status);
|
||||
|
||||
// Clean up the fifo file created by LinuxContainer, which is used for block the created process.
|
||||
if action == ContainerAction::Run || action == ContainerAction::Start {
|
||||
let fifo_path = get_fifo_path(&status);
|
||||
if fifo_path.exists() {
|
||||
unlink(&fifo_path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate rustjail::Process from OCI::Process
|
||||
fn get_process(&self, logger: &Logger) -> Result<Process> {
|
||||
let spec = self.runner.config.spec.as_ref().unwrap();
|
||||
if spec.process().is_some() {
|
||||
Ok(Process::new(
|
||||
logger,
|
||||
spec.process().as_ref().unwrap(),
|
||||
// rustjail::LinuxContainer use the exec_id to identify processes in a container,
|
||||
// so we can get the spawned process by ctr.get_process(exec_id) later.
|
||||
// Since LinuxContainer is temporarily created to spawn one process in each runk invocation,
|
||||
// we can use arbitrary string as the exec_id. Here we choose the container id.
|
||||
&self.id,
|
||||
self.init,
|
||||
0,
|
||||
None,
|
||||
)?)
|
||||
} else {
|
||||
Err(anyhow!("no process configuration"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn a new process in the container by invoking runner.
|
||||
async fn spawn_process(&mut self, action: ContainerAction, logger: &Logger) -> Result<()> {
|
||||
// Agent will chdir to bundle_path before creating LinuxContainer. Just do the same as agent.
|
||||
let current_dir = current_dir()?;
|
||||
chdir(&self.bundle)?;
|
||||
defer! {
|
||||
chdir(¤t_dir).unwrap();
|
||||
}
|
||||
|
||||
let process = self.get_process(logger)?;
|
||||
match action {
|
||||
ContainerAction::Create => {
|
||||
self.runner.start(process).await?;
|
||||
}
|
||||
ContainerAction::Start => {
|
||||
self.runner.exec().await?;
|
||||
}
|
||||
ContainerAction::Run => {
|
||||
self.runner.run(process).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate runk specified Status
|
||||
fn get_status(&self) -> Result<Status> {
|
||||
let oci_state = self.runner.oci_state()?;
|
||||
// read start time from /proc/<pid>/stat
|
||||
let proc = procfs::process::Process::new(self.runner.init_process_pid)?;
|
||||
let process_start_time = proc.stat()?.starttime;
|
||||
Status::new(
|
||||
&self.state_root,
|
||||
&self.bundle,
|
||||
oci_state,
|
||||
process_start_time,
|
||||
self.runner.created,
|
||||
self.runner
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.as_any()?
|
||||
.downcast_ref::<CgroupManager>()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
self.runner.config.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_linux_container(
|
||||
id: &str,
|
||||
root: &Path,
|
||||
config: CreateOpts,
|
||||
console_socket: Option<PathBuf>,
|
||||
logger: &Logger,
|
||||
) -> Result<LinuxContainer> {
|
||||
let mut container = LinuxContainer::new(
|
||||
id,
|
||||
root.to_str()
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| anyhow!("failed to convert bundle path"))?
|
||||
.as_str(),
|
||||
None,
|
||||
config,
|
||||
logger,
|
||||
)?;
|
||||
if let Some(socket_path) = console_socket.as_ref() {
|
||||
container.set_console_socket(socket_path)?;
|
||||
}
|
||||
Ok(container)
|
||||
}
|
||||
|
||||
// Load rustjail's Linux container.
|
||||
// "uid_map_path" and "gid_map_path" are always empty, so they are not set.
|
||||
pub fn load_linux_container(
|
||||
status: &Status,
|
||||
console_socket: Option<PathBuf>,
|
||||
logger: &Logger,
|
||||
) -> Result<LinuxContainer> {
|
||||
let mut container = LinuxContainer::new(
|
||||
&status.id,
|
||||
&status
|
||||
.root
|
||||
.to_str()
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| anyhow!("failed to convert a root path"))?,
|
||||
None,
|
||||
status.config.clone(),
|
||||
logger,
|
||||
)?;
|
||||
if let Some(socket_path) = console_socket.as_ref() {
|
||||
container.set_console_socket(socket_path)?;
|
||||
}
|
||||
|
||||
container.init_process_pid = status.pid;
|
||||
container.init_process_start_time = status.process_start_time;
|
||||
container.created = status.created.into();
|
||||
Ok(container)
|
||||
}
|
||||
|
||||
pub fn get_config_path<P: AsRef<Path>>(bundle: P) -> PathBuf {
|
||||
bundle.as_ref().join(CONFIG_FILE_NAME)
|
||||
}
|
||||
|
||||
pub fn get_fifo_path(status: &Status) -> PathBuf {
|
||||
status.root.join(&status.id).join(EXEC_FIFO_FILENAME)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::test_utils::*;
|
||||
use rustjail::container::EXEC_FIFO_FILENAME;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn test_get_config_path() {
|
||||
let test_data = PathBuf::from(TEST_BUNDLE_PATH).join(CONFIG_FILE_NAME);
|
||||
assert_eq!(get_config_path(TEST_BUNDLE_PATH), test_data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_fifo_path() {
|
||||
let test_data = PathBuf::from(TEST_STATE_ROOT_PATH)
|
||||
.join(TEST_CONTAINER_ID)
|
||||
.join(EXEC_FIFO_FILENAME);
|
||||
let status = create_dummy_status();
|
||||
|
||||
assert_eq!(get_fifo_path(&status), test_data);
|
||||
}
|
||||
}
|
||||
@@ -1,140 +0,0 @@
|
||||
// Copyright 2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::container::{load_linux_container, Container, ContainerLauncher};
|
||||
use anyhow::{anyhow, Result};
|
||||
use derive_builder::Builder;
|
||||
use runtime_spec::ContainerState;
|
||||
use slog::{debug, Logger};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Used for start command. It will prepare the options used for starting a new container.
|
||||
#[derive(Default, Builder, Debug, Clone)]
|
||||
#[builder(build_fn(validate = "Self::validate"))]
|
||||
pub struct CreatedContainer {
|
||||
id: String,
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
impl CreatedContainerBuilder {
|
||||
/// pre-validate before building CreatedContainer
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
// ensure container exists
|
||||
let id = self.id.as_ref().unwrap();
|
||||
let root = self.root.as_ref().unwrap();
|
||||
let path = root.join(id);
|
||||
if !path.as_path().exists() {
|
||||
return Err(format!("container {} does not exist", id));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl CreatedContainer {
|
||||
/// Create ContainerLauncher that can be used to start a process from an existing init container.
|
||||
/// It reads the spec from status file of the init container.
|
||||
pub fn create_launcher(self, logger: &Logger) -> Result<ContainerLauncher> {
|
||||
debug!(logger, "enter CreatedContainer::create_launcher {:?}", self);
|
||||
let container = Container::load(&self.root, &self.id)?;
|
||||
|
||||
if container.state != ContainerState::Created {
|
||||
return Err(anyhow!(
|
||||
"cannot start a container in the {:?} state",
|
||||
container.state
|
||||
));
|
||||
}
|
||||
|
||||
let config = container.status.config.clone();
|
||||
|
||||
debug!(
|
||||
logger,
|
||||
"Prepare LinuxContainer for starting with config: {:?}", config
|
||||
);
|
||||
let runner = load_linux_container(&container.status, None, logger)?;
|
||||
|
||||
Ok(ContainerLauncher::new(
|
||||
&self.id,
|
||||
&container.status.bundle,
|
||||
&self.root,
|
||||
true,
|
||||
runner,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::status::Status;
|
||||
use crate::utils::test_utils::*;
|
||||
use nix::sys::stat::Mode;
|
||||
use nix::unistd::{self, getpid};
|
||||
use rustjail::container::EXEC_FIFO_FILENAME;
|
||||
use scopeguard::defer;
|
||||
use slog::o;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use tempfile::tempdir;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
fn create_created_container_dirs(root: &Path, id: &str, bundle: &Path) {
|
||||
Status::create_dir(root, id).unwrap();
|
||||
let fifo = root.join(id).join(EXEC_FIFO_FILENAME);
|
||||
unistd::mkfifo(&fifo, Mode::from_bits(0o644).unwrap()).unwrap();
|
||||
create_dir_all(bundle.join(TEST_ROOTFS_PATH)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_created_container_validate() {
|
||||
let root = tempdir().unwrap();
|
||||
let id = TEST_CONTAINER_ID.to_string();
|
||||
let result = CreatedContainerBuilder::default()
|
||||
.id(id)
|
||||
.root(root.path().to_path_buf())
|
||||
.build();
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_created_container_create_launcher() {
|
||||
// create cgroup directory needs root permission
|
||||
skip_if_not_root!();
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
let bundle_dir = tempdir().unwrap();
|
||||
let root = tempdir().unwrap();
|
||||
// Since tests are executed concurrently, container_id must be unique in tests with cgroup.
|
||||
// Or the cgroup directory may be removed by other tests in advance.
|
||||
let id = "test_created_container_create".to_string();
|
||||
create_created_container_dirs(root.path(), &id, bundle_dir.path());
|
||||
let pid = getpid().as_raw();
|
||||
|
||||
let mut spec = create_dummy_spec();
|
||||
spec.root_mut()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set_path(bundle_dir.path().join(TEST_ROOTFS_PATH));
|
||||
|
||||
let status = create_custom_dummy_status(&id, pid, root.path(), &spec);
|
||||
status.save().unwrap();
|
||||
|
||||
// create empty cgroup directory to avoid is_pause failing
|
||||
let cgroup = create_dummy_cgroup(Path::new(id.as_str()));
|
||||
defer!(cgroup.delete().unwrap());
|
||||
|
||||
let launcher = CreatedContainerBuilder::default()
|
||||
.id(id.clone())
|
||||
.root(root.into_path())
|
||||
.build()
|
||||
.unwrap()
|
||||
.create_launcher(&logger)
|
||||
.unwrap();
|
||||
|
||||
assert!(launcher.init);
|
||||
assert_eq!(launcher.runner.config.spec.unwrap(), spec);
|
||||
assert_eq!(launcher.runner.id, id);
|
||||
}
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::container::{create_linux_container, get_config_path, ContainerLauncher};
|
||||
use crate::status::Status;
|
||||
use crate::utils::{canonicalize_spec_root, validate_spec};
|
||||
use anyhow::{anyhow, Result};
|
||||
use derive_builder::Builder;
|
||||
use oci_spec::runtime::Spec;
|
||||
use rustjail::specconv::CreateOpts;
|
||||
use slog::{debug, Logger};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Used for create and run commands. It will prepare the options used for creating a new container.
|
||||
#[derive(Default, Builder, Debug, Clone)]
|
||||
#[builder(build_fn(validate = "Self::validate"))]
|
||||
pub struct InitContainer {
|
||||
id: String,
|
||||
bundle: PathBuf,
|
||||
root: PathBuf,
|
||||
console_socket: Option<PathBuf>,
|
||||
pid_file: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl InitContainerBuilder {
|
||||
/// pre-validate before building InitContainer
|
||||
fn validate(&self) -> Result<(), String> {
|
||||
// ensure container hasn't already been created
|
||||
let id = self.id.as_ref().unwrap();
|
||||
let root = self.root.as_ref().unwrap();
|
||||
let status_path = Status::get_dir_path(root, id);
|
||||
if status_path.exists() {
|
||||
return Err(format!(
|
||||
"container {} already exists at path {:?}",
|
||||
id, root
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl InitContainer {
|
||||
/// Create ContainerLauncher that can be used to launch a new container.
|
||||
/// It will read the spec under bundle path.
|
||||
pub fn create_launcher(self, logger: &Logger) -> Result<ContainerLauncher> {
|
||||
debug!(logger, "enter InitContainer::create_launcher {:?}", self);
|
||||
let bundle_canon = self.bundle.canonicalize()?;
|
||||
let config_path = get_config_path(&bundle_canon);
|
||||
let mut spec = Spec::load(
|
||||
config_path
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("invalid config path"))?,
|
||||
)?;
|
||||
// Only absolute rootfs path is valid when creating LinuxContainer later.
|
||||
canonicalize_spec_root(&mut spec, &bundle_canon)?;
|
||||
debug!(logger, "load spec from config file: {:?}", spec);
|
||||
validate_spec(&spec, &self.console_socket)?;
|
||||
|
||||
let config = CreateOpts {
|
||||
cgroup_name: "".to_string(),
|
||||
use_systemd_cgroup: false,
|
||||
// TODO: liboci-cli does not support --no-pivot option for create and run command.
|
||||
// After liboci-cli supports the option, we will change the following code.
|
||||
// no_pivot_root: self.no_pivot,
|
||||
no_pivot_root: false,
|
||||
no_new_keyring: false,
|
||||
spec: Some(spec),
|
||||
rootless_euid: false,
|
||||
rootless_cgroup: false,
|
||||
container_name: "".to_string(),
|
||||
};
|
||||
debug!(logger, "create LinuxContainer with config: {:?}", config);
|
||||
let container =
|
||||
create_linux_container(&self.id, &self.root, config, self.console_socket, logger)?;
|
||||
|
||||
Ok(ContainerLauncher::new(
|
||||
&self.id,
|
||||
&bundle_canon,
|
||||
&self.root,
|
||||
true,
|
||||
container,
|
||||
self.pid_file,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::container::CONFIG_FILE_NAME;
|
||||
use crate::utils::test_utils::*;
|
||||
use oci_spec::runtime::Process;
|
||||
use slog::o;
|
||||
use std::fs::{create_dir, File};
|
||||
use std::path::Path;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn test_init_container_validate() {
|
||||
let root = tempdir().unwrap();
|
||||
let id = TEST_CONTAINER_ID.to_string();
|
||||
Status::create_dir(root.path(), id.as_str()).unwrap();
|
||||
let result = InitContainerBuilder::default()
|
||||
.id(id)
|
||||
.root(root.path().to_path_buf())
|
||||
.bundle(PathBuf::from(TEST_BUNDLE_PATH))
|
||||
.pid_file(None)
|
||||
.console_socket(None)
|
||||
.build();
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_init_container_create_launcher() {
|
||||
#[cfg(all(target_arch = "powerpc64", target_endian = "little"))]
|
||||
skip_if_not_root!();
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
let root_dir = tempdir().unwrap();
|
||||
let bundle_dir = tempdir().unwrap();
|
||||
// create dummy rootfs
|
||||
create_dir(bundle_dir.path().join(TEST_ROOTFS_PATH)).unwrap();
|
||||
let config_file = bundle_dir.path().join(CONFIG_FILE_NAME);
|
||||
let mut spec = create_dummy_spec();
|
||||
let file = File::create(config_file).unwrap();
|
||||
serde_json::to_writer(&file, &spec).unwrap();
|
||||
|
||||
spec.root_mut()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set_path(bundle_dir.path().join(TEST_ROOTFS_PATH));
|
||||
let test_data = TestContainerData {
|
||||
// Since tests are executed concurrently, container_id must be unique in tests with cgroup.
|
||||
// Or the cgroup directory may be removed by other tests in advance.
|
||||
id: String::from("test_init_container_create_launcher"),
|
||||
bundle: bundle_dir.path().to_path_buf(),
|
||||
root: root_dir.into_path(),
|
||||
console_socket: Some(PathBuf::from(TEST_CONSOLE_SOCKET_PATH)),
|
||||
config: CreateOpts {
|
||||
spec: Some(spec),
|
||||
..Default::default()
|
||||
},
|
||||
pid_file: Some(PathBuf::from(TEST_PID_FILE_PATH)),
|
||||
};
|
||||
|
||||
let launcher = InitContainerBuilder::default()
|
||||
.id(test_data.id.clone())
|
||||
.bundle(test_data.bundle.clone())
|
||||
.root(test_data.root.clone())
|
||||
.console_socket(test_data.console_socket.clone())
|
||||
.pid_file(test_data.pid_file.clone())
|
||||
.build()
|
||||
.unwrap()
|
||||
.create_launcher(&logger)
|
||||
.unwrap();
|
||||
|
||||
// LinuxContainer doesn't impl PartialEq, so we need to compare the fields manually.
|
||||
assert!(launcher.init);
|
||||
assert_eq!(launcher.bundle, test_data.bundle);
|
||||
assert_eq!(launcher.state_root, test_data.root);
|
||||
assert_eq!(launcher.pid_file, test_data.pid_file);
|
||||
assert_eq!(launcher.runner.id, test_data.id);
|
||||
assert_eq!(launcher.runner.config.spec, test_data.config.spec);
|
||||
assert_eq!(
|
||||
Some(launcher.runner.console_socket),
|
||||
test_data.console_socket
|
||||
);
|
||||
// If it is run by root, create_launcher will create cgroup dirs successfully. So we need to do some cleanup stuff.
|
||||
if nix::unistd::Uid::effective().is_root() {
|
||||
clean_up_cgroup(Path::new(&test_data.id));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_init_container_tty_err() {
|
||||
let logger = slog::Logger::root(slog::Discard, o!());
|
||||
let bundle_dir = tempdir().unwrap();
|
||||
let config_file = bundle_dir.path().join(CONFIG_FILE_NAME);
|
||||
|
||||
let mut spec = Spec::default();
|
||||
spec.set_process(Some(Process::default()));
|
||||
spec.process_mut()
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.set_terminal(Some(true));
|
||||
|
||||
let file = File::create(config_file).unwrap();
|
||||
serde_json::to_writer(&file, &spec).unwrap();
|
||||
|
||||
let test_data = TestContainerData {
|
||||
id: String::from(TEST_CONTAINER_ID),
|
||||
bundle: bundle_dir.into_path(),
|
||||
root: tempdir().unwrap().into_path(),
|
||||
console_socket: None,
|
||||
config: CreateOpts {
|
||||
spec: Some(spec),
|
||||
..Default::default()
|
||||
},
|
||||
pid_file: None,
|
||||
};
|
||||
|
||||
let result = InitContainerBuilder::default()
|
||||
.id(test_data.id.clone())
|
||||
.bundle(test_data.bundle.clone())
|
||||
.root(test_data.root.clone())
|
||||
.console_socket(test_data.console_socket.clone())
|
||||
.pid_file(test_data.pid_file)
|
||||
.build()
|
||||
.unwrap()
|
||||
.create_launcher(&logger);
|
||||
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub mod activated_builder;
|
||||
pub mod cgroup;
|
||||
pub mod container;
|
||||
pub mod created_builder;
|
||||
pub mod init_builder;
|
||||
pub mod status;
|
||||
pub mod utils;
|
||||
@@ -1,236 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::cgroup::is_paused;
|
||||
use crate::container::get_fifo_path;
|
||||
use crate::utils::*;
|
||||
use anyhow::{anyhow, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use libc::pid_t;
|
||||
use nix::{
|
||||
errno::Errno,
|
||||
sys::{signal::kill, stat::Mode},
|
||||
unistd::Pid,
|
||||
};
|
||||
use procfs::process::ProcState;
|
||||
use runtime_spec::{ContainerState, State as OCIState};
|
||||
use rustjail::{cgroups::fs::Manager as CgroupManager, specconv::CreateOpts};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
fs::{self, File, OpenOptions},
|
||||
path::{Path, PathBuf},
|
||||
time::SystemTime,
|
||||
};
|
||||
|
||||
const STATUS_FILE: &str = "status.json";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Status {
|
||||
pub oci_version: String,
|
||||
pub id: String,
|
||||
pub pid: pid_t,
|
||||
pub root: PathBuf,
|
||||
pub bundle: PathBuf,
|
||||
pub rootfs: String,
|
||||
pub process_start_time: u64,
|
||||
pub created: DateTime<Utc>,
|
||||
// Methods of Manager traits in rustjail are invisible, and CgroupManager.cgroup can't be serialized.
|
||||
// So it is cumbersome to manage cgroups by this field. Instead, we use cgroups-rs::cgroup directly in Container to manager cgroups.
|
||||
// Another solution is making some methods public outside rustjail and adding getter/setter for CgroupManager.cgroup.
|
||||
// Temporarily keep this field for compatibility.
|
||||
pub cgroup_manager: CgroupManager,
|
||||
pub config: CreateOpts,
|
||||
}
|
||||
|
||||
impl Status {
|
||||
pub fn new(
|
||||
root: &Path,
|
||||
bundle: &Path,
|
||||
oci_state: OCIState,
|
||||
process_start_time: u64,
|
||||
created_time: SystemTime,
|
||||
cgroup_mg: CgroupManager,
|
||||
config: CreateOpts,
|
||||
) -> Result<Self> {
|
||||
let created = DateTime::from(created_time);
|
||||
let rootfs = config
|
||||
.clone()
|
||||
.spec
|
||||
.ok_or_else(|| anyhow!("spec config was not present"))?
|
||||
.root()
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("root config was not present in the spec"))?
|
||||
.path()
|
||||
.clone();
|
||||
|
||||
Ok(Self {
|
||||
oci_version: oci_state.version,
|
||||
id: oci_state.id,
|
||||
pid: oci_state.pid,
|
||||
root: root.to_path_buf(),
|
||||
bundle: bundle.to_path_buf(),
|
||||
rootfs: rootfs.display().to_string(),
|
||||
process_start_time,
|
||||
created,
|
||||
cgroup_manager: cgroup_mg,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn save(&self) -> Result<()> {
|
||||
let state_file_path = Self::get_file_path(&self.root, &self.id);
|
||||
|
||||
if !&self.root.exists() {
|
||||
create_dir_with_mode(&self.root, Mode::S_IRWXU, true)?;
|
||||
}
|
||||
|
||||
let file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(state_file_path)?;
|
||||
|
||||
serde_json::to_writer(&file, self)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load(state_root: &Path, id: &str) -> Result<Self> {
|
||||
let state_file_path = Self::get_file_path(state_root, id);
|
||||
if !state_file_path.exists() {
|
||||
return Err(anyhow!("container \"{}\" does not exist", id));
|
||||
}
|
||||
|
||||
let file = File::open(&state_file_path)?;
|
||||
let state: Self = serde_json::from_reader(&file)?;
|
||||
|
||||
Ok(state)
|
||||
}
|
||||
|
||||
pub fn create_dir(state_root: &Path, id: &str) -> Result<()> {
|
||||
let state_dir_path = Self::get_dir_path(state_root, id);
|
||||
if !state_dir_path.exists() {
|
||||
create_dir_with_mode(state_dir_path, Mode::S_IRWXU, true)?;
|
||||
} else {
|
||||
return Err(anyhow!("container with id exists: \"{}\"", id));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn remove_dir(&self) -> Result<()> {
|
||||
let state_dir_path = Self::get_dir_path(&self.root, &self.id);
|
||||
fs::remove_dir_all(state_dir_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_dir_path(state_root: &Path, id: &str) -> PathBuf {
|
||||
state_root.join(id)
|
||||
}
|
||||
|
||||
pub fn get_file_path(state_root: &Path, id: &str) -> PathBuf {
|
||||
state_root.join(id).join(STATUS_FILE)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_process_running(pid: Pid) -> Result<bool> {
|
||||
match kill(pid, None) {
|
||||
Err(errno) => {
|
||||
if errno != Errno::ESRCH {
|
||||
return Err(anyhow!("failed to kill process {}: {:?}", pid, errno));
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
Ok(()) => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the current state of a container. It will read cgroupfs and procfs to determine the state.
|
||||
// https://github.com/opencontainers/runc/blob/86d6898f3052acba1ebcf83aa2eae3f6cc5fb471/libcontainer/container_linux.go#L1953
|
||||
pub fn get_current_container_state(
|
||||
status: &Status,
|
||||
cgroup: &cgroups::Cgroup,
|
||||
) -> Result<ContainerState> {
|
||||
if is_paused(cgroup)? {
|
||||
return Ok(ContainerState::Paused);
|
||||
}
|
||||
let proc = procfs::process::Process::new(status.pid);
|
||||
// if reading /proc/<pid> occurs error, then the process is not running
|
||||
if proc.is_err() {
|
||||
return Ok(ContainerState::Stopped);
|
||||
}
|
||||
let proc_stat = proc.unwrap().stat()?;
|
||||
// if start time is not equal, then the pid is reused, and the process is not running
|
||||
if proc_stat.starttime != status.process_start_time {
|
||||
return Ok(ContainerState::Stopped);
|
||||
}
|
||||
match proc_stat.state()? {
|
||||
ProcState::Zombie | ProcState::Dead => Ok(ContainerState::Stopped),
|
||||
_ => {
|
||||
let fifo = get_fifo_path(status);
|
||||
if fifo.exists() {
|
||||
return Ok(ContainerState::Created);
|
||||
}
|
||||
Ok(ContainerState::Running)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::test_utils::*;
|
||||
use ::test_utils::skip_if_not_root;
|
||||
use chrono::{DateTime, Utc};
|
||||
use nix::unistd::getpid;
|
||||
use runtime_spec::ContainerState;
|
||||
use rustjail::cgroups::fs::Manager as CgroupManager;
|
||||
use scopeguard::defer;
|
||||
use std::path::Path;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[test]
|
||||
fn test_status() {
|
||||
let cgm: CgroupManager = serde_json::from_str(TEST_CGM_DATA).unwrap();
|
||||
let oci_state = create_dummy_oci_state();
|
||||
let created = SystemTime::now();
|
||||
let status = Status::new(
|
||||
Path::new(TEST_STATE_ROOT_PATH),
|
||||
Path::new(TEST_BUNDLE_PATH),
|
||||
oci_state.clone(),
|
||||
1,
|
||||
created,
|
||||
cgm,
|
||||
create_dummy_opts(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(status.id, oci_state.id);
|
||||
assert_eq!(status.pid, oci_state.pid);
|
||||
assert_eq!(status.process_start_time, 1);
|
||||
assert_eq!(status.created, DateTime::<Utc>::from(created));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_process_running() {
|
||||
let pid = getpid();
|
||||
let ret = is_process_running(pid).unwrap();
|
||||
assert!(ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_current_container_state() {
|
||||
skip_if_not_root!();
|
||||
let mut status = create_dummy_status();
|
||||
status.id = "test_get_current_container_state".to_string();
|
||||
// crete a dummy cgroup to make sure is_pause doesn't return error
|
||||
let cgroup = create_dummy_cgroup(Path::new(&status.id));
|
||||
defer!(cgroup.delete().unwrap());
|
||||
let state = get_current_container_state(&status, &cgroup).unwrap();
|
||||
assert_eq!(state, ContainerState::Running);
|
||||
}
|
||||
}
|
||||
@@ -1,294 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use nix::sys::stat::Mode;
|
||||
use oci_spec::runtime::{Process, Spec};
|
||||
use std::{
|
||||
fs::{DirBuilder, File},
|
||||
io::{prelude::*, BufReader},
|
||||
os::unix::fs::DirBuilderExt,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
pub fn lines_from_file<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
|
||||
let file = File::open(&path)?;
|
||||
let buf = BufReader::new(file);
|
||||
Ok(buf
|
||||
.lines()
|
||||
.map(|v| v.expect("could not parse line"))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub fn create_dir_with_mode<P: AsRef<Path>>(path: P, mode: Mode, recursive: bool) -> Result<()> {
|
||||
let path = path.as_ref();
|
||||
if path.exists() {
|
||||
return Err(anyhow!("{} already exists", path.display()));
|
||||
}
|
||||
|
||||
Ok(DirBuilder::new()
|
||||
.recursive(recursive)
|
||||
.mode(mode.bits())
|
||||
.create(path)?)
|
||||
}
|
||||
|
||||
/// If root in spec is a relative path, make it absolute.
|
||||
pub fn canonicalize_spec_root(spec: &mut Spec, bundle_canon: &Path) -> Result<()> {
|
||||
let spec_root = spec
|
||||
.root_mut()
|
||||
.as_mut()
|
||||
.ok_or_else(|| anyhow!("root config was not present in the spec file"))?;
|
||||
let rootfs_path = &spec_root.path();
|
||||
if !rootfs_path.is_absolute() {
|
||||
let bundle_canon_path = bundle_canon.join(rootfs_path).canonicalize()?;
|
||||
spec_root.set_path(bundle_canon_path);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check whether spec is valid. Now runk only support detach mode.
|
||||
pub fn validate_spec(spec: &Spec, console_socket: &Option<PathBuf>) -> Result<()> {
|
||||
validate_process_spec(spec.process())?;
|
||||
if let Some(process) = spec.process().as_ref() {
|
||||
// runk always launches containers with detached mode, so users have to
|
||||
// use a console socket with run or create operation when a terminal is used.
|
||||
if process.terminal().is_some() && console_socket.is_none() {
|
||||
return Err(anyhow!(
|
||||
"cannot allocate a pseudo-TTY without setting a console socket"
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Validate process just like runc, https://github.com/opencontainers/runc/pull/623
|
||||
pub fn validate_process_spec(process: &Option<Process>) -> Result<()> {
|
||||
let process = process
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("process property must not be empty"))?;
|
||||
if process.cwd().as_os_str().is_empty() {
|
||||
return Err(anyhow!("cwd property must not be empty"));
|
||||
}
|
||||
let cwd = process.cwd();
|
||||
if !cwd.is_absolute() {
|
||||
return Err(anyhow!("cwd must be an absolute path"));
|
||||
}
|
||||
if process.args().is_none() {
|
||||
return Err(anyhow!("args must not be empty"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_utils {
|
||||
use super::*;
|
||||
use crate::status::Status;
|
||||
use chrono::DateTime;
|
||||
use nix::unistd::getpid;
|
||||
use oci::{LinuxBuilder, LinuxNamespaceBuilder, Process, Root, Spec};
|
||||
use oci_spec::runtime as oci;
|
||||
use runtime_spec::{ContainerState, State as OCIState};
|
||||
use rustjail::{
|
||||
cgroups::fs::Manager as CgroupManager, container::TYPETONAME, specconv::CreateOpts,
|
||||
};
|
||||
use std::{fs::create_dir_all, path::Path, time::SystemTime};
|
||||
use tempfile::tempdir;
|
||||
|
||||
pub const TEST_CONTAINER_ID: &str = "test";
|
||||
pub const TEST_STATE_ROOT_PATH: &str = "/state";
|
||||
pub const TEST_BUNDLE_PATH: &str = "/bundle";
|
||||
pub const TEST_ROOTFS_PATH: &str = "rootfs";
|
||||
pub const TEST_ANNOTATION: &str = "test-annotation";
|
||||
pub const TEST_CONSOLE_SOCKET_PATH: &str = "/test-console-sock";
|
||||
pub const TEST_PROCESS_FILE_NAME: &str = "process.json";
|
||||
pub const TEST_PID_FILE_PATH: &str = "/test-pid";
|
||||
pub const TEST_HOST_NAME: &str = "test-host";
|
||||
pub const TEST_OCI_SPEC_VERSION: &str = "1.0.2";
|
||||
pub const TEST_CGM_DATA: &str = r#"{
|
||||
"paths": {
|
||||
"devices": "/sys/fs/cgroup/devices"
|
||||
},
|
||||
"mounts": {
|
||||
"devices": "/sys/fs/cgroup/devices"
|
||||
},
|
||||
"cpath": "test"
|
||||
}"#;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TestContainerData {
|
||||
pub id: String,
|
||||
pub bundle: PathBuf,
|
||||
pub root: PathBuf,
|
||||
pub console_socket: Option<PathBuf>,
|
||||
pub pid_file: Option<PathBuf>,
|
||||
pub config: CreateOpts,
|
||||
}
|
||||
|
||||
pub fn create_dummy_spec() -> Spec {
|
||||
let linux = LinuxBuilder::default()
|
||||
.namespaces(
|
||||
TYPETONAME
|
||||
.iter()
|
||||
.filter(|&(_, &name)| name != "user")
|
||||
.map(|ns| {
|
||||
LinuxNamespaceBuilder::default()
|
||||
.typ(ns.0.clone())
|
||||
.path(PathBuf::from(""))
|
||||
.build()
|
||||
.unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut process = Process::default();
|
||||
process.set_args(Some(vec!["sleep".to_string(), "10".to_string()]));
|
||||
process.set_env(Some(vec!["PATH=/bin:/usr/bin".to_string()]));
|
||||
process.set_cwd(PathBuf::from("/"));
|
||||
|
||||
let mut root = Root::default();
|
||||
root.set_path(PathBuf::from(TEST_ROOTFS_PATH));
|
||||
root.set_readonly(Some(false));
|
||||
|
||||
let mut spec = Spec::default();
|
||||
spec.set_version(TEST_OCI_SPEC_VERSION.to_string());
|
||||
spec.set_process(Some(process));
|
||||
spec.set_hostname(Some(TEST_HOST_NAME.to_string()));
|
||||
spec.set_root(Some(root));
|
||||
spec.set_linux(Some(linux));
|
||||
|
||||
spec
|
||||
}
|
||||
|
||||
pub fn create_dummy_opts() -> CreateOpts {
|
||||
let mut spec = Spec::default();
|
||||
spec.set_root(Some(Root::default()));
|
||||
|
||||
CreateOpts {
|
||||
cgroup_name: "".to_string(),
|
||||
use_systemd_cgroup: false,
|
||||
no_pivot_root: false,
|
||||
no_new_keyring: false,
|
||||
spec: Some(spec),
|
||||
rootless_euid: false,
|
||||
rootless_cgroup: false,
|
||||
container_name: "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_dummy_oci_state() -> OCIState {
|
||||
OCIState {
|
||||
version: TEST_OCI_SPEC_VERSION.to_string(),
|
||||
id: TEST_CONTAINER_ID.to_string(),
|
||||
status: ContainerState::Running,
|
||||
pid: getpid().as_raw(),
|
||||
bundle: TEST_BUNDLE_PATH.to_string(),
|
||||
annotations: [(TEST_ANNOTATION.to_string(), TEST_ANNOTATION.to_string())]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_dummy_status() -> Status {
|
||||
let cgm: CgroupManager = serde_json::from_str(TEST_CGM_DATA).unwrap();
|
||||
let oci_state = create_dummy_oci_state();
|
||||
let created = SystemTime::now();
|
||||
let start_time = procfs::process::Process::new(oci_state.pid)
|
||||
.unwrap()
|
||||
.stat()
|
||||
.unwrap()
|
||||
.starttime;
|
||||
let status = Status::new(
|
||||
Path::new(TEST_STATE_ROOT_PATH),
|
||||
Path::new(TEST_BUNDLE_PATH),
|
||||
oci_state,
|
||||
start_time,
|
||||
created,
|
||||
cgm,
|
||||
create_dummy_opts(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
status
|
||||
}
|
||||
|
||||
pub fn create_custom_dummy_status(id: &str, pid: i32, root: &Path, spec: &Spec) -> Status {
|
||||
let start_time = procfs::process::Process::new(pid)
|
||||
.unwrap()
|
||||
.stat()
|
||||
.unwrap()
|
||||
.starttime;
|
||||
Status {
|
||||
oci_version: spec.version().clone(),
|
||||
id: id.to_string(),
|
||||
pid,
|
||||
root: root.to_path_buf(),
|
||||
bundle: PathBuf::from(TEST_BUNDLE_PATH),
|
||||
rootfs: TEST_ROOTFS_PATH.to_string(),
|
||||
process_start_time: start_time,
|
||||
created: DateTime::from(SystemTime::now()),
|
||||
cgroup_manager: serde_json::from_str(TEST_CGM_DATA).unwrap(),
|
||||
config: CreateOpts {
|
||||
spec: Some(spec.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_dummy_cgroup(cpath: &Path) -> cgroups::Cgroup {
|
||||
cgroups::Cgroup::new(cgroups::hierarchies::auto(), cpath).unwrap()
|
||||
}
|
||||
|
||||
pub fn clean_up_cgroup(cpath: &Path) {
|
||||
let cgroup = cgroups::Cgroup::load(cgroups::hierarchies::auto(), cpath);
|
||||
cgroup.delete().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_spec_root() {
|
||||
let gen_spec = |p: &str| -> Spec {
|
||||
let mut root = Root::default();
|
||||
root.set_path(PathBuf::from(p));
|
||||
root.set_readonly(Some(false));
|
||||
|
||||
let mut spec = Spec::default();
|
||||
spec.set_root(Some(root));
|
||||
spec
|
||||
};
|
||||
|
||||
let rootfs_name = TEST_ROOTFS_PATH;
|
||||
let temp_dir = tempdir().unwrap();
|
||||
let bundle_dir = temp_dir.path();
|
||||
let abs_root = bundle_dir.join(rootfs_name);
|
||||
create_dir_all(abs_root.clone()).unwrap();
|
||||
let mut spec = gen_spec(abs_root.to_str().unwrap());
|
||||
assert!(canonicalize_spec_root(&mut spec, bundle_dir).is_ok());
|
||||
assert_eq!(spec.root_mut().clone().unwrap().path(), &abs_root);
|
||||
let mut spec = gen_spec(rootfs_name);
|
||||
assert!(canonicalize_spec_root(&mut spec, bundle_dir).is_ok());
|
||||
assert_eq!(spec.root().clone().unwrap().path(), &abs_root);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_validate_process_spec() {
|
||||
let mut valid_process = Process::default();
|
||||
valid_process.set_args(Some(vec!["test".to_string()]));
|
||||
valid_process.set_cwd(PathBuf::from("/"));
|
||||
|
||||
assert!(validate_process_spec(&None).is_err());
|
||||
assert!(validate_process_spec(&Some(valid_process.clone())).is_ok());
|
||||
let mut invalid_process = valid_process.clone();
|
||||
invalid_process.set_args(None);
|
||||
assert!(validate_process_spec(&Some(invalid_process)).is_err());
|
||||
let mut invalid_process = valid_process.clone();
|
||||
invalid_process.set_cwd(PathBuf::from(""));
|
||||
assert!(validate_process_spec(&Some(invalid_process)).is_err());
|
||||
let mut invalid_process = valid_process;
|
||||
invalid_process.set_cwd(PathBuf::from("test/"));
|
||||
assert!(validate_process_spec(&Some(invalid_process)).is_err());
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use libcontainer::{container::ContainerAction, init_builder::InitContainerBuilder};
|
||||
|
||||
use liboci_cli::Create;
|
||||
use slog::{info, Logger};
|
||||
use std::path::Path;
|
||||
|
||||
pub async fn run(opts: Create, root: &Path, logger: &Logger) -> Result<()> {
|
||||
let mut launcher = InitContainerBuilder::default()
|
||||
.id(opts.container_id)
|
||||
.bundle(opts.bundle)
|
||||
.root(root.to_path_buf())
|
||||
.console_socket(opts.console_socket)
|
||||
.pid_file(opts.pid_file)
|
||||
.build()?
|
||||
.create_launcher(logger)?;
|
||||
|
||||
launcher.launch(ContainerAction::Create, logger).await?;
|
||||
|
||||
info!(&logger, "create command finished successfully");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use libcontainer::{container::Container, status::Status};
|
||||
use liboci_cli::Delete;
|
||||
use slog::{info, Logger};
|
||||
use std::{fs, path::Path};
|
||||
|
||||
pub async fn run(opts: Delete, root: &Path, logger: &Logger) -> Result<()> {
|
||||
let container_id = &opts.container_id;
|
||||
let status_dir = Status::get_dir_path(root, container_id);
|
||||
if !status_dir.exists() {
|
||||
return Err(anyhow!("container {} does not exist", container_id));
|
||||
}
|
||||
|
||||
let container = if let Ok(value) = Container::load(root, container_id) {
|
||||
value
|
||||
} else {
|
||||
fs::remove_dir_all(status_dir)?;
|
||||
return Ok(());
|
||||
};
|
||||
container.delete(opts.force, logger).await?;
|
||||
|
||||
info!(&logger, "delete command finished successfully");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use libcontainer::activated_builder::ActivatedContainerBuilder;
|
||||
use libcontainer::container::ContainerAction;
|
||||
use liboci_cli::Exec;
|
||||
use slog::{info, Logger};
|
||||
use std::path::Path;
|
||||
|
||||
pub async fn run(opts: Exec, root: &Path, logger: &Logger) -> Result<()> {
|
||||
let mut launcher = ActivatedContainerBuilder::default()
|
||||
.id(opts.container_id)
|
||||
.root(root.to_path_buf())
|
||||
.console_socket(opts.console_socket)
|
||||
.pid_file(opts.pid_file)
|
||||
.tty(opts.tty)
|
||||
.cwd(opts.cwd)
|
||||
.env(opts.env)
|
||||
.no_new_privs(opts.no_new_privs)
|
||||
.process(opts.process)
|
||||
.args(opts.command)
|
||||
.build()?
|
||||
.create_launcher(logger)?;
|
||||
|
||||
launcher.launch(ContainerAction::Run, logger).await?;
|
||||
|
||||
info!(&logger, "exec command finished successfully");
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use libcontainer::container::Container;
|
||||
use liboci_cli::Kill;
|
||||
use nix::sys::signal::Signal;
|
||||
use slog::{info, Logger};
|
||||
use std::{convert::TryFrom, path::Path, str::FromStr};
|
||||
|
||||
pub fn run(opts: Kill, state_root: &Path, logger: &Logger) -> Result<()> {
|
||||
let container_id = &opts.container_id;
|
||||
let container = Container::load(state_root, container_id)?;
|
||||
let sig = parse_signal(&opts.signal)?;
|
||||
|
||||
let all = opts.all;
|
||||
container.kill(sig, all)?;
|
||||
|
||||
info!(&logger, "kill command finished successfully");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_signal(signal: &str) -> Result<Signal> {
|
||||
if let Ok(num) = signal.parse::<i32>() {
|
||||
return Ok(Signal::try_from(num)?);
|
||||
}
|
||||
|
||||
let mut signal_upper = signal.to_uppercase();
|
||||
if !signal_upper.starts_with("SIG") {
|
||||
signal_upper = "SIG".to_string() + &signal_upper;
|
||||
}
|
||||
|
||||
Ok(Signal::from_str(&signal_upper)?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use nix::sys::signal::Signal;
|
||||
|
||||
#[test]
|
||||
fn test_parse_signal() {
|
||||
assert_eq!(Signal::SIGHUP, parse_signal("1").unwrap());
|
||||
assert_eq!(Signal::SIGHUP, parse_signal("sighup").unwrap());
|
||||
assert_eq!(Signal::SIGHUP, parse_signal("hup").unwrap());
|
||||
assert_eq!(Signal::SIGHUP, parse_signal("SIGHUP").unwrap());
|
||||
assert_eq!(Signal::SIGHUP, parse_signal("HUP").unwrap());
|
||||
|
||||
assert_eq!(Signal::SIGKILL, parse_signal("9").unwrap());
|
||||
assert_eq!(Signal::SIGKILL, parse_signal("sigkill").unwrap());
|
||||
assert_eq!(Signal::SIGKILL, parse_signal("kill").unwrap());
|
||||
assert_eq!(Signal::SIGKILL, parse_signal("SIGKILL").unwrap());
|
||||
assert_eq!(Signal::SIGKILL, parse_signal("KILL").unwrap());
|
||||
}
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::state::get_container_state_name;
|
||||
use anyhow::Result;
|
||||
use libcontainer::container::Container;
|
||||
use liboci_cli::List;
|
||||
use runtime_spec::ContainerState;
|
||||
use slog::{info, Logger};
|
||||
use std::fmt::Write as _;
|
||||
use std::{fs, os::unix::prelude::MetadataExt, path::Path};
|
||||
use std::{io, io::Write};
|
||||
use tabwriter::TabWriter;
|
||||
use uzers::get_user_by_uid;
|
||||
|
||||
pub fn run(_: List, root: &Path, logger: &Logger) -> Result<()> {
|
||||
let mut content = String::new();
|
||||
for entry in fs::read_dir(root)? {
|
||||
let entry = entry?;
|
||||
// Possibly race with other command of runk, so continue loop when any error occurs below
|
||||
let metadata = match entry.metadata() {
|
||||
Ok(metadata) => metadata,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if !metadata.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let container_id = match entry.file_name().into_string() {
|
||||
Ok(id) => id,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let container = match Container::load(root, &container_id) {
|
||||
Ok(container) => container,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let state = container.state;
|
||||
// Just like runc, pid of stopped container is 0
|
||||
let pid = match state {
|
||||
ContainerState::Stopped => 0,
|
||||
_ => container.status.pid,
|
||||
};
|
||||
// May replace get_user_by_uid with getpwuid(3)
|
||||
let owner = match get_user_by_uid(metadata.uid()) {
|
||||
Some(user) => String::from(user.name().to_string_lossy()),
|
||||
None => format!("#{}", metadata.uid()),
|
||||
};
|
||||
let _ = writeln!(
|
||||
content,
|
||||
"{}\t{}\t{}\t{}\t{}\t{}",
|
||||
container_id,
|
||||
pid,
|
||||
get_container_state_name(state),
|
||||
container.status.bundle.display(),
|
||||
container.status.created,
|
||||
owner
|
||||
);
|
||||
}
|
||||
|
||||
let mut tab_writer = TabWriter::new(io::stdout());
|
||||
writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER")?;
|
||||
write!(&mut tab_writer, "{}", content)?;
|
||||
tab_writer.flush()?;
|
||||
|
||||
info!(&logger, "list command finished successfully");
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
// Copyright 2021-2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub mod create;
|
||||
pub mod delete;
|
||||
pub mod exec;
|
||||
pub mod kill;
|
||||
pub mod list;
|
||||
pub mod pause;
|
||||
pub mod ps;
|
||||
pub mod resume;
|
||||
pub mod run;
|
||||
pub mod spec;
|
||||
pub mod start;
|
||||
pub mod state;
|
||||
@@ -1,18 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use libcontainer::container::Container;
|
||||
use liboci_cli::Pause;
|
||||
use slog::{info, Logger};
|
||||
use std::path::Path;
|
||||
|
||||
pub fn run(opts: Pause, root: &Path, logger: &Logger) -> Result<()> {
|
||||
let container = Container::load(root, &opts.container_id)?;
|
||||
container.pause()?;
|
||||
|
||||
info!(&logger, "pause command finished successfully");
|
||||
Ok(())
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user