Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
65dd151e0a | ||
|
|
d6437435a2 | ||
|
|
38801e5bf1 | ||
|
|
8f8b93d753 | ||
|
|
25b1a52291 | ||
|
|
5532930107 | ||
|
|
15e5394073 | ||
|
|
54d97c2104 | ||
|
|
d04c7e582f | ||
|
|
2e3ae3f230 | ||
|
|
14a4551d56 | ||
|
|
012837260c | ||
|
|
69505695b7 | ||
|
|
48a94f36a6 | ||
|
|
1a396a1784 | ||
|
|
b2c6ff0b05 | ||
|
|
cd898d28c7 | ||
|
|
e8512320c3 | ||
|
|
c0b5ba2303 | ||
|
|
94c7f6e5a9 | ||
|
|
da875e7473 | ||
|
|
05b2096c08 | ||
|
|
1b930156cb | ||
|
|
01c889fb66 | ||
|
|
59bd5c2e0a | ||
|
|
22c005f551 | ||
|
|
8220e54787 | ||
|
|
7fabdf975e | ||
|
|
4f0ca40e0c | ||
|
|
79e1249603 | ||
|
|
da24fd88e2 | ||
|
|
0e8e10540b | ||
|
|
ed25d2cf5b | ||
|
|
dfc1413e4a |
@@ -1,40 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2022 Red Hat
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
script_dir=$(dirname "$(readlink -f "$0")")
|
||||
parent_dir=$(realpath "${script_dir}/../..")
|
||||
cidir="${parent_dir}/ci"
|
||||
source "${cidir}/lib.sh"
|
||||
|
||||
cargo_deny_file="${script_dir}/action.yaml"
|
||||
|
||||
cat cargo-deny-skeleton.yaml.in > "${cargo_deny_file}"
|
||||
|
||||
changed_files_status=$(run_get_pr_changed_file_details)
|
||||
changed_files_status=$(echo "$changed_files_status" | grep "Cargo\.toml$" || true)
|
||||
changed_files=$(echo "$changed_files_status" | awk '{print $NF}' || true)
|
||||
|
||||
if [ -z "$changed_files" ]; then
|
||||
cat >> "${cargo_deny_file}" << EOF
|
||||
- run: echo "No Cargo.toml files to check"
|
||||
shell: bash
|
||||
EOF
|
||||
fi
|
||||
|
||||
for path in $changed_files
|
||||
do
|
||||
cat >> "${cargo_deny_file}" << EOF
|
||||
|
||||
- name: ${path}
|
||||
continue-on-error: true
|
||||
shell: bash
|
||||
run: |
|
||||
pushd $(dirname ${path})
|
||||
cargo deny check
|
||||
popd
|
||||
EOF
|
||||
done
|
||||
@@ -1,30 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2022 Red Hat
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
name: 'Cargo Crates Check'
|
||||
description: 'Checks every Cargo.toml file using cargo-deny'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
|
||||
- name: Install Cargo deny
|
||||
shell: bash
|
||||
run: |
|
||||
which cargo
|
||||
cargo install --locked cargo-deny || true
|
||||
100
.github/workflows/add-backport-label.yaml
vendored
@@ -1,100 +0,0 @@
|
||||
name: Add backport label
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- reopened
|
||||
- edited
|
||||
- labeled
|
||||
- unlabeled
|
||||
|
||||
jobs:
|
||||
check-issues:
|
||||
if: ${{ github.event.label.name != 'auto-backport' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code to allow hub to communicate with the project
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install hub extension script
|
||||
run: |
|
||||
pushd $(mktemp -d) &>/dev/null
|
||||
git clone --single-branch --depth 1 "https://github.com/kata-containers/.github" && cd .github/scripts
|
||||
sudo install hub-util.sh /usr/local/bin
|
||||
popd &>/dev/null
|
||||
|
||||
- name: Determine whether to add label
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CONTAINS_AUTO_BACKPORT: ${{ contains(github.event.pull_request.labels.*.name, 'auto-backport') }}
|
||||
id: add_label
|
||||
run: |
|
||||
pr=${{ github.event.pull_request.number }}
|
||||
linked_issue_urls=$(hub-util.sh \
|
||||
list-issues-for-pr "$pr" |\
|
||||
grep -v "^\#" |\
|
||||
cut -d';' -f3 || true)
|
||||
[ -z "$linked_issue_urls" ] && {
|
||||
echo "::error::No linked issues for PR $pr"
|
||||
exit 1
|
||||
}
|
||||
has_bug=false
|
||||
for issue_url in $(echo "$linked_issue_urls")
|
||||
do
|
||||
issue=$(echo "$issue_url"| awk -F\/ '{print $NF}' || true)
|
||||
[ -z "$issue" ] && {
|
||||
echo "::error::Cannot determine issue number from $issue_url for PR $pr"
|
||||
exit 1
|
||||
}
|
||||
labels=$(hub-util.sh list-labels-for-issue "$issue")
|
||||
|
||||
label_names=$(echo $labels | jq -r '.[].name' || true)
|
||||
if [[ "$label_names" =~ "bug" ]]; then
|
||||
has_bug=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
has_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'needs-backport') }}
|
||||
has_no_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'no-backport-needed') }}
|
||||
|
||||
echo "::set-output name=add_backport_label::false"
|
||||
if [ $has_backport_needed_label = true ] || [ $has_bug = true ]; then
|
||||
if [[ $has_no_backport_needed_label = false ]]; then
|
||||
echo "::set-output name=add_backport_label::true"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Do not spam comment, only if auto-backport label is going to be newly added.
|
||||
echo "::set-output name=auto_backport_added::$CONTAINS_AUTO_BACKPORT"
|
||||
|
||||
- name: Add comment
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' && steps.add_label.outputs.auto_backport_added == 'false' }}
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: 'This issue has been marked for auto-backporting. Add label(s) backport-to-BRANCHNAME to backport to them'
|
||||
})
|
||||
|
||||
# Allow label to be removed by adding no-backport-needed label
|
||||
- name: Remove auto-backport label
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'false' }}
|
||||
uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90
|
||||
with:
|
||||
remove-labels: "auto-backport"
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Add auto-backport label
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' }}
|
||||
uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90
|
||||
with:
|
||||
add-labels: "auto-backport"
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
29
.github/workflows/auto-backport.yaml
vendored
@@ -1,29 +0,0 @@
|
||||
on:
|
||||
pull_request_target:
|
||||
types: ["labeled", "closed"]
|
||||
|
||||
jobs:
|
||||
backport:
|
||||
name: Backport PR
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
github.event.pull_request.merged == true
|
||||
&& contains(github.event.pull_request.labels.*.name, 'auto-backport')
|
||||
&& (
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'auto-backport')
|
||||
|| (github.event.action == 'closed')
|
||||
)
|
||||
steps:
|
||||
- name: Backport Action
|
||||
uses: sqren/backport-github-action@v8.9.2
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
auto_backport_label_prefix: backport-to-
|
||||
|
||||
- name: Info log
|
||||
if: ${{ success() }}
|
||||
run: cat /home/runner/.backport/backport.info.log
|
||||
|
||||
- name: Debug log
|
||||
if: ${{ failure() }}
|
||||
run: cat /home/runner/.backport/backport.debug.log
|
||||
26
.github/workflows/cargo-deny-runner.yaml
vendored
@@ -1,26 +0,0 @@
|
||||
name: Cargo Crates Check Runner
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ]
|
||||
jobs:
|
||||
cargo-deny-runner:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: actions/checkout@v3
|
||||
- name: Generate Action
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: bash cargo-deny-generator.sh
|
||||
working-directory: ./.github/cargo-deny-composite-action/
|
||||
env:
|
||||
GOPATH: ${{ runner.workspace }}/kata-containers
|
||||
- name: Run Action
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: ./.github/cargo-deny-composite-action
|
||||
10
.github/workflows/darwin-tests.yaml
vendored
@@ -5,16 +5,20 @@ on:
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ]
|
||||
|
||||
name: Darwin tests
|
||||
jobs:
|
||||
test:
|
||||
runs-on: macos-latest
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: [1.16.x, 1.17.x]
|
||||
os: [macos-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.19.3
|
||||
go-version: ${{ matrix.go-version }}
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build utils
|
||||
|
||||
15
.github/workflows/docs-url-alive-check.yaml
vendored
@@ -5,33 +5,40 @@ on:
|
||||
name: Docs URL Alive Check
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-20.04
|
||||
# don't run this action on forks
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: [1.17.x]
|
||||
os: [ubuntu-20.04]
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
target_branch: ${{ github.base_ref }}
|
||||
steps:
|
||||
- name: Install Go
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.19.3
|
||||
go-version: ${{ matrix.go-version }}
|
||||
env:
|
||||
GOPATH: ${{ runner.workspace }}/kata-containers
|
||||
- name: Set env
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
run: |
|
||||
echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV
|
||||
echo "${{ github.workspace }}/bin" >> $GITHUB_PATH
|
||||
- name: Checkout code
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
path: ./src/github.com/${{ github.repository }}
|
||||
- name: Setup
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh
|
||||
env:
|
||||
GOPATH: ${{ runner.workspace }}/kata-containers
|
||||
# docs url alive check
|
||||
- name: Docs URL Alive Check
|
||||
if: github.repository_owner == 'kata-containers'
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && make docs-url-alive-check
|
||||
|
||||
8
.github/workflows/kata-deploy-push.yaml
vendored
@@ -18,7 +18,6 @@ jobs:
|
||||
matrix:
|
||||
asset:
|
||||
- kernel
|
||||
- kernel-dragonball-experimental
|
||||
- shim-v2
|
||||
- qemu
|
||||
- cloud-hypervisor
|
||||
@@ -26,9 +25,14 @@ jobs:
|
||||
- rootfs-image
|
||||
- rootfs-initrd
|
||||
- virtiofsd
|
||||
- nydus
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install docker
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
curl -fsSL https://test.docker.com -o test-docker.sh
|
||||
sh test-docker.sh
|
||||
|
||||
- name: Build ${{ matrix.asset }}
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
|
||||
43
.github/workflows/kata-deploy-test.yaml
vendored
@@ -1,10 +1,5 @@
|
||||
on:
|
||||
workflow_dispatch: # this is used to trigger the workflow on non-main branches
|
||||
inputs:
|
||||
pr:
|
||||
description: 'PR number from the selected branch to test'
|
||||
type: string
|
||||
required: true
|
||||
issue_comment:
|
||||
types: [created, edited]
|
||||
|
||||
@@ -18,20 +13,19 @@ jobs:
|
||||
&& github.event_name == 'issue_comment'
|
||||
&& github.event.action == 'created'
|
||||
&& startsWith(github.event.comment.body, '/test_kata_deploy')
|
||||
|| github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Check membership on comment or dispatch
|
||||
- name: Check membership
|
||||
uses: kata-containers/is-organization-member@1.0.1
|
||||
id: is_organization_member
|
||||
with:
|
||||
organization: kata-containers
|
||||
username: ${{ github.event.comment.user.login || github.event.sender.login }}
|
||||
username: ${{ github.event.comment.user.login }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Fail if not member
|
||||
run: |
|
||||
result=${{ steps.is_organization_member.outputs.result }}
|
||||
if [ $result == false ]; then
|
||||
user=${{ github.event.comment.user.login || github.event.sender.login }}
|
||||
user=${{ github.event.comment.user.login }}
|
||||
echo Either ${user} is not part of the kata-containers organization
|
||||
echo or ${user} has its Organization Visibility set to Private at
|
||||
echo https://github.com/orgs/kata-containers/people?query=${user}
|
||||
@@ -50,8 +44,6 @@ jobs:
|
||||
- cloud-hypervisor
|
||||
- firecracker
|
||||
- kernel
|
||||
- kernel-dragonball-experimental
|
||||
- nydus
|
||||
- qemu
|
||||
- rootfs-image
|
||||
- rootfs-initrd
|
||||
@@ -61,17 +53,18 @@ jobs:
|
||||
- name: get-PR-ref
|
||||
id: get-PR-ref
|
||||
run: |
|
||||
if [ ${{ github.event_name }} == 'issue_comment' ]; then
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
else # workflow_dispatch
|
||||
ref="refs/pull/${{ github.event.inputs.pr }}/merge"
|
||||
fi
|
||||
echo "reference for PR: " ${ref} "event:" ${{ github.event_name }}
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
echo "reference for PR: " ${ref}
|
||||
echo "##[set-output name=pr-ref;]${ref}"
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
ref: ${{ steps.get-PR-ref.outputs.pr-ref }}
|
||||
|
||||
- name: Install docker
|
||||
run: |
|
||||
curl -fsSL https://test.docker.com -o test-docker.sh
|
||||
sh test-docker.sh
|
||||
|
||||
- name: Build ${{ matrix.asset }}
|
||||
run: |
|
||||
make "${KATA_ASSET}-tarball"
|
||||
@@ -96,12 +89,8 @@ jobs:
|
||||
- name: get-PR-ref
|
||||
id: get-PR-ref
|
||||
run: |
|
||||
if [ ${{ github.event_name }} == 'issue_comment' ]; then
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
else # workflow_dispatch
|
||||
ref="refs/pull/${{ github.event.inputs.pr }}/merge"
|
||||
fi
|
||||
echo "reference for PR: " ${ref} "event:" ${{ github.event_name }}
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
echo "reference for PR: " ${ref}
|
||||
echo "##[set-output name=pr-ref;]${ref}"
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
@@ -127,12 +116,8 @@ jobs:
|
||||
- name: get-PR-ref
|
||||
id: get-PR-ref
|
||||
run: |
|
||||
if [ ${{ github.event_name }} == 'issue_comment' ]; then
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
else # workflow_dispatch
|
||||
ref="refs/pull/${{ github.event.inputs.pr }}/merge"
|
||||
fi
|
||||
echo "reference for PR: " ${ref} "event:" ${{ github.event_name }}
|
||||
ref=$(cat $GITHUB_EVENT_PATH | jq -r '.issue.pull_request.url' | sed 's#^.*\/pulls#refs\/pull#' | sed 's#$#\/merge#')
|
||||
echo "reference for PR: " ${ref}
|
||||
echo "##[set-output name=pr-ref;]${ref}"
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
|
||||
14
.github/workflows/release.yaml
vendored
@@ -1,8 +1,8 @@
|
||||
name: Publish Kata release artifacts
|
||||
name: Publish Kata 2.x release artifacts
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '[0-9]+.[0-9]+.[0-9]+*'
|
||||
- '2.*'
|
||||
|
||||
jobs:
|
||||
build-asset:
|
||||
@@ -13,8 +13,6 @@ jobs:
|
||||
- cloud-hypervisor
|
||||
- firecracker
|
||||
- kernel
|
||||
- kernel-dragonball-experimental
|
||||
- nydus
|
||||
- qemu
|
||||
- rootfs-image
|
||||
- rootfs-initrd
|
||||
@@ -22,6 +20,11 @@ jobs:
|
||||
- virtiofsd
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install docker
|
||||
run: |
|
||||
curl -fsSL https://test.docker.com -o test-docker.sh
|
||||
sh test-docker.sh
|
||||
|
||||
- name: Build ${{ matrix.asset }}
|
||||
run: |
|
||||
./tools/packaging/kata-deploy/local-build/kata-deploy-copy-yq-installer.sh
|
||||
@@ -119,7 +122,8 @@ jobs:
|
||||
name: kata-static-tarball
|
||||
- name: install hub
|
||||
run: |
|
||||
wget -q -O- https://github.com/mislav/hub/releases/download/v2.14.2/hub-linux-amd64-2.14.2.tgz | \
|
||||
HUB_VER=$(curl -s "https://api.github.com/repos/github/hub/releases/latest" | jq -r .tag_name | sed 's/^v//')
|
||||
wget -q -O- https://github.com/github/hub/releases/download/v$HUB_VER/hub-linux-amd64-$HUB_VER.tgz | \
|
||||
tar xz --strip-components=2 --wildcards '*/bin/hub' && sudo mv hub /usr/local/bin/hub
|
||||
- name: push static tarball to github
|
||||
run: |
|
||||
|
||||
21
.github/workflows/snap-release.yaml
vendored
@@ -1,12 +1,8 @@
|
||||
name: Release Kata in snapcraft store
|
||||
name: Release Kata 2.x in snapcraft store
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '[0-9]+.[0-9]+.[0-9]+*'
|
||||
|
||||
env:
|
||||
SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.snapcraft_token }}
|
||||
|
||||
- '2.*'
|
||||
jobs:
|
||||
release-snap:
|
||||
runs-on: ubuntu-20.04
|
||||
@@ -17,16 +13,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install Snapcraft
|
||||
run: |
|
||||
# Required to avoid snapcraft install failure
|
||||
sudo chown root:root /
|
||||
|
||||
# "--classic" is needed for the GitHub action runner
|
||||
# environment.
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
# Allow other parts to access snap binaries
|
||||
echo /snap/bin >> "$GITHUB_PATH"
|
||||
uses: samuelmeuli/action-snapcraft@v1
|
||||
with:
|
||||
snapcraft_token: ${{ secrets.snapcraft_token }}
|
||||
|
||||
- name: Build snap
|
||||
run: |
|
||||
|
||||
12
.github/workflows/snap.yaml
vendored
@@ -6,7 +6,6 @@ on:
|
||||
- synchronize
|
||||
- reopened
|
||||
- edited
|
||||
paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@@ -20,16 +19,7 @@ jobs:
|
||||
|
||||
- name: Install Snapcraft
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
# Required to avoid snapcraft install failure
|
||||
sudo chown root:root /
|
||||
|
||||
# "--classic" is needed for the GitHub action runner
|
||||
# environment.
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
# Allow other parts to access snap binaries
|
||||
echo /snap/bin >> "$GITHUB_PATH"
|
||||
uses: samuelmeuli/action-snapcraft@v1
|
||||
|
||||
- name: Build snap
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
|
||||
33
.github/workflows/static-checks-dragonball.yaml
vendored
@@ -1,33 +0,0 @@
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ]
|
||||
|
||||
name: Static checks dragonball
|
||||
jobs:
|
||||
test-dragonball:
|
||||
runs-on: self-hosted
|
||||
env:
|
||||
RUST_BACKTRACE: "1"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set env
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV
|
||||
- name: Install Rust
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
./ci/install_rust.sh
|
||||
PATH=$PATH:"$HOME/.cargo/bin"
|
||||
- name: Run Unit Test
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd src/dragonball
|
||||
cargo version
|
||||
rustc --version
|
||||
sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test
|
||||
66
.github/workflows/static-checks.yaml
vendored
@@ -8,16 +8,12 @@ on:
|
||||
|
||||
name: Static checks
|
||||
jobs:
|
||||
static-checks:
|
||||
runs-on: ubuntu-20.04
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
cmd:
|
||||
- "make vendor"
|
||||
- "make static-checks"
|
||||
- "make check"
|
||||
- "make test"
|
||||
- "sudo -E PATH=\"$PATH\" make test"
|
||||
go-version: [1.16.x, 1.17.x]
|
||||
os: [ubuntu-20.04]
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
TRAVIS: "true"
|
||||
TRAVIS_BRANCH: ${{ github.base_ref }}
|
||||
@@ -26,33 +22,13 @@ jobs:
|
||||
RUST_BACKTRACE: "1"
|
||||
target_branch: ${{ github.base_ref }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
path: ./src/github.com/${{ github.repository }}
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v3
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.19.3
|
||||
go-version: ${{ matrix.go-version }}
|
||||
env:
|
||||
GOPATH: ${{ runner.workspace }}/kata-containers
|
||||
- name: Check kernel config version
|
||||
run: |
|
||||
cd "${{ github.workspace }}/src/github.com/${{ github.repository }}"
|
||||
kernel_dir="tools/packaging/kernel/"
|
||||
kernel_version_file="${kernel_dir}kata_config_version"
|
||||
modified_files=$(git diff --name-only origin/main..HEAD)
|
||||
result=$(git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null")
|
||||
if git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null"; then
|
||||
echo "Kernel directory has changed, checking if $kernel_version_file has been updated"
|
||||
if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then
|
||||
echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1)
|
||||
else
|
||||
echo "Readme file changed, no need for kernel config version update."
|
||||
fi
|
||||
echo "Check passed"
|
||||
fi
|
||||
- name: Setup GOPATH
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
@@ -65,6 +41,12 @@ jobs:
|
||||
run: |
|
||||
echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV
|
||||
echo "${{ github.workspace }}/bin" >> $GITHUB_PATH
|
||||
- name: Checkout code
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
path: ./src/github.com/${{ github.repository }}
|
||||
- name: Setup travis references
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
@@ -84,7 +66,6 @@ jobs:
|
||||
rustup target add x86_64-unknown-linux-musl
|
||||
rustup component add rustfmt clippy
|
||||
- name: Setup seccomp
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX)
|
||||
gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX)
|
||||
@@ -92,7 +73,24 @@ jobs:
|
||||
echo "Set environment variables for the libseccomp crate to link the libseccomp library statically"
|
||||
echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV
|
||||
echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV
|
||||
- name: Run check
|
||||
# Check whether the vendored code is up-to-date & working as the first thing
|
||||
- name: Check vendored code
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }}
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && make vendor
|
||||
- name: Static Checks
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && make static-checks
|
||||
- name: Run Compiler Checks
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && make check
|
||||
- name: Run Unit Tests
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && make test
|
||||
- name: Run Unit Tests As Root User
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
cd ${GOPATH}/src/github.com/${{ github.repository }} && sudo -E PATH="$PATH" make test
|
||||
|
||||
3
.gitignore
vendored
@@ -4,8 +4,6 @@
|
||||
**/*.rej
|
||||
**/target
|
||||
**/.vscode
|
||||
**/.idea
|
||||
**/.fleet
|
||||
pkg/logging/Cargo.lock
|
||||
src/agent/src/version.rs
|
||||
src/agent/kata-agent.service
|
||||
@@ -13,3 +11,4 @@ src/agent/protocols/src/*.rs
|
||||
!src/agent/protocols/src/lib.rs
|
||||
build
|
||||
src/tools/log-parser/kata-log-parser
|
||||
|
||||
|
||||
24
Makefile
@@ -6,25 +6,26 @@
|
||||
# List of available components
|
||||
COMPONENTS =
|
||||
|
||||
COMPONENTS += libs
|
||||
COMPONENTS += agent
|
||||
COMPONENTS += dragonball
|
||||
COMPONENTS += runtime
|
||||
COMPONENTS += runtime-rs
|
||||
|
||||
# List of available tools
|
||||
TOOLS =
|
||||
|
||||
TOOLS += agent-ctl
|
||||
TOOLS += kata-ctl
|
||||
TOOLS += log-parser
|
||||
TOOLS += runk
|
||||
TOOLS += trace-forwarder
|
||||
TOOLS += runk
|
||||
TOOLS += log-parser
|
||||
|
||||
STANDARD_TARGETS = build check clean install static-checks-build test vendor
|
||||
STANDARD_TARGETS = build check clean install test vendor
|
||||
|
||||
default: all
|
||||
|
||||
all: logging-crate-tests build
|
||||
|
||||
logging-crate-tests:
|
||||
make -C src/libs/logging
|
||||
|
||||
include utils.mk
|
||||
include ./tools/packaging/kata-deploy/local-build/Makefile
|
||||
|
||||
@@ -37,7 +38,7 @@ generate-protocols:
|
||||
make -C src/agent generate-protocols
|
||||
|
||||
# Some static checks rely on generated source files of components.
|
||||
static-checks: static-checks-build
|
||||
static-checks: build
|
||||
bash ci/static-checks.sh
|
||||
|
||||
docs-url-alive-check:
|
||||
@@ -45,8 +46,11 @@ docs-url-alive-check:
|
||||
|
||||
.PHONY: \
|
||||
all \
|
||||
kata-tarball \
|
||||
install-tarball \
|
||||
binary-tarball \
|
||||
default \
|
||||
install-binary-tarball \
|
||||
logging-crate-tests \
|
||||
static-checks \
|
||||
docs-url-alive-check
|
||||
|
||||
|
||||
|
||||
@@ -71,7 +71,6 @@ See the [official documentation](docs) including:
|
||||
- [Developer guide](docs/Developer-Guide.md)
|
||||
- [Design documents](docs/design)
|
||||
- [Architecture overview](docs/design/architecture)
|
||||
- [Architecture 3.0 overview](docs/design/architecture_3.0/)
|
||||
|
||||
## Configuration
|
||||
|
||||
@@ -117,10 +116,9 @@ The table below lists the core parts of the project:
|
||||
| Component | Type | Description |
|
||||
|-|-|-|
|
||||
| [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. |
|
||||
| [runtime-rs](src/runtime-rs) | core | The Rust version runtime. |
|
||||
| [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. |
|
||||
| [`dragonball`](src/dragonball) | core | An optional built-in VMM brings out-of-the-box Kata Containers experience with optimizations on container workloads |
|
||||
| [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). |
|
||||
| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) |
|
||||
| [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. |
|
||||
|
||||
### Additional components
|
||||
@@ -133,7 +131,6 @@ The table below lists the remaining parts of the project:
|
||||
| [kernel](https://www.kernel.org) | kernel | Linux kernel used by the hypervisor to boot the guest image. Patches are stored [here](tools/packaging/kernel). |
|
||||
| [osbuilder](tools/osbuilder) | infrastructure | Tool to create "mini O/S" rootfs and initrd images and kernel for the hypervisor. |
|
||||
| [`agent-ctl`](src/tools/agent-ctl) | utility | Tool that provides low-level access for testing the agent. |
|
||||
| [`kata-ctl`](src/tools/kata-ctl) | utility | Tool that provides advanced commands and debug facilities. |
|
||||
| [`trace-forwarder`](src/tools/trace-forwarder) | utility | Agent tracing helper. |
|
||||
| [`runk`](src/tools/runk) | utility | Standard OCI container runtime based on the agent. |
|
||||
| [`ci`](https://github.com/kata-containers/ci) | CI | Continuous Integration configuration files and scripts. |
|
||||
|
||||
@@ -23,27 +23,25 @@ arch=${ARCH:-$(uname -m)}
|
||||
workdir="$(mktemp -d --tmpdir build-libseccomp.XXXXX)"
|
||||
|
||||
# Variables for libseccomp
|
||||
libseccomp_version="${LIBSECCOMP_VERSION:-""}"
|
||||
if [ -z "${libseccomp_version}" ]; then
|
||||
libseccomp_version=$(get_version "externals.libseccomp.version")
|
||||
fi
|
||||
libseccomp_url="${LIBSECCOMP_URL:-""}"
|
||||
if [ -z "${libseccomp_url}" ]; then
|
||||
libseccomp_url=$(get_version "externals.libseccomp.url")
|
||||
fi
|
||||
# Currently, specify the libseccomp version directly without using `versions.yaml`
|
||||
# because the current Snap workflow is incomplete.
|
||||
# After solving the issue, replace this code by using the `versions.yaml`.
|
||||
# libseccomp_version=$(get_version "externals.libseccomp.version")
|
||||
# libseccomp_url=$(get_version "externals.libseccomp.url")
|
||||
libseccomp_version="2.5.1"
|
||||
libseccomp_url="https://github.com/seccomp/libseccomp"
|
||||
libseccomp_tarball="libseccomp-${libseccomp_version}.tar.gz"
|
||||
libseccomp_tarball_url="${libseccomp_url}/releases/download/v${libseccomp_version}/${libseccomp_tarball}"
|
||||
cflags="-O2"
|
||||
|
||||
# Variables for gperf
|
||||
gperf_version="${GPERF_VERSION:-""}"
|
||||
if [ -z "${gperf_version}" ]; then
|
||||
gperf_version=$(get_version "externals.gperf.version")
|
||||
fi
|
||||
gperf_url="${GPERF_URL:-""}"
|
||||
if [ -z "${gperf_url}" ]; then
|
||||
gperf_url=$(get_version "externals.gperf.url")
|
||||
fi
|
||||
# Currently, specify the gperf version directly without using `versions.yaml`
|
||||
# because the current Snap workflow is incomplete.
|
||||
# After solving the issue, replace this code by using the `versions.yaml`.
|
||||
# gperf_version=$(get_version "externals.gperf.version")
|
||||
# gperf_url=$(get_version "externals.gperf.url")
|
||||
gperf_version="3.1"
|
||||
gperf_url="https://ftp.gnu.org/gnu/gperf"
|
||||
gperf_tarball="gperf-${gperf_version}.tar.gz"
|
||||
gperf_tarball_url="${gperf_url}/${gperf_tarball}"
|
||||
|
||||
|
||||
@@ -43,16 +43,6 @@ function install_yq() {
|
||||
"aarch64")
|
||||
goarch=arm64
|
||||
;;
|
||||
"arm64")
|
||||
# If we're on an apple silicon machine, just assign amd64.
|
||||
# The version of yq we use doesn't have a darwin arm build,
|
||||
# but Rosetta can come to the rescue here.
|
||||
if [ $goos == "Darwin" ]; then
|
||||
goarch=amd64
|
||||
else
|
||||
goarch=arm64
|
||||
fi
|
||||
;;
|
||||
"ppc64le")
|
||||
goarch=ppc64le
|
||||
;;
|
||||
@@ -74,7 +64,7 @@ function install_yq() {
|
||||
fi
|
||||
|
||||
## NOTE: ${var,,} => gives lowercase value of var
|
||||
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
|
||||
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
|
||||
curl -o "${yq_path}" -LSsf "${yq_url}"
|
||||
[ $? -ne 0 ] && die "Download ${yq_url} failed"
|
||||
chmod +x "${yq_path}"
|
||||
|
||||
10
ci/lib.sh
@@ -54,13 +54,3 @@ run_docs_url_alive_check()
|
||||
git fetch -a
|
||||
bash "$tests_repo_dir/.ci/static-checks.sh" --docs --all "github.com/kata-containers/kata-containers"
|
||||
}
|
||||
|
||||
run_get_pr_changed_file_details()
|
||||
{
|
||||
clone_tests_repo
|
||||
# Make sure we have the targeting branch
|
||||
git remote set-branches --add origin "${branch}"
|
||||
git fetch -a
|
||||
source "$tests_repo_dir/.ci/lib.sh"
|
||||
get_pr_changed_file_details
|
||||
}
|
||||
|
||||
33
deny.toml
@@ -1,33 +0,0 @@
|
||||
targets = [
|
||||
{ triple = "x86_64-apple-darwin" },
|
||||
{ triple = "x86_64-unknown-linux-gnu" },
|
||||
{ triple = "x86_64-unknown-linux-musl" },
|
||||
]
|
||||
|
||||
[advisories]
|
||||
vulnerability = "deny"
|
||||
unsound = "deny"
|
||||
unmaintained = "deny"
|
||||
ignore = ["RUSTSEC-2020-0071"]
|
||||
|
||||
[bans]
|
||||
multiple-versions = "allow"
|
||||
deny = [
|
||||
{ name = "cmake" },
|
||||
{ name = "openssl-sys" },
|
||||
]
|
||||
|
||||
[licenses]
|
||||
unlicensed = "deny"
|
||||
allow-osi-fsf-free = "neither"
|
||||
copyleft = "allow"
|
||||
# We want really high confidence when inferring licenses from text
|
||||
confidence-threshold = 0.93
|
||||
allow = ["0BSD", "Apache-2.0", "BSD-2-Clause", "BSD-3-Clause", "CC0-1.0", "ISC", "MIT", "MPL-2.0"]
|
||||
private = { ignore = true}
|
||||
|
||||
exceptions = []
|
||||
|
||||
[sources]
|
||||
unknown-registry = "allow"
|
||||
unknown-git = "allow"
|
||||
@@ -33,41 +33,51 @@ You need to install the following to build Kata Containers components:
|
||||
- `make`.
|
||||
- `gcc` (required for building the shim and runtime).
|
||||
|
||||
# Build and install Kata Containers
|
||||
## Build and install the Kata Containers runtime
|
||||
# Build and install the Kata Containers runtime
|
||||
|
||||
```bash
|
||||
$ git clone https://github.com/kata-containers/kata-containers.git
|
||||
$ pushd kata-containers/src/runtime
|
||||
$ make && sudo -E "PATH=$PATH" make install
|
||||
$ sudo mkdir -p /etc/kata-containers/
|
||||
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||
$ popd
|
||||
```
|
||||
$ go get -d -u github.com/kata-containers/kata-containers
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/runtime
|
||||
$ make && sudo -E PATH=$PATH make install
|
||||
```
|
||||
|
||||
The build will create the following:
|
||||
|
||||
- runtime binary: `/usr/local/bin/kata-runtime` and `/usr/local/bin/containerd-shim-kata-v2`
|
||||
- configuration file: `/usr/share/defaults/kata-containers/configuration.toml` and `/etc/kata-containers/configuration.toml`
|
||||
- configuration file: `/usr/share/defaults/kata-containers/configuration.toml`
|
||||
|
||||
# Check hardware requirements
|
||||
|
||||
You can check if your system is capable of creating a Kata Container by running the following:
|
||||
|
||||
```
|
||||
$ sudo kata-runtime check
|
||||
```
|
||||
|
||||
If your system is *not* able to run Kata Containers, the previous command will error out and explain why.
|
||||
|
||||
## Configure to use initrd or rootfs image
|
||||
|
||||
Kata containers can run with either an initrd image or a rootfs image.
|
||||
|
||||
If you want to test with `initrd`, make sure you have uncommented `initrd = /usr/share/kata-containers/kata-containers-initrd.img`
|
||||
in your configuration file, commenting out the `image` line in
|
||||
`/etc/kata-containers/configuration.toml`. For example:
|
||||
If you want to test with `initrd`, make sure you have `initrd = /usr/share/kata-containers/kata-containers-initrd.img`
|
||||
in your configuration file, commenting out the `image` line:
|
||||
|
||||
```bash
|
||||
`/usr/share/defaults/kata-containers/configuration.toml` and comment out the `image` line with the following. For example:
|
||||
|
||||
```
|
||||
$ sudo mkdir -p /etc/kata-containers/
|
||||
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||
$ sudo sed -i 's/^\(image =.*\)/# \1/g' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i 's/^# \(initrd =.*\)/\1/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
You can create the initrd image as shown in the [create an initrd image](#create-an-initrd-image---optional) section.
|
||||
|
||||
If you want to test with a rootfs `image`, make sure you have uncommented `image = /usr/share/kata-containers/kata-containers.img`
|
||||
If you want to test with a rootfs `image`, make sure you have `image = /usr/share/kata-containers/kata-containers.img`
|
||||
in your configuration file, commenting out the `initrd` line. For example:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo mkdir -p /etc/kata-containers/
|
||||
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||
$ sudo sed -i 's/^\(initrd =.*\)/# \1/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
The rootfs image is created as shown in the [create a rootfs image](#create-a-rootfs-image) section.
|
||||
@@ -80,38 +90,19 @@ rootfs `image`(100MB+).
|
||||
|
||||
Enable seccomp as follows:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
This will pass container seccomp profiles to the kata agent.
|
||||
|
||||
## Enable SELinux on the guest
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> - To enable SELinux on the guest, SELinux MUST be also enabled on the host.
|
||||
> - You MUST create and build a rootfs image for SELinux in advance.
|
||||
> See [Create a rootfs image](#create-a-rootfs-image) and [Build a rootfs image](#build-a-rootfs-image).
|
||||
> - SELinux on the guest is supported in only a rootfs image currently, so
|
||||
> you cannot enable SELinux with the agent init (`AGENT_INIT=yes`) yet.
|
||||
|
||||
Enable guest SELinux in Enforcing mode as follows:
|
||||
|
||||
```
|
||||
$ sudo sed -i '/^disable_guest_selinux/ s/true/false/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
The runtime automatically will set `selinux=1` to the kernel parameters and `xattr` option to
|
||||
`virtiofsd` when `disable_guest_selinux` is set to `false`.
|
||||
|
||||
If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kernel parameters.
|
||||
|
||||
## Enable full debug
|
||||
|
||||
Enable full debug as follows:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo mkdir -p /etc/kata-containers/
|
||||
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
|
||||
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
@@ -184,7 +175,7 @@ and offers possible workarounds and fixes.
|
||||
it stores. When messages are suppressed, it is noted in the logs. This can be checked
|
||||
for by looking for those notifications, such as:
|
||||
|
||||
```bash
|
||||
```sh
|
||||
$ sudo journalctl --since today | fgrep Suppressed
|
||||
Jun 29 14:51:17 mymachine systemd-journald[346]: Suppressed 4150 messages from /system.slice/docker.service
|
||||
```
|
||||
@@ -209,7 +200,7 @@ RateLimitBurst=0
|
||||
|
||||
Restart `systemd-journald` for the changes to take effect:
|
||||
|
||||
```bash
|
||||
```sh
|
||||
$ sudo systemctl restart systemd-journald
|
||||
```
|
||||
|
||||
@@ -223,52 +214,39 @@ $ sudo systemctl restart systemd-journald
|
||||
|
||||
The agent is built with a statically linked `musl.` The default `libc` used is `musl`, but on `ppc64le` and `s390x`, `gnu` should be used. To configure this:
|
||||
|
||||
```bash
|
||||
$ export ARCH="$(uname -m)"
|
||||
```
|
||||
$ export ARCH=$(uname -m)
|
||||
$ if [ "$ARCH" = "ppc64le" -o "$ARCH" = "s390x" ]; then export LIBC=gnu; else export LIBC=musl; fi
|
||||
$ [ "${ARCH}" == "ppc64le" ] && export ARCH=powerpc64le
|
||||
$ rustup target add "${ARCH}-unknown-linux-${LIBC}"
|
||||
$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le
|
||||
$ rustup target add ${ARCH}-unknown-linux-${LIBC}
|
||||
```
|
||||
|
||||
To build the agent:
|
||||
|
||||
```
|
||||
$ go get -d -u github.com/kata-containers/kata-containers
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent && make
|
||||
```
|
||||
|
||||
The agent is built with seccomp capability by default.
|
||||
If you want to build the agent without the seccomp capability, you need to run `make` with `SECCOMP=no` as follows.
|
||||
|
||||
```bash
|
||||
$ make -C kata-containers/src/agent SECCOMP=no
|
||||
```
|
||||
|
||||
For building the agent with seccomp support using `musl`, set the environment
|
||||
variables for the [`libseccomp` crate](https://github.com/libseccomp-rs/libseccomp-rs).
|
||||
|
||||
```bash
|
||||
$ export LIBSECCOMP_LINK_TYPE=static
|
||||
$ export LIBSECCOMP_LIB_PATH="the path of the directory containing libseccomp.a"
|
||||
$ make -C kata-containers/src/agent
|
||||
$ make -C $GOPATH/src/github.com/kata-containers/kata-containers/src/agent SECCOMP=no
|
||||
```
|
||||
|
||||
If the compilation fails when the agent tries to link the `libseccomp` library statically
|
||||
against `musl`, you will need to build `libseccomp` manually with `-U_FORTIFY_SOURCE`.
|
||||
You can use [our script](https://github.com/kata-containers/kata-containers/blob/main/ci/install_libseccomp.sh)
|
||||
to install `libseccomp` for the agent.
|
||||
|
||||
```bash
|
||||
$ mkdir -p ${seccomp_install_path} ${gperf_install_path}
|
||||
$ kata-containers/ci/install_libseccomp.sh ${seccomp_install_path} ${gperf_install_path}
|
||||
$ export LIBSECCOMP_LIB_PATH="${seccomp_install_path}/lib"
|
||||
```
|
||||
|
||||
On `ppc64le` and `s390x`, `glibc` is used. You will need to install the `libseccomp` library
|
||||
provided by your distribution.
|
||||
|
||||
> e.g. `libseccomp-dev` for Ubuntu, or `libseccomp-devel` for CentOS
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> - If you enable seccomp in the main configuration file but build the agent without seccomp capability,
|
||||
> the runtime exits conservatively with an error message.
|
||||
|
||||
## Get the osbuilder
|
||||
|
||||
```
|
||||
$ go get -d -u github.com/kata-containers/kata-containers
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder
|
||||
```
|
||||
|
||||
## Create a rootfs image
|
||||
### Create a local rootfs
|
||||
|
||||
@@ -276,32 +254,24 @@ As a prerequisite, you need to install Docker. Otherwise, you will not be
|
||||
able to run the `rootfs.sh` script with `USE_DOCKER=true` as expected in
|
||||
the following example.
|
||||
|
||||
```bash
|
||||
$ export distro="ubuntu" # example
|
||||
$ export ROOTFS_DIR="$(realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs)"
|
||||
$ sudo rm -rf "${ROOTFS_DIR}"
|
||||
$ pushd kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ script -fec 'sudo -E USE_DOCKER=true ./rootfs.sh "${distro}"'
|
||||
$ popd
|
||||
```
|
||||
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs
|
||||
$ sudo rm -rf ${ROOTFS_DIR}
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true ./rootfs.sh ${distro}'
|
||||
```
|
||||
|
||||
You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`.
|
||||
You can get a supported distributions list in the Kata Containers by running the following.
|
||||
|
||||
```bash
|
||||
$ ./kata-containers/tools/osbuilder/rootfs-builder/rootfs.sh -l
|
||||
```
|
||||
$ ./rootfs.sh -l
|
||||
```
|
||||
|
||||
If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows.
|
||||
|
||||
```bash
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"'
|
||||
```
|
||||
|
||||
If you want to enable SELinux on the guest, you MUST choose `centos` and run the `rootfs.sh` script with `SELINUX=yes` as follows.
|
||||
|
||||
```
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh centos'
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
@@ -317,32 +287,18 @@ $ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh ce
|
||||
>
|
||||
> - You should only do this step if you are testing with the latest version of the agent.
|
||||
|
||||
```bash
|
||||
$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent"
|
||||
$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/"
|
||||
$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/"
|
||||
```
|
||||
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/usr/bin ../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent
|
||||
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
|
||||
$ sudo install -o root -g root -m 0440 ../../../src/agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
|
||||
```
|
||||
|
||||
### Build a rootfs image
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/osbuilder/image-builder
|
||||
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh "${ROOTFS_DIR}"'
|
||||
$ popd
|
||||
```
|
||||
|
||||
If you want to enable SELinux on the guest, you MUST run the `image_builder.sh` script with `SELINUX=yes`
|
||||
to label the guest image as follows.
|
||||
To label the image on the host, you need to make sure that SELinux is enabled (`selinuxfs` is mounted) on the host
|
||||
and the rootfs MUST be created by running the `rootfs.sh` with `SELINUX=yes`.
|
||||
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/image-builder
|
||||
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
|
||||
```
|
||||
$ script -fec 'sudo -E USE_DOCKER=true SELINUX=yes ./image_builder.sh ${ROOTFS_DIR}'
|
||||
```
|
||||
|
||||
Currently, the `image_builder.sh` uses `chcon` as an interim solution in order to apply `container_runtime_exec_t`
|
||||
to the `kata-agent`. Hence, if you run `restorecon` to the guest image after running the `image_builder.sh`,
|
||||
the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yourself.
|
||||
|
||||
> **Notes:**
|
||||
>
|
||||
@@ -353,31 +309,25 @@ the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yoursel
|
||||
> variable in the previous command and ensure the `qemu-img` command is
|
||||
> available on your system.
|
||||
> - If `qemu-img` is not installed, you will likely see errors such as `ERROR: File /dev/loop19p1 is not a block device` and `losetup: /tmp/tmp.bHz11oY851: Warning: file is smaller than 512 bytes; the loop device may be useless or invisible for system tools`. These can be mitigated by installing the `qemu-img` command (available in the `qemu-img` package on Fedora or the `qemu-utils` package on Debian).
|
||||
> - If `loop` module is not probed, you will likely see errors such as `losetup: cannot find an unused loop device`. Execute `modprobe loop` could resolve it.
|
||||
|
||||
|
||||
### Install the rootfs image
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/osbuilder/image-builder
|
||||
$ commit="$(git log --format=%h -1 HEAD)"
|
||||
$ date="$(date +%Y-%m-%d-%T.%N%z)"
|
||||
```
|
||||
$ commit=$(git log --format=%h -1 HEAD)
|
||||
$ date=$(date +%Y-%m-%d-%T.%N%z)
|
||||
$ image="kata-containers-${date}-${commit}"
|
||||
$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}"
|
||||
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
|
||||
$ popd
|
||||
```
|
||||
|
||||
## Create an initrd image - OPTIONAL
|
||||
### Create a local rootfs for initrd image
|
||||
|
||||
```bash
|
||||
$ export distro="ubuntu" # example
|
||||
$ export ROOTFS_DIR="$(realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs)"
|
||||
$ sudo rm -rf "${ROOTFS_DIR}"
|
||||
$ pushd kata-containers/tools/osbuilder/rootfs-builder/
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./rootfs.sh "${distro}"'
|
||||
$ popd
|
||||
```
|
||||
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs"
|
||||
$ sudo rm -rf ${ROOTFS_DIR}
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true ./rootfs.sh ${distro}'
|
||||
```
|
||||
`AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image,
|
||||
always set `AGENT_INIT` to `yes`.
|
||||
@@ -385,14 +335,14 @@ always set `AGENT_INIT` to `yes`.
|
||||
You MUST choose a distribution (e.g., `ubuntu`) for `${distro}`.
|
||||
You can get a supported distributions list in the Kata Containers by running the following.
|
||||
|
||||
```bash
|
||||
$ ./kata-containers/tools/osbuilder/rootfs-builder/rootfs.sh -l
|
||||
```
|
||||
$ ./rootfs.sh -l
|
||||
```
|
||||
|
||||
If you want to build the agent without seccomp capability, you need to run the `rootfs.sh` script with `SECCOMP=no` as follows.
|
||||
|
||||
```bash
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"'
|
||||
```
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
@@ -401,31 +351,28 @@ $ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${
|
||||
|
||||
Optionally, add your custom agent binary to the rootfs with the following commands. The default `$LIBC` used
|
||||
is `musl`, but on ppc64le and s390x, `gnu` should be used. Also, Rust refers to ppc64le as `powerpc64le`:
|
||||
```bash
|
||||
$ export ARCH="$(uname -m)"
|
||||
$ [ "${ARCH}" == "ppc64le" ] || [ "${ARCH}" == "s390x" ] && export LIBC=gnu || export LIBC=musl
|
||||
$ [ "${ARCH}" == "ppc64le" ] && export ARCH=powerpc64le
|
||||
$ sudo install -o root -g root -m 0550 -T "${ROOTFS_DIR}/../../../../src/agent/target/${ARCH}-unknown-linux-${LIBC}/release/kata-agent" "${ROOTFS_DIR}/sbin/init"
|
||||
```
|
||||
$ export ARCH=$(uname -m)
|
||||
$ [ ${ARCH} == "ppc64le" ] || [ ${ARCH} == "s390x" ] && export LIBC=gnu || export LIBC=musl
|
||||
$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le
|
||||
$ sudo install -o root -g root -m 0550 -T ../../../src/agent/target/${ARCH}-unknown-linux-${LIBC}/release/kata-agent ${ROOTFS_DIR}/sbin/init
|
||||
```
|
||||
|
||||
### Build an initrd image
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/osbuilder/initrd-builder
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh "${ROOTFS_DIR}"'
|
||||
$ popd
|
||||
```
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/initrd-builder
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh ${ROOTFS_DIR}'
|
||||
```
|
||||
|
||||
### Install the initrd image
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/osbuilder/initrd-builder
|
||||
$ commit="$(git log --format=%h -1 HEAD)"
|
||||
$ date="$(date +%Y-%m-%d-%T.%N%z)"
|
||||
```
|
||||
$ commit=$(git log --format=%h -1 HEAD)
|
||||
$ date=$(date +%Y-%m-%d-%T.%N%z)
|
||||
$ image="kata-containers-initrd-${date}-${commit}"
|
||||
$ sudo install -o root -g root -m 0640 -D kata-containers-initrd.img "/usr/share/kata-containers/${image}"
|
||||
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers-initrd.img)
|
||||
$ popd
|
||||
```
|
||||
|
||||
# Install guest kernel images
|
||||
@@ -444,43 +391,43 @@ Kata Containers makes use of upstream QEMU branch. The exact version
|
||||
and repository utilized can be found by looking at the [versions file](../versions.yaml).
|
||||
|
||||
Find the correct version of QEMU from the versions file:
|
||||
```bash
|
||||
$ source kata-containers/tools/packaging/scripts/lib.sh
|
||||
$ qemu_version="$(get_from_kata_deps "assets.hypervisor.qemu.version")"
|
||||
$ echo "${qemu_version}"
|
||||
```
|
||||
$ source ${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging/scripts/lib.sh
|
||||
$ qemu_version=$(get_from_kata_deps "assets.hypervisor.qemu.version")
|
||||
$ echo ${qemu_version}
|
||||
```
|
||||
Get source from the matching branch of QEMU:
|
||||
```bash
|
||||
$ git clone -b "${qemu_version}" https://github.com/qemu/qemu.git
|
||||
$ your_qemu_directory="$(realpath qemu)"
|
||||
```
|
||||
$ go get -d github.com/qemu/qemu
|
||||
$ cd ${GOPATH}/src/github.com/qemu/qemu
|
||||
$ git checkout ${qemu_version}
|
||||
$ your_qemu_directory=${GOPATH}/src/github.com/qemu/qemu
|
||||
```
|
||||
|
||||
There are scripts to manage the build and packaging of QEMU. For the examples below, set your
|
||||
environment as:
|
||||
```bash
|
||||
$ packaging_dir="$(realpath kata-containers/tools/packaging)"
|
||||
```
|
||||
$ go get -d github.com/kata-containers/kata-containers
|
||||
$ packaging_dir="${GOPATH}/src/github.com/kata-containers/kata-containers/tools/packaging"
|
||||
```
|
||||
|
||||
Kata often utilizes patches for not-yet-upstream and/or backported fixes for components,
|
||||
including QEMU. These can be found in the [packaging/QEMU directory](../tools/packaging/qemu/patches),
|
||||
and it's *recommended* that you apply them. For example, suppose that you are going to build QEMU
|
||||
version 5.2.0, do:
|
||||
```bash
|
||||
$ "$packaging_dir/scripts/apply_patches.sh" "$packaging_dir/qemu/patches/5.2.x/"
|
||||
```
|
||||
$ cd $your_qemu_directory
|
||||
$ $packaging_dir/scripts/apply_patches.sh $packaging_dir/qemu/patches/5.2.x/
|
||||
```
|
||||
|
||||
To build utilizing the same options as Kata, you should make use of the `configure-hypervisor.sh` script. For example:
|
||||
```bash
|
||||
$ pushd "$your_qemu_directory"
|
||||
$ "$packaging_dir/scripts/configure-hypervisor.sh" kata-qemu > kata.cfg
|
||||
$ eval ./configure "$(cat kata.cfg)"
|
||||
$ make -j $(nproc --ignore=1)
|
||||
# Optional
|
||||
$ sudo -E make install
|
||||
$ popd
|
||||
```
|
||||
|
||||
If you do not want to install the respective QEMU version, the configuration file can be modified to point to the correct binary. In `/etc/kata-containers/configuration.toml`, change `path = "/path/to/qemu/build/qemu-system-x86_64"` to point to the correct QEMU binary.
|
||||
$ cd $your_qemu_directory
|
||||
$ $packaging_dir/scripts/configure-hypervisor.sh kata-qemu > kata.cfg
|
||||
$ eval ./configure "$(cat kata.cfg)"
|
||||
$ make -j $(nproc)
|
||||
$ sudo -E make install
|
||||
```
|
||||
|
||||
See the [static-build script for QEMU](../tools/packaging/static-build/qemu/build-static-qemu.sh) for a reference on how to get, setup, configure and build QEMU for Kata.
|
||||
|
||||
@@ -492,33 +439,11 @@ See the [static-build script for QEMU](../tools/packaging/static-build/qemu/buil
|
||||
> under upstream review for supporting NVDIMM on aarch64.
|
||||
>
|
||||
You could build the custom `qemu-system-aarch64` as required with the following command:
|
||||
```bash
|
||||
$ git clone https://github.com/kata-containers/tests.git
|
||||
$ script -fec 'sudo -E tests/.ci/install_qemu.sh'
|
||||
```
|
||||
|
||||
## Build `virtiofsd`
|
||||
|
||||
When using the file system type virtio-fs (default), `virtiofsd` is required
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/packaging/static-build/virtiofsd
|
||||
$ ./build.sh
|
||||
$ popd
|
||||
$ go get -d github.com/kata-containers/tests
|
||||
$ script -fec 'sudo -E ${GOPATH}/src/github.com/kata-containers/tests/.ci/install_qemu.sh'
|
||||
```
|
||||
|
||||
Modify `/etc/kata-containers/configuration.toml` and update value `virtio_fs_daemon = "/path/to/kata-containers/tools/packaging/static-build/virtiofsd/virtiofsd/virtiofsd"` to point to the binary.
|
||||
|
||||
# Check hardware requirements
|
||||
|
||||
You can check if your system is capable of creating a Kata Container by running the following:
|
||||
|
||||
```bash
|
||||
$ sudo kata-runtime check
|
||||
```
|
||||
|
||||
If your system is *not* able to run Kata Containers, the previous command will error out and explain why.
|
||||
|
||||
# Run Kata Containers with Containerd
|
||||
Refer to the [How to use Kata Containers and Containerd](how-to/containerd-kata.md) how-to guide.
|
||||
|
||||
@@ -549,7 +474,7 @@ See [Set up a debug console](#set-up-a-debug-console).
|
||||
|
||||
## Checking Docker default runtime
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo docker info 2>/dev/null | grep -i "default runtime" | cut -d: -f2- | grep -q runc && echo "SUCCESS" || echo "ERROR: Incorrect default Docker runtime"
|
||||
```
|
||||
## Set up a debug console
|
||||
@@ -566,7 +491,7 @@ contain either `/bin/sh` or `/bin/bash`.
|
||||
|
||||
Enable debug_console_enabled in the `configuration.toml` configuration file:
|
||||
|
||||
```toml
|
||||
```
|
||||
[agent.kata]
|
||||
debug_console_enabled = true
|
||||
```
|
||||
@@ -577,7 +502,7 @@ This will pass `agent.debug_console agent.debug_console_vport=1026` to agent as
|
||||
|
||||
For Kata Containers `2.0.x` releases, the `kata-runtime exec` command depends on the`kata-monitor` running, in order to get the sandbox's `vsock` address to connect to. Thus, first start the `kata-monitor` process.
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo kata-monitor
|
||||
```
|
||||
|
||||
@@ -597,7 +522,7 @@ bash-4.2# exit
|
||||
exit
|
||||
```
|
||||
|
||||
`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/main/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
|
||||
`kata-runtime exec` has a command-line option `runtime-namespace`, which is used to specify under which [runtime namespace](https://github.com/containerd/containerd/blob/master/docs/namespaces.md) the particular pod was created. By default, it is set to `k8s.io` and works for containerd when configured
|
||||
with Kubernetes. For CRI-O, the namespace should set to `default` explicitly. This should not be confused with [Kubernetes namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
|
||||
For other CRI-runtimes and configurations, you may need to set the namespace utilizing the `runtime-namespace` option.
|
||||
|
||||
@@ -639,10 +564,10 @@ an additional `coreutils` package.
|
||||
|
||||
For example using CentOS:
|
||||
|
||||
```bash
|
||||
$ pushd kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ export ROOTFS_DIR="$(realpath ./rootfs)"
|
||||
$ script -fec 'sudo -E USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos'
|
||||
```
|
||||
$ cd $GOPATH/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/kata-containers/tools/osbuilder/rootfs-builder/rootfs
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos'
|
||||
```
|
||||
|
||||
#### Build the debug image
|
||||
@@ -657,10 +582,9 @@ Install the image:
|
||||
>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
|
||||
>with the initrd image name `kata-containers-initrd.img`.
|
||||
|
||||
```bash
|
||||
```
|
||||
$ name="kata-containers-centos-with-debug-console.img"
|
||||
$ sudo install -o root -g root -m 0640 kata-containers.img "/usr/share/kata-containers/${name}"
|
||||
$ popd
|
||||
```
|
||||
|
||||
Next, modify the `image=` values in the `[hypervisor.qemu]` section of the
|
||||
@@ -669,7 +593,7 @@ to specify the full path to the image name specified in the previous code
|
||||
section. Alternatively, recreate the symbolic link so it points to
|
||||
the new debug image:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ (cd /usr/share/kata-containers && sudo ln -sf "$name" kata-containers.img)
|
||||
```
|
||||
|
||||
@@ -680,7 +604,7 @@ to avoid all subsequently created containers from using the debug image.
|
||||
|
||||
Create a container as normal. For example using `crictl`:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo crictl run -r kata container.yaml pod.yaml
|
||||
```
|
||||
|
||||
@@ -693,7 +617,7 @@ those for firecracker / cloud-hypervisor.
|
||||
|
||||
Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console.
|
||||
|
||||
```bash
|
||||
```
|
||||
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console"/g' "${kata_configuration_file}"
|
||||
```
|
||||
|
||||
@@ -714,7 +638,7 @@ between the host and the guest. The kernel command line option `agent.debug_cons
|
||||
|
||||
Add the parameter `agent.debug_console_vport=1026` to the kernel command line
|
||||
as shown below:
|
||||
```bash
|
||||
```
|
||||
sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console_vport=1026"/g' "${kata_configuration_file}"
|
||||
```
|
||||
|
||||
@@ -727,7 +651,7 @@ Next, connect to the debug console. The VSOCKS paths vary slightly between each
|
||||
VMM solution.
|
||||
|
||||
In case of cloud-hypervisor, connect to the `vsock` as shown:
|
||||
```bash
|
||||
```
|
||||
$ sudo su -c 'cd /var/run/vc/vm/${sandbox_id}/root/ && socat stdin unix-connect:clh.sock'
|
||||
CONNECT 1026
|
||||
```
|
||||
@@ -735,7 +659,7 @@ CONNECT 1026
|
||||
**Note**: You need to type `CONNECT 1026` and press `RETURN` key after entering the `socat` command.
|
||||
|
||||
For firecracker, connect to the `hvsock` as shown:
|
||||
```bash
|
||||
```
|
||||
$ sudo su -c 'cd /var/run/vc/firecracker/${sandbox_id}/root/ && socat stdin unix-connect:kata.hvsock'
|
||||
CONNECT 1026
|
||||
```
|
||||
@@ -744,7 +668,7 @@ CONNECT 1026
|
||||
|
||||
|
||||
For QEMU, connect to the `vsock` as shown:
|
||||
```bash
|
||||
```
|
||||
$ sudo su -c 'cd /var/run/vc/vm/${sandbox_id} && socat "stdin,raw,echo=0,escape=0x11" "unix-connect:console.sock"'
|
||||
```
|
||||
|
||||
@@ -757,7 +681,7 @@ If the image is created using
|
||||
[osbuilder](../tools/osbuilder), the following YAML
|
||||
file exists and contains details of the image and how it was created:
|
||||
|
||||
```bash
|
||||
```
|
||||
$ cat /var/lib/osbuilder/osbuilder.yaml
|
||||
```
|
||||
|
||||
|
||||
@@ -60,26 +60,17 @@ This section lists items that might be possible to fix.
|
||||
## OCI CLI commands
|
||||
|
||||
### Docker and Podman support
|
||||
Currently Kata Containers does not support Podman.
|
||||
Currently Kata Containers does not support Docker or Podman.
|
||||
|
||||
See issue https://github.com/kata-containers/kata-containers/issues/722 for more information.
|
||||
|
||||
Docker supports Kata Containers since 22.06:
|
||||
|
||||
```bash
|
||||
$ sudo docker run --runtime io.containerd.kata.v2
|
||||
```
|
||||
|
||||
Kata Containers works perfectly with containerd, we recommend to use
|
||||
containerd's Docker-style command line tool [`nerdctl`](https://github.com/containerd/nerdctl).
|
||||
|
||||
## Runtime commands
|
||||
|
||||
### checkpoint and restore
|
||||
|
||||
The runtime does not provide `checkpoint` and `restore` commands. There
|
||||
are discussions about using VM save and restore to give us a
|
||||
[`criu`](https://github.com/checkpoint-restore/criu)-like functionality,
|
||||
`[criu](https://github.com/checkpoint-restore/criu)`-like functionality,
|
||||
which might provide a solution.
|
||||
|
||||
Note that the OCI standard does not specify `checkpoint` and `restore`
|
||||
@@ -102,42 +93,6 @@ All other configurations are supported and are working properly.
|
||||
|
||||
## Networking
|
||||
|
||||
### Host network
|
||||
|
||||
Host network (`nerdctl/docker run --net=host`or [Kubernetes `HostNetwork`](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#hosts-namespaces)) is not supported.
|
||||
It is not possible to directly access the host networking configuration
|
||||
from within the VM.
|
||||
|
||||
The `--net=host` option can still be used with `runc` containers and
|
||||
inter-mixed with running Kata Containers, thus enabling use of `--net=host`
|
||||
when necessary.
|
||||
|
||||
It should be noted, currently passing the `--net=host` option into a
|
||||
Kata Container may result in the Kata Container networking setup
|
||||
modifying, re-configuring and therefore possibly breaking the host
|
||||
networking setup. Do not use `--net=host` with Kata Containers.
|
||||
|
||||
### Support for joining an existing VM network
|
||||
|
||||
Docker supports the ability for containers to join another containers
|
||||
namespace with the `docker run --net=containers` syntax. This allows
|
||||
multiple containers to share a common network namespace and the network
|
||||
interfaces placed in the network namespace. Kata Containers does not
|
||||
support network namespace sharing. If a Kata Container is setup to
|
||||
share the network namespace of a `runc` container, the runtime
|
||||
effectively takes over all the network interfaces assigned to the
|
||||
namespace and binds them to the VM. Consequently, the `runc` container loses
|
||||
its network connectivity.
|
||||
|
||||
### docker run --link
|
||||
|
||||
The runtime does not support the `docker run --link` command. This
|
||||
command is now deprecated by docker and we have no intention of adding support.
|
||||
Equivalent functionality can be achieved with the newer docker networking commands.
|
||||
|
||||
See more documentation at
|
||||
[docs.docker.com](https://docs.docker.com/network/links/).
|
||||
|
||||
## Resource management
|
||||
|
||||
Due to the way VMs differ in their CPU and memory allocation, and sharing
|
||||
|
||||
@@ -341,7 +341,7 @@ The main repository has the most comprehensive set of skip abilities. See:
|
||||
|
||||
One method is to use the `nix` crate along with some custom macros:
|
||||
|
||||
```rust
|
||||
```
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[allow(unused_macros)]
|
||||
|
||||
@@ -7,9 +7,7 @@ Kata Containers design documents:
|
||||
- [Design requirements for Kata Containers](kata-design-requirements.md)
|
||||
- [VSocks](VSocks.md)
|
||||
- [VCPU handling](vcpu-handling.md)
|
||||
- [VCPU threads pinning](vcpu-threads-pinning.md)
|
||||
- [Host cgroups](host-cgroups.md)
|
||||
- [Agent systemd cgroup](agent-systemd-cgroup.md)
|
||||
- [`Inotify` support](inotify.md)
|
||||
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
|
||||
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
# Systemd Cgroup for Agent
|
||||
|
||||
As we know, we can interact with cgroups in two ways, **`cgroupfs`** and **`systemd`**. The former is achieved by reading and writing cgroup `tmpfs` files under `/sys/fs/cgroup` while the latter is done by configuring a transient unit by requesting systemd. Kata agent uses **`cgroupfs`** by default, unless you pass the parameter `--systemd-cgroup`.
|
||||
|
||||
## usage
|
||||
|
||||
For systemd, kata agent configures cgroups according to the following `linux.cgroupsPath` format standard provided by `runc` (`[slice]:[prefix]:[name]`). If you don't provide a valid `linux.cgroupsPath`, kata agent will treat it as `"system.slice:kata_agent:<container-id>"`.
|
||||
|
||||
> Here slice is a systemd slice under which the container is placed. If empty, it defaults to system.slice, except when cgroup v2 is used and rootless container is created, in which case it defaults to user.slice.
|
||||
>
|
||||
> Note that slice can contain dashes to denote a sub-slice (e.g. user-1000.slice is a correct notation, meaning a `subslice` of user.slice), but it must not contain slashes (e.g. user.slice/user-1000.slice is invalid).
|
||||
>
|
||||
> A slice of `-` represents a root slice.
|
||||
>
|
||||
> Next, prefix and name are used to compose the unit name, which is `<prefix>-<name>.scope`, unless name has `.slice` suffix, in which case prefix is ignored and the name is used as is.
|
||||
|
||||
## supported properties
|
||||
|
||||
The kata agent will translate the parameters in the `linux.resources` of `config.json` into systemd unit properties, and send it to systemd for configuration. Since systemd supports limited properties, only the following parameters in `linux.resources` will be applied. We will simply treat hybrid mode as legacy mode by the way.
|
||||
|
||||
- CPU
|
||||
|
||||
- v1
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| --------------------- | --------------------- |
|
||||
| `cpu.shares` | `CPUShares` |
|
||||
|
||||
- v2
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| -------------------------- | -------------------------- |
|
||||
| `cpu.shares` | `CPUShares` |
|
||||
| `cpu.period` | `CPUQuotaPeriodUSec`(v242) |
|
||||
| `cpu.period` & `cpu.quota` | `CPUQuotaPerSecUSec` |
|
||||
|
||||
- MEMORY
|
||||
|
||||
- v1
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| --------------------- | --------------------- |
|
||||
| `memory.limit` | `MemoryLimit` |
|
||||
|
||||
- v2
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| ------------------------------ | --------------------- |
|
||||
| `memory.low` | `MemoryLow` |
|
||||
| `memory.max` | `MemoryMax` |
|
||||
| `memory.swap` & `memory.limit` | `MemorySwapMax` |
|
||||
|
||||
- PIDS
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| --------------------- | --------------------- |
|
||||
| `pids.limit ` | `TasksMax` |
|
||||
|
||||
- CPUSET
|
||||
|
||||
| runtime spec resource | systemd property name |
|
||||
| --------------------- | -------------------------- |
|
||||
| `cpuset.cpus` | `AllowedCPUs`(v244) |
|
||||
| `cpuset.mems` | `AllowedMemoryNodes`(v244) |
|
||||
|
||||
## Systemd Interface
|
||||
|
||||
`session.rs` and `system.rs` in `src/agent/rustjail/src/cgroups/systemd/interface` are automatically generated by `zbus-xmlgen`, which is is an accompanying tool provided by `zbus` to generate Rust code from `D-Bus XML interface descriptions`. The specific commands to generate these two files are as follows:
|
||||
|
||||
```shell
|
||||
// system.rs
|
||||
zbus-xmlgen --system org.freedesktop.systemd1 /org/freedesktop/systemd1
|
||||
// session.rs
|
||||
zbus-xmlgen --session org.freedesktop.systemd1 /org/freedesktop/systemd1
|
||||
```
|
||||
|
||||
The current implementation of `cgroups/systemd` uses `system.rs` while `session.rs` could be used to build rootless containers in the future.
|
||||
|
||||
## references
|
||||
|
||||
- [runc - systemd cgroup driver](https://github.com/opencontainers/runc/blob/main/docs/systemd.md)
|
||||
|
||||
- [systemd.resource-control — Resource control unit settings](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html)
|
||||
|
||||
|
Before Width: | Height: | Size: 193 KiB |
@@ -1,169 +0,0 @@
|
||||
# Kata 3.0 Architecture
|
||||
## Overview
|
||||
In cloud-native scenarios, there is an increased demand for container startup speed, resource consumption, stability, and security, areas where the present Kata Containers runtime is challenged relative to other runtimes. To achieve this, we propose a solid, field-tested and secure Rust version of the kata-runtime.
|
||||
|
||||
Also, we provide the following designs:
|
||||
|
||||
- Turn key solution with builtin `Dragonball` Sandbox
|
||||
- Async I/O to reduce resource consumption
|
||||
- Extensible framework for multiple services, runtimes and hypervisors
|
||||
- Lifecycle management for sandbox and container associated resources
|
||||
|
||||
### Rationale for choosing Rust
|
||||
|
||||
We chose Rust because it is designed as a system language with a focus on efficiency.
|
||||
In contrast to Go, Rust makes a variety of design trade-offs in order to obtain
|
||||
good execution performance, with innovative techniques that, in contrast to C or
|
||||
C++, provide reasonable protection against common memory errors (buffer
|
||||
overflow, invalid pointers, range errors), error checking (ensuring errors are
|
||||
dealt with), thread safety, ownership of resources, and more.
|
||||
|
||||
These benefits were verified in our project when the Kata Containers guest agent
|
||||
was rewritten in Rust. We notably saw a significant reduction in memory usage
|
||||
with the Rust-based implementation.
|
||||
|
||||
|
||||
## Design
|
||||
### Architecture
|
||||

|
||||
### Built-in VMM
|
||||
#### Current Kata 2.x architecture
|
||||

|
||||
As shown in the figure, runtime and VMM are separate processes. The runtime process forks the VMM process and interacts through the inter-process RPC. Typically, process interaction consumes more resources than peers within the process, and it will result in relatively low efficiency. At the same time, the cost of resource operation and maintenance should be considered. For example, when performing resource recovery under abnormal conditions, the exception of any process must be detected by others and activate the appropriate resource recovery process. If there are additional processes, the recovery becomes even more difficult.
|
||||
#### How To Support Built-in VMM
|
||||
We provide `Dragonball` Sandbox to enable built-in VMM by integrating VMM's function into the Rust library. We could perform VMM-related functionalities by using the library. Because runtime and VMM are in the same process, there is a benefit in terms of message processing speed and API synchronization. It can also guarantee the consistency of the runtime and the VMM life cycle, reducing resource recovery and exception handling maintenance, as shown in the figure:
|
||||

|
||||
### Async Support
|
||||
#### Why Need Async
|
||||
**Async is already in stable Rust and allows us to write async code**
|
||||
|
||||
- Async provides significantly reduced CPU and memory overhead, especially for workloads with a large amount of IO-bound tasks
|
||||
- Async is zero-cost in Rust, which means that you only pay for what you use. Specifically, you can use async without heap allocations and dynamic dispatch, which greatly improves efficiency
|
||||
- For more (see [Why Async?](https://rust-lang.github.io/async-book/01_getting_started/02_why_async.html) and [The State of Asynchronous Rust](https://rust-lang.github.io/async-book/01_getting_started/03_state_of_async_rust.html)).
|
||||
|
||||
**There may be several problems if implementing kata-runtime with Sync Rust**
|
||||
|
||||
- Too many threads with a new TTRPC connection
|
||||
- TTRPC threads: reaper thread(1) + listener thread(1) + client handler(2)
|
||||
- Add 3 I/O threads with a new container
|
||||
- In Sync mode, implementing a timeout mechanism is challenging. For example, in TTRPC API interaction, the timeout mechanism is difficult to align with Golang
|
||||
#### How To Support Async
|
||||
The kata-runtime is controlled by TOKIO_RUNTIME_WORKER_THREADS to run the OS thread, which is 2 threads by default. For TTRPC and container-related threads run in the `tokio` thread in a unified manner, and related dependencies need to be switched to Async, such as Timer, File, Netlink, etc. With the help of Async, we can easily support no-block I/O and timer. Currently, we only utilize Async for kata-runtime. The built-in VMM keeps the OS thread because it can ensure that the threads are controllable.
|
||||
|
||||
**For N tokio worker threads and M containers**
|
||||
|
||||
- Sync runtime(both OS thread and `tokio` task are OS thread but without `tokio` worker thread) OS thread number: 4 + 12*M
|
||||
- Async runtime(only OS thread is OS thread) OS thread number: 2 + N
|
||||
```shell
|
||||
├─ main(OS thread)
|
||||
├─ async-logger(OS thread)
|
||||
└─ tokio worker(N * OS thread)
|
||||
├─ agent log forwarder(1 * tokio task)
|
||||
├─ health check thread(1 * tokio task)
|
||||
├─ TTRPC reaper thread(M * tokio task)
|
||||
├─ TTRPC listener thread(M * tokio task)
|
||||
├─ TTRPC client handler thread(7 * M * tokio task)
|
||||
├─ container stdin io thread(M * tokio task)
|
||||
├─ container stdout io thread(M * tokio task)
|
||||
└─ container stderr io thread(M * tokio task)
|
||||
```
|
||||
### Extensible Framework
|
||||
The Kata 3.x runtime is designed with the extension of service, runtime, and hypervisor, combined with configuration to meet the needs of different scenarios. At present, the service provides a register mechanism to support multiple services. Services could interact with runtime through messages. In addition, the runtime handler handles messages from services. To meet the needs of a binary that supports multiple runtimes and hypervisors, the startup must obtain the runtime handler type and hypervisor type through configuration.
|
||||
|
||||

|
||||
### Resource Manager
|
||||
In our case, there will be a variety of resources, and every resource has several subtypes. Especially for `Virt-Container`, every subtype of resource has different operations. And there may be dependencies, such as the share-fs rootfs and the share-fs volume will use share-fs resources to share files to the VM. Currently, network and share-fs are regarded as sandbox resources, while rootfs, volume, and cgroup are regarded as container resources. Also, we abstract a common interface for each resource and use subclass operations to evaluate the differences between different subtypes.
|
||||

|
||||
|
||||
## Roadmap
|
||||
|
||||
- Stage 1 (June): provide basic features (current delivered)
|
||||
- Stage 2 (September): support common features
|
||||
- Stage 3: support full features
|
||||
|
||||
| **Class** | **Sub-Class** | **Development Stage** | **Status** |
|
||||
| -------------------------- | ------------------- | --------------------- |------------|
|
||||
| Service | task service | Stage 1 | ✅ |
|
||||
| | extend service | Stage 3 | 🚫 |
|
||||
| | image service | Stage 3 | 🚫 |
|
||||
| Runtime handler | `Virt-Container` | Stage 1 | ✅ |
|
||||
| Endpoint | VETH Endpoint | Stage 1 | ✅ |
|
||||
| | Physical Endpoint | Stage 2 | ✅ |
|
||||
| | Tap Endpoint | Stage 2 | ✅ |
|
||||
| | `Tuntap` Endpoint | Stage 2 | ✅ |
|
||||
| | `IPVlan` Endpoint | Stage 2 | ✅ |
|
||||
| | `MacVlan` Endpoint | Stage 2 | ✅ |
|
||||
| | MACVTAP Endpoint | Stage 3 | 🚫 |
|
||||
| | `VhostUserEndpoint` | Stage 3 | 🚫 |
|
||||
| Network Interworking Model | Tc filter | Stage 1 | ✅ |
|
||||
| | `MacVtap` | Stage 3 | 🚧 |
|
||||
| Storage | Virtio-fs | Stage 1 | ✅ |
|
||||
| | `nydus` | Stage 2 | 🚧 |
|
||||
| | `device mapper` | Stage 2 | 🚫 |
|
||||
| `Cgroup V2` | | Stage 2 | 🚧 |
|
||||
| Hypervisor | `Dragonball` | Stage 1 | 🚧 |
|
||||
| | QEMU | Stage 2 | 🚫 |
|
||||
| | ACRN | Stage 3 | 🚫 |
|
||||
| | Cloud Hypervisor | Stage 3 | 🚫 |
|
||||
| | Firecracker | Stage 3 | 🚫 |
|
||||
|
||||
## FAQ
|
||||
|
||||
- Are the "service", "message dispatcher" and "runtime handler" all part of the single Kata 3.x runtime binary?
|
||||
|
||||
Yes. They are components in Kata 3.x runtime. And they will be packed into one binary.
|
||||
1. Service is an interface, which is responsible for handling multiple services like task service, image service and etc.
|
||||
2. Message dispatcher, it is used to match multiple requests from the service module.
|
||||
3. Runtime handler is used to deal with the operation for sandbox and container.
|
||||
- What is the name of the Kata 3.x runtime binary?
|
||||
|
||||
Apparently we can't use `containerd-shim-v2-kata` because it's already used. We are facing the hardest issue of "naming" again. Any suggestions are welcomed.
|
||||
Internally we use `containerd-shim-v2-rund`.
|
||||
|
||||
- Is the Kata 3.x design compatible with the containerd shimv2 architecture?
|
||||
|
||||
Yes. It is designed to follow the functionality of go version kata. And it implements the `containerd shim v2` interface/protocol.
|
||||
|
||||
- How will users migrate to the Kata 3.x architecture?
|
||||
|
||||
The migration plan will be provided before the Kata 3.x is merging into the main branch.
|
||||
|
||||
- Is `Dragonball` limited to its own built-in VMM? Can the `Dragonball` system be configured to work using an external `Dragonball` VMM/hypervisor?
|
||||
|
||||
The `Dragonball` could work as an external hypervisor. However, stability and performance is challenging in this case. Built in VMM could optimise the container overhead, and it's easy to maintain stability.
|
||||
|
||||
`runD` is the `containerd-shim-v2` counterpart of `runC` and can run a pod/containers. `Dragonball` is a `microvm`/VMM that is designed to run container workloads. Instead of `microvm`/VMM, we sometimes refer to it as secure sandbox.
|
||||
|
||||
- QEMU, Cloud Hypervisor and Firecracker support are planned, but how that would work. Are they working in separate process?
|
||||
|
||||
Yes. They are unable to work as built in VMM.
|
||||
|
||||
- What is `upcall`?
|
||||
|
||||
The `upcall` is used to hotplug CPU/memory/MMIO devices, and it solves two issues.
|
||||
1. avoid dependency on PCI/ACPI
|
||||
2. avoid dependency on `udevd` within guest and get deterministic results for hotplug operations. So `upcall` is an alternative to ACPI based CPU/memory/device hotplug. And we may cooperate with the community to add support for ACPI based CPU/memory/device hotplug if needed.
|
||||
|
||||
`Dbs-upcall` is a `vsock-based` direct communication tool between VMM and guests. The server side of the `upcall` is a driver in guest kernel (kernel patches are needed for this feature) and it'll start to serve the requests once the kernel has started. And the client side is in VMM , it'll be a thread that communicates with VSOCK through `uds`. We have accomplished device hotplug / hot-unplug directly through `upcall` in order to avoid virtualization of ACPI to minimize virtual machine's overhead. And there could be many other usage through this direct communication channel. It's already open source.
|
||||
https://github.com/openanolis/dragonball-sandbox/tree/main/crates/dbs-upcall
|
||||
|
||||
- The URL below says the kernel patches work with 4.19, but do they also work with 5.15+ ?
|
||||
|
||||
Forward compatibility should be achievable, we have ported it to 5.10 based kernel.
|
||||
|
||||
- Are these patches platform-specific or would they work for any architecture that supports VSOCK?
|
||||
|
||||
It's almost platform independent, but some message related to CPU hotplug are platform dependent.
|
||||
|
||||
- Could the kernel driver be replaced with a userland daemon in the guest using loopback VSOCK?
|
||||
|
||||
We need to create device nodes for hot-added CPU/memory/devices, so it's not easy for userspace daemon to do these tasks.
|
||||
|
||||
- The fact that `upcall` allows communication between the VMM and the guest suggests that this architecture might be incompatible with https://github.com/confidential-containers where the VMM should have no knowledge of what happens inside the VM.
|
||||
|
||||
1. `TDX` doesn't support CPU/memory hotplug yet.
|
||||
2. For ACPI based device hotplug, it depends on ACPI `DSDT` table, and the guest kernel will execute `ASL` code to handle during handling those hotplug event. And it should be easier to audit VSOCK based communication than ACPI `ASL` methods.
|
||||
|
||||
- What is the security boundary for the monolithic / "Built-in VMM" case?
|
||||
|
||||
It has the security boundary of virtualization. More details will be provided in next stage.
|
||||
|
Before Width: | Height: | Size: 95 KiB |
|
Before Width: | Height: | Size: 66 KiB |
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 72 KiB |
|
Before Width: | Height: | Size: 139 KiB |
@@ -81,7 +81,7 @@ Notes: given that the `mountInfo` is persisted to the disk by the Kata runtime,
|
||||
Instead of the CSI node driver writing the mount info into a `csiPlugin.json` file under the volume root,
|
||||
as described in the original proposal, here we propose that the CSI node driver passes the mount information to
|
||||
the Kata Containers runtime through a new `kata-runtime` commandline command. The `kata-runtime` then writes the mount
|
||||
information to a `mountInfo.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`).
|
||||
information to a `mount-info.json` file in a predefined location (`/run/kata-containers/shared/direct-volumes/[volume_path]/`).
|
||||
|
||||
When the Kata Containers runtime starts a container, it verifies whether a volume mount is a direct-assigned volume by checking
|
||||
whether there is a `mountInfo` file under the computed Kata `direct-volumes` directory. If it is, the runtime parses the `mountInfo` file,
|
||||
|
||||
@@ -12,7 +12,7 @@ The OCI [runtime specification][linux-config] provides guidance on where the con
|
||||
> [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups
|
||||
> hierarchy for containers or to run a new process in an existing container
|
||||
|
||||
The cgroups are hierarchical, and this can be seen with the following pod example:
|
||||
Cgroups are hierarchical, and this can be seen with the following pod example:
|
||||
|
||||
- Pod 1: `cgroupsPath=/kubepods/pod1`
|
||||
- Container 1: `cgroupsPath=/kubepods/pod1/container1`
|
||||
@@ -247,14 +247,14 @@ cgroup size and constraints accordingly.
|
||||
|
||||
# Supported cgroups
|
||||
|
||||
Kata Containers currently supports cgroups `v1` and `v2`.
|
||||
Kata Containers currently only supports cgroups `v1`.
|
||||
|
||||
In the following sections each cgroup is described briefly.
|
||||
|
||||
## cgroups v1
|
||||
## Cgroups V1
|
||||
|
||||
`cgroups v1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is
|
||||
mounted under a separate cgroup filesystem. A `cgroups v1` hierarchy may look like the following
|
||||
`Cgroups V1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is
|
||||
mounted under a separate cgroup filesystem. A `Cgroups v1` hierarchy may look like the following
|
||||
diagram:
|
||||
|
||||
```
|
||||
@@ -301,12 +301,13 @@ diagram:
|
||||
A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file,
|
||||
or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file.
|
||||
|
||||
Kata Containers only supports `v1`.
|
||||
To know more about `cgroups v1`, see [cgroupsv1(7)][2].
|
||||
|
||||
## cgroups v2
|
||||
## Cgroups V2
|
||||
|
||||
`cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are
|
||||
mounted under the same cgroup filesystem. A `cgroups v2` hierarchy may look like the following
|
||||
`Cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are
|
||||
mounted under the same cgroup filesystem. A `Cgroups v2` hierarchy may look like the following
|
||||
diagram:
|
||||
|
||||
```
|
||||
@@ -353,6 +354,8 @@ Same as `cgroups v1`, a process can join the cgroup by writing its process id (`
|
||||
`cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to
|
||||
`cgroup.threads` file.
|
||||
|
||||
Kata Containers does not support cgroups `v2` on the host.
|
||||
|
||||
### Distro Support
|
||||
|
||||
Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition.
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Design Doc for Kata Containers' VCPUs Pinning Feature
|
||||
|
||||
## Background
|
||||
By now, vCPU threads of Kata Containers are scheduled randomly to CPUs. And each pod would request a specific set of CPUs which we call it CPU set (just the CPU set meaning in Linux cgroups).
|
||||
|
||||
If the number of vCPU threads are equal to that of CPUs claimed in CPU set, we can then pin each vCPU thread to one specified CPU, to reduce the cost of random scheduling.
|
||||
|
||||
## Detailed Design
|
||||
|
||||
### Passing Config Parameters
|
||||
Two ways are provided to use this vCPU thread pinning feature: through `QEMU` configuration file and through annotations. Finally the pinning parameter is passed to `HypervisorConfig`.
|
||||
|
||||
### Related Linux Thread Scheduling API
|
||||
|
||||
| API Info | Value |
|
||||
|-------------------|-----------------------------------------------------------|
|
||||
| Package | `golang.org/x/sys/unix` |
|
||||
| Method | `unix.SchedSetaffinity(thread_id, &unixCPUSet)` |
|
||||
| Official Doc Page | https://pkg.go.dev/golang.org/x/sys/unix#SchedSetaffinity |
|
||||
|
||||
### When is VCPUs Pinning Checked?
|
||||
|
||||
As shown in Section 1, when `num(vCPU threads) == num(CPUs in CPU set)`, we shall pin each vCPU thread to a specified CPU. And when this condition is broken, we should restore to the original random scheduling pattern.
|
||||
So when may `num(CPUs in CPU set)` change? There are 5 possible scenes:
|
||||
|
||||
| Possible scenes | Related Code |
|
||||
|-----------------------------------|--------------------------------------------|
|
||||
| when creating a container | File Sandbox.go, in method `CreateContainer` |
|
||||
| when starting a container | File Sandbox.go, in method `StartContainer` |
|
||||
| when deleting a container | File Sandbox.go, in method `DeleteContainer` |
|
||||
| when updating a container | File Sandbox.go, in method `UpdateContainer` |
|
||||
| when creating multiple containers | File Sandbox.go, in method `createContainers` |
|
||||
|
||||
### Core Pinning Logics
|
||||
|
||||
We can split the whole process into the following steps. Related methods are `checkVCPUsPinning` and `resetVCPUsPinning`, in file Sandbox.go.
|
||||

|
||||
@@ -110,7 +110,7 @@ Devices and features used:
|
||||
- VFIO
|
||||
- hotplug
|
||||
- seccomp filters
|
||||
- [HTTP OpenAPI](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/api/openapi/cloud-hypervisor.yaml)
|
||||
- [HTTP OpenAPI](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/vmm/src/api/openapi/cloud-hypervisor.yaml)
|
||||
|
||||
### Summary
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
- [Run Kata containers with `crictl`](run-kata-with-crictl.md)
|
||||
- [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
|
||||
- [How to use Kata Containers and Containerd](containerd-kata.md)
|
||||
- [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
|
||||
- [How to use Kata Containers and CRI (containerd) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||
- [Kata Containers and service mesh for Kubernetes](service-mesh.md)
|
||||
- [How to import Kata Containers logs into Fluentd](how-to-import-kata-logs-with-fluentd.md)
|
||||
|
||||
@@ -42,6 +42,4 @@
|
||||
- [How to setup swap devices in guest kernel](how-to-setup-swap-devices-in-guest-kernel.md)
|
||||
- [How to run rootless vmm](how-to-run-rootless-vmm.md)
|
||||
- [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md)
|
||||
- [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md)
|
||||
- [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md)
|
||||
- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md)
|
||||
- [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md)
|
||||
@@ -40,7 +40,7 @@ use `RuntimeClass` instead of the deprecated annotations.
|
||||
### Containerd Runtime V2 API: Shim V2 API
|
||||
|
||||
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/cmd/containerd-shim-kata-v2/)
|
||||
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/main/runtime/v2) for Kata.
|
||||
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
|
||||
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
|
||||
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`
|
||||
process were used, even with VSOCK not available.
|
||||
@@ -77,8 +77,8 @@ $ command -v containerd
|
||||
You can manually install CNI plugins as follows:
|
||||
|
||||
```bash
|
||||
$ git clone https://github.com/containernetworking/plugins.git
|
||||
$ pushd plugins
|
||||
$ go get github.com/containernetworking/plugins
|
||||
$ pushd $GOPATH/src/github.com/containernetworking/plugins
|
||||
$ ./build_linux.sh
|
||||
$ sudo mkdir /opt/cni
|
||||
$ sudo cp -r bin /opt/cni/
|
||||
@@ -93,8 +93,8 @@ $ popd
|
||||
You can install the `cri-tools` from source code:
|
||||
|
||||
```bash
|
||||
$ git clone https://github.com/kubernetes-sigs/cri-tools.git
|
||||
$ pushd cri-tools
|
||||
$ go get github.com/kubernetes-sigs/cri-tools
|
||||
$ pushd $GOPATH/src/github.com/kubernetes-sigs/cri-tools
|
||||
$ make
|
||||
$ sudo -E make install
|
||||
$ popd
|
||||
@@ -132,9 +132,9 @@ The `RuntimeClass` is suggested.
|
||||
|
||||
The following configuration includes two runtime classes:
|
||||
- `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime.
|
||||
- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/main/runtime/v2#binary-naming))
|
||||
- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming))
|
||||
where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the
|
||||
binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/main/runtime/v2)).
|
||||
binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)).
|
||||
|
||||
```toml
|
||||
[plugins.cri.containerd]
|
||||
@@ -257,48 +257,6 @@ This launches a BusyBox container named `hello`, and it will be removed by `--rm
|
||||
The `--cni` flag enables CNI networking for the container. Without this flag, a container with just a
|
||||
loopback interface is created.
|
||||
|
||||
### Launch containers using `ctr` command line with rootfs bundle
|
||||
|
||||
#### Get rootfs
|
||||
Use the script to create rootfs
|
||||
```bash
|
||||
ctr i pull quay.io/prometheus/busybox:latest
|
||||
ctr i export rootfs.tar quay.io/prometheus/busybox:latest
|
||||
|
||||
rootfs_tar=rootfs.tar
|
||||
bundle_dir="./bundle"
|
||||
mkdir -p "${bundle_dir}"
|
||||
|
||||
# extract busybox rootfs
|
||||
rootfs_dir="${bundle_dir}/rootfs"
|
||||
mkdir -p "${rootfs_dir}"
|
||||
layers_dir="$(mktemp -d)"
|
||||
tar -C "${layers_dir}" -pxf "${rootfs_tar}"
|
||||
for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do
|
||||
tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]")
|
||||
done
|
||||
```
|
||||
#### Get `config.json`
|
||||
Use runc spec to generate `config.json`
|
||||
```bash
|
||||
cd ./bundle/rootfs
|
||||
runc spec
|
||||
mv config.json ../
|
||||
```
|
||||
Change the root `path` in `config.json` to the absolute path of rootfs
|
||||
|
||||
```JSON
|
||||
"root":{
|
||||
"path":"/root/test/bundle/rootfs",
|
||||
"readonly": false
|
||||
},
|
||||
```
|
||||
|
||||
#### Run container
|
||||
```bash
|
||||
sudo ctr run -d --runtime io.containerd.run.kata.v2 --config bundle/config.json hello
|
||||
sudo ctr t exec --exec-id ${ID} -t hello sh
|
||||
```
|
||||
### Launch Pods with `crictl` command line
|
||||
|
||||
With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag.
|
||||
|
||||
@@ -45,9 +45,6 @@ spec:
|
||||
- name: containerdsocket
|
||||
mountPath: /run/containerd/containerd.sock
|
||||
readOnly: true
|
||||
- name: sbs
|
||||
mountPath: /run/vc/sbs/
|
||||
readOnly: true
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: containerdtask
|
||||
@@ -56,6 +53,3 @@ spec:
|
||||
- name: containerdsocket
|
||||
hostPath:
|
||||
path: /run/containerd/containerd.sock
|
||||
- name: sbs
|
||||
hostPath:
|
||||
path: /run/vc/sbs/
|
||||
|
||||
@@ -15,18 +15,6 @@ $ sudo .ci/aarch64/install_rom_aarch64.sh
|
||||
$ popd
|
||||
```
|
||||
|
||||
## Config KATA QEMU
|
||||
|
||||
After executing the above script, two files will be generated under the directory `/usr/share/kata-containers/` by default, namely `kata-flash0.img` and `kata-flash1.img`. Next we need to change the configuration file of `kata qemu`, which is in `/opt/kata/share/defaults/kata-containers/configuration-qemu.toml` by default, specify in the configuration file to use the UEFI ROM installed above. The above is an example of `kata deploy` installation. For package management installation, please use `kata-runtime env` to find the location of the configuration file. Please refer to the following configuration.
|
||||
|
||||
```
|
||||
[hypervisor.qemu]
|
||||
|
||||
# -pflash can add image file to VM. The arguments of it should be in format
|
||||
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
|
||||
pflashes = ["/usr/share/kata-containers/kata-flash0.img", "/usr/share/kata-containers/kata-flash1.img"]
|
||||
```
|
||||
|
||||
## Run for test
|
||||
|
||||
Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test.
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
# Kata Containers with AMD SEV-SNP VMs
|
||||
|
||||
## Disclaimer
|
||||
|
||||
This guide is designed for developers and is - same as the Developer Guide - not intended for production systems or end users. It is advisable to only follow this guide on non-critical development systems.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
To run Kata Containers in SNP-VMs, the following software stack is used.
|
||||
|
||||

|
||||
|
||||
The host BIOS and kernel must be capable of supporting AMD SEV-SNP and configured accordingly. For Kata Containers, the host kernel with branch [`sev-snp-iommu-avic_5.19-rc6_v3`](https://github.com/AMDESE/linux/tree/sev-snp-iommu-avic_5.19-rc6_v3) and commit [`3a88547`](https://github.com/AMDESE/linux/commit/3a885471cf89156ea555341f3b737ad2a8d9d3d0) is known to work in conjunction with SEV Firmware version 1.51.3 (0xh\_1.33.03) available on AMD's [SEV developer website](https://developer.amd.com/sev/). See [AMD's guide](https://github.com/AMDESE/AMDSEV/tree/sev-snp-devel) to configure the host accordingly. Verify that you are able to run SEV-SNP encrypted VMs first. The guest components required for Kata Containers are built as described below.
|
||||
|
||||
**Tip**: It is easiest to first have Kata Containers running on your system and then modify it to run containers in SNP-VMs. Follow the [Developer guide](../Developer-Guide.md#warning) and then follow the below steps. Nonetheless, you can just follow this guide from the start.
|
||||
|
||||
## How to build
|
||||
|
||||
Follow all of the below steps to install Kata Containers with SNP-support from scratch. These steps mostly follow the developer guide with modifications to support SNP
|
||||
|
||||
__Steps from the Developer Guide:__
|
||||
- Get all the [required components](../Developer-Guide.md#requirements-to-build-individual-components) for building the kata-runtime
|
||||
- [Build the and install kata-runtime](../Developer-Guide.md#build-and-install-the-kata-containers-runtime)
|
||||
- [Build a custom agent](../Developer-Guide.md#build-a-custom-kata-agent---optional)
|
||||
- [Create an initrd image](../Developer-Guide.md#create-an-initrd-image---optional) by first building a rootfs, then building the initrd based on the rootfs, use a custom agent and install. `ubuntu` works as the distribution of choice.
|
||||
- Get the [required components](../../tools/packaging/kernel/README.md#requirements) to build a custom kernel
|
||||
|
||||
__SNP-specific steps:__
|
||||
- Build the SNP-specific kernel as shown below (see this [guide](../../tools/packaging/kernel/README.md#build-kata-containers-kernel) for more information)
|
||||
```bash
|
||||
$ pushd kata-containers/tools/packaging/kernel/
|
||||
$ ./build-kernel.sh -a x86_64 -x snp setup
|
||||
$ ./build-kernel.sh -a x86_64 -x snp build
|
||||
$ sudo -E PATH="${PATH}" ./build-kernel.sh -x snp install
|
||||
$ popd
|
||||
```
|
||||
- Build a current OVMF capable of SEV-SNP:
|
||||
```bash
|
||||
$ pushd kata-containers/tools/packaging/static-build/ovmf
|
||||
$ ./build.sh
|
||||
$ tar -xvf edk2-x86_64.tar.gz
|
||||
$ popd
|
||||
```
|
||||
- Build a custom QEMU
|
||||
```bash
|
||||
$ source kata-containers/tools/packaging/scripts/lib.sh
|
||||
$ qemu_url="$(get_from_kata_deps "assets.hypervisor.qemu.snp.url")"
|
||||
$ qemu_branch="$(get_from_kata_deps "assets.hypervisor.qemu.snp.branch")"
|
||||
$ qemu_commit="$(get_from_kata_deps "assets.hypervisor.qemu.snp.commit")"
|
||||
$ git clone -b "${qemu_branch}" "${qemu_url}"
|
||||
$ pushd qemu
|
||||
$ git checkout "${qemu_commit}"
|
||||
$ ./configure --enable-virtfs --target-list=x86_64-softmmu --enable-debug
|
||||
$ make -j "$(nproc)"
|
||||
$ popd
|
||||
```
|
||||
|
||||
### Kata Containers Configuration for SNP
|
||||
|
||||
The configuration file located at `/etc/kata-containers/configuration.toml` must be adapted as follows to support SNP-VMs:
|
||||
- Use the SNP-specific kernel for the guest VM (change path)
|
||||
```toml
|
||||
kernel = "/usr/share/kata-containers/vmlinuz-snp.container"
|
||||
```
|
||||
- Enable the use of an initrd (uncomment)
|
||||
```toml
|
||||
initrd = "/usr/share/kata-containers/kata-containers-initrd.img"
|
||||
```
|
||||
- Disable the use of a rootfs (comment out)
|
||||
```toml
|
||||
# image = "/usr/share/kata-containers/kata-containers.img"
|
||||
```
|
||||
- Use the custom QEMU capable of SNP (change path)
|
||||
```toml
|
||||
path = "/path/to/qemu/build/qemu-system-x86_64"
|
||||
```
|
||||
- Use `virtio-9p` device since `virtio-fs` is unsupported due to bugs / shortcomings in QEMU version [`snp-v3`](https://github.com/AMDESE/qemu/tree/snp-v3) for SEV and SEV-SNP (change value)
|
||||
```toml
|
||||
shared_fs = "virtio-9p"
|
||||
```
|
||||
- Disable `virtiofsd` since it is no longer required (comment out)
|
||||
```toml
|
||||
# virtio_fs_daemon = "/usr/libexec/virtiofsd"
|
||||
```
|
||||
- Disable NVDIMM (uncomment)
|
||||
```toml
|
||||
disable_image_nvdimm = true
|
||||
```
|
||||
- Disable shared memory (uncomment)
|
||||
```toml
|
||||
file_mem_backend = ""
|
||||
```
|
||||
- Enable confidential guests (uncomment)
|
||||
```toml
|
||||
confidential_guest = true
|
||||
```
|
||||
- Enable SNP-VMs (uncomment)
|
||||
```toml
|
||||
sev_snp_guest = true
|
||||
```
|
||||
- Configure an OVMF (add path)
|
||||
```toml
|
||||
firmware = "/path/to/kata-containers/tools/packaging/static-build/ovmf/opt/kata/share/ovmf/OVMF.fd"
|
||||
```
|
||||
|
||||
## Test Kata Containers with Containerd
|
||||
|
||||
With Kata Containers configured to support SNP-VMs, we use containerd to test and deploy containers in these VMs.
|
||||
|
||||
### Install Containerd
|
||||
If not already present, follow [this guide](./containerd-kata.md#install) to install containerd and its related components including `CNI` and the `cri-tools` (skip Kata Containers since we already installed it)
|
||||
|
||||
### Containerd Configuration
|
||||
|
||||
Follow [this guide](./containerd-kata.md#configuration) to configure containerd to use Kata Containers
|
||||
|
||||
## Run Kata Containers in SNP-VMs
|
||||
|
||||
Run the below commands to start a container. See [this guide](./containerd-kata.md#run) for more information
|
||||
```bash
|
||||
$ sudo ctr image pull docker.io/library/busybox:latest
|
||||
$ sudo ctr run --cni --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
|
||||
```
|
||||
|
||||
### Check for active SNP:
|
||||
|
||||
Inside the running container, run the following commands to check if SNP is active. It should look something like this:
|
||||
```
|
||||
/ # dmesg | grep -i sev
|
||||
[ 0.299242] Memory Encryption Features active: AMD SEV SEV-ES SEV-SNP
|
||||
[ 0.472286] SEV: Using SNP CPUID table, 31 entries present.
|
||||
[ 0.514574] SEV: SNP guest platform device initialized.
|
||||
[ 0.885425] sev-guest sev-guest: Initialized SEV guest driver (using vmpck_id 0)
|
||||
```
|
||||
|
||||
### Obtain an SNP Attestation Report
|
||||
|
||||
To obtain an attestation report inside the container, the `/dev/sev-guest` must first be configured. As of now, the VM does not perform this step, however it can be performed inside the container, either in the terminal or in code.
|
||||
|
||||
Example for shell:
|
||||
```
|
||||
/ # SNP_MAJOR=$(cat /sys/devices/virtual/misc/sev-guest/dev | awk -F: '{print $1}')
|
||||
/ # SNP_MINOR=$(cat /sys/devices/virtual/misc/sev-guest/dev | awk -F: '{print $2}')
|
||||
/ # mknod -m 600 /dev/sev-guest c "${SNP_MAJOR}" "${SNP_MINOR}"
|
||||
```
|
||||
|
||||
## Known Issues
|
||||
|
||||
- Support for cgroups v2 is still [work in progress](https://github.com/kata-containers/kata-containers/issues/927). If issues occur due to cgroups v2 becoming the default in newer systems, one possible solution is to downgrade cgroups to v1:
|
||||
```bash
|
||||
sudo sed -i 's/^\(GRUB_CMDLINE_LINUX=".*\)"/\1 systemd.unified_cgroup_hierarchy=0"/' /etc/default/grub
|
||||
sudo update-grub
|
||||
sudo reboot
|
||||
```
|
||||
- If both SEV and SEV-SNP are supported by the host, Kata Containers uses SEV-SNP by default. You can verify what features are enabled by checking `/sys/module/kvm_amd/parameters/sev` and `sev_snp`. This means that Kata Containers can not run both SEV-SNP-VMs and SEV-VMs at the same time. If SEV is to be used by Kata Containers instead, reload the `kvm_amd` kernel module without SNP-support, this will disable SNP-support for the entire platform.
|
||||
```bash
|
||||
sudo rmmod kvm_amd && sudo modprobe kvm_amd sev_snp=0
|
||||
```
|
||||
|
||||
@@ -19,7 +19,7 @@ Also you should ensure that `kubectl` working correctly.
|
||||
> **Note**: More information about Kubernetes integrations:
|
||||
> - [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
|
||||
> - [How to use Kata Containers and Containerd](containerd-kata.md)
|
||||
> - [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
|
||||
> - [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||
|
||||
## Configure Prometheus
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@ There are several kinds of Kata configurations and they are listed below.
|
||||
| `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver |
|
||||
| `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor |
|
||||
| `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.vhost_user_reconnect_timeout_sec` | `string`| the timeout for reconnecting vhost user socket (QEMU)
|
||||
| `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
|
||||
| `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory |
|
||||
@@ -88,7 +87,7 @@ There are several kinds of Kata configurations and they are listed below.
|
||||
| `io.katacontainers.config.hypervisor.use_vsock` | `boolean` | specify use of `vsock` for agent communication |
|
||||
| `io.katacontainers.config.hypervisor.vhost_user_store_path` (R) | `string` | specify the directory path where vhost-user devices related folders, sockets and device nodes should be (QEMU) |
|
||||
| `io.katacontainers.config.hypervisor.virtio_fs_cache_size` | uint32 | virtio-fs DAX cache size in `MiB` |
|
||||
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `never` |
|
||||
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
|
||||
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
|
||||
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
|
||||
| `io.katacontainers.config.hypervisor.enable_guest_swap` | `boolean` | enable swap in the guest |
|
||||
|
||||
@@ -17,9 +17,9 @@ Enable setup swap device in guest kernel as follows:
|
||||
$ sudo sed -i -e 's/^#enable_guest_swap.*$/enable_guest_swap = true/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
## Run a Kata Containers utilizing swap device
|
||||
## Run a Kata Container utilizing swap device
|
||||
|
||||
Use following command to start a Kata Containers with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes).
|
||||
Use following command to start a Kata Container with swappiness 60 and 1GB swap device (swap_in_bytes - memory_limit_in_bytes).
|
||||
```
|
||||
$ pod_yaml=pod.yaml
|
||||
$ container_yaml=container.yaml
|
||||
@@ -43,12 +43,12 @@ command:
|
||||
- top
|
||||
EOF
|
||||
$ sudo crictl pull $image
|
||||
$ podid=$(sudo crictl runp --runtime kata $pod_yaml)
|
||||
$ podid=$(sudo crictl runp $pod_yaml)
|
||||
$ cid=$(sudo crictl create $podid $container_yaml $pod_yaml)
|
||||
$ sudo crictl start $cid
|
||||
```
|
||||
|
||||
Kata Containers setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set.
|
||||
Kata Container setups swap device for this container only when `io.katacontainers.container.resource.swappiness` is set.
|
||||
|
||||
The following table shows the swap size how to decide if `io.katacontainers.container.resource.swappiness` is set.
|
||||
|`io.katacontainers.container.resource.swap_in_bytes`|`memory_limit_in_bytes`|swap size|
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
# Configure Kata Containers to use EROFS build rootfs
|
||||
|
||||
## Introduction
|
||||
For kata containers, rootfs is used in the read-only way. EROFS can noticeably decrease metadata overhead.
|
||||
|
||||
`mkfs.erofs` can generate compressed and uncompressed EROFS images.
|
||||
|
||||
For uncompressed images, no files are compressed. However, it is optional to inline the data blocks at the end of the file with the metadata.
|
||||
|
||||
For compressed images, each file will be compressed using the lz4 or lz4hc algorithm, and it will be confirmed whether it can save space. Use No compression of the file if compression does not save space.
|
||||
|
||||
## Performance comparison
|
||||
| | EROFS | EXT4 | XFS |
|
||||
|-----------------|-------| --- | --- |
|
||||
| Image Size [MB] | 106(uncompressed) | 256 | 126 |
|
||||
|
||||
|
||||
## Guidance
|
||||
### Install the `erofs-utils`
|
||||
#### `apt/dnf` install
|
||||
On newer `Ubuntu/Debian` systems, it can be installed directly using the `apt` command, and on `Fedora` it can be installed directly using the `dnf` command.
|
||||
|
||||
```shell
|
||||
# Debian/Ubuntu
|
||||
$ apt install erofs-utils
|
||||
# Fedora
|
||||
$ dnf install erofs-utils
|
||||
```
|
||||
|
||||
#### Source install
|
||||
[https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git](https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git)
|
||||
|
||||
##### Compile dependencies
|
||||
If you need to enable the `Lz4` compression feature, `Lz4 1.8.0+` is required, and `Lz4 1.9.3+` is strongly recommended.
|
||||
|
||||
##### Compilation process
|
||||
For some old lz4 versions (lz4-1.8.0~1.8.3), if lz4-static is not installed, the lz4hc algorithm will not be supported. lz4-static can be installed with apt install lz4-static.x86_64. However, these versions have some bugs in compression, and it is not recommended to use these versions directly.
|
||||
If you use `lz4 1.9.0+`, you can directly use the following command to compile.
|
||||
|
||||
```shell
|
||||
$ ./autogen.sh
|
||||
$ ./configure
|
||||
$ make
|
||||
```
|
||||
|
||||
The compiled `mkfs.erofs` program will be saved in the `mkfs` directory. Afterwards, the generated tools can be installed to a system directory using make install (requires root privileges).
|
||||
|
||||
### Create a local rootfs
|
||||
```shell
|
||||
$ export distro="ubuntu"
|
||||
$ export FS_TYPE="erofs"
|
||||
$ export ROOTFS_DIR="realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs"
|
||||
$ sudo rm -rf "${ROOTFS_DIR}"
|
||||
$ pushd kata-containers/tools/osbuilder/rootfs-builder
|
||||
$ script -fec 'sudo -E SECCOMP=no ./rootfs.sh "${distro}"'
|
||||
$ popd
|
||||
```
|
||||
|
||||
### Add a custom agent to the image - OPTIONAL
|
||||
> Note:
|
||||
> - You should only do this step if you are testing with the latest version of the agent.
|
||||
```shell
|
||||
$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent"
|
||||
$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/"
|
||||
$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/"
|
||||
```
|
||||
|
||||
### Build a root image
|
||||
```shell
|
||||
$ pushd kata-containers/tools/osbuilder/image-builder
|
||||
$ script -fec 'sudo -E ./image_builder.sh "${ROOTFS_DIR}"'
|
||||
$ popd
|
||||
```
|
||||
|
||||
### Install the rootfs image
|
||||
```shell
|
||||
$ pushd kata-containers/tools/osbuilder/image-builder
|
||||
$ commit="$(git log --format=%h -1 HEAD)"
|
||||
$ date="$(date +%Y-%m-%d-%T.%N%z)"
|
||||
$ rootfs="erofs"
|
||||
$ image="kata-containers-${rootfs}-${date}-${commit}"
|
||||
$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}"
|
||||
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
|
||||
$ popd
|
||||
```
|
||||
|
||||
### Use `EROFS` in the runtime
|
||||
```shell
|
||||
$ sudo sed -i -e 's/^# *\(rootfs_type\).*=.*$/\1 = erofs/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
@@ -1,15 +1,15 @@
|
||||
# How to use Kata Containers and containerd with Kubernetes
|
||||
# How to use Kata Containers and CRI (containerd plugin) with Kubernetes
|
||||
|
||||
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
|
||||
|
||||
The Kubernetes cluster will use the
|
||||
[containerd](https://github.com/containerd/containerd/) and
|
||||
[Kata Containers](https://katacontainers.io) to launch workloads.
|
||||
[CRI containerd](https://github.com/containerd/containerd/) and
|
||||
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Kubernetes, Kubelet, `kubeadm`
|
||||
- containerd
|
||||
- containerd with `cri` plug-in
|
||||
- Kata Containers
|
||||
|
||||
> **Note:** For information about the supported versions of these components,
|
||||
@@ -149,7 +149,7 @@ $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
|
||||
|
||||
## Create runtime class for Kata Containers
|
||||
|
||||
By default, all pods are created with the default runtime configured in containerd.
|
||||
By default, all pods are created with the default runtime configured in CRI containerd plugin.
|
||||
From Kubernetes v1.12, users can use [`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/#runtime-class) to specify a different runtime for Pods.
|
||||
|
||||
```bash
|
||||
@@ -166,7 +166,7 @@ $ sudo -E kubectl apply -f runtime.yaml
|
||||
|
||||
## Run pod in Kata Containers
|
||||
|
||||
If a pod has the `runtimeClassName` set to `kata`, the CRI runs the pod with the
|
||||
If a pod has the `runtimeClassName` set to `kata`, the CRI plugin runs the pod with the
|
||||
[Kata Containers runtime](../../src/runtime/README.md).
|
||||
|
||||
- Create an pod configuration that using Kata Containers runtime
|
||||
@@ -104,7 +104,7 @@ sudo dmsetup create "${POOL_NAME}" \
|
||||
|
||||
cat << EOF
|
||||
#
|
||||
# Add this to your config.toml configuration file and restart containerd daemon
|
||||
# Add this to your config.toml configuration file and restart `containerd` daemon
|
||||
#
|
||||
[plugins]
|
||||
[plugins.devmapper]
|
||||
@@ -212,7 +212,7 @@ Next, we need to configure containerd. Add a file in your path (e.g. `/usr/local
|
||||
|
||||
```
|
||||
#!/bin/bash
|
||||
KATA_CONF_FILE=/etc/kata-containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@
|
||||
KATA_CONF_FILE=/etc/containers/configuration-fc.toml /usr/local/bin/containerd-shim-kata-v2 $@
|
||||
```
|
||||
> **Note:** You may need to edit the paths of the configuration file and the `containerd-shim-kata-v2` to correspond to your setup.
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 9.0 KiB |
@@ -40,7 +40,7 @@ See below example config:
|
||||
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml"
|
||||
```
|
||||
|
||||
- [How to use Kata Containers and containerd with Kubernetes](how-to-use-k8s-with-containerd-and-kata.md)
|
||||
- [Kata Containers with Containerd and CRI documentation](how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||
- [Containerd CRI config documentation](https://github.com/containerd/containerd/blob/main/docs/cri/config.md)
|
||||
|
||||
#### CRI-O
|
||||
|
||||
@@ -15,7 +15,7 @@ After choosing one CRI implementation, you must make the appropriate configurati
|
||||
to ensure it integrates with Kata Containers.
|
||||
|
||||
Kata Containers 1.5 introduced the `shimv2` for containerd 1.2.0, reducing the components
|
||||
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
|
||||
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
|
||||
|
||||
An equivalent shim implementation for CRI-O is planned.
|
||||
|
||||
@@ -57,7 +57,7 @@ content shown below:
|
||||
|
||||
To customize containerd to select Kata Containers runtime, follow our
|
||||
"Configure containerd to use Kata Containers" internal documentation
|
||||
[here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
|
||||
[here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
|
||||
|
||||
## Install Kubernetes
|
||||
|
||||
@@ -85,7 +85,7 @@ Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-tim
|
||||
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
|
||||
```
|
||||
For more information about containerd see the "Configure Kubelet to use containerd"
|
||||
documentation [here](../how-to/how-to-use-k8s-with-containerd-and-kata.md#configure-kubelet-to-use-containerd).
|
||||
documentation [here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd).
|
||||
|
||||
## Run a Kubernetes pod with Kata Containers
|
||||
|
||||
|
||||
@@ -33,7 +33,6 @@ are available, their default values and how each setting can be used.
|
||||
[Cloud Hypervisor] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-clh.toml` |
|
||||
[Firecracker] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-fc.toml` |
|
||||
[QEMU] | C | all | Type 2 ([KVM]) | `configuration-qemu.toml` |
|
||||
[`Dragonball`] | rust | `aarch64`, `x86_64` | Type 2 ([KVM]) | `configuration-dragonball.toml` |
|
||||
|
||||
## Determine currently configured hypervisor
|
||||
|
||||
@@ -53,7 +52,6 @@ the hypervisors:
|
||||
[Cloud Hypervisor] | Low latency, small memory footprint, small attack surface | Minimal | | excellent | excellent | High performance modern cloud workloads | |
|
||||
[Firecracker] | Very slimline | Extremely minimal | Doesn't support all device types | excellent | excellent | Serverless / FaaS | |
|
||||
[QEMU] | Lots of features | Lots | | good | good | Good option for most users | | All users |
|
||||
[`Dragonball`] | Built-in VMM, low CPU and memory overhead| Minimal | | excellent | excellent | Optimized for most container workloads | `out-of-the-box` Kata Containers experience |
|
||||
|
||||
For further details, see the [Virtualization in Kata Containers](design/virtualization.md) document and the official documentation for each hypervisor.
|
||||
|
||||
@@ -62,4 +60,3 @@ For further details, see the [Virtualization in Kata Containers](design/virtuali
|
||||
[Firecracker]: https://github.com/firecracker-microvm/firecracker
|
||||
[KVM]: https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine
|
||||
[QEMU]: http://www.qemu-project.org
|
||||
[`Dragonball`]: https://github.com/openanolis/dragonball-sandbox
|
||||
|
||||
@@ -79,6 +79,3 @@ versions. This is not recommended for normal users.
|
||||
* [upgrading document](../Upgrading.md)
|
||||
* [developer guide](../Developer-Guide.md)
|
||||
* [runtime documentation](../../src/runtime/README.md)
|
||||
|
||||
## Kata Containers 3.0 rust runtime installation
|
||||
* [installation guide](../install/kata-containers-3.0-rust-runtime-installation-guide.md)
|
||||
|
||||
@@ -19,6 +19,12 @@
|
||||
> - If you decide to proceed and install a Kata Containers release, you can
|
||||
> still check for the latest version of Kata Containers by running
|
||||
> `kata-runtime check --only-list-releases`.
|
||||
>
|
||||
> - These instructions will not work for Fedora 31 and higher since those
|
||||
> distribution versions only support cgroups version 2 by default. However,
|
||||
> Kata Containers currently requires cgroups version 1 (on the host side). See
|
||||
> https://github.com/kata-containers/kata-containers/issues/927 for further
|
||||
> details.
|
||||
|
||||
## Install Kata Containers
|
||||
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
# Kata Containers 3.0 rust runtime installation
|
||||
The following is an overview of the different installation methods available.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Kata Containers 3.0 rust runtime requires nested virtualization or bare metal. Check
|
||||
[hardware requirements](/src/runtime/README.md#hardware-requirements) to see if your system is capable of running Kata
|
||||
Containers.
|
||||
|
||||
### Platform support
|
||||
|
||||
Kata Containers 3.0 rust runtime currently runs on 64-bit systems supporting the following
|
||||
architectures:
|
||||
|
||||
> **Notes:**
|
||||
> For other architectures, see https://github.com/kata-containers/kata-containers/issues/4320
|
||||
|
||||
| Architecture | Virtualization technology |
|
||||
|-|-|
|
||||
| `x86_64`| [Intel](https://www.intel.com) VT-x |
|
||||
| `aarch64` ("`arm64`")| [ARM](https://www.arm.com) Hyp |
|
||||
|
||||
## Packaged installation methods
|
||||
|
||||
| Installation method | Description | Automatic updates | Use case | Availability
|
||||
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|----------- |
|
||||
| [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. | Yes |
|
||||
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. | No |
|
||||
| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. | No |
|
||||
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. | No |
|
||||
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. | No |
|
||||
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. | Yes |
|
||||
|
||||
### Kata Deploy Installation
|
||||
|
||||
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md).
|
||||
### Official packages
|
||||
`ToDo`
|
||||
### Snap Installation
|
||||
`ToDo`
|
||||
### Automatic Installation
|
||||
`ToDo`
|
||||
### Manual Installation
|
||||
`ToDo`
|
||||
|
||||
## Build from source installation
|
||||
|
||||
### Rust Environment Set Up
|
||||
|
||||
* Download `Rustup` and install `Rust`
|
||||
> **Notes:**
|
||||
> Rust version 1.62.0 is needed
|
||||
|
||||
Example for `x86_64`
|
||||
```
|
||||
$ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
$ source $HOME/.cargo/env
|
||||
$ rustup install 1.62.0
|
||||
$ rustup default 1.62.0-x86_64-unknown-linux-gnu
|
||||
```
|
||||
|
||||
* Musl support for fully static binary
|
||||
|
||||
Example for `x86_64`
|
||||
```
|
||||
$ rustup target add x86_64-unknown-linux-musl
|
||||
```
|
||||
* [Musl `libc`](http://musl.libc.org/) install
|
||||
|
||||
Example for musl 1.2.3
|
||||
```
|
||||
$ curl -O https://git.musl-libc.org/cgit/musl/snapshot/musl-1.2.3.tar.gz
|
||||
$ tar vxf musl-1.2.3.tar.gz
|
||||
$ cd musl-1.2.3/
|
||||
$ ./configure --prefix=/usr/local/
|
||||
$ make && sudo make install
|
||||
```
|
||||
|
||||
|
||||
### Install Kata 3.0 Rust Runtime Shim
|
||||
|
||||
```
|
||||
$ git clone https://github.com/kata-containers/kata-containers.git
|
||||
$ cd kata-containers/src/runtime-rs
|
||||
$ make && sudo make install
|
||||
```
|
||||
After running the command above, the default config file `configuration.toml` will be installed under `/usr/share/defaults/kata-containers/`, the binary file `containerd-shim-kata-v2` will be installed under `/usr/local/bin/` .
|
||||
|
||||
### Build Kata Containers Kernel
|
||||
Follow the [Kernel installation guide](/tools/packaging/kernel/README.md).
|
||||
|
||||
### Build Kata Rootfs
|
||||
Follow the [Rootfs installation guide](../../tools/osbuilder/rootfs-builder/README.md).
|
||||
|
||||
### Build Kata Image
|
||||
Follow the [Image installation guide](../../tools/osbuilder/image-builder/README.md).
|
||||
|
||||
### Install Containerd
|
||||
|
||||
Follow the [Containerd installation guide](container-manager/containerd/containerd-install.md).
|
||||
|
||||
|
||||
@@ -55,11 +55,11 @@ Here are the features to set up a CRI-O based Minikube, and why you need them:
|
||||
|
||||
| what | why |
|
||||
| ---- | --- |
|
||||
| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) |
|
||||
| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||
| `--container-runtime=cri-o` | Using CRI-O for Kata |
|
||||
| `--enable-default-cni` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) |
|
||||
| `--enable-default-cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||
| `--memory 6144` | Allocate sufficient memory, as Kata Containers default to 1 or 2Gb |
|
||||
| `--network-plugin=cni` | As recommended for [minikube CRI-O](https://minikube.sigs.k8s.io/docs/handbook/config/#runtime-configuration) |
|
||||
| `--network-plugin=cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||
| `--vm-driver kvm2` | The host VM driver |
|
||||
|
||||
To use containerd, modify the `--container-runtime` argument:
|
||||
@@ -71,6 +71,12 @@ To use containerd, modify the `--container-runtime` argument:
|
||||
> **Notes:**
|
||||
> - Adjust the `--memory 6144` line to suit your environment and requirements. Kata Containers default to
|
||||
> requesting 2048MB per container. We recommended you supply more than that to the Minikube node.
|
||||
> - Prior to Minikube/Kubernetes v1.14, the beta `RuntimeClass` feature also needed enabling with
|
||||
> the following.
|
||||
>
|
||||
> | what | why |
|
||||
> | ---- | --- |
|
||||
> | `--feature-gates=RuntimeClass=true` | Kata needs to use the `RuntimeClass` Kubernetes feature |
|
||||
|
||||
The full command is therefore:
|
||||
|
||||
@@ -132,9 +138,17 @@ $ kubectl -n kube-system exec ${podname} -- ps -ef | fgrep infinity
|
||||
|
||||
## Enabling Kata Containers
|
||||
|
||||
> **Note:** Only Minikube/Kubernetes versions <= 1.13 require this step. Since version
|
||||
> v1.14, the `RuntimeClass` is enabled by default. Performing this step on Kubernetes > v1.14 is
|
||||
> however benign.
|
||||
|
||||
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
|
||||
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
|
||||
|
||||
```sh
|
||||
$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/node-api/master/manifests/runtimeclass_crd.yaml > runtimeclass_crd.yaml
|
||||
```
|
||||
|
||||
### Register the runtime
|
||||
|
||||
Now register the `kata qemu` runtime with that class. This should result in no errors:
|
||||
|
||||
@@ -545,12 +545,6 @@ Create the hook execution file for Kata:
|
||||
/usr/bin/nvidia-container-toolkit -debug $@
|
||||
```
|
||||
|
||||
Make sure the hook shell is executable:
|
||||
|
||||
```sh
|
||||
chmod +x $ROOTFS_DIR/usr/share/oci/hooks/prestart/nvidia-container-toolkit.sh
|
||||
```
|
||||
|
||||
As the last step one can do some cleanup of files or package caches. Build the
|
||||
rootfs and configure it for use with Kata according to the development guide.
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ the latest driver.
|
||||
$ export QAT_DRIVER_VER=qat1.7.l.4.14.0-00031.tar.gz
|
||||
$ export QAT_DRIVER_URL=https://downloadmirror.intel.com/30178/eng/${QAT_DRIVER_VER}
|
||||
$ export QAT_CONF_LOCATION=~/QAT_conf
|
||||
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/main/demo/openssl-qat-engine/Dockerfile
|
||||
$ export QAT_DOCKERFILE=https://raw.githubusercontent.com/intel/intel-device-plugins-for-kubernetes/master/demo/openssl-qat-engine/Dockerfile
|
||||
$ export QAT_SRC=~/src/QAT
|
||||
$ export GOPATH=~/src/go
|
||||
$ export KATA_KERNEL_LOCATION=~/kata
|
||||
@@ -279,8 +279,8 @@ $ export KERNEL_EXTRAVERSION=$(awk '/^EXTRAVERSION =/{print $NF}' $GOPATH/$LINUX
|
||||
$ export KERNEL_ROOTFS_DIR=${KERNEL_MAJOR_VERSION}.${KERNEL_PATHLEVEL}.${KERNEL_SUBLEVEL}${KERNEL_EXTRAVERSION}
|
||||
$ cd $QAT_SRC
|
||||
$ KERNEL_SOURCE_ROOT=$GOPATH/$LINUX_VER ./configure --enable-icp-sriov=guest
|
||||
$ sudo -E make all -j $($(nproc ${CI:+--ignore 1}))
|
||||
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j $($(nproc ${CI:+--ignore 1}))
|
||||
$ sudo -E make all -j$(nproc)
|
||||
$ sudo -E make INSTALL_MOD_PATH=$ROOTFS_DIR qat-driver-install -j$(nproc)
|
||||
```
|
||||
|
||||
The `usdm_drv` module also needs to be copied into the rootfs modules path and
|
||||
|
||||
@@ -18,13 +18,16 @@ CONFIG_X86_SGX_KVM=y
|
||||
|
||||
* Kubernetes cluster configured with:
|
||||
* [`kata-deploy`](../../tools/packaging/kata-deploy) based Kata Containers installation
|
||||
* [Intel SGX Kubernetes device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#deploying-with-pre-built-images) and associated components including [operator](https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/cmd/operator/README.md) and dependencies
|
||||
* [Intel SGX Kubernetes device plugin](https://github.com/intel/intel-device-plugins-for-kubernetes/tree/main/cmd/sgx_plugin#deploying-with-pre-built-images)
|
||||
|
||||
> Note: Kata Containers supports creating VM sandboxes with Intel® SGX enabled
|
||||
> using [cloud-hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor/) and [QEMU](https://www.qemu.org/) VMMs only.
|
||||
|
||||
### Kata Containers Configuration
|
||||
|
||||
Before running a Kata Container make sure that your version of `crio` or `containerd`
|
||||
supports annotations.
|
||||
|
||||
For `containerd` check in `/etc/containerd/config.toml` that the list of `pod_annotations` passed
|
||||
to the `sandbox` are: `["io.katacontainers.*", "sgx.intel.com/epc"]`.
|
||||
|
||||
@@ -61,9 +64,6 @@ spec:
|
||||
name: eosgx-demo-job-1
|
||||
image: oeciteam/oe-helloworld:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /dev
|
||||
name: dev-mount
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
@@ -99,4 +99,4 @@ because socket passthrough is not supported. An alternative is to deploy the `ae
|
||||
container.
|
||||
* Projects like [Gramine Shielded Containers (GSC)](https://gramine-gsc.readthedocs.io/en/latest/) are
|
||||
also known to work. For GSC specifically, the Kata guest kernel needs to have the `CONFIG_NUMA=y`
|
||||
enabled and at least one CPU online when running the GSC container. The Kata Containers guest kernel currently has `CONFIG_NUMA=y` enabled by default.
|
||||
enabled and at least one CPU online when running the GSC container.
|
||||
|
||||
@@ -197,6 +197,11 @@ vhost_user_store_path = "<Path of the base directory for vhost-user device>"
|
||||
> under `[hypervisor.qemu]` section.
|
||||
|
||||
|
||||
For the subdirectories of `vhost_user_store_path`: `block` is used for block
|
||||
device; `block/sockets` is where we expect UNIX domain sockets for vhost-user
|
||||
block devices to live; `block/devices` is where simulated block device nodes
|
||||
for vhost-user block devices are created.
|
||||
|
||||
For the subdirectories of `vhost_user_store_path`:
|
||||
- `block` is used for block device;
|
||||
- `block/sockets` is where we expect UNIX domain sockets for vhost-user
|
||||
|
||||
@@ -82,39 +82,8 @@ parts:
|
||||
fi
|
||||
rustup component add rustfmt
|
||||
|
||||
docker:
|
||||
after: [metadata]
|
||||
plugin: nil
|
||||
prime:
|
||||
- -*
|
||||
build-packages:
|
||||
- ca-certificates
|
||||
- containerd
|
||||
- curl
|
||||
- gnupg
|
||||
- lsb-release
|
||||
- runc
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\
|
||||
sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
|
||||
distro_codename=$(lsb_release -cs)
|
||||
echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io
|
||||
|
||||
echo "Unmasking docker service"
|
||||
sudo -E systemctl unmask docker.service || true
|
||||
sudo -E systemctl unmask docker.socket || true
|
||||
echo "Adding $USER into docker group"
|
||||
sudo -E gpasswd -a $USER docker
|
||||
echo "Starting docker"
|
||||
sudo -E systemctl start docker || true
|
||||
|
||||
image:
|
||||
after: [godeps, docker, qemu, kernel]
|
||||
after: [godeps, qemu, kernel]
|
||||
plugin: nil
|
||||
build-packages:
|
||||
- docker.io
|
||||
@@ -138,6 +107,14 @@ parts:
|
||||
# Copy yq binary. It's used in the container
|
||||
cp -a "${yq}" "${GOPATH}/bin/"
|
||||
|
||||
echo "Unmasking docker service"
|
||||
sudo -E systemctl unmask docker.service || true
|
||||
sudo -E systemctl unmask docker.socket || true
|
||||
echo "Adding $USER into docker group"
|
||||
sudo -E gpasswd -a $USER docker
|
||||
echo "Starting docker"
|
||||
sudo -E systemctl start docker || true
|
||||
|
||||
cd "${kata_dir}/tools/osbuilder"
|
||||
|
||||
# build image
|
||||
@@ -216,7 +193,7 @@ parts:
|
||||
# Setup and build kernel
|
||||
./build-kernel.sh -v "${kernel_version}" -d setup
|
||||
cd ${kernel_dir_prefix}*
|
||||
make -j $(nproc ${CI:+--ignore 1}) EXTRAVERSION=".container"
|
||||
make -j $(($(nproc)-1)) EXTRAVERSION=".container"
|
||||
|
||||
kernel_suffix="${kernel_version}.container"
|
||||
kata_kernel_dir="${SNAPCRAFT_PART_INSTALL}/usr/share/kata-containers"
|
||||
@@ -229,7 +206,7 @@ parts:
|
||||
|
||||
# Install raw kernel
|
||||
vmlinux_path="vmlinux"
|
||||
[ "${arch}" = "s390x" ] && vmlinux_path="arch/s390/boot/vmlinux"
|
||||
[ "${arch}" = "s390x" ] && vmlinux_path="arch/s390/boot/compressed/vmlinux"
|
||||
vmlinux_name="vmlinux-${kernel_suffix}"
|
||||
cp "${vmlinux_path}" "${kata_kernel_dir}/${vmlinux_name}"
|
||||
ln -sf "${vmlinux_name}" "${kata_kernel_dir}/vmlinux.container"
|
||||
@@ -305,7 +282,7 @@ parts:
|
||||
esac
|
||||
|
||||
# build and install
|
||||
make -j $(nproc ${CI:+--ignore 1})
|
||||
make -j $(($(nproc)-1))
|
||||
make install DESTDIR="${SNAPCRAFT_PART_INSTALL}"
|
||||
prime:
|
||||
- -snap/
|
||||
@@ -324,31 +301,54 @@ parts:
|
||||
|
||||
virtiofsd:
|
||||
plugin: nil
|
||||
after: [godeps, rustdeps, docker]
|
||||
after: [godeps, rustdeps]
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
echo "INFO: Building rust version of virtiofsd"
|
||||
# Currently, powerpc makes use of the QEMU's C implementation.
|
||||
# The other platforms make use of the new rust virtiofsd.
|
||||
#
|
||||
# See "tools/packaging/scripts/configure-hypervisor.sh".
|
||||
if [ "${arch}" == "ppc64le" ]
|
||||
then
|
||||
echo "INFO: Building QEMU's C version of virtiofsd"
|
||||
# Handled by the 'qemu' part, so nothing more to do here.
|
||||
exit 0
|
||||
else
|
||||
echo "INFO: Building rust version of virtiofsd"
|
||||
fi
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
# Clean-up build dir in case it already exists
|
||||
sudo -E NO_TTY=true make virtiofsd-tarball
|
||||
cd "${kata_dir}"
|
||||
|
||||
export PATH=${PATH}:${HOME}/.cargo/bin
|
||||
# Download the rust implementation of virtiofsd
|
||||
tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh
|
||||
sudo install \
|
||||
--owner='root' \
|
||||
--group='root' \
|
||||
--mode=0755 \
|
||||
-D \
|
||||
--target-directory="${SNAPCRAFT_PART_INSTALL}/usr/libexec/" \
|
||||
build/virtiofsd/builddir/virtiofsd/virtiofsd
|
||||
virtiofsd/virtiofsd
|
||||
|
||||
cloud-hypervisor:
|
||||
plugin: nil
|
||||
after: [godeps, docker]
|
||||
after: [godeps]
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
if [ "${arch}" == "aarch64" ] || [ "${arch}" == "x86_64" ]; then
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install ca-certificates curl gnupg lsb-release
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\
|
||||
sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
|
||||
distro_codename=$(lsb_release -cs)
|
||||
echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io
|
||||
sudo systemctl start docker.socket
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make cloud-hypervisor-tarball
|
||||
|
||||
|
||||
849
src/agent/Cargo.lock
generated
@@ -3,17 +3,16 @@ name = "kata-agent"
|
||||
version = "0.1.0"
|
||||
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
|
||||
edition = "2018"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
oci = { path = "../libs/oci" }
|
||||
rustjail = { path = "rustjail" }
|
||||
protocols = { path = "../libs/protocols", features = ["async"] }
|
||||
protocols = { path = "../libs/protocols" }
|
||||
lazy_static = "1.3.0"
|
||||
ttrpc = { version = "0.6.0", features = ["async"], default-features = false }
|
||||
protobuf = "2.27.0"
|
||||
ttrpc = { version = "0.5.0", features = ["async", "protobuf-codec"], default-features = false }
|
||||
protobuf = "=2.14.0"
|
||||
libc = "0.2.58"
|
||||
nix = "0.24.2"
|
||||
nix = "0.23"
|
||||
capctl = "0.2.0"
|
||||
serde_json = "1.0.39"
|
||||
scan_fmt = "0.2.3"
|
||||
@@ -21,8 +20,7 @@ scopeguard = "1.0.0"
|
||||
thiserror = "1.0.26"
|
||||
regex = "1.5.6"
|
||||
serial_test = "0.5.1"
|
||||
kata-sys-util = { path = "../libs/kata-sys-util" }
|
||||
kata-types = { path = "../libs/kata-types" }
|
||||
sysinfo = "0.23.0"
|
||||
|
||||
# Async helpers
|
||||
async-trait = "0.1.42"
|
||||
@@ -30,7 +28,7 @@ async-recursion = "0.3.2"
|
||||
futures = "0.3.17"
|
||||
|
||||
# Async runtime
|
||||
tokio = { version = "1.28.1", features = ["full"] }
|
||||
tokio = { version = "1.14.0", features = ["full"] }
|
||||
tokio-vsock = "0.3.1"
|
||||
|
||||
netlink-sys = { version = "0.7.0", features = ["tokio_socket",]}
|
||||
@@ -51,7 +49,7 @@ log = "0.4.11"
|
||||
prometheus = { version = "0.13.0", features = ["process"] }
|
||||
procfs = "0.12.0"
|
||||
anyhow = "1.0.32"
|
||||
cgroups = { package = "cgroups-rs", version = "0.3.2" }
|
||||
cgroups = { package = "cgroups-rs", version = "0.2.8" }
|
||||
|
||||
# Tracing
|
||||
tracing = "0.1.26"
|
||||
@@ -67,8 +65,6 @@ clap = { version = "3.0.1", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1.0"
|
||||
test-utils = { path = "../libs/test-utils" }
|
||||
which = "4.3.0"
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
|
||||
@@ -107,9 +107,10 @@ endef
|
||||
##TARGET default: build code
|
||||
default: $(TARGET) show-header
|
||||
|
||||
static-checks-build: $(GENERATED_CODE)
|
||||
$(TARGET): $(GENERATED_CODE) logging-crate-tests $(TARGET_PATH)
|
||||
|
||||
$(TARGET): $(GENERATED_CODE) $(TARGET_PATH)
|
||||
logging-crate-tests:
|
||||
make -C $(CWD)/../libs/logging
|
||||
|
||||
$(TARGET_PATH): show-summary
|
||||
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo build --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES)
|
||||
@@ -202,6 +203,7 @@ codecov-html: check_tarpaulin
|
||||
|
||||
.PHONY: \
|
||||
help \
|
||||
logging-crate-tests \
|
||||
optimize \
|
||||
show-header \
|
||||
show-summary \
|
||||
|
||||
@@ -3,7 +3,6 @@ name = "rustjail"
|
||||
version = "0.1.0"
|
||||
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
|
||||
edition = "2018"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
serde = "1.0.91"
|
||||
@@ -11,37 +10,32 @@ serde_json = "1.0.39"
|
||||
serde_derive = "1.0.91"
|
||||
oci = { path = "../../libs/oci" }
|
||||
protocols = { path ="../../libs/protocols" }
|
||||
kata-sys-util = { path = "../../libs/kata-sys-util" }
|
||||
caps = "0.5.0"
|
||||
nix = "0.24.2"
|
||||
nix = "0.23.0"
|
||||
scopeguard = "1.0.0"
|
||||
capctl = "0.2.0"
|
||||
lazy_static = "1.3.0"
|
||||
libc = "0.2.58"
|
||||
protobuf = "2.27.0"
|
||||
protobuf = "=2.14.0"
|
||||
slog = "2.5.2"
|
||||
slog-scope = "4.1.2"
|
||||
scan_fmt = "0.2.6"
|
||||
regex = "1.5.6"
|
||||
path-absolutize = "1.2.0"
|
||||
anyhow = "1.0.32"
|
||||
cgroups = { package = "cgroups-rs", version = "0.3.2" }
|
||||
cgroups = { package = "cgroups-rs", version = "0.2.8" }
|
||||
rlimit = "0.5.3"
|
||||
cfg-if = "0.1.0"
|
||||
|
||||
tokio = { version = "1.28.1", features = ["sync", "io-util", "process", "time", "macros", "rt"] }
|
||||
tokio = { version = "1.2.0", features = ["sync", "io-util", "process", "time", "macros"] }
|
||||
futures = "0.3.17"
|
||||
async-trait = "0.1.31"
|
||||
inotify = "0.9.2"
|
||||
libseccomp = { version = "0.3.0", optional = true }
|
||||
zbus = "2.3.0"
|
||||
bit-vec= "0.6.3"
|
||||
xattr = "0.2.3"
|
||||
libseccomp = { version = "0.2.3", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = "0.5.0"
|
||||
tempfile = "3.1.0"
|
||||
test-utils = { path = "../../libs/test-utils" }
|
||||
|
||||
[features]
|
||||
seccomp = ["libseccomp"]
|
||||
|
||||
@@ -32,7 +32,6 @@ use protocols::agent::{
|
||||
BlkioStats, BlkioStatsEntry, CgroupStats, CpuStats, CpuUsage, HugetlbStats, MemoryData,
|
||||
MemoryStats, PidsStats, ThrottlingData,
|
||||
};
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
@@ -76,7 +75,7 @@ macro_rules! set_resource {
|
||||
|
||||
impl CgroupManager for Manager {
|
||||
fn apply(&self, pid: pid_t) -> Result<()> {
|
||||
self.cgroup.add_task_by_tgid(CgroupPid::from(pid as u64))?;
|
||||
self.cgroup.add_task(CgroupPid::from(pid as u64))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -175,7 +174,7 @@ impl CgroupManager for Manager {
|
||||
freezer_controller.freeze()?;
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("Invalid FreezerState"));
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,83 +193,6 @@ impl CgroupManager for Manager {
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
|
||||
if guest_cpuset.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
|
||||
|
||||
let h = cgroups::hierarchies::auto();
|
||||
let root_cg = h.root_control_group();
|
||||
|
||||
let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap();
|
||||
let path = root_cpuset_controller.path();
|
||||
let root_path = Path::new(path);
|
||||
info!(sl!(), "root cpuset path: {:?}", &path);
|
||||
|
||||
let container_cpuset_controller: &CpuSetController = self.cgroup.controller_of().unwrap();
|
||||
let path = container_cpuset_controller.path();
|
||||
let container_path = Path::new(path);
|
||||
info!(sl!(), "container cpuset path: {:?}", &path);
|
||||
|
||||
let mut paths = vec![];
|
||||
for ancestor in container_path.ancestors() {
|
||||
if ancestor == root_path {
|
||||
break;
|
||||
}
|
||||
paths.push(ancestor);
|
||||
}
|
||||
info!(sl!(), "parent paths to update cpuset: {:?}", &paths);
|
||||
|
||||
let mut i = paths.len();
|
||||
loop {
|
||||
if i == 0 {
|
||||
break;
|
||||
}
|
||||
i -= 1;
|
||||
|
||||
// remove cgroup root from path
|
||||
let r_path = &paths[i]
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.trim_start_matches(root_path.to_str().unwrap());
|
||||
info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
|
||||
let cg = new_cgroup(cgroups::hierarchies::auto(), r_path)?;
|
||||
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
|
||||
cpuset_controller.set_cpus(guest_cpuset)?;
|
||||
}
|
||||
|
||||
if !container_cpuset.is_empty() {
|
||||
info!(
|
||||
sl!(),
|
||||
"updating cpuset for container path: {:?} cpuset: {}",
|
||||
&container_path,
|
||||
container_cpuset
|
||||
);
|
||||
container_cpuset_controller.set_cpus(container_cpuset)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_cgroup_path(&self, cg: &str) -> Result<String> {
|
||||
if cgroups::hierarchies::is_cgroup2_unified_mode() {
|
||||
let cg_path = format!("/sys/fs/cgroup/{}", self.cpath);
|
||||
return Ok(cg_path);
|
||||
}
|
||||
|
||||
// for cgroup v1
|
||||
Ok(self.paths.get(cg).map(|s| s.to_string()).unwrap())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> Result<&dyn Any> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"cgroupfs"
|
||||
}
|
||||
}
|
||||
|
||||
fn set_network_resources(
|
||||
@@ -330,28 +252,19 @@ fn set_devices_resources(
|
||||
}
|
||||
|
||||
fn set_hugepages_resources(
|
||||
cg: &cgroups::Cgroup,
|
||||
_cg: &cgroups::Cgroup,
|
||||
hugepage_limits: &[LinuxHugepageLimit],
|
||||
res: &mut cgroups::Resources,
|
||||
) {
|
||||
info!(sl!(), "cgroup manager set hugepage");
|
||||
let mut limits = vec![];
|
||||
let hugetlb_controller = cg.controller_of::<HugeTlbController>();
|
||||
|
||||
for l in hugepage_limits.iter() {
|
||||
if hugetlb_controller.is_some() && hugetlb_controller.unwrap().size_supported(&l.page_size)
|
||||
{
|
||||
let hr = HugePageResource {
|
||||
size: l.page_size.clone(),
|
||||
limit: l.limit,
|
||||
};
|
||||
limits.push(hr);
|
||||
} else {
|
||||
warn!(
|
||||
sl!(),
|
||||
"{} page size support cannot be verified, dropping requested limit", l.page_size
|
||||
);
|
||||
}
|
||||
let hr = HugePageResource {
|
||||
size: l.page_size.clone(),
|
||||
limit: l.limit,
|
||||
};
|
||||
limits.push(hr);
|
||||
}
|
||||
res.hugepages.limits = limits;
|
||||
}
|
||||
@@ -695,6 +608,17 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField<CpuUsage> {
|
||||
});
|
||||
}
|
||||
|
||||
if cg.v2() {
|
||||
return SingularPtrField::some(CpuUsage {
|
||||
total_usage: 0,
|
||||
percpu_usage: vec![],
|
||||
usage_in_kernelmode: 0,
|
||||
usage_in_usermode: 0,
|
||||
unknown_fields: UnknownFields::default(),
|
||||
cached_size: CachedSize::default(),
|
||||
});
|
||||
}
|
||||
|
||||
// try to get from cpu controller
|
||||
let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg);
|
||||
let stat = cpu_controller.cpu().stat;
|
||||
@@ -725,7 +649,7 @@ fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField<MemoryStats> {
|
||||
let value = memory.use_hierarchy;
|
||||
let use_hierarchy = value == 1;
|
||||
|
||||
// get memory data
|
||||
// gte memory datas
|
||||
let usage = SingularPtrField::some(MemoryData {
|
||||
usage: memory.usage_in_bytes,
|
||||
max_usage: memory.max_usage_in_bytes,
|
||||
@@ -987,8 +911,9 @@ pub fn get_paths() -> Result<HashMap<String, String>> {
|
||||
Ok(m)
|
||||
}
|
||||
|
||||
pub fn get_mounts(paths: &HashMap<String, String>) -> Result<HashMap<String, String>> {
|
||||
pub fn get_mounts() -> Result<HashMap<String, String>> {
|
||||
let mut m = HashMap::new();
|
||||
let paths = get_paths()?;
|
||||
|
||||
for l in fs::read_to_string(MOUNTS)?.lines() {
|
||||
let p: Vec<&str> = l.splitn(2, " - ").collect();
|
||||
@@ -1016,9 +941,9 @@ pub fn get_mounts(paths: &HashMap<String, String>) -> Result<HashMap<String, Str
|
||||
Ok(m)
|
||||
}
|
||||
|
||||
fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Result<Cgroup> {
|
||||
fn new_cgroup(h: Box<dyn cgroups::Hierarchy>, path: &str) -> Cgroup {
|
||||
let valid_path = path.trim_start_matches('/').to_string();
|
||||
cgroups::Cgroup::new(h, valid_path.as_str()).map_err(anyhow::Error::from)
|
||||
cgroups::Cgroup::new(h, valid_path.as_str())
|
||||
}
|
||||
|
||||
impl Manager {
|
||||
@@ -1026,7 +951,7 @@ impl Manager {
|
||||
let mut m = HashMap::new();
|
||||
|
||||
let paths = get_paths()?;
|
||||
let mounts = get_mounts(&paths)?;
|
||||
let mounts = get_mounts()?;
|
||||
|
||||
for key in paths.keys() {
|
||||
let mnt = mounts.get(key);
|
||||
@@ -1040,16 +965,83 @@ impl Manager {
|
||||
m.insert(key.to_string(), p);
|
||||
}
|
||||
|
||||
let cg = new_cgroup(cgroups::hierarchies::auto(), cpath)?;
|
||||
|
||||
Ok(Self {
|
||||
paths: m,
|
||||
mounts,
|
||||
// rels: paths,
|
||||
cpath: cpath.to_string(),
|
||||
cgroup: cg,
|
||||
cgroup: new_cgroup(cgroups::hierarchies::auto(), cpath),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
|
||||
if guest_cpuset.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
info!(sl!(), "update_cpuset_path to: {}", guest_cpuset);
|
||||
|
||||
let h = cgroups::hierarchies::auto();
|
||||
let root_cg = h.root_control_group();
|
||||
|
||||
let root_cpuset_controller: &CpuSetController = root_cg.controller_of().unwrap();
|
||||
let path = root_cpuset_controller.path();
|
||||
let root_path = Path::new(path);
|
||||
info!(sl!(), "root cpuset path: {:?}", &path);
|
||||
|
||||
let container_cpuset_controller: &CpuSetController = self.cgroup.controller_of().unwrap();
|
||||
let path = container_cpuset_controller.path();
|
||||
let container_path = Path::new(path);
|
||||
info!(sl!(), "container cpuset path: {:?}", &path);
|
||||
|
||||
let mut paths = vec![];
|
||||
for ancestor in container_path.ancestors() {
|
||||
if ancestor == root_path {
|
||||
break;
|
||||
}
|
||||
paths.push(ancestor);
|
||||
}
|
||||
info!(sl!(), "parent paths to update cpuset: {:?}", &paths);
|
||||
|
||||
let mut i = paths.len();
|
||||
loop {
|
||||
if i == 0 {
|
||||
break;
|
||||
}
|
||||
i -= 1;
|
||||
|
||||
// remove cgroup root from path
|
||||
let r_path = &paths[i]
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.trim_start_matches(root_path.to_str().unwrap());
|
||||
info!(sl!(), "updating cpuset for parent path {:?}", &r_path);
|
||||
let cg = new_cgroup(cgroups::hierarchies::auto(), r_path);
|
||||
let cpuset_controller: &CpuSetController = cg.controller_of().unwrap();
|
||||
cpuset_controller.set_cpus(guest_cpuset)?;
|
||||
}
|
||||
|
||||
if !container_cpuset.is_empty() {
|
||||
info!(
|
||||
sl!(),
|
||||
"updating cpuset for container path: {:?} cpuset: {}",
|
||||
&container_path,
|
||||
container_cpuset
|
||||
);
|
||||
container_cpuset_controller.set_cpus(container_cpuset)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_cg_path(&self, cg: &str) -> Option<String> {
|
||||
if cgroups::hierarchies::is_cgroup2_unified_mode() {
|
||||
let cg_path = format!("/sys/fs/cgroup/{}", self.cpath);
|
||||
return Some(cg_path);
|
||||
}
|
||||
|
||||
// for cgroup v1
|
||||
self.paths.get(cg).map(|s| s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
// get the guest's online cpus.
|
||||
|
||||
@@ -11,7 +11,6 @@ use anyhow::Result;
|
||||
use cgroups::freezer::FreezerState;
|
||||
use libc::{self, pid_t};
|
||||
use oci::LinuxResources;
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::string::String;
|
||||
|
||||
@@ -54,22 +53,6 @@ impl CgroupManager for Manager {
|
||||
fn get_pids(&self) -> Result<Vec<pid_t>> {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_cgroup_path(&self, _: &str) -> Result<String> {
|
||||
Ok("".to_string())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> Result<&dyn Any> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"mock"
|
||||
}
|
||||
}
|
||||
|
||||
impl Manager {
|
||||
@@ -80,4 +63,12 @@ impl Manager {
|
||||
cpath: cpath.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_cg_path(&self, _: &str) -> Option<String> {
|
||||
Some("".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,10 +4,8 @@
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use core::fmt::Debug;
|
||||
use oci::LinuxResources;
|
||||
use protocols::agent::CgroupStats;
|
||||
use std::any::Any;
|
||||
|
||||
use cgroups::freezer::FreezerState;
|
||||
|
||||
@@ -40,24 +38,4 @@ pub trait Manager {
|
||||
fn set(&self, _container: &LinuxResources, _update: bool) -> Result<()> {
|
||||
Err(anyhow!("not supported!"))
|
||||
}
|
||||
|
||||
fn update_cpuset_path(&self, _: &str, _: &str) -> Result<()> {
|
||||
Err(anyhow!("not supported!"))
|
||||
}
|
||||
|
||||
fn get_cgroup_path(&self, _: &str) -> Result<String> {
|
||||
Err(anyhow!("not supported!"))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> Result<&dyn Any> {
|
||||
Err(anyhow!("not supported!"))
|
||||
}
|
||||
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
|
||||
impl Debug for dyn Manager + Send + Sync {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
10
src/agent/rustjail/src/cgroups/systemd.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
// Copyright (c) 2019 Ant Financial
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::cgroups::Manager as CgroupManager;
|
||||
|
||||
pub struct Manager {}
|
||||
|
||||
impl CgroupManager for Manager {}
|
||||
@@ -1,95 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
use super::common::{DEFAULT_SLICE, SCOPE_SUFFIX, SLICE_SUFFIX};
|
||||
use std::string::String;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct CgroupsPath {
|
||||
pub slice: String,
|
||||
pub prefix: String,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
impl CgroupsPath {
|
||||
pub fn new(cgroups_path_str: &str) -> Result<Self> {
|
||||
let path_vec: Vec<&str> = cgroups_path_str.split(':').collect();
|
||||
if path_vec.len() != 3 {
|
||||
return Err(anyhow!("invalid cpath: {:?}", cgroups_path_str));
|
||||
}
|
||||
|
||||
Ok(CgroupsPath {
|
||||
slice: if path_vec[0].is_empty() {
|
||||
DEFAULT_SLICE.to_string()
|
||||
} else {
|
||||
path_vec[0].to_owned()
|
||||
},
|
||||
prefix: path_vec[1].to_owned(),
|
||||
name: path_vec[2].to_owned(),
|
||||
})
|
||||
}
|
||||
|
||||
// ref: https://github.com/opencontainers/runc/blob/main/docs/systemd.md
|
||||
// return: (parent_slice, unit_name)
|
||||
pub fn parse(&self) -> Result<(String, String)> {
|
||||
Ok((
|
||||
parse_parent(self.slice.to_owned())?,
|
||||
get_unit_name(self.prefix.to_owned(), self.name.to_owned()),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_parent(slice: String) -> Result<String> {
|
||||
if !slice.ends_with(SLICE_SUFFIX) || slice.contains('/') {
|
||||
return Err(anyhow!("invalid slice name: {}", slice));
|
||||
} else if slice == "-.slice" {
|
||||
return Ok(String::new());
|
||||
}
|
||||
|
||||
let mut slice_path = String::new();
|
||||
let mut prefix = String::new();
|
||||
for subslice in slice.trim_end_matches(SLICE_SUFFIX).split('-') {
|
||||
if subslice.is_empty() {
|
||||
return Err(anyhow!("invalid slice name: {}", slice));
|
||||
}
|
||||
slice_path = format!("{}/{}{}{}", slice_path, prefix, subslice, SLICE_SUFFIX);
|
||||
prefix = format!("{}{}-", prefix, subslice);
|
||||
}
|
||||
slice_path.remove(0);
|
||||
Ok(slice_path)
|
||||
}
|
||||
|
||||
fn get_unit_name(prefix: String, name: String) -> String {
|
||||
if name.ends_with(SLICE_SUFFIX) {
|
||||
name
|
||||
} else if prefix.is_empty() {
|
||||
format!("{}{}", name, SCOPE_SUFFIX)
|
||||
} else {
|
||||
format!("{}-{}{}", prefix, name, SCOPE_SUFFIX)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::CgroupsPath;
|
||||
|
||||
#[test]
|
||||
fn test_cgroup_path_parse() {
|
||||
let slice = "system.slice";
|
||||
let prefix = "kata_agent";
|
||||
let name = "123";
|
||||
let cgroups_path =
|
||||
CgroupsPath::new(format!("{}:{}:{}", slice, prefix, name).as_str()).unwrap();
|
||||
assert_eq!(slice, cgroups_path.slice.as_str());
|
||||
assert_eq!(prefix, cgroups_path.prefix.as_str());
|
||||
assert_eq!(name, cgroups_path.name.as_str());
|
||||
|
||||
let (parent_slice, unit_name) = cgroups_path.parse().unwrap();
|
||||
assert_eq!(format!("{}", slice), parent_slice);
|
||||
assert_eq!(format!("{}-{}.scope", prefix, name), unit_name);
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub const DEFAULT_SLICE: &str = "system.slice";
|
||||
pub const SLICE_SUFFIX: &str = ".slice";
|
||||
pub const SCOPE_SUFFIX: &str = ".scope";
|
||||
pub const UNIT_MODE: &str = "replace";
|
||||
|
||||
pub type Properties<'a> = Vec<(&'a str, zbus::zvariant::Value<'a>)>;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub enum CgroupHierarchy {
|
||||
Legacy,
|
||||
Unified,
|
||||
}
|
||||
@@ -1,129 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::vec;
|
||||
|
||||
use super::common::CgroupHierarchy;
|
||||
use super::common::{Properties, SLICE_SUFFIX, UNIT_MODE};
|
||||
use super::interface::system::ManagerProxyBlocking as SystemManager;
|
||||
use anyhow::{Context, Result};
|
||||
use zbus::zvariant::Value;
|
||||
|
||||
pub trait SystemdInterface {
|
||||
fn start_unit(
|
||||
&self,
|
||||
pid: i32,
|
||||
parent: &str,
|
||||
unit_name: &str,
|
||||
cg_hierarchy: &CgroupHierarchy,
|
||||
) -> Result<()>;
|
||||
|
||||
fn set_properties(&self, unit_name: &str, properties: &Properties) -> Result<()>;
|
||||
|
||||
fn stop_unit(&self, unit_name: &str) -> Result<()>;
|
||||
|
||||
fn get_version(&self) -> Result<String>;
|
||||
|
||||
fn unit_exists(&self, unit_name: &str) -> Result<bool>;
|
||||
|
||||
fn add_process(&self, pid: i32, unit_name: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct DBusClient {}
|
||||
|
||||
impl DBusClient {
|
||||
fn build_proxy(&self) -> Result<SystemManager<'static>> {
|
||||
let connection =
|
||||
zbus::blocking::Connection::system().context("Establishing a D-Bus connection")?;
|
||||
let proxy = SystemManager::new(&connection).context("Building a D-Bus proxy manager")?;
|
||||
Ok(proxy)
|
||||
}
|
||||
}
|
||||
|
||||
impl SystemdInterface for DBusClient {
|
||||
fn start_unit(
|
||||
&self,
|
||||
pid: i32,
|
||||
parent: &str,
|
||||
unit_name: &str,
|
||||
cg_hierarchy: &CgroupHierarchy,
|
||||
) -> Result<()> {
|
||||
let proxy = self.build_proxy()?;
|
||||
|
||||
// enable CPUAccounting & MemoryAccounting & (Block)IOAccounting by default
|
||||
let mut properties: Properties = vec![
|
||||
("CPUAccounting", Value::Bool(true)),
|
||||
("DefaultDependencies", Value::Bool(false)),
|
||||
("MemoryAccounting", Value::Bool(true)),
|
||||
("TasksAccounting", Value::Bool(true)),
|
||||
("Description", Value::Str("kata-agent container".into())),
|
||||
("PIDs", Value::Array(vec![pid as u32].into())),
|
||||
];
|
||||
|
||||
match *cg_hierarchy {
|
||||
CgroupHierarchy::Legacy => properties.push(("IOAccounting", Value::Bool(true))),
|
||||
CgroupHierarchy::Unified => properties.push(("BlockIOAccounting", Value::Bool(true))),
|
||||
}
|
||||
|
||||
if unit_name.ends_with(SLICE_SUFFIX) {
|
||||
properties.push(("Wants", Value::Str(parent.into())));
|
||||
} else {
|
||||
properties.push(("Slice", Value::Str(parent.into())));
|
||||
properties.push(("Delegate", Value::Bool(true)));
|
||||
}
|
||||
|
||||
proxy
|
||||
.start_transient_unit(unit_name, UNIT_MODE, &properties, &[])
|
||||
.with_context(|| format!("failed to start transient unit {}", unit_name))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_properties(&self, unit_name: &str, properties: &Properties) -> Result<()> {
|
||||
let proxy = self.build_proxy()?;
|
||||
|
||||
proxy
|
||||
.set_unit_properties(unit_name, true, properties)
|
||||
.with_context(|| format!("failed to set unit properties {}", unit_name))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn stop_unit(&self, unit_name: &str) -> Result<()> {
|
||||
let proxy = self.build_proxy()?;
|
||||
|
||||
proxy
|
||||
.stop_unit(unit_name, UNIT_MODE)
|
||||
.with_context(|| format!("failed to stop unit {}", unit_name))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Result<String> {
|
||||
let proxy = self.build_proxy()?;
|
||||
|
||||
let systemd_version = proxy
|
||||
.version()
|
||||
.with_context(|| "failed to get systemd version".to_string())?;
|
||||
Ok(systemd_version)
|
||||
}
|
||||
|
||||
fn unit_exists(&self, unit_name: &str) -> Result<bool> {
|
||||
let proxy = self
|
||||
.build_proxy()
|
||||
.with_context(|| format!("Checking if systemd unit {} exists", unit_name))?;
|
||||
|
||||
Ok(proxy.get_unit(unit_name).is_ok())
|
||||
}
|
||||
|
||||
fn add_process(&self, pid: i32, unit_name: &str) -> Result<()> {
|
||||
let proxy = self.build_proxy()?;
|
||||
|
||||
proxy
|
||||
.attach_processes_to_unit(unit_name, "/", &[pid as u32])
|
||||
.with_context(|| format!("failed to add process {}", unit_name))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub(crate) mod session;
|
||||
pub(crate) mod system;
|
||||
@@ -1,133 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::cgroups::Manager as CgroupManager;
|
||||
use crate::protocols::agent::CgroupStats;
|
||||
use anyhow::Result;
|
||||
use cgroups::freezer::FreezerState;
|
||||
use libc::{self, pid_t};
|
||||
use oci::LinuxResources;
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::string::String;
|
||||
use std::vec;
|
||||
|
||||
use super::super::fs::Manager as FsManager;
|
||||
|
||||
use super::cgroups_path::CgroupsPath;
|
||||
use super::common::{CgroupHierarchy, Properties};
|
||||
use super::dbus_client::{DBusClient, SystemdInterface};
|
||||
use super::subsystem::transformer::Transformer;
|
||||
use super::subsystem::{cpu::Cpu, cpuset::CpuSet, memory::Memory, pids::Pids};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Manager {
|
||||
pub paths: HashMap<String, String>,
|
||||
pub mounts: HashMap<String, String>,
|
||||
pub cgroups_path: CgroupsPath,
|
||||
pub cpath: String,
|
||||
pub unit_name: String,
|
||||
// dbus client for set properties
|
||||
dbus_client: DBusClient,
|
||||
// fs manager for get properties
|
||||
fs_manager: FsManager,
|
||||
// cgroup version for different dbus properties
|
||||
cg_hierarchy: CgroupHierarchy,
|
||||
}
|
||||
|
||||
impl CgroupManager for Manager {
|
||||
fn apply(&self, pid: pid_t) -> Result<()> {
|
||||
let unit_name = self.unit_name.as_str();
|
||||
if self.dbus_client.unit_exists(unit_name)? {
|
||||
self.dbus_client.add_process(pid, self.unit_name.as_str())?;
|
||||
} else {
|
||||
self.dbus_client.start_unit(
|
||||
(pid as u32).try_into().unwrap(),
|
||||
self.cgroups_path.slice.as_str(),
|
||||
self.unit_name.as_str(),
|
||||
&self.cg_hierarchy,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set(&self, r: &LinuxResources, _: bool) -> Result<()> {
|
||||
let mut properties: Properties = vec![];
|
||||
|
||||
let systemd_version = self.dbus_client.get_version()?;
|
||||
let systemd_version_str = systemd_version.as_str();
|
||||
|
||||
Cpu::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?;
|
||||
Memory::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?;
|
||||
Pids::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?;
|
||||
CpuSet::apply(r, &mut properties, &self.cg_hierarchy, systemd_version_str)?;
|
||||
|
||||
self.dbus_client
|
||||
.set_properties(self.unit_name.as_str(), &properties)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_stats(&self) -> Result<CgroupStats> {
|
||||
self.fs_manager.get_stats()
|
||||
}
|
||||
|
||||
fn freeze(&self, state: FreezerState) -> Result<()> {
|
||||
self.fs_manager.freeze(state)
|
||||
}
|
||||
|
||||
fn destroy(&mut self) -> Result<()> {
|
||||
self.dbus_client.stop_unit(self.unit_name.as_str())?;
|
||||
self.fs_manager.destroy()
|
||||
}
|
||||
|
||||
fn get_pids(&self) -> Result<Vec<pid_t>> {
|
||||
self.fs_manager.get_pids()
|
||||
}
|
||||
|
||||
fn update_cpuset_path(&self, guest_cpuset: &str, container_cpuset: &str) -> Result<()> {
|
||||
self.fs_manager
|
||||
.update_cpuset_path(guest_cpuset, container_cpuset)
|
||||
}
|
||||
|
||||
fn get_cgroup_path(&self, cg: &str) -> Result<String> {
|
||||
self.fs_manager.get_cgroup_path(cg)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> Result<&dyn Any> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"systemd"
|
||||
}
|
||||
}
|
||||
|
||||
impl Manager {
|
||||
pub fn new(cgroups_path_str: &str) -> Result<Self> {
|
||||
let cgroups_path = CgroupsPath::new(cgroups_path_str)?;
|
||||
let (parent_slice, unit_name) = cgroups_path.parse()?;
|
||||
let cpath = parent_slice + "/" + &unit_name;
|
||||
|
||||
let fs_manager = FsManager::new(cpath.as_str())?;
|
||||
|
||||
Ok(Manager {
|
||||
paths: fs_manager.paths.clone(),
|
||||
mounts: fs_manager.mounts.clone(),
|
||||
cgroups_path,
|
||||
cpath,
|
||||
unit_name,
|
||||
dbus_client: DBusClient {},
|
||||
fs_manager,
|
||||
cg_hierarchy: if cgroups::hierarchies::is_cgroup2_unified_mode() {
|
||||
CgroupHierarchy::Unified
|
||||
} else {
|
||||
CgroupHierarchy::Legacy
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub mod manager;
|
||||
|
||||
mod cgroups_path;
|
||||
mod common;
|
||||
mod dbus_client;
|
||||
mod interface;
|
||||
mod subsystem;
|
||||
@@ -1,139 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::super::common::{CgroupHierarchy, Properties};
|
||||
use super::transformer::Transformer;
|
||||
|
||||
use anyhow::Result;
|
||||
use oci::{LinuxCpu, LinuxResources};
|
||||
use zbus::zvariant::Value;
|
||||
|
||||
const BASIC_SYSTEMD_VERSION: &str = "242";
|
||||
const DEFAULT_CPUQUOTAPERIOD: u64 = 100 * 1000;
|
||||
const SEC2MICROSEC: u64 = 1000 * 1000;
|
||||
const BASIC_INTERVAL: u64 = 10 * 1000;
|
||||
|
||||
pub struct Cpu {}
|
||||
|
||||
impl Transformer for Cpu {
|
||||
fn apply(
|
||||
r: &LinuxResources,
|
||||
properties: &mut Properties,
|
||||
cgroup_hierarchy: &CgroupHierarchy,
|
||||
systemd_version: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(cpu_resources) = &r.cpu {
|
||||
match cgroup_hierarchy {
|
||||
CgroupHierarchy::Legacy => {
|
||||
Self::legacy_apply(cpu_resources, properties, systemd_version)?
|
||||
}
|
||||
CgroupHierarchy::Unified => {
|
||||
Self::unified_apply(cpu_resources, properties, systemd_version)?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Cpu {
|
||||
// v1:
|
||||
// cpu.shares <-> CPUShares
|
||||
// cpu.period <-> CPUQuotaPeriodUSec
|
||||
// cpu.period & cpu.quota <-> CPUQuotaPerSecUSec
|
||||
fn legacy_apply(
|
||||
cpu_resources: &LinuxCpu,
|
||||
properties: &mut Properties,
|
||||
systemd_version: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(shares) = cpu_resources.shares {
|
||||
properties.push(("CPUShares", Value::U64(shares)));
|
||||
}
|
||||
|
||||
if let Some(period) = cpu_resources.period {
|
||||
if period != 0 && systemd_version >= BASIC_SYSTEMD_VERSION {
|
||||
properties.push(("CPUQuotaPeriodUSec", Value::U64(period)));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(quota) = cpu_resources.quota {
|
||||
let period = cpu_resources.period.unwrap_or(DEFAULT_CPUQUOTAPERIOD);
|
||||
if period != 0 {
|
||||
let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period);
|
||||
properties.push(("CPUQuotaPerSecUSec", Value::U64(cpu_quota_per_sec_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// v2:
|
||||
// cpu.shares <-> CPUWeight
|
||||
// cpu.period <-> CPUQuotaPeriodUSec
|
||||
// cpu.period & cpu.quota <-> CPUQuotaPerSecUSec
|
||||
fn unified_apply(
|
||||
cpu_resources: &LinuxCpu,
|
||||
properties: &mut Properties,
|
||||
systemd_version: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(shares) = cpu_resources.shares {
|
||||
let weight = shares_to_weight(shares);
|
||||
properties.push(("CPUWeight", Value::U64(weight)));
|
||||
}
|
||||
|
||||
if let Some(period) = cpu_resources.period {
|
||||
if period != 0 && systemd_version >= BASIC_SYSTEMD_VERSION {
|
||||
properties.push(("CPUQuotaPeriodUSec", Value::U64(period)));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(quota) = cpu_resources.quota {
|
||||
let period = cpu_resources.period.unwrap_or(DEFAULT_CPUQUOTAPERIOD);
|
||||
if period != 0 {
|
||||
let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period);
|
||||
properties.push(("CPUQuotaPerSecUSec", Value::U64(cpu_quota_per_sec_usec)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ref: https://github.com/containers/crun/blob/main/crun.1.md#cgroup-v2
|
||||
// [2-262144] to [1-10000]
|
||||
fn shares_to_weight(shares: u64) -> u64 {
|
||||
if shares == 0 {
|
||||
return 100;
|
||||
}
|
||||
|
||||
1 + ((shares - 2) * 9999) / 262142
|
||||
}
|
||||
|
||||
fn resolve_cpuquota(quota: i64, period: u64) -> u64 {
|
||||
let mut cpu_quota_per_sec_usec = u64::MAX;
|
||||
if quota > 0 {
|
||||
cpu_quota_per_sec_usec = (quota as u64) * SEC2MICROSEC / period;
|
||||
if cpu_quota_per_sec_usec % BASIC_INTERVAL != 0 {
|
||||
cpu_quota_per_sec_usec =
|
||||
((cpu_quota_per_sec_usec / BASIC_INTERVAL) + 1) * BASIC_INTERVAL;
|
||||
}
|
||||
}
|
||||
cpu_quota_per_sec_usec
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::cgroups::systemd::subsystem::cpu::resolve_cpuquota;
|
||||
|
||||
#[test]
|
||||
fn test_unified_cpuquota() {
|
||||
let quota: i64 = 1000000;
|
||||
let period: u64 = 500000;
|
||||
let cpu_quota_per_sec_usec = resolve_cpuquota(quota, period);
|
||||
|
||||
assert_eq!(2000000, cpu_quota_per_sec_usec);
|
||||
}
|
||||
}
|
||||
@@ -1,124 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::super::common::{CgroupHierarchy, Properties};
|
||||
|
||||
use super::transformer::Transformer;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use bit_vec::BitVec;
|
||||
use oci::{LinuxCpu, LinuxResources};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use zbus::zvariant::Value;
|
||||
|
||||
const BASIC_SYSTEMD_VERSION: &str = "244";
|
||||
|
||||
pub struct CpuSet {}
|
||||
|
||||
impl Transformer for CpuSet {
|
||||
fn apply(
|
||||
r: &LinuxResources,
|
||||
properties: &mut Properties,
|
||||
_: &CgroupHierarchy,
|
||||
systemd_version: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(cpuset_resources) = &r.cpu {
|
||||
Self::apply(cpuset_resources, properties, systemd_version)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// v1 & v2:
|
||||
// cpuset.cpus <-> AllowedCPUs (v244)
|
||||
// cpuset.mems <-> AllowedMemoryNodes (v244)
|
||||
impl CpuSet {
|
||||
fn apply(
|
||||
cpuset_resources: &LinuxCpu,
|
||||
properties: &mut Properties,
|
||||
systemd_version: &str,
|
||||
) -> Result<()> {
|
||||
if systemd_version < BASIC_SYSTEMD_VERSION {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let cpus = cpuset_resources.cpus.as_str();
|
||||
if !cpus.is_empty() {
|
||||
let cpus_vec: BitMask = cpus.try_into()?;
|
||||
properties.push(("AllowedCPUs", Value::Array(cpus_vec.0.into())));
|
||||
}
|
||||
|
||||
let mems = cpuset_resources.mems.as_str();
|
||||
if !mems.is_empty() {
|
||||
let mems_vec: BitMask = mems.try_into()?;
|
||||
properties.push(("AllowedMemoryNodes", Value::Array(mems_vec.0.into())));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct BitMask(Vec<u8>);
|
||||
|
||||
impl TryFrom<&str> for BitMask {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(bitmask_str: &str) -> Result<Self, Self::Error> {
|
||||
let mut bitmask_vec = BitVec::from_elem(8, false);
|
||||
let bitmask_str_vec: Vec<&str> = bitmask_str.split(',').collect();
|
||||
for bitmask in bitmask_str_vec.iter() {
|
||||
let range: Vec<&str> = bitmask.split('-').collect();
|
||||
match range.len() {
|
||||
1 => {
|
||||
let idx: usize = range[0].parse()?;
|
||||
while idx >= bitmask_vec.len() {
|
||||
bitmask_vec.grow(8, false);
|
||||
}
|
||||
bitmask_vec.set(adjust_index(idx), true);
|
||||
}
|
||||
2 => {
|
||||
let left_index = range[0].parse()?;
|
||||
let right_index = range[1].parse()?;
|
||||
while right_index >= bitmask_vec.len() {
|
||||
bitmask_vec.grow(8, false);
|
||||
}
|
||||
for idx in left_index..=right_index {
|
||||
bitmask_vec.set(adjust_index(idx), true);
|
||||
}
|
||||
}
|
||||
_ => bail!("invalid bitmask str {}", bitmask_str),
|
||||
}
|
||||
}
|
||||
let mut result_vec = bitmask_vec.to_bytes();
|
||||
result_vec.reverse();
|
||||
|
||||
Ok(BitMask(result_vec))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn adjust_index(idx: usize) -> usize {
|
||||
idx / 8 * 8 + 7 - idx % 8
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::cgroups::systemd::subsystem::cpuset::BitMask;
|
||||
|
||||
#[test]
|
||||
fn test_bitmask_conversion() {
|
||||
let cpus_vec: BitMask = "2-4".try_into().unwrap();
|
||||
assert_eq!(vec![0b11100 as u8], cpus_vec.0);
|
||||
|
||||
let cpus_vec: BitMask = "1,7".try_into().unwrap();
|
||||
assert_eq!(vec![0b10000010 as u8], cpus_vec.0);
|
||||
|
||||
let cpus_vec: BitMask = "0,2-3,7".try_into().unwrap();
|
||||
assert_eq!(vec![0b10001101 as u8], cpus_vec.0);
|
||||
}
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::super::common::{CgroupHierarchy, Properties};
|
||||
|
||||
use super::transformer::Transformer;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use oci::{LinuxMemory, LinuxResources};
|
||||
use zbus::zvariant::Value;
|
||||
|
||||
pub struct Memory {}
|
||||
|
||||
impl Transformer for Memory {
|
||||
fn apply(
|
||||
r: &LinuxResources,
|
||||
properties: &mut Properties,
|
||||
cgroup_hierarchy: &CgroupHierarchy,
|
||||
_: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(memory_resources) = &r.memory {
|
||||
match cgroup_hierarchy {
|
||||
CgroupHierarchy::Legacy => Self::legacy_apply(memory_resources, properties)?,
|
||||
CgroupHierarchy::Unified => Self::unified_apply(memory_resources, properties)?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Memory {
|
||||
// v1:
|
||||
// memory.limit <-> MemoryLimit
|
||||
fn legacy_apply(memory_resources: &LinuxMemory, properties: &mut Properties) -> Result<()> {
|
||||
if let Some(limit) = memory_resources.limit {
|
||||
let limit = match limit {
|
||||
1..=i64::MAX => limit as u64,
|
||||
0 => u64::MAX,
|
||||
_ => bail!("invalid memory.limit"),
|
||||
};
|
||||
properties.push(("MemoryLimit", Value::U64(limit)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// v2:
|
||||
// memory.low <-> MemoryLow
|
||||
// memory.max <-> MemoryMax
|
||||
// memory.swap & memory.limit <-> MemorySwapMax
|
||||
fn unified_apply(memory_resources: &LinuxMemory, properties: &mut Properties) -> Result<()> {
|
||||
if let Some(limit) = memory_resources.limit {
|
||||
let limit = match limit {
|
||||
1..=i64::MAX => limit as u64,
|
||||
0 => u64::MAX,
|
||||
_ => bail!("invalid memory.limit: {}", limit),
|
||||
};
|
||||
properties.push(("MemoryMax", Value::U64(limit)));
|
||||
}
|
||||
|
||||
if let Some(reservation) = memory_resources.reservation {
|
||||
let reservation = match reservation {
|
||||
1..=i64::MAX => reservation as u64,
|
||||
0 => u64::MAX,
|
||||
_ => bail!("invalid memory.reservation: {}", reservation),
|
||||
};
|
||||
properties.push(("MemoryLow", Value::U64(reservation)));
|
||||
}
|
||||
|
||||
let swap = match memory_resources.swap {
|
||||
Some(0) => u64::MAX,
|
||||
Some(1..=i64::MAX) => match memory_resources.limit {
|
||||
Some(1..=i64::MAX) => {
|
||||
(memory_resources.limit.unwrap() - memory_resources.swap.unwrap()) as u64
|
||||
}
|
||||
_ => bail!("invalid memory.limit when memory.swap specified"),
|
||||
},
|
||||
None => u64::MAX,
|
||||
_ => bail!("invalid memory.swap"),
|
||||
};
|
||||
|
||||
properties.push(("MemorySwapMax", Value::U64(swap)));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Memory;
|
||||
use super::Properties;
|
||||
use super::Value;
|
||||
|
||||
#[test]
|
||||
fn test_unified_memory() {
|
||||
let memory_resources = oci::LinuxMemory {
|
||||
limit: Some(736870912),
|
||||
reservation: Some(536870912),
|
||||
swap: Some(536870912),
|
||||
kernel: Some(0),
|
||||
kernel_tcp: Some(0),
|
||||
swappiness: Some(0),
|
||||
disable_oom_killer: Some(false),
|
||||
};
|
||||
let mut properties: Properties = vec![];
|
||||
|
||||
assert_eq!(
|
||||
true,
|
||||
Memory::unified_apply(&memory_resources, &mut properties).is_ok()
|
||||
);
|
||||
|
||||
assert_eq!(Value::U64(200000000), properties[2].1);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub mod cpu;
|
||||
pub mod cpuset;
|
||||
pub mod memory;
|
||||
pub mod pids;
|
||||
pub mod transformer;
|
||||
@@ -1,60 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::super::common::{CgroupHierarchy, Properties};
|
||||
|
||||
use super::transformer::Transformer;
|
||||
|
||||
use anyhow::Result;
|
||||
use oci::{LinuxPids, LinuxResources};
|
||||
use zbus::zvariant::Value;
|
||||
|
||||
pub struct Pids {}
|
||||
|
||||
impl Transformer for Pids {
|
||||
fn apply(
|
||||
r: &LinuxResources,
|
||||
properties: &mut Properties,
|
||||
_: &CgroupHierarchy,
|
||||
_: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(pids_resources) = &r.pids {
|
||||
Self::apply(pids_resources, properties)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// pids.limit <-> TasksMax
|
||||
impl Pids {
|
||||
fn apply(pids_resources: &LinuxPids, properties: &mut Properties) -> Result<()> {
|
||||
let limit = if pids_resources.limit > 0 {
|
||||
pids_resources.limit as u64
|
||||
} else {
|
||||
u64::MAX
|
||||
};
|
||||
|
||||
properties.push(("TasksMax", Value::U64(limit)));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Pids;
|
||||
use super::Properties;
|
||||
use super::Value;
|
||||
|
||||
#[test]
|
||||
fn test_subsystem_workflow() {
|
||||
let pids_resources = oci::LinuxPids { limit: 0 };
|
||||
let mut properties: Properties = vec![];
|
||||
|
||||
assert_eq!(true, Pids::apply(&pids_resources, &mut properties).is_ok());
|
||||
|
||||
assert_eq!(Value::U64(u64::MAX), properties[0].1);
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
// Copyright 2021-2022 Kata Contributors
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use super::super::common::{CgroupHierarchy, Properties};
|
||||
use anyhow::Result;
|
||||
use oci::LinuxResources;
|
||||
|
||||
pub trait Transformer {
|
||||
fn apply(
|
||||
r: &LinuxResources,
|
||||
properties: &mut Properties,
|
||||
cgroup_hierarchy: &CgroupHierarchy,
|
||||
systemd_version: &str,
|
||||
) -> Result<()>;
|
||||
}
|
||||
@@ -6,9 +6,8 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use nix::errno::Errno;
|
||||
use nix::pty;
|
||||
use nix::sys::socket;
|
||||
use nix::sys::{socket, uio};
|
||||
use nix::unistd::{self, dup2};
|
||||
use std::io::IoSlice;
|
||||
use std::os::unix::io::{AsRawFd, RawFd};
|
||||
use std::path::Path;
|
||||
|
||||
@@ -24,7 +23,10 @@ pub fn setup_console_socket(csocket_path: &str) -> Result<Option<RawFd>> {
|
||||
None,
|
||||
)?;
|
||||
|
||||
match socket::connect(socket_fd, &socket::UnixAddr::new(Path::new(csocket_path))?) {
|
||||
match socket::connect(
|
||||
socket_fd,
|
||||
&socket::SockAddr::Unix(socket::UnixAddr::new(Path::new(csocket_path))?),
|
||||
) {
|
||||
Ok(()) => Ok(Some(socket_fd)),
|
||||
Err(errno) => Err(anyhow!("failed to open console fd: {}", errno)),
|
||||
}
|
||||
@@ -34,11 +36,11 @@ pub fn setup_master_console(socket_fd: RawFd) -> Result<()> {
|
||||
let pseudo = pty::openpty(None, None)?;
|
||||
|
||||
let pty_name: &[u8] = b"/dev/ptmx";
|
||||
let iov = [IoSlice::new(pty_name)];
|
||||
let iov = [uio::IoVec::from_slice(pty_name)];
|
||||
let fds = [pseudo.master];
|
||||
let cmsg = socket::ControlMessage::ScmRights(&fds);
|
||||
|
||||
socket::sendmsg::<()>(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?;
|
||||
socket::sendmsg(socket_fd, &iov, &[cmsg], socket::MsgFlags::empty(), None)?;
|
||||
|
||||
unistd::setsid()?;
|
||||
let ret = unsafe { libc::ioctl(pseudo.slave, libc::TIOCSCTTY) };
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use libc::pid_t;
|
||||
use oci::{ContainerState, LinuxDevice, LinuxIdMapping};
|
||||
use oci::{Linux, LinuxNamespace, LinuxResources, Spec};
|
||||
use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec};
|
||||
use std::clone::Clone;
|
||||
use std::ffi::CString;
|
||||
use std::fmt::Display;
|
||||
@@ -22,7 +22,6 @@ use crate::capabilities;
|
||||
use crate::cgroups::fs::Manager as FsManager;
|
||||
#[cfg(test)]
|
||||
use crate::cgroups::mock::Manager as FsManager;
|
||||
use crate::cgroups::systemd::manager::Manager as SystemdManager;
|
||||
use crate::cgroups::Manager;
|
||||
#[cfg(feature = "standard-oci-runtime")]
|
||||
use crate::console;
|
||||
@@ -30,7 +29,6 @@ use crate::log_child;
|
||||
use crate::process::Process;
|
||||
#[cfg(feature = "seccomp")]
|
||||
use crate::seccomp;
|
||||
use crate::selinux;
|
||||
use crate::specconv::CreateOpts;
|
||||
use crate::{mount, validator};
|
||||
|
||||
@@ -51,7 +49,6 @@ use std::os::unix::io::AsRawFd;
|
||||
use protobuf::SingularPtrField;
|
||||
|
||||
use oci::State as OCIState;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::os::unix::io::FromRawFd;
|
||||
use std::str::FromStr;
|
||||
@@ -67,9 +64,6 @@ use rlimit::{setrlimit, Resource, Rlim};
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use kata_sys_util::hooks::HookStates;
|
||||
use kata_sys_util::validate::valid_env;
|
||||
|
||||
pub const EXEC_FIFO_FILENAME: &str = "exec.fifo";
|
||||
|
||||
const INIT: &str = "INIT";
|
||||
@@ -112,10 +106,6 @@ impl Default for ContainerStatus {
|
||||
}
|
||||
}
|
||||
|
||||
// We might want to change this to thiserror in the future
|
||||
const MissingLinux: &str = "no linux config";
|
||||
const InvalidNamespace: &str = "invalid namespace type";
|
||||
|
||||
pub type Config = CreateOpts;
|
||||
type NamespaceType = String;
|
||||
|
||||
@@ -206,8 +196,6 @@ lazy_static! {
|
||||
},
|
||||
]
|
||||
};
|
||||
|
||||
pub static ref SYSTEMD_CGROUP_PATH_FORMAT:Regex = Regex::new(r"^[\w\-.]*:[\w\-.]*:[\w\-.]*$").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -246,7 +234,7 @@ pub struct LinuxContainer {
|
||||
pub id: String,
|
||||
pub root: String,
|
||||
pub config: Config,
|
||||
pub cgroup_manager: Box<dyn Manager + Send + Sync>,
|
||||
pub cgroup_manager: Option<FsManager>,
|
||||
pub init_process_pid: pid_t,
|
||||
pub init_process_start_time: u64,
|
||||
pub uid_map_path: String,
|
||||
@@ -295,11 +283,16 @@ impl Container for LinuxContainer {
|
||||
));
|
||||
}
|
||||
|
||||
self.cgroup_manager.as_ref().freeze(FreezerState::Frozen)?;
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.freeze(FreezerState::Frozen)?;
|
||||
|
||||
self.status.transition(ContainerState::Paused);
|
||||
|
||||
Ok(())
|
||||
self.status.transition(ContainerState::Paused);
|
||||
return Ok(());
|
||||
}
|
||||
Err(anyhow!("failed to get container's cgroup manager"))
|
||||
}
|
||||
|
||||
fn resume(&mut self) -> Result<()> {
|
||||
@@ -308,11 +301,16 @@ impl Container for LinuxContainer {
|
||||
return Err(anyhow!("container status is: {:?}, not paused", status));
|
||||
}
|
||||
|
||||
self.cgroup_manager.as_ref().freeze(FreezerState::Thawed)?;
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.freeze(FreezerState::Thawed)?;
|
||||
|
||||
self.status.transition(ContainerState::Running);
|
||||
|
||||
Ok(())
|
||||
self.status.transition(ContainerState::Running);
|
||||
return Ok(());
|
||||
}
|
||||
Err(anyhow!("failed to get container's cgroup manager"))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -387,9 +385,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
let buf = read_sync(crfd)?;
|
||||
let cm_str = std::str::from_utf8(&buf)?;
|
||||
|
||||
// deserialize cm_str into FsManager and SystemdManager separately
|
||||
let fs_cm: Result<FsManager, serde_json::Error> = serde_json::from_str(cm_str);
|
||||
let systemd_cm: Result<SystemdManager, serde_json::Error> = serde_json::from_str(cm_str);
|
||||
let cm: FsManager = serde_json::from_str(cm_str)?;
|
||||
|
||||
#[cfg(feature = "standard-oci-runtime")]
|
||||
let csocket_fd = console::setup_console_socket(&std::env::var(CONSOLE_SOCKET_FD)?)?;
|
||||
@@ -401,7 +397,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
};
|
||||
|
||||
if spec.linux.is_none() {
|
||||
return Err(anyhow!(MissingLinux));
|
||||
return Err(anyhow!("no linux config"));
|
||||
}
|
||||
let linux = spec.linux.as_ref().unwrap();
|
||||
|
||||
@@ -415,7 +411,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
for ns in &nses {
|
||||
let s = NAMESPACES.get(&ns.r#type.as_str());
|
||||
if s.is_none() {
|
||||
return Err(anyhow!(InvalidNamespace));
|
||||
return Err(anyhow!("invalid ns type"));
|
||||
}
|
||||
let s = s.unwrap();
|
||||
|
||||
@@ -530,8 +526,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let selinux_enabled = selinux::is_enabled()?;
|
||||
|
||||
sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?;
|
||||
|
||||
if userns {
|
||||
@@ -549,18 +543,7 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
|
||||
if to_new.contains(CloneFlags::CLONE_NEWNS) {
|
||||
// setup rootfs
|
||||
if let Ok(systemd_cm) = systemd_cm {
|
||||
mount::init_rootfs(
|
||||
cfd_log,
|
||||
&spec,
|
||||
&systemd_cm.paths,
|
||||
&systemd_cm.mounts,
|
||||
bind_device,
|
||||
)?;
|
||||
} else {
|
||||
let fs_cm = fs_cm.unwrap();
|
||||
mount::init_rootfs(cfd_log, &spec, &fs_cm.paths, &fs_cm.mounts, bind_device)?;
|
||||
}
|
||||
mount::init_rootfs(cfd_log, &spec, &cm.paths, &cm.mounts, bind_device)?;
|
||||
}
|
||||
|
||||
if init {
|
||||
@@ -633,18 +616,6 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
|
||||
}
|
||||
|
||||
// Set SELinux label
|
||||
if !oci_process.selinux_label.is_empty() {
|
||||
if !selinux_enabled {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the process is provided but SELinux is not enabled on the running kernel"
|
||||
));
|
||||
}
|
||||
|
||||
log_child!(cfd_log, "Set SELinux label to the container process");
|
||||
selinux::set_exec_label(&oci_process.selinux_label)?;
|
||||
}
|
||||
|
||||
// Log unknown seccomp system calls in advance before the log file descriptor closes.
|
||||
#[cfg(feature = "seccomp")]
|
||||
if let Some(ref scmp) = linux.seccomp {
|
||||
@@ -854,17 +825,22 @@ impl BaseContainer for LinuxContainer {
|
||||
}
|
||||
|
||||
fn stats(&self) -> Result<StatsContainerResponse> {
|
||||
let mut r = StatsContainerResponse::default();
|
||||
|
||||
if self.cgroup_manager.is_some() {
|
||||
r.cgroup_stats =
|
||||
SingularPtrField::some(self.cgroup_manager.as_ref().unwrap().get_stats()?);
|
||||
}
|
||||
|
||||
// what about network interface stats?
|
||||
|
||||
Ok(StatsContainerResponse {
|
||||
cgroup_stats: SingularPtrField::some(self.cgroup_manager.as_ref().get_stats()?),
|
||||
..Default::default()
|
||||
})
|
||||
Ok(r)
|
||||
}
|
||||
|
||||
fn set(&mut self, r: LinuxResources) -> Result<()> {
|
||||
self.cgroup_manager.as_ref().set(&r, true)?;
|
||||
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager.as_ref().unwrap().set(&r, true)?;
|
||||
}
|
||||
self.config
|
||||
.spec
|
||||
.as_mut()
|
||||
@@ -1037,8 +1013,7 @@ impl BaseContainer for LinuxContainer {
|
||||
&logger,
|
||||
spec,
|
||||
&p,
|
||||
self.cgroup_manager.as_ref(),
|
||||
self.config.use_systemd_cgroup,
|
||||
self.cgroup_manager.as_ref().unwrap(),
|
||||
&st,
|
||||
&mut pipe_w,
|
||||
&mut pipe_r,
|
||||
@@ -1101,14 +1076,12 @@ impl BaseContainer for LinuxContainer {
|
||||
}
|
||||
}
|
||||
|
||||
// guest Poststop hook
|
||||
// * should be executed after the container is deleted but before the delete operation returns
|
||||
// * the executable file is in agent namespace
|
||||
// * should also be executed in agent namespace.
|
||||
if let Some(hooks) = spec.hooks.as_ref() {
|
||||
info!(self.logger, "guest Poststop hook");
|
||||
let mut hook_states = HookStates::new();
|
||||
hook_states.execute_hooks(&hooks.poststop, Some(st))?;
|
||||
if spec.hooks.is_some() {
|
||||
info!(self.logger, "poststop");
|
||||
let hooks = spec.hooks.as_ref().unwrap();
|
||||
for h in hooks.poststop.iter() {
|
||||
execute_hook(&self.logger, h, &st).await?;
|
||||
}
|
||||
}
|
||||
|
||||
self.status.transition(ContainerState::Stopped);
|
||||
@@ -1118,19 +1091,19 @@ impl BaseContainer for LinuxContainer {
|
||||
)?;
|
||||
fs::remove_dir_all(&self.root)?;
|
||||
|
||||
let cgm = self.cgroup_manager.as_mut();
|
||||
// Kill all of the processes created in this container to prevent
|
||||
// the leak of some daemon process when this container shared pidns
|
||||
// with the sandbox.
|
||||
let pids = cgm.get_pids().context("get cgroup pids")?;
|
||||
for i in pids {
|
||||
if let Err(e) = signal::kill(Pid::from_raw(i), Signal::SIGKILL) {
|
||||
warn!(self.logger, "kill the process {} error: {:?}", i, e);
|
||||
if let Some(cgm) = self.cgroup_manager.as_mut() {
|
||||
// Kill all of the processes created in this container to prevent
|
||||
// the leak of some daemon process when this container shared pidns
|
||||
// with the sandbox.
|
||||
let pids = cgm.get_pids().context("get cgroup pids")?;
|
||||
for i in pids {
|
||||
if let Err(e) = signal::kill(Pid::from_raw(i), Signal::SIGKILL) {
|
||||
warn!(self.logger, "kill the process {} error: {:?}", i, e);
|
||||
}
|
||||
}
|
||||
|
||||
cgm.destroy().context("destroy cgroups")?;
|
||||
}
|
||||
|
||||
cgm.destroy().context("destroy cgroups")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1154,14 +1127,16 @@ impl BaseContainer for LinuxContainer {
|
||||
.ok_or_else(|| anyhow!("OCI spec was not found"))?;
|
||||
let st = self.oci_state()?;
|
||||
|
||||
// guest Poststart hook
|
||||
// * should be executed after the container is started but before the delete operation returns
|
||||
// * the executable file is in agent namespace
|
||||
// * should also be executed in agent namespace.
|
||||
if let Some(hooks) = spec.hooks.as_ref() {
|
||||
info!(self.logger, "guest Poststart hook");
|
||||
let mut hook_states = HookStates::new();
|
||||
hook_states.execute_hooks(&hooks.poststart, Some(st))?;
|
||||
// run poststart hook
|
||||
if spec.hooks.is_some() {
|
||||
info!(self.logger, "poststart hook");
|
||||
let hooks = spec
|
||||
.hooks
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("OCI hooks were not found"))?;
|
||||
for h in hooks.poststart.iter() {
|
||||
execute_hook(&self.logger, h, &st).await?;
|
||||
}
|
||||
}
|
||||
|
||||
unistd::close(fd)?;
|
||||
@@ -1206,7 +1181,7 @@ fn do_exec(args: &[String]) -> ! {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
pub fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> {
|
||||
fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> {
|
||||
info!(logger, "updating namespaces");
|
||||
let linux = spec
|
||||
.linux
|
||||
@@ -1300,13 +1275,11 @@ pub fn setup_child_logger(fd: RawFd, child_logger: Logger) -> tokio::task::JoinH
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn join_namespaces(
|
||||
logger: &Logger,
|
||||
spec: &Spec,
|
||||
p: &Process,
|
||||
cm: &(dyn Manager + Send + Sync),
|
||||
use_systemd_cgroup: bool,
|
||||
cm: &FsManager,
|
||||
st: &OCIState,
|
||||
pipe_w: &mut PipeStream,
|
||||
pipe_r: &mut PipeStream,
|
||||
@@ -1333,11 +1306,7 @@ async fn join_namespaces(
|
||||
info!(logger, "wait child received oci process");
|
||||
read_async(pipe_r).await?;
|
||||
|
||||
let cm_str = if use_systemd_cgroup {
|
||||
serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap())
|
||||
} else {
|
||||
serde_json::to_string(cm.as_any()?.downcast_ref::<FsManager>().unwrap())
|
||||
}?;
|
||||
let cm_str = serde_json::to_string(cm)?;
|
||||
write_async(pipe_w, SYNC_DATA, cm_str.as_str()).await?;
|
||||
|
||||
// wait child setup user namespace
|
||||
@@ -1360,16 +1329,13 @@ async fn join_namespaces(
|
||||
}
|
||||
|
||||
// apply cgroups
|
||||
// For FsManger, it's no matter about the order of apply and set.
|
||||
// For SystemdManger, apply must be precede set because we can only create a systemd unit with specific processes(pids).
|
||||
if res.is_some() {
|
||||
info!(logger, "apply processes to cgroups!");
|
||||
cm.apply(p.pid)?;
|
||||
if p.init && res.is_some() {
|
||||
info!(logger, "apply cgroups!");
|
||||
cm.set(res.unwrap(), false)?;
|
||||
}
|
||||
|
||||
if p.init && res.is_some() {
|
||||
info!(logger, "set properties to cgroups!");
|
||||
cm.set(res.unwrap(), false)?;
|
||||
if res.is_some() {
|
||||
cm.apply(p.pid)?;
|
||||
}
|
||||
|
||||
info!(logger, "notify child to continue");
|
||||
@@ -1382,14 +1348,13 @@ async fn join_namespaces(
|
||||
|
||||
info!(logger, "get ready to run prestart hook!");
|
||||
|
||||
// guest Prestart hook
|
||||
// * should be executed during the start operation, and before the container command is executed
|
||||
// * the executable file is in agent namespace
|
||||
// * should also be executed in agent namespace.
|
||||
if let Some(hooks) = spec.hooks.as_ref() {
|
||||
info!(logger, "guest Prestart hook");
|
||||
let mut hook_states = HookStates::new();
|
||||
hook_states.execute_hooks(&hooks.prestart, Some(st.clone()))?;
|
||||
// run prestart hook
|
||||
if spec.hooks.is_some() {
|
||||
info!(logger, "prestart hook");
|
||||
let hooks = spec.hooks.as_ref().unwrap();
|
||||
for h in hooks.prestart.iter() {
|
||||
execute_hook(&logger, h, st).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// notify child run prestart hooks completed
|
||||
@@ -1472,44 +1437,33 @@ impl LinuxContainer {
|
||||
Some(unistd::getuid()),
|
||||
Some(unistd::getgid()),
|
||||
)
|
||||
.context(format!("Cannot change owner of container {} root", id))?;
|
||||
.context(format!("cannot change onwer of container {} root", id))?;
|
||||
|
||||
if config.spec.is_none() {
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
|
||||
let spec = config.spec.as_ref().unwrap();
|
||||
|
||||
if spec.linux.is_none() {
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
|
||||
let linux = spec.linux.as_ref().unwrap();
|
||||
let cpath = if config.use_systemd_cgroup {
|
||||
if linux.cgroups_path.len() == 2 {
|
||||
format!("system.slice:kata_agent:{}", id.as_str())
|
||||
} else {
|
||||
linux.cgroups_path.clone()
|
||||
}
|
||||
} else if linux.cgroups_path.is_empty() {
|
||||
|
||||
let cpath = if linux.cgroups_path.is_empty() {
|
||||
format!("/{}", id.as_str())
|
||||
} else {
|
||||
// if we have a systemd cgroup path we need to convert it to a fs cgroup path
|
||||
linux.cgroups_path.replace(':', "/")
|
||||
linux.cgroups_path.clone()
|
||||
};
|
||||
|
||||
let cgroup_manager: Box<dyn Manager + Send + Sync> = if config.use_systemd_cgroup {
|
||||
Box::new(SystemdManager::new(cpath.as_str()).map_err(|e| {
|
||||
anyhow!(format!(
|
||||
"fail to create cgroup manager with path {}: {:}",
|
||||
cpath, e
|
||||
))
|
||||
})?)
|
||||
} else {
|
||||
Box::new(FsManager::new(cpath.as_str()).map_err(|e| {
|
||||
anyhow!(format!(
|
||||
"fail to create cgroup manager with path {}: {:}",
|
||||
cpath, e
|
||||
))
|
||||
})?)
|
||||
};
|
||||
let cgroup_manager = FsManager::new(cpath.as_str())?;
|
||||
info!(logger, "new cgroup_manager {:?}", &cgroup_manager);
|
||||
|
||||
Ok(LinuxContainer {
|
||||
id: id.clone(),
|
||||
root,
|
||||
cgroup_manager,
|
||||
cgroup_manager: Some(cgroup_manager),
|
||||
status: ContainerStatus::new(),
|
||||
uid_map_path: String::from(""),
|
||||
gid_map_path: "".to_string(),
|
||||
@@ -1561,16 +1515,154 @@ fn set_sysctls(sysctls: &HashMap<String, String>) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
pub async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
|
||||
let logger = logger.new(o!("action" => "execute-hook"));
|
||||
|
||||
let binary = PathBuf::from(h.path.as_str());
|
||||
let path = binary.canonicalize()?;
|
||||
if !path.exists() {
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
|
||||
let mut args = h.args.clone();
|
||||
// the hook.args[0] is the hook binary name which shouldn't be included
|
||||
// in the Command.args
|
||||
if args.len() > 1 {
|
||||
args.remove(0);
|
||||
}
|
||||
|
||||
// all invalid envs will be omitted, only valid envs will be passed to hook.
|
||||
let env: HashMap<&str, &str> = h.env.iter().filter_map(|e| valid_env(e)).collect();
|
||||
|
||||
// Avoid the exit signal to be reaped by the global reaper.
|
||||
let _wait_locker = WAIT_PID_LOCKER.lock().await;
|
||||
let mut child = tokio::process::Command::new(path)
|
||||
.args(args.iter())
|
||||
.envs(env.iter())
|
||||
.kill_on_drop(true)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()?;
|
||||
|
||||
// default timeout 10s
|
||||
let mut timeout: u64 = 10;
|
||||
|
||||
// if timeout is set if hook, then use the specified value
|
||||
if let Some(t) = h.timeout {
|
||||
if t > 0 {
|
||||
timeout = t as u64;
|
||||
}
|
||||
}
|
||||
|
||||
let state = serde_json::to_string(st)?;
|
||||
let path = h.path.clone();
|
||||
|
||||
let join_handle = tokio::spawn(async move {
|
||||
if let Some(mut stdin) = child.stdin.take() {
|
||||
match stdin.write_all(state.as_bytes()).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
info!(logger, "write to child stdin failed: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read something from stdout and stderr for debug
|
||||
if let Some(stdout) = child.stdout.as_mut() {
|
||||
let mut out = String::new();
|
||||
match stdout.read_to_string(&mut out).await {
|
||||
Ok(_) => {
|
||||
info!(logger, "child stdout: {}", out.as_str());
|
||||
}
|
||||
Err(e) => {
|
||||
info!(logger, "read from child stdout failed: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut err = String::new();
|
||||
if let Some(stderr) = child.stderr.as_mut() {
|
||||
match stderr.read_to_string(&mut err).await {
|
||||
Ok(_) => {
|
||||
info!(logger, "child stderr: {}", err.as_str());
|
||||
}
|
||||
Err(e) => {
|
||||
info!(logger, "read from child stderr failed: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match child.wait().await {
|
||||
Ok(exit) => {
|
||||
let code = exit
|
||||
.code()
|
||||
.ok_or_else(|| anyhow!("hook exit status has no status code"))?;
|
||||
|
||||
if code != 0 {
|
||||
error!(
|
||||
logger,
|
||||
"hook {} exit status is {}, error message is {}", &path, code, err
|
||||
);
|
||||
return Err(anyhow!(nix::Error::UnknownErrno));
|
||||
}
|
||||
|
||||
debug!(logger, "hook {} exit status is 0", &path);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => Err(anyhow!(
|
||||
"wait child error: {} {}",
|
||||
e,
|
||||
e.raw_os_error().unwrap()
|
||||
)),
|
||||
}
|
||||
});
|
||||
|
||||
match tokio::time::timeout(Duration::new(timeout, 0), join_handle).await {
|
||||
Ok(r) => r.unwrap(),
|
||||
Err(_) => Err(anyhow!(nix::Error::ETIMEDOUT)),
|
||||
}
|
||||
}
|
||||
|
||||
// valid environment variables according to https://doc.rust-lang.org/std/env/fn.set_var.html#panics
|
||||
fn valid_env(e: &str) -> Option<(&str, &str)> {
|
||||
// wherther key or value will contain NULL char.
|
||||
if e.as_bytes().contains(&b'\0') {
|
||||
return None;
|
||||
}
|
||||
|
||||
let v: Vec<&str> = e.splitn(2, '=').collect();
|
||||
|
||||
// key can't hold an `equal` sign, but value can
|
||||
if v.len() != 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (key, value) = (v[0].trim(), v[1].trim());
|
||||
|
||||
// key can't be empty
|
||||
if key.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((key, value))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::process::Process;
|
||||
use crate::skip_if_not_root;
|
||||
use nix::unistd::Uid;
|
||||
use std::fs;
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use tempfile::tempdir;
|
||||
use test_utils::skip_if_not_root;
|
||||
use tokio::process::Command;
|
||||
|
||||
macro_rules! sl {
|
||||
() => {
|
||||
@@ -1578,6 +1670,113 @@ mod tests {
|
||||
};
|
||||
}
|
||||
|
||||
async fn which(cmd: &str) -> String {
|
||||
let output: std::process::Output = Command::new("which")
|
||||
.arg(cmd)
|
||||
.output()
|
||||
.await
|
||||
.expect("which command failed to run");
|
||||
|
||||
match String::from_utf8(output.stdout) {
|
||||
Ok(v) => v.trim_end_matches('\n').to_string(),
|
||||
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_hook() {
|
||||
let temp_file = "/tmp/test_execute_hook";
|
||||
|
||||
let touch = which("touch").await;
|
||||
|
||||
defer!(fs::remove_file(temp_file).unwrap(););
|
||||
let invalid_str = vec![97, b'\0', 98];
|
||||
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
|
||||
let invalid_env = format!("{}=value", invalid_string);
|
||||
|
||||
execute_hook(
|
||||
&slog_scope::logger(),
|
||||
&Hook {
|
||||
path: touch,
|
||||
args: vec!["touch".to_string(), temp_file.to_string()],
|
||||
env: vec![invalid_env],
|
||||
timeout: Some(10),
|
||||
},
|
||||
&OCIState {
|
||||
version: "1.2.3".to_string(),
|
||||
id: "321".to_string(),
|
||||
status: ContainerState::Running,
|
||||
pid: 2,
|
||||
bundle: "".to_string(),
|
||||
annotations: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(Path::new(&temp_file).exists(), true);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_hook_with_error() {
|
||||
let ls = which("ls").await;
|
||||
|
||||
let res = execute_hook(
|
||||
&slog_scope::logger(),
|
||||
&Hook {
|
||||
path: ls,
|
||||
args: vec!["ls".to_string(), "/tmp/not-exist".to_string()],
|
||||
env: vec![],
|
||||
timeout: None,
|
||||
},
|
||||
&OCIState {
|
||||
version: "1.2.3".to_string(),
|
||||
id: "321".to_string(),
|
||||
status: ContainerState::Running,
|
||||
pid: 2,
|
||||
bundle: "".to_string(),
|
||||
annotations: Default::default(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected_err = nix::Error::UnknownErrno;
|
||||
assert_eq!(
|
||||
res.unwrap_err().downcast::<nix::Error>().unwrap(),
|
||||
expected_err
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_hook_with_timeout() {
|
||||
let sleep = which("sleep").await;
|
||||
|
||||
let res = execute_hook(
|
||||
&slog_scope::logger(),
|
||||
&Hook {
|
||||
path: sleep,
|
||||
args: vec!["sleep".to_string(), "2".to_string()],
|
||||
env: vec![],
|
||||
timeout: Some(1),
|
||||
},
|
||||
&OCIState {
|
||||
version: "1.2.3".to_string(),
|
||||
id: "321".to_string(),
|
||||
status: ContainerState::Running,
|
||||
pid: 2,
|
||||
bundle: "".to_string(),
|
||||
annotations: Default::default(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected_err = nix::Error::ETIMEDOUT;
|
||||
assert_eq!(
|
||||
res.unwrap_err().downcast::<nix::Error>().unwrap(),
|
||||
expected_err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_transtition() {
|
||||
let mut status = ContainerStatus::new();
|
||||
@@ -1730,12 +1929,20 @@ mod tests {
|
||||
assert!(format!("{:?}", ret).contains("failed to pause container"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_pause_cgroupmgr_is_none() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.cgroup_manager = None;
|
||||
c.pause().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_pause() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
c.cgroup_manager = FsManager::new("").ok();
|
||||
c.pause().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
@@ -1754,12 +1961,21 @@ mod tests {
|
||||
assert!(format!("{:?}", ret).contains("not paused"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_resume_cgroupmgr_is_none() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.status.transition(ContainerState::Paused);
|
||||
c.cgroup_manager = None;
|
||||
c.resume().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_resume() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
c.cgroup_manager = FsManager::new("").ok();
|
||||
// Change status to paused, this way we can resume it
|
||||
c.status.transition(ContainerState::Paused);
|
||||
c.resume().map_err(|e| anyhow!(e))
|
||||
@@ -1892,4 +2108,49 @@ mod tests {
|
||||
let ret = do_init_child(std::io::stdin().as_raw_fd());
|
||||
assert!(ret.is_err(), "Expecting Err, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_env() {
|
||||
let env = valid_env("a=b=c");
|
||||
assert_eq!(Some(("a", "b=c")), env);
|
||||
|
||||
let env = valid_env("a=b");
|
||||
assert_eq!(Some(("a", "b")), env);
|
||||
let env = valid_env("a =b");
|
||||
assert_eq!(Some(("a", "b")), env);
|
||||
|
||||
let env = valid_env(" a =b");
|
||||
assert_eq!(Some(("a", "b")), env);
|
||||
|
||||
let env = valid_env("a= b");
|
||||
assert_eq!(Some(("a", "b")), env);
|
||||
|
||||
let env = valid_env("a=b ");
|
||||
assert_eq!(Some(("a", "b")), env);
|
||||
let env = valid_env("a=b c ");
|
||||
assert_eq!(Some(("a", "b c")), env);
|
||||
|
||||
let env = valid_env("=b");
|
||||
assert_eq!(None, env);
|
||||
|
||||
let env = valid_env("a=");
|
||||
assert_eq!(Some(("a", "")), env);
|
||||
|
||||
let env = valid_env("a==");
|
||||
assert_eq!(Some(("a", "=")), env);
|
||||
|
||||
let env = valid_env("a");
|
||||
assert_eq!(None, env);
|
||||
|
||||
let invalid_str = vec![97, b'\0', 98];
|
||||
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
|
||||
|
||||
let invalid_env = format!("{}=value", invalid_string);
|
||||
let env = valid_env(&invalid_env);
|
||||
assert_eq!(None, env);
|
||||
|
||||
let invalid_env = format!("key={}", invalid_string);
|
||||
let env = valid_env(&invalid_env);
|
||||
assert_eq!(None, env);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ pub mod pipestream;
|
||||
pub mod process;
|
||||
#[cfg(feature = "seccomp")]
|
||||
pub mod seccomp;
|
||||
pub mod selinux;
|
||||
pub mod specconv;
|
||||
pub mod sync;
|
||||
pub mod sync_with_async;
|
||||
@@ -515,6 +514,15 @@ pub fn grpc_to_oci(grpc: &grpc::Spec) -> oci::Spec {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[macro_export]
|
||||
macro_rules! skip_if_not_root {
|
||||
() => {
|
||||
if !nix::unistd::Uid::effective().is_root() {
|
||||
println!("INFO: skipping {} which needs root", module_path!());
|
||||
return;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Parameters:
|
||||
//
|
||||
|
||||
@@ -25,7 +25,6 @@ use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
|
||||
use crate::container::DEFAULT_DEVICES;
|
||||
use crate::selinux;
|
||||
use crate::sync::write_count;
|
||||
use std::string::ToString;
|
||||
|
||||
@@ -182,8 +181,6 @@ pub fn init_rootfs(
|
||||
None => flags |= MsFlags::MS_SLAVE,
|
||||
}
|
||||
|
||||
let label = &linux.mount_label;
|
||||
|
||||
let root = spec
|
||||
.root
|
||||
.as_ref()
|
||||
@@ -247,7 +244,7 @@ pub fn init_rootfs(
|
||||
}
|
||||
}
|
||||
|
||||
mount_from(cfd_log, m, rootfs, flags, &data, label)?;
|
||||
mount_from(cfd_log, m, rootfs, flags, &data, "")?;
|
||||
// bind mount won't change mount options, we need remount to make mount options
|
||||
// effective.
|
||||
// first check that we have non-default options required before attempting a
|
||||
@@ -527,6 +524,7 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
|
||||
|
||||
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
|
||||
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
|
||||
|
||||
let mut max_len = 0;
|
||||
let mut mount_point = String::from("");
|
||||
let mut options = String::from("");
|
||||
@@ -769,9 +767,9 @@ fn mount_from(
|
||||
rootfs: &str,
|
||||
flags: MsFlags,
|
||||
data: &str,
|
||||
label: &str,
|
||||
_label: &str,
|
||||
) -> Result<()> {
|
||||
let mut d = String::from(data);
|
||||
let d = String::from(data);
|
||||
let dest = secure_join(rootfs, &m.destination);
|
||||
|
||||
let src = if m.r#type.as_str() == "bind" {
|
||||
@@ -782,31 +780,18 @@ fn mount_from(
|
||||
Path::new(&dest).parent().unwrap()
|
||||
};
|
||||
|
||||
fs::create_dir_all(dir).map_err(|e| {
|
||||
let _ = fs::create_dir_all(&dir).map_err(|e| {
|
||||
log_child!(
|
||||
cfd_log,
|
||||
"create dir {}: {}",
|
||||
dir.to_str().unwrap(),
|
||||
e.to_string()
|
||||
);
|
||||
e
|
||||
})?;
|
||||
)
|
||||
});
|
||||
|
||||
// make sure file exists so we can bind over it
|
||||
if !src.is_dir() {
|
||||
let _ = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.open(&dest)
|
||||
.map_err(|e| {
|
||||
log_child!(
|
||||
cfd_log,
|
||||
"open/create dest error. {}: {:?}",
|
||||
dest.as_str(),
|
||||
e
|
||||
);
|
||||
e
|
||||
})?;
|
||||
let _ = OpenOptions::new().create(true).write(true).open(&dest);
|
||||
}
|
||||
src.to_str().unwrap().to_string()
|
||||
} else {
|
||||
@@ -819,41 +804,8 @@ fn mount_from(
|
||||
}
|
||||
};
|
||||
|
||||
let _ = stat::stat(dest.as_str()).map_err(|e| {
|
||||
log_child!(cfd_log, "dest stat error. {}: {:?}", dest.as_str(), e);
|
||||
e
|
||||
})?;
|
||||
|
||||
// Set the SELinux context for the mounts
|
||||
let mut use_xattr = false;
|
||||
if !label.is_empty() {
|
||||
if selinux::is_enabled()? {
|
||||
let device = Path::new(&m.source)
|
||||
.file_name()
|
||||
.ok_or_else(|| anyhow!("invalid device source path: {}", &m.source))?
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("failed to convert device source path: {}", &m.source))?;
|
||||
|
||||
match device {
|
||||
// SELinux does not support labeling of /proc or /sys
|
||||
"proc" | "sysfs" => (),
|
||||
// SELinux does not support mount labeling against /dev/mqueue,
|
||||
// so we use setxattr instead
|
||||
"mqueue" => {
|
||||
use_xattr = true;
|
||||
}
|
||||
_ => {
|
||||
log_child!(cfd_log, "add SELinux mount label to {}", dest.as_str());
|
||||
selinux::add_mount_label(&mut d, label);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_child!(
|
||||
cfd_log,
|
||||
"SELinux label for the mount is provided but SELinux is not enabled on the running kernel"
|
||||
);
|
||||
}
|
||||
}
|
||||
let _ = stat::stat(dest.as_str())
|
||||
.map_err(|e| log_child!(cfd_log, "dest stat error. {}: {:?}", dest.as_str(), e));
|
||||
|
||||
mount(
|
||||
Some(src.as_str()),
|
||||
@@ -867,10 +819,6 @@ fn mount_from(
|
||||
e
|
||||
})?;
|
||||
|
||||
if !label.is_empty() && selinux::is_enabled()? && use_xattr {
|
||||
xattr::set(dest.as_str(), "security.selinux", label.as_bytes())?;
|
||||
}
|
||||
|
||||
if flags.contains(MsFlags::MS_BIND)
|
||||
&& flags.intersects(
|
||||
!(MsFlags::MS_REC
|
||||
@@ -1057,7 +1005,9 @@ pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec, process: &Process) -> Result<(
|
||||
}
|
||||
|
||||
fn mask_path(path: &str) -> Result<()> {
|
||||
check_paths(path)?;
|
||||
if !path.starts_with('/') || path.contains("..") {
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
|
||||
match mount(
|
||||
Some("/dev/null"),
|
||||
@@ -1075,7 +1025,9 @@ fn mask_path(path: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
fn readonly_path(path: &str) -> Result<()> {
|
||||
check_paths(path)?;
|
||||
if !path.starts_with('/') || path.contains("..") {
|
||||
return Err(anyhow!(nix::Error::EINVAL));
|
||||
}
|
||||
|
||||
if let Err(e) = mount(
|
||||
Some(&path[1..]),
|
||||
@@ -1101,20 +1053,11 @@ fn readonly_path(path: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_paths(path: &str) -> Result<()> {
|
||||
if !path.starts_with('/') || path.contains("..") {
|
||||
return Err(anyhow!(
|
||||
"Cannot mount {} (path does not start with '/' or contains '..').",
|
||||
path
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::assert_result;
|
||||
use crate::skip_if_not_root;
|
||||
use std::fs::create_dir;
|
||||
use std::fs::create_dir_all;
|
||||
use std::fs::remove_dir_all;
|
||||
@@ -1122,7 +1065,6 @@ mod tests {
|
||||
use std::os::unix::fs;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use tempfile::tempdir;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
#[test]
|
||||
#[serial(chdir)]
|
||||
@@ -1463,55 +1405,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_paths() {
|
||||
#[derive(Debug)]
|
||||
struct TestData<'a> {
|
||||
name: &'a str,
|
||||
path: &'a str,
|
||||
result: Result<()>,
|
||||
}
|
||||
|
||||
let tests = &[
|
||||
TestData {
|
||||
name: "valid path",
|
||||
path: "/foo/bar",
|
||||
result: Ok(()),
|
||||
},
|
||||
TestData {
|
||||
name: "does not starts with /",
|
||||
path: "foo/bar",
|
||||
result: Err(anyhow!(
|
||||
"Cannot mount foo/bar (path does not start with '/' or contains '..')."
|
||||
)),
|
||||
},
|
||||
TestData {
|
||||
name: "contains ..",
|
||||
path: "../foo/bar",
|
||||
result: Err(anyhow!(
|
||||
"Cannot mount ../foo/bar (path does not start with '/' or contains '..')."
|
||||
)),
|
||||
},
|
||||
];
|
||||
|
||||
for (i, d) in tests.iter().enumerate() {
|
||||
let msg = format!("test[{}]: {:?}", i, d.name);
|
||||
|
||||
let result = check_paths(d.path);
|
||||
|
||||
let msg = format!("{}: result: {:?}", msg, result);
|
||||
|
||||
if d.result.is_ok() {
|
||||
assert!(result.is_ok());
|
||||
continue;
|
||||
}
|
||||
|
||||
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
|
||||
let actual_error = format!("{}", result.unwrap_err());
|
||||
assert!(actual_error == expected_error, "{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_proc_mount() {
|
||||
let mount = oci::Mount {
|
||||
|
||||
@@ -28,6 +28,7 @@ macro_rules! close_process_stream {
|
||||
($self: ident, $stream:ident, $stream_type: ident) => {
|
||||
if $self.$stream.is_some() {
|
||||
$self.close_stream(StreamType::$stream_type);
|
||||
let _ = unistd::close($self.$stream.unwrap());
|
||||
$self.$stream = None;
|
||||
}
|
||||
};
|
||||
@@ -224,7 +225,7 @@ impl Process {
|
||||
Some(writer)
|
||||
}
|
||||
|
||||
fn close_stream(&mut self, stream_type: StreamType) {
|
||||
pub fn close_stream(&mut self, stream_type: StreamType) {
|
||||
let _ = self.readers.remove(&stream_type);
|
||||
let _ = self.writers.remove(&stream_type);
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ pub fn get_unknown_syscalls(scmp: &LinuxSeccomp) -> Option<Vec<String>> {
|
||||
// init_seccomp creates a seccomp filter and loads it for the current process
|
||||
// including all the child processes.
|
||||
pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
|
||||
let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM))?;
|
||||
let def_action = ScmpAction::from_str(scmp.default_action.as_str(), Some(libc::EPERM as i32))?;
|
||||
|
||||
// Create a new filter context
|
||||
let mut filter = ScmpFilterContext::new_filter(def_action)?;
|
||||
@@ -122,10 +122,10 @@ pub fn init_seccomp(scmp: &LinuxSeccomp) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::skip_if_not_root;
|
||||
use libc::{dup3, process_vm_readv, EPERM, O_CLOEXEC};
|
||||
use std::io::Error;
|
||||
use std::ptr::null;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
macro_rules! syscall_assert {
|
||||
($e1: expr, $e2: expr) => {
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
// Copyright 2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use nix::unistd::gettid;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn is_enabled() -> Result<bool> {
|
||||
let buf = fs::read_to_string("/proc/mounts")?;
|
||||
let enabled = buf.contains("selinuxfs");
|
||||
|
||||
Ok(enabled)
|
||||
}
|
||||
|
||||
pub fn add_mount_label(data: &mut String, label: &str) {
|
||||
if data.is_empty() {
|
||||
let context = format!("context=\"{}\"", label);
|
||||
data.push_str(&context);
|
||||
} else {
|
||||
let context = format!(",context=\"{}\"", label);
|
||||
data.push_str(&context);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_exec_label(label: &str) -> Result<()> {
|
||||
let mut attr_path = Path::new("/proc/thread-self/attr/exec").to_path_buf();
|
||||
if !attr_path.exists() {
|
||||
// Fall back to the old convention
|
||||
attr_path = Path::new("/proc/self/task")
|
||||
.join(gettid().to_string())
|
||||
.join("attr/exec")
|
||||
}
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(attr_path)?;
|
||||
file.write_all(label.as_bytes())
|
||||
.with_context(|| "failed to apply SELinux label")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TEST_LABEL: &str = "system_u:system_r:unconfined_t:s0";
|
||||
|
||||
#[test]
|
||||
fn test_is_enabled() {
|
||||
let ret = is_enabled();
|
||||
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mount_label() {
|
||||
let mut data = String::new();
|
||||
add_mount_label(&mut data, TEST_LABEL);
|
||||
assert_eq!(data, format!("context=\"{}\"", TEST_LABEL));
|
||||
|
||||
let mut data = String::from("defaults");
|
||||
add_mount_label(&mut data, TEST_LABEL);
|
||||
assert_eq!(data, format!("defaults,context=\"{}\"", TEST_LABEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_exec_label() {
|
||||
let ret = set_exec_label(TEST_LABEL);
|
||||
if is_enabled().unwrap() {
|
||||
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||
} else {
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,16 +4,17 @@
|
||||
//
|
||||
|
||||
use crate::container::Config;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{anyhow, Context, Error, Result};
|
||||
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Component, PathBuf};
|
||||
|
||||
fn einval() -> Error {
|
||||
anyhow!(nix::Error::EINVAL)
|
||||
}
|
||||
|
||||
fn get_linux(oci: &Spec) -> Result<&Linux> {
|
||||
oci.linux
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("Unable to get Linux section from Spec"))
|
||||
oci.linux.as_ref().ok_or_else(einval)
|
||||
}
|
||||
|
||||
fn contain_namespace(nses: &[LinuxNamespace], key: &str) -> bool {
|
||||
@@ -30,10 +31,7 @@ fn rootfs(root: &str) -> Result<()> {
|
||||
let path = PathBuf::from(root);
|
||||
// not absolute path or not exists
|
||||
if !path.exists() || !path.is_absolute() {
|
||||
return Err(anyhow!(
|
||||
"Path from {:?} does not exist or is not absolute",
|
||||
root
|
||||
));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
// symbolic link? ..?
|
||||
@@ -51,7 +49,7 @@ fn rootfs(root: &str) -> Result<()> {
|
||||
if let Some(v) = c.as_os_str().to_str() {
|
||||
stack.push(v.to_string());
|
||||
} else {
|
||||
return Err(anyhow!("Invalid path component (unable to convert to str)"));
|
||||
return Err(einval());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,13 +58,10 @@ fn rootfs(root: &str) -> Result<()> {
|
||||
cleaned.push(e);
|
||||
}
|
||||
|
||||
let canon = path.canonicalize().context("failed to canonicalize path")?;
|
||||
let canon = path.canonicalize().context("canonicalize")?;
|
||||
if cleaned != canon {
|
||||
// There is symbolic in path
|
||||
return Err(anyhow!(
|
||||
"There may be illegal symbols in the path name. Cleaned ({:?}) and canonicalized ({:?}) paths do not match",
|
||||
cleaned,
|
||||
canon));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -79,7 +74,7 @@ fn hostname(oci: &Spec) -> Result<()> {
|
||||
|
||||
let linux = get_linux(oci)?;
|
||||
if !contain_namespace(&linux.namespaces, "uts") {
|
||||
return Err(anyhow!("Linux namespace does not contain uts"));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -87,32 +82,17 @@ fn hostname(oci: &Spec) -> Result<()> {
|
||||
|
||||
fn security(oci: &Spec) -> Result<()> {
|
||||
let linux = get_linux(oci)?;
|
||||
let label_pattern = r".*_u:.*_r:.*_t:s[0-9]|1[0-5].*";
|
||||
let label_regex = Regex::new(label_pattern)?;
|
||||
|
||||
if let Some(ref process) = oci.process {
|
||||
if !process.selinux_label.is_empty() && !label_regex.is_match(&process.selinux_label) {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the process is invalid format: {}",
|
||||
&process.selinux_label
|
||||
));
|
||||
}
|
||||
}
|
||||
if !linux.mount_label.is_empty() && !label_regex.is_match(&linux.mount_label) {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the mount is invalid format: {}",
|
||||
&linux.mount_label
|
||||
));
|
||||
}
|
||||
|
||||
if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !contain_namespace(&linux.namespaces, "mount") {
|
||||
return Err(anyhow!("Linux namespace does not contain mount"));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
// don't care about selinux at present
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -123,7 +103,7 @@ fn idmapping(maps: &[LinuxIdMapping]) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!("No idmap has size > 0"))
|
||||
Err(einval())
|
||||
}
|
||||
|
||||
fn usernamespace(oci: &Spec) -> Result<()> {
|
||||
@@ -141,7 +121,7 @@ fn usernamespace(oci: &Spec) -> Result<()> {
|
||||
} else {
|
||||
// no user namespace but idmap
|
||||
if !linux.uid_mappings.is_empty() || !linux.gid_mappings.is_empty() {
|
||||
return Err(anyhow!("No user namespace, but uid or gid mapping exists"));
|
||||
return Err(einval());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,7 +163,7 @@ fn sysctl(oci: &Spec) -> Result<()> {
|
||||
if contain_namespace(&linux.namespaces, "ipc") {
|
||||
continue;
|
||||
} else {
|
||||
return Err(anyhow!("Linux namespace does not contain ipc"));
|
||||
return Err(einval());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,11 +178,11 @@ fn sysctl(oci: &Spec) -> Result<()> {
|
||||
}
|
||||
|
||||
if key == "kernel.hostname" {
|
||||
return Err(anyhow!("Kernel hostname specfied in Spec"));
|
||||
return Err(einval());
|
||||
}
|
||||
}
|
||||
|
||||
return Err(anyhow!("Sysctl config contains invalid settings"));
|
||||
return Err(einval());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -211,13 +191,12 @@ fn rootless_euid_mapping(oci: &Spec) -> Result<()> {
|
||||
let linux = get_linux(oci)?;
|
||||
|
||||
if !contain_namespace(&linux.namespaces, "user") {
|
||||
return Err(anyhow!("Linux namespace is missing user"));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
if linux.uid_mappings.is_empty() || linux.gid_mappings.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"Rootless containers require at least one UID/GID mapping"
|
||||
));
|
||||
// rootless containers requires at least one UID/GID mapping
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -241,7 +220,7 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> {
|
||||
let fields: Vec<&str> = opt.split('=').collect();
|
||||
|
||||
if fields.len() != 2 {
|
||||
return Err(anyhow!("Options has invalid field: {:?}", fields));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
let id = fields[1]
|
||||
@@ -250,11 +229,11 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> {
|
||||
.context(format!("parse field {}", &fields[1]))?;
|
||||
|
||||
if opt.starts_with("uid=") && !has_idmapping(&linux.uid_mappings, id) {
|
||||
return Err(anyhow!("uid of {} does not have a valid mapping", id));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
if opt.starts_with("gid=") && !has_idmapping(&linux.gid_mappings, id) {
|
||||
return Err(anyhow!("gid of {} does not have a valid mapping", id));
|
||||
return Err(einval());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -270,18 +249,15 @@ fn rootless_euid(oci: &Spec) -> Result<()> {
|
||||
|
||||
pub fn validate(conf: &Config) -> Result<()> {
|
||||
lazy_static::initialize(&SYSCTLS);
|
||||
let oci = conf
|
||||
.spec
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("Invalid config spec"))?;
|
||||
let oci = conf.spec.as_ref().ok_or_else(einval)?;
|
||||
|
||||
if oci.linux.is_none() {
|
||||
return Err(anyhow!("oci Linux is none"));
|
||||
return Err(einval());
|
||||
}
|
||||
|
||||
let root = match oci.root.as_ref() {
|
||||
Some(v) => v.path.as_str(),
|
||||
None => return Err(anyhow!("oci root is none")),
|
||||
None => return Err(einval()),
|
||||
};
|
||||
|
||||
rootfs(root).context("rootfs")?;
|
||||
@@ -301,7 +277,7 @@ pub fn validate(conf: &Config) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use oci::{Mount, Process};
|
||||
use oci::Mount;
|
||||
|
||||
#[test]
|
||||
fn test_namespace() {
|
||||
@@ -404,29 +380,6 @@ mod tests {
|
||||
];
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap();
|
||||
|
||||
// SELinux
|
||||
let valid_label = "system_u:system_r:container_t:s0:c123,c456";
|
||||
let mut process = Process::default();
|
||||
process.selinux_label = valid_label.to_string();
|
||||
spec.process = Some(process);
|
||||
security(&spec).unwrap();
|
||||
|
||||
let mut linux = Linux::default();
|
||||
linux.mount_label = valid_label.to_string();
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap();
|
||||
|
||||
let invalid_label = "system_u:system_r:container_t";
|
||||
let mut process = Process::default();
|
||||
process.selinux_label = invalid_label.to_string();
|
||||
spec.process = Some(process);
|
||||
security(&spec).unwrap_err();
|
||||
|
||||
let mut linux = Linux::default();
|
||||
linux.mount_label = invalid_label.to_string();
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -12,8 +12,6 @@ use std::str::FromStr;
|
||||
use std::time;
|
||||
use tracing::instrument;
|
||||
|
||||
use kata_types::config::default::DEFAULT_AGENT_VSOCK_PORT;
|
||||
|
||||
const DEBUG_CONSOLE_FLAG: &str = "agent.debug_console";
|
||||
const DEV_MODE_FLAG: &str = "agent.devmode";
|
||||
const TRACE_MODE_OPTION: &str = "agent.trace";
|
||||
@@ -30,6 +28,7 @@ const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
|
||||
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
|
||||
const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0;
|
||||
const VSOCK_ADDR: &str = "vsock://-1";
|
||||
const VSOCK_PORT: u16 = 1024;
|
||||
|
||||
// Environment variables used for development and testing
|
||||
const SERVER_ADDR_ENV_VAR: &str = "KATA_AGENT_SERVER_ADDR";
|
||||
@@ -148,7 +147,7 @@ impl Default for AgentConfig {
|
||||
debug_console_vport: 0,
|
||||
log_vport: 0,
|
||||
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
|
||||
server_addr: format!("{}:{}", VSOCK_ADDR, DEFAULT_AGENT_VSOCK_PORT),
|
||||
server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
|
||||
unified_cgroup_hierarchy: false,
|
||||
tracing: false,
|
||||
endpoints: Default::default(),
|
||||
@@ -433,7 +432,7 @@ fn get_container_pipe_size(param: &str) -> Result<i32> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use test_utils::assert_result;
|
||||
use crate::assert_result;
|
||||
|
||||
use super::*;
|
||||
use anyhow::anyhow;
|
||||
|
||||
@@ -9,7 +9,7 @@ use anyhow::{anyhow, Result};
|
||||
use nix::fcntl::{self, FcntlArg, FdFlag, OFlag};
|
||||
use nix::libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
|
||||
use nix::pty::{openpty, OpenptyResult};
|
||||
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr};
|
||||
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
|
||||
use nix::sys::stat::Mode;
|
||||
use nix::sys::wait;
|
||||
use nix::unistd::{self, close, dup2, fork, setsid, ForkResult, Pid};
|
||||
@@ -67,7 +67,7 @@ pub async fn debug_console_handler(
|
||||
SockFlag::SOCK_CLOEXEC,
|
||||
None,
|
||||
)?;
|
||||
let addr = VsockAddr::new(libc::VMADDR_CID_ANY, port);
|
||||
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, port);
|
||||
socket::bind(listenfd, &addr)?;
|
||||
socket::listen(listenfd, 1)?;
|
||||
|
||||
|
||||
@@ -414,7 +414,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
||||
|
||||
// Scan scsi host passing in the channel, SCSI id and LUN.
|
||||
// Channel is always 0 because we have only one SCSI controller.
|
||||
let scan_data = &format!("0 {} {}", tokens[0], tokens[1]);
|
||||
let scan_data = format!("0 {} {}", tokens[0], tokens[1]);
|
||||
|
||||
for entry in fs::read_dir(SYSFS_SCSI_HOST_PATH)? {
|
||||
let host = entry?.file_name();
|
||||
@@ -428,7 +428,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
|
||||
|
||||
let scan_path = PathBuf::from(&format!("{}/{}/{}", SYSFS_SCSI_HOST_PATH, host_str, "scan"));
|
||||
|
||||
fs::write(scan_path, scan_data)?;
|
||||
fs::write(scan_path, &scan_data)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -1531,7 +1531,7 @@ mod tests {
|
||||
pci_driver_override(syspci, dev0, "drv_b").unwrap();
|
||||
assert_eq!(fs::read_to_string(&dev0override).unwrap(), "drv_b");
|
||||
assert_eq!(fs::read_to_string(&probepath).unwrap(), dev0.to_string());
|
||||
assert_eq!(fs::read_to_string(drvaunbind).unwrap(), dev0.to_string());
|
||||
assert_eq!(fs::read_to_string(&drvaunbind).unwrap(), dev0.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1543,7 +1543,7 @@ mod tests {
|
||||
let dev0 = pci::Address::new(0, 0, pci::SlotFn::new(0, 0).unwrap());
|
||||
let dev0path = syspci.join("devices").join(dev0.to_string());
|
||||
|
||||
fs::create_dir_all(dev0path).unwrap();
|
||||
fs::create_dir_all(&dev0path).unwrap();
|
||||
|
||||
// Test dev0
|
||||
assert!(pci_iommu_group(&syspci, dev0).unwrap().is_none());
|
||||
@@ -1554,7 +1554,7 @@ mod tests {
|
||||
let dev1group = dev1path.join("iommu_group");
|
||||
|
||||
fs::create_dir_all(&dev1path).unwrap();
|
||||
std::os::unix::fs::symlink("../../../kernel/iommu_groups/12", dev1group).unwrap();
|
||||
std::os::unix::fs::symlink("../../../kernel/iommu_groups/12", &dev1group).unwrap();
|
||||
|
||||
// Test dev1
|
||||
assert_eq!(
|
||||
@@ -1567,7 +1567,7 @@ mod tests {
|
||||
let dev2path = syspci.join("devices").join(dev2.to_string());
|
||||
let dev2group = dev2path.join("iommu_group");
|
||||
|
||||
fs::create_dir_all(dev2group).unwrap();
|
||||
fs::create_dir_all(&dev2group).unwrap();
|
||||
|
||||
// Test dev2
|
||||
assert!(pci_iommu_group(&syspci, dev2).is_err());
|
||||
|
||||
@@ -22,7 +22,7 @@ extern crate slog;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use clap::{AppSettings, Parser};
|
||||
use nix::fcntl::OFlag;
|
||||
use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr};
|
||||
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
|
||||
use nix::unistd::{self, dup, Pid};
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
@@ -49,6 +49,8 @@ mod pci;
|
||||
pub mod random;
|
||||
mod sandbox;
|
||||
mod signal;
|
||||
#[cfg(test)]
|
||||
mod test_utils;
|
||||
mod uevent;
|
||||
mod util;
|
||||
mod version;
|
||||
@@ -108,6 +110,10 @@ enum SubCommand {
|
||||
fn announce(logger: &Logger, config: &AgentConfig) {
|
||||
info!(logger, "announce";
|
||||
"agent-commit" => version::VERSION_COMMIT,
|
||||
|
||||
// Avoid any possibility of confusion with the old agent
|
||||
"agent-type" => "rust",
|
||||
|
||||
"agent-version" => version::AGENT_VERSION,
|
||||
"api-version" => version::API_VERSION,
|
||||
"config" => format!("{:?}", config),
|
||||
@@ -126,7 +132,7 @@ async fn create_logger_task(rfd: RawFd, vsock_port: u32, shutdown: Receiver<bool
|
||||
None,
|
||||
)?;
|
||||
|
||||
let addr = VsockAddr::new(libc::VMADDR_CID_ANY, vsock_port);
|
||||
let addr = SockAddr::new_vsock(libc::VMADDR_CID_ANY, vsock_port);
|
||||
socket::bind(listenfd, &addr)?;
|
||||
socket::listen(listenfd, 1)?;
|
||||
|
||||
@@ -207,7 +213,7 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
if config.log_level == slog::Level::Trace {
|
||||
// Redirect ttrpc log calls to slog iff full debug requested
|
||||
ttrpc_log_guard = Ok(slog_stdlog::init()?);
|
||||
ttrpc_log_guard = Ok(slog_stdlog::init().map_err(|e| e)?);
|
||||
}
|
||||
|
||||
if config.tracing {
|
||||
@@ -339,7 +345,7 @@ async fn start_sandbox(
|
||||
sandbox.lock().await.sender = Some(tx);
|
||||
|
||||
// vsock:///dev/vsock, port
|
||||
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str(), init_mode)?;
|
||||
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str())?;
|
||||
server.start().await?;
|
||||
|
||||
rx.await?;
|
||||
@@ -399,8 +405,7 @@ use std::os::unix::io::{FromRawFd, RawFd};
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use test_utils::TestUserType;
|
||||
use test_utils::{assert_result, skip_if_not_root, skip_if_root};
|
||||
use crate::test_utils::test_utils::TestUserType;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_logger_task() {
|
||||
@@ -436,8 +441,9 @@ mod tests {
|
||||
let msg = format!("test[{}]: {:?}", i, d);
|
||||
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
|
||||
defer!({
|
||||
// XXX: Never try to close rfd, because it will be closed by PipeStream in
|
||||
// create_logger_task() and it's not safe to close the same fd twice time.
|
||||
// rfd is closed by the use of PipeStream in the crate_logger_task function,
|
||||
// but we will attempt to close in case of a failure
|
||||
let _ = unistd::close(rfd);
|
||||
unistd::close(wfd).unwrap();
|
||||
});
|
||||
|
||||
|
||||
@@ -5,11 +5,10 @@
|
||||
|
||||
extern crate procfs;
|
||||
|
||||
use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, Opts, Registry, TextEncoder};
|
||||
use prometheus::{Encoder, Gauge, GaugeVec, IntCounter, TextEncoder};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::Result;
|
||||
use slog::warn;
|
||||
use std::sync::Mutex;
|
||||
use tracing::instrument;
|
||||
|
||||
const NAMESPACE_KATA_AGENT: &str = "kata_agent";
|
||||
@@ -24,70 +23,55 @@ macro_rules! sl {
|
||||
|
||||
lazy_static! {
|
||||
|
||||
static ref REGISTERED: Mutex<bool> = Mutex::new(false);
|
||||
static ref AGENT_SCRAPE_COUNT: IntCounter =
|
||||
prometheus::register_int_counter!(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count"), "Metrics scrape count").unwrap();
|
||||
|
||||
// custom registry
|
||||
static ref REGISTRY: Registry = Registry::new();
|
||||
static ref AGENT_THREADS: Gauge =
|
||||
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads"), "Agent process threads").unwrap();
|
||||
|
||||
static ref AGENT_SCRAPE_COUNT: IntCounter =
|
||||
IntCounter::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"scrape_count"), "Metrics scrape count").unwrap();
|
||||
static ref AGENT_TOTAL_TIME: Gauge =
|
||||
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time"), "Agent process total time").unwrap();
|
||||
|
||||
// agent metrics
|
||||
static ref AGENT_THREADS: Gauge =
|
||||
Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"threads"), "Agent process threads").unwrap();
|
||||
static ref AGENT_TOTAL_VM: Gauge =
|
||||
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm"), "Agent process total VM size").unwrap();
|
||||
|
||||
static ref AGENT_TOTAL_TIME: Gauge =
|
||||
Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_time"), "Agent process total time").unwrap();
|
||||
static ref AGENT_TOTAL_RSS: Gauge =
|
||||
prometheus::register_gauge!(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss"), "Agent process total RSS size").unwrap();
|
||||
|
||||
static ref AGENT_TOTAL_VM: Gauge =
|
||||
Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_vm"), "Agent process total VM size").unwrap() ;
|
||||
static ref AGENT_PROC_STATUS: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status"), "Agent process status.", &["item"]).unwrap();
|
||||
|
||||
static ref AGENT_TOTAL_RSS: Gauge =
|
||||
Gauge::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"total_rss"), "Agent process total RSS size").unwrap();
|
||||
static ref AGENT_IO_STAT: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat"), "Agent process IO statistics.", &["item"]).unwrap();
|
||||
|
||||
static ref AGENT_PROC_STATUS: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_status"), "Agent process status."), &["item"]).unwrap();
|
||||
|
||||
static ref AGENT_IO_STAT: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"io_stat"), "Agent process IO statistics."), &["item"]).unwrap();
|
||||
|
||||
static ref AGENT_PROC_STAT: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat"), "Agent process statistics."), &["item"]).unwrap();
|
||||
static ref AGENT_PROC_STAT: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_AGENT,"proc_stat"), "Agent process statistics.", &["item"]).unwrap();
|
||||
|
||||
// guest os metrics
|
||||
static ref GUEST_LOAD: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"load"), "Guest system load."), &["item"]).unwrap();
|
||||
static ref GUEST_LOAD: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"load") , "Guest system load.", &["item"]).unwrap();
|
||||
|
||||
static ref GUEST_TASKS: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks"), "Guest system load."), &["item"]).unwrap();
|
||||
static ref GUEST_TASKS: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"tasks") , "Guest system load.", &["item"]).unwrap();
|
||||
|
||||
static ref GUEST_CPU_TIME: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time"), "Guest CPU statistics."), &["cpu","item"]).unwrap();
|
||||
static ref GUEST_CPU_TIME: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"cpu_time") , "Guest CPU statistics.", &["cpu","item"]).unwrap();
|
||||
|
||||
static ref GUEST_VM_STAT: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat"), "Guest virtual memory statistics."), &["item"]).unwrap();
|
||||
static ref GUEST_VM_STAT: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"vm_stat") , "Guest virtual memory statistics.", &["item"]).unwrap();
|
||||
|
||||
static ref GUEST_NETDEV_STAT: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat"), "Guest net devices statistics."), &["interface","item"]).unwrap();
|
||||
static ref GUEST_NETDEV_STAT: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"netdev_stat") , "Guest net devices statistics.", &["interface","item"]).unwrap();
|
||||
|
||||
static ref GUEST_DISKSTAT: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat"), "Disks statistics in system."), &["disk","item"]).unwrap();
|
||||
static ref GUEST_DISKSTAT: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"diskstat") , "Disks statistics in system.", &["disk","item"]).unwrap();
|
||||
|
||||
static ref GUEST_MEMINFO: GaugeVec =
|
||||
GaugeVec::new(Opts::new(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo"), "Statistics about memory usage in the system."), &["item"]).unwrap();
|
||||
static ref GUEST_MEMINFO: GaugeVec =
|
||||
prometheus::register_gauge_vec!(format!("{}_{}",NAMESPACE_KATA_GUEST,"meminfo") , "Statistics about memory usage in the system.", &["item"]).unwrap();
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
|
||||
let mut registered = REGISTERED
|
||||
.lock()
|
||||
.map_err(|e| anyhow!("failed to check agent metrics register status {:?}", e))?;
|
||||
|
||||
if !(*registered) {
|
||||
register_metrics()?;
|
||||
*registered = true;
|
||||
}
|
||||
|
||||
AGENT_SCRAPE_COUNT.inc();
|
||||
|
||||
// update agent process metrics
|
||||
@@ -97,7 +81,7 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
|
||||
update_guest_metrics();
|
||||
|
||||
// gather all metrics and return as a String
|
||||
let metric_families = REGISTRY.gather();
|
||||
let metric_families = prometheus::gather();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let encoder = TextEncoder::new();
|
||||
@@ -106,31 +90,6 @@ pub fn get_metrics(_: &protocols::agent::GetMetricsRequest) -> Result<String> {
|
||||
Ok(String::from_utf8(buffer)?)
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
fn register_metrics() -> Result<()> {
|
||||
REGISTRY.register(Box::new(AGENT_SCRAPE_COUNT.clone()))?;
|
||||
|
||||
// agent metrics
|
||||
REGISTRY.register(Box::new(AGENT_THREADS.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_TOTAL_TIME.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_TOTAL_VM.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_TOTAL_RSS.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_PROC_STATUS.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_IO_STAT.clone()))?;
|
||||
REGISTRY.register(Box::new(AGENT_PROC_STAT.clone()))?;
|
||||
|
||||
// guest metrics
|
||||
REGISTRY.register(Box::new(GUEST_LOAD.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_TASKS.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_CPU_TIME.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_VM_STAT.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_NETDEV_STAT.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_DISKSTAT.clone()))?;
|
||||
REGISTRY.register(Box::new(GUEST_MEMINFO.clone()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
fn update_agent_metrics() -> Result<()> {
|
||||
let me = procfs::process::Process::myself();
|
||||
|
||||
@@ -169,12 +169,11 @@ pub fn baremount(
|
||||
|
||||
info!(
|
||||
logger,
|
||||
"baremount source={:?}, dest={:?}, fs_type={:?}, options={:?}, flags={:?}",
|
||||
"mount source={:?}, dest={:?}, fs_type={:?}, options={:?}",
|
||||
source,
|
||||
destination,
|
||||
fs_type,
|
||||
options,
|
||||
flags
|
||||
options
|
||||
);
|
||||
|
||||
nix::mount::mount(
|
||||
@@ -648,7 +647,7 @@ pub fn recursive_ownership_change(
|
||||
) -> Result<()> {
|
||||
let mut mask = if read_only { RO_MASK } else { RW_MASK };
|
||||
if path.is_dir() {
|
||||
for entry in fs::read_dir(path)? {
|
||||
for entry in fs::read_dir(&path)? {
|
||||
recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?;
|
||||
}
|
||||
mask |= EXEC_MASK;
|
||||
@@ -779,20 +778,8 @@ pub async fn add_storages(
|
||||
}
|
||||
};
|
||||
|
||||
let mount_point = match res {
|
||||
Err(e) => {
|
||||
error!(
|
||||
logger,
|
||||
"add_storages failed, storage: {:?}, error: {:?} ", storage, e
|
||||
);
|
||||
let mut sb = sandbox.lock().await;
|
||||
sb.unset_sandbox_storage(&storage.mount_point)
|
||||
.map_err(|e| warn!(logger, "fail to unset sandbox storage {:?}", e))
|
||||
.ok();
|
||||
return Err(e);
|
||||
}
|
||||
Ok(m) => m,
|
||||
};
|
||||
// Todo need to rollback the mounted storage if err met.
|
||||
let mount_point = res?;
|
||||
|
||||
if !mount_point.is_empty() {
|
||||
mount_list.push(mount_point);
|
||||
@@ -853,8 +840,7 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul
|
||||
return Err(anyhow!("Invalid mount point {}", mount_point));
|
||||
}
|
||||
|
||||
let content = fs::read_to_string(mount_file)
|
||||
.map_err(|e| anyhow!("read mount file {}: {}", mount_file, e))?;
|
||||
let content = fs::read_to_string(mount_file)?;
|
||||
|
||||
let re = Regex::new(format!("device .+ mounted on {} with fstype (.+)", mount_point).as_str())?;
|
||||
|
||||
@@ -894,7 +880,7 @@ pub fn get_cgroup_mounts(
|
||||
}]);
|
||||
}
|
||||
|
||||
let file = File::open(cg_path)?;
|
||||
let file = File::open(&cg_path)?;
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let mut has_device_cgroup = false;
|
||||
@@ -1030,6 +1016,8 @@ fn parse_options(option_list: Vec<String>) -> HashMap<String, String> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_utils::test_utils::TestUserType;
|
||||
use crate::{skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root};
|
||||
use protobuf::RepeatedField;
|
||||
use protocols::agent::FSGroup;
|
||||
use std::fs::File;
|
||||
@@ -1037,10 +1025,6 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::tempdir;
|
||||
use test_utils::TestUserType;
|
||||
use test_utils::{
|
||||
skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_mount() {
|
||||
@@ -1777,7 +1761,7 @@ mod tests {
|
||||
let tempdir = tempdir().unwrap();
|
||||
|
||||
let src = if d.mask_src {
|
||||
tempdir.path().join(d.src)
|
||||
tempdir.path().join(&d.src)
|
||||
} else {
|
||||
Path::new(d.src).to_path_buf()
|
||||
};
|
||||
|
||||
@@ -78,7 +78,6 @@ impl Namespace {
|
||||
// setup creates persistent namespace without switching to it.
|
||||
// Note, pid namespaces cannot be persisted.
|
||||
#[instrument]
|
||||
#[allow(clippy::question_mark)]
|
||||
pub async fn setup(mut self) -> Result<Self> {
|
||||
fs::create_dir_all(&self.persistent_ns_dir)?;
|
||||
|
||||
@@ -89,7 +88,7 @@ impl Namespace {
|
||||
}
|
||||
let logger = self.logger.clone();
|
||||
|
||||
let new_ns_path = ns_path.join(ns_type.get());
|
||||
let new_ns_path = ns_path.join(&ns_type.get());
|
||||
|
||||
File::create(new_ns_path.as_path())?;
|
||||
|
||||
@@ -103,7 +102,7 @@ impl Namespace {
|
||||
let source = Path::new(&origin_ns_path);
|
||||
let destination = new_ns_path.as_path();
|
||||
|
||||
File::open(source)?;
|
||||
File::open(&source)?;
|
||||
|
||||
// Create a new netns on the current thread.
|
||||
let cf = ns_type.get_flags();
|
||||
@@ -188,10 +187,9 @@ impl fmt::Debug for NamespaceType {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Namespace, NamespaceType};
|
||||
use crate::mount::remove_mounts;
|
||||
use crate::{mount::remove_mounts, skip_if_not_root};
|
||||
use nix::sched::CloneFlags;
|
||||
use tempfile::Builder;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_setup_persistent_ns() {
|
||||
|
||||