docs: merge documentation repository

Generated by
git subtree add --prefix=docs git@github.com:kata-containers/documentation.git master

git-subtree-dir: docs
git-subtree-mainline: ec146a1b39
git-subtree-split: 510287204b

Fixes: #329
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
This commit is contained in:
Peng Tao 2020-06-23 21:21:53 -07:00
commit a196c85e04
109 changed files with 10281 additions and 0 deletions

25
docs/.ci/lib.sh Normal file
View File

@ -0,0 +1,25 @@
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
export tests_repo_dir="$GOPATH/src/$tests_repo"
clone_tests_repo()
{
# KATA_CI_NO_NETWORK is (has to be) ignored if there is
# no existing clone.
if [ -d "$tests_repo_dir" -a -n "$KATA_CI_NO_NETWORK" ]
then
return
fi
go get -d -u "$tests_repo" || true
}
run_static_checks()
{
clone_tests_repo
bash "$tests_repo_dir/.ci/static-checks.sh" "github.com/kata-containers/documentation"
}

11
docs/.ci/run.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -e
cidir=$(dirname "$0")
bash "${cidir}/test-install-docs.sh"

17
docs/.ci/setup.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -e
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
clone_tests_repo
pushd "${tests_repo_dir}"
.ci/setup.sh
popd

12
docs/.ci/static-checks.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
set -e
cidir=$(dirname "$0")
source "${cidir}/lib.sh"
run_static_checks

351
docs/.ci/test-install-docs.sh Executable file
View File

@ -0,0 +1,351 @@
#!/bin/bash
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
set -e
# The go binary isn't installed, but we checkout the repos to the standard
# golang locations.
export GOPATH=${GOPATH:-${HOME}/go}
typeset -r script_name="${0##*/}"
typeset -r script_dir="$(cd "$(dirname "${0}")" && pwd)"
typeset -r docker_image="busybox"
typeset -r kata_project_url="github.com/kata-containers"
typeset -r test_repo="${kata_project_url}/tests"
typeset -r test_repo_url="https://${test_repo}"
typeset -r test_repo_dir="${GOPATH}/src/${test_repo}"
typeset -r kata_project_dir="${GOPATH}/src/${kata_project_url}"
typeset -r mgr="${test_repo_dir}/cmd/kata-manager/kata-manager.sh"
typeset -r doc_to_script="${test_repo_dir}/.ci/kata-doc-to-script.sh"
die()
{
local msg="$*"
echo >&2 "ERROR: $msg"
exit 1
}
info()
{
local msg="$*"
echo "INFO: $msg"
}
usage()
{
cat <<EOT
Description: Run Kata documentation CI tests.
Usage: $script_name [options]
Options:
-h : Show this help.
-t <dir> : Run all scripts ("\*.sh" files) in the specified
directory.
Notes:
- The '-t' option is not generally useful - it is used by this
script which re-exec's itself with this option.
EOT
}
# Re-execute the running script from a temporary directory to allow the
# script to continue executing even if the original source file is deleted.
reexec_in_tmpdir()
{
local -r test_dir="$1"
[ -d "${test_dir}" ] || die "invalid test dir: ${test_dir}"
if [ "${script_dir}" = "${test_dir}" ]
then
# Already running from temp directory so nothing else to do
return
fi
local new
new="${test_dir}/${script_name}"
install --mode 750 "${0}" "${new}"
info "Re-execing ${0} as ${new}"
cd "${test_dir}"
exec "${new}" -t "${test_dir}/tests"
}
# Grab a copy of the tests repository
get_tests_repo()
{
[ -d "${test_repo_dir}" ] && return
mkdir -p "${kata_project_dir}"
git clone "${test_repo_url}" "${test_repo_dir}"
}
# Delete all local github repo clones.
#
# This is required to ensure that the tests themselves (re-)create these
# clones.
delete_kata_repos()
{
[ -n "${KATA_DEV_MODE}" ] && die "Not continuing as this is a dev system"
[ -z "${CI}" ] && die "Not continuing as this is a non-CI environment"
local cwd="$PWD"
info "Deleting all local kata repositories below ${kata_project_dir}"
[ -d "${kata_project_dir}" ] && rm -rf "${kata_project_dir}" || true
# Recreate the empty directory, taking care to handle the scenario
# where the script is run from within the just-deleted directory.
mkdir -p "$cwd" && cd "$cwd"
}
setup()
{
source /etc/os-release || source /usr/lib/os-release
mkdir -p "${GOPATH}"
get_tests_repo
[ -e "$mgr" ] || die "cannot find $mgr"
}
# Perform a simple test to create a container
create_kata_container()
{
local -r test_name="$1"
local -r msg=$(info "Successfully tested ${test_name} on distro ${ID} ${VERSION}")
# Perform a basic test
sudo -E docker run --rm -i --runtime "kata-runtime" "${docker_image}" echo "$msg"
}
# Run the kata manager to "execute" the install guide to ensure the commands
# it specified result in a working system.
test_distro_install_guide()
{
info "Installing system from the $ID install guide"
$mgr install-docker-system
$mgr configure-image
$mgr enable-debug
local mgr_name="${mgr##*/}"
local test_name="${mgr_name} to test install guide"
info "Install using ${test_name}"
create_kata_container "${test_name}"
# Clean up
$mgr remove-packages
}
# Apart from the distro-specific install guides, users can choose to install
# using one of the following methods:
#
# - kata-manager ("Automatic" method).
# - kata-doc-to-script ("Scripted" method).
#
# Testing these is awkward because we need to "execute" the documents
# describing those install methods, but since those install methods should
# themselves entirely document/handle an installation method, we need to
# convert each install document to a script, then delete all the kata code
# repositories. This ensures that when each install method script is run, it
# does not rely on any local files (it should download anything it needs). But
# since we're deleting the repos, we need to copy this script to a temporary
# location, along with the install scripts this function generates, and then
# re-exec this script with an option to ask it to run the scripts the previous
# instance of this script just generated.
test_alternative_install_methods()
{
local -a files
files+=("installing-with-kata-manager.md")
files+=("installing-with-kata-doc-to-script.md")
local tmp_dir
tmp_dir=$(mktemp -d)
local script_file
local file
local tests_dir
tests_dir="${tmp_dir}/tests"
mkdir -p "${tests_dir}"
local -i num=0
# Convert the docs to scripts
for file in "${files[@]}"
do
num+=1
local file_path
local script_file
local script_file_path
local test_name
file_path="${script_dir}/../install/${file}"
script_file=${file/.md/.sh}
# Add a numeric prefix so the tests are run in the array order
test_name=$(printf "%.2d-%s" "${num}" "${script_file}")
script_file_path="${tests_dir}/${test_name}"
info "Creating test script ${test_name} from ${file}"
bash "${doc_to_script}" "${file_path}" "${script_file_path}"
done
reexec_in_tmpdir "${tmp_dir}"
# Not reached
die "re-exec failed"
}
run_tests()
{
# If docker was installed by default, zap it.
$mgr -v -f remove-docker
test_distro_install_guide
test_alternative_install_methods
}
# Detect if any installation documents changed. If so, execute all the
# documents to test they result in a working system.
check_install_docs()
{
if [ -n "$TRAVIS" ]
then
info "Not testing install guide as Travis lacks modern distro support and VT-x"
return
fi
# List of filters used to restrict the types of file changes.
# See git-diff-tree(1) for further info.
local filters=""
# Added file
filters+="A"
# Copied file
filters+="C"
# Modified file
filters+="M"
# Renamed file
filters+="R"
# Unmerged (U) and Unknown (X) files. These particular filters
# shouldn't be necessary but just in case...
filters+="UX"
# List of changed files
local files=$(git diff-tree \
--name-only \
--no-commit-id \
--diff-filter="${filters}" \
-r \
origin/master HEAD || true)
# No files were changed
[ -z "$files" ] && return
changed=$(echo "${files}" | grep "^install/.*\.md$" || true)
[ -z "$changed" ] && info "No install documents modified" && return
info "Found modified install documents: ${changed}"
# Regardless of which installation documents were changed, we test
# them all where possible.
run_tests
}
# Run the test scripts in the specified directory.
run_tests_from_dir()
{
local -r test_dir="$1"
[ -e "$test_dir" ] || die "invalid test dir: ${test_dir}"
cd "${test_dir}"
info "Looking for tests scripts to run in directory ${test_dir}"
for t in $(ls -- *.sh)
do
# Ensure the test script cannot access any local files
# (since it should be standalone and download any files
# it needs).
delete_kata_repos
info "Running test script '$t'"
bash -x "${t}"
# Ensure it is possible to use the installed system
create_kata_container "${t}"
# Re-run setup to recreate the tests repo that was deleted
# before the test ran.
setup
# Packaged install so clean up
# (Note that '$mgr' should now exist again)
$mgr remove-packages
done
# paranoia
[ -d "${test_dir}" ] && rm -rf "${test_dir}"
info "All tests passed"
}
main()
{
local opt
local test_dir
setup
while getopts "ht:" opt
do
case "$opt" in
h) usage; exit 0;;
t) test_dir="$OPTARG";;
*) die "invalid option: $opt";;
esac
done
if [ -n "$test_dir" ]
then
run_tests_from_dir "$test_dir"
exit 0
fi
check_install_docs
}
main "$@"

25
docs/.travis.yml Normal file
View File

@ -0,0 +1,25 @@
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
sudo: required
dist: xenial
language: go
os:
- linux
- linux-ppc64le
go:
- "1.10.x"
before_install:
- ".ci/setup.sh"
before_script:
- ".ci/static-checks.sh"
script:
- ".ci/run.sh"

13
docs/CODEOWNERS Normal file
View File

@ -0,0 +1,13 @@
# Copyright 2019 Intel Corporation.
#
# SPDX-License-Identifier: Apache-2.0
#
# Define any code owners for this repository.
# The code owners lists are used to help automatically enforce
# reviews and acks of the right groups on the right PRs.
# Order in this file is important. Only the last match will be
# used. See https://help.github.com/articles/about-code-owners/
*.md @kata-containers/documentation

3
docs/CODE_OF_CONDUCT.md Normal file
View File

@ -0,0 +1,3 @@
## Kata Containers Documentation Code of Conduct
Kata Containers follows the [OpenStack Foundation Code of Conduct](https://www.openstack.org/legal/community-code-of-conduct/).

5
docs/CONTRIBUTING.md Normal file
View File

@ -0,0 +1,5 @@
# Contributing
## This repo is part of [Kata Containers](https://katacontainers.io)
For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).

690
docs/Developer-Guide.md Normal file
View File

@ -0,0 +1,690 @@
* [Warning](#warning)
* [Assumptions](#assumptions)
* [Initial setup](#initial-setup)
* [Requirements to build individual components](#requirements-to-build-individual-components)
* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
* [Check hardware requirements](#check-hardware-requirements)
* [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
* [Enable full debug](#enable-full-debug)
* [debug logs and shimv2](#debug-logs-and-shimv2)
* [Enabling full `containerd` debug](#enabling-full-containerd-debug)
* [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
* [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
* [journald rate limiting](#journald-rate-limiting)
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
* [Build and install Kata proxy](#build-and-install-kata-proxy)
* [Build and install Kata shim](#build-and-install-kata-shim)
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
* [Get the osbuilder](#get-the-osbuilder)
* [Create a rootfs image](#create-a-rootfs-image)
* [Create a local rootfs](#create-a-local-rootfs)
* [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
* [Build a rootfs image](#build-a-rootfs-image)
* [Install the rootfs image](#install-the-rootfs-image)
* [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
* [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
* [Build an initrd image](#build-an-initrd-image)
* [Install the initrd image](#install-the-initrd-image)
* [Install guest kernel images](#install-guest-kernel-images)
* [Install a hypervisor](#install-a-hypervisor)
* [Build a custom QEMU](#build-a-custom-qemu)
* [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
* [Run Kata Containers with Docker](#run-kata-containers-with-docker)
* [Update the Docker systemd unit file](#update-the-docker-systemd-unit-file)
* [Create a container using Kata](#create-a-container-using-kata)
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
* [Appendices](#appendices)
* [Checking Docker default runtime](#checking-docker-default-runtime)
* [Set up a debug console](#set-up-a-debug-console)
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
* [Create a debug systemd service](#create-a-debug-systemd-service)
* [Build the debug image](#build-the-debug-image)
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
* [Ensure debug options are valid](#ensure-debug-options-are-valid)
* [Create a container](#create-a-container)
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
* [Obtain details of the image](#obtain-details-of-the-image)
* [Capturing kernel boot logs](#capturing-kernel-boot-logs)
* [Running standalone](#running-standalone)
* [Create an OCI bundle](#create-an-oci-bundle)
* [Launch the runtime to create a container](#launch-the-runtime-to-create-a-container)
# Warning
This document is written **specifically for developers**: it is not intended for end users.
# Assumptions
- You are working on a non-critical test or development system.
# Initial setup
The recommended way to create a development environment is to first
[install the packaged versions of the Kata Containers components](install/README.md)
to create a working system.
The installation guide instructions will install all required Kata Containers
components, plus Docker*, the hypervisor, and the Kata Containers image and
guest kernel.
# Requirements to build individual components
You need to install the following to build Kata Containers components:
- [golang](https://golang.org/dl)
To view the versions of go known to work, see the `golang` entry in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
- `make`.
- `gcc` (required for building the shim and runtime).
# Build and install the Kata Containers runtime
```
$ go get -d -u github.com/kata-containers/runtime
$ cd $GOPATH/src/github.com/kata-containers/runtime
$ make && sudo -E PATH=$PATH make install
```
The build will create the following:
- runtime binary: `/usr/local/bin/kata-runtime`
- configuration file: `/usr/share/defaults/kata-containers/configuration.toml`
# Check hardware requirements
You can check if your system is capable of creating a Kata Container by running the following:
```
$ sudo kata-runtime kata-check
```
If your system is *not* able to run Kata Containers, the previous command will error out and explain why.
## Configure to use initrd or rootfs image
Kata containers can run with either an initrd image or a rootfs image.
If you want to test with `initrd`, make sure you have `initrd = /usr/share/kata-containers/kata-containers-initrd.img`
in your configuration file, commenting out the `image` line:
`/usr/share/defaults/kata-containers/configuration.toml` and comment out the `image` line with the following. For example:
```
$ sudo mkdir -p /etc/kata-containers/
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
$ sudo sed -i 's/^\(image =.*\)/# \1/g' /etc/kata-containers/configuration.toml
```
You can create the initrd image as shown in the [create an initrd image](#create-an-initrd-image---optional) section.
If you want to test with a rootfs `image`, make sure you have `image = /usr/share/kata-containers/kata-containers.img`
in your configuration file, commenting out the `initrd` line. For example:
```
$ sudo mkdir -p /etc/kata-containers/
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
$ sudo sed -i 's/^\(initrd =.*\)/# \1/g' /etc/kata-containers/configuration.toml
```
The rootfs image is created as shown in the [create a rootfs image](#create-a-rootfs-image) section.
One of the `initrd` and `image` options in Kata runtime config file **MUST** be set but **not both**.
The main difference between the options is that the size of `initrd`(10MB+) is significantly smaller than
rootfs `image`(100MB+).
## Enable full debug
Enable full debug as follows:
```
$ sudo mkdir -p /etc/kata-containers/
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
```
### debug logs and shimv2
If you are using `containerd` and the Kata `containerd-shimv2` to launch Kata Containers, and wish
to enable Kata debug logging, there are two ways this can be enabled via the `containerd` configuration file,
detailed below.
The Kata logs appear in the `containerd` log files, along with logs from `containerd` itself.
For more information about `containerd` debug, please see the
[`containerd` documentation](https://github.com/containerd/containerd/blob/master/docs/getting-started.md).
#### Enabling full `containerd` debug
Enabling full `containerd` debug also enables the shimv2 debug. Edit the `containerd` configuration file
to include the top level debug option such as:
```toml
[debug]
level = "debug"
```
#### Enabling just `containerd shim` debug
If you only wish to enable debug for the `containerd` shims themselves, just enable the debug
option in the `plugins.linux` section of the `containerd` configuration file, such as:
```toml
[plugins.linux]
shim_debug = true
```
#### Enabling `CRI-O` and `shimv2` debug
Depending on the CRI-O version being used one of the following configuration files can
be found: `/etc/crio/crio.conf` or `/etc/crio/crio.conf.d/00-default`.
If the latter is found, the change must be done there as it'll take precedence, overriding
`/etc/crio/crio.conf`.
```toml
# Changes the verbosity of the logs based on the level it is set to. Options
# are fatal, panic, error, warn, info, debug and trace. This option supports
# live configuration reload.
log_level = "info"
```
Switching the default `log_level` from `info` to `debug` enables shimv2 debug logs.
CRI-O logs can be found by using the `crio` identifier, and Kata specific logs can
be found by using the `kata` identifier.
### journald rate limiting
Enabling [full debug](#enable-full-debug) results in the Kata components generating
large amounts of logging, which by default is stored in the system log. Depending on
your system configuration, it is possible that some events might be discarded by the
system logging daemon. The following shows how to determine this for `systemd-journald`,
and offers possible workarounds and fixes.
> **Note** The method of implementation can vary between Operating System installations.
> Amend these instructions as necessary to your system implementation,
> and consult with your system administrator for the appropriate configuration.
#### `systemd-journald` suppressing messages
`systemd-journald` can be configured to rate limit the number of journal entries
it stores. When messages are suppressed, it is noted in the logs. This can be checked
for by looking for those notifications, such as:
```sh
$ sudo journalctl --since today | fgrep Suppressed
Jun 29 14:51:17 mymachine systemd-journald[346]: Suppressed 4150 messages from /system.slice/docker.service
```
This message indicates that a number of log messages from the `docker.service` slice were
suppressed. In such a case, you can expect to have incomplete logging information
stored from the Kata Containers components.
#### Disabling `systemd-journald` rate limiting
In order to capture complete logs from the Kata Containers components, you
need to reduce or disable the `systemd-journald` rate limit. Configure
this at the global `systemd-journald` level, and it will apply to all system slices.
To disable `systemd-journald` rate limiting at the global level, edit the file
`/etc/systemd/journald.conf`, and add/uncomment the following lines:
```
RateLimitInterval=0s
RateLimitBurst=0
```
Restart `systemd-journald` for the changes to take effect:
```sh
$ sudo systemctl restart systemd-journald
```
# Build and install Kata proxy
```
$ go get -d -u github.com/kata-containers/proxy
$ cd $GOPATH/src/github.com/kata-containers/proxy && make && sudo make install
```
# Build and install Kata shim
```
$ go get -d -u github.com/kata-containers/shim
$ cd $GOPATH/src/github.com/kata-containers/shim && make && sudo make install
```
# Create and install rootfs and initrd image
## Build a custom Kata agent - OPTIONAL
> **Note:**
>
> - You should only do this step if you are testing with the latest version of the agent.
```
$ go get -d -u github.com/kata-containers/agent
$ cd $GOPATH/src/github.com/kata-containers/agent && make
```
## Get the osbuilder
```
$ go get -d -u github.com/kata-containers/osbuilder
```
## Create a rootfs image
### Create a local rootfs
As a prerequisite, you need to install Docker. Otherwise, you will not be
able to run the `rootfs.sh` script with `USE_DOCKER=true` as expected in
the following example.
```
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `fedora`, `suse`, and `ubuntu` for `${distro}`. By default `seccomp` packages are not included in the rootfs image. Set `SECCOMP` to `yes` to include them.
> **Note:**
>
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
> - You must ensure that the *default Docker runtime* is `runc` to make use of
> the `USE_DOCKER` variable. If that is not the case, remove the variable
> from the previous command. See [Checking Docker default runtime](#checking-docker-default-runtime).
### Add a custom agent to the image - OPTIONAL
> **Note:**
>
> - You should only do this step if you are testing with the latest version of the agent.
```
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../agent/kata-agent
$ sudo install -o root -g root -m 0440 ../../agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
$ sudo install -o root -g root -m 0440 ../../agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
```
### Build a rootfs image
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/image-builder
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
```
> **Notes:**
>
> - You must ensure that the *default Docker runtime* is `runc` to make use of
> the `USE_DOCKER` variable. If that is not the case, remove the variable
> from the previous command. See [Checking Docker default runtime](#checking-docker-default-runtime).
> - If you do *not* wish to build under Docker, remove the `USE_DOCKER`
> variable in the previous command and ensure the `qemu-img` command is
> available on your system.
### Install the rootfs image
```
$ commit=$(git log --format=%h -1 HEAD)
$ date=$(date +%Y-%m-%d-%T.%N%z)
$ image="kata-containers-${date}-${commit}"
$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}"
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
```
## Create an initrd image - OPTIONAL
### Create a local rootfs for initrd image
```
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs"
$ sudo rm -rf ${ROOTFS_DIR}
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
```
`AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image,
always set `AGENT_INIT` to `yes`. By default `seccomp` packages are not included in the initrd image. Set `SECCOMP` to `yes` to include them.
You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora` for `${distro}`.
> **Note:**
>
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
Optionally, add your custom agent binary to the rootfs with the following:
```
$ sudo install -o root -g root -m 0550 -T ../../agent/kata-agent ${ROOTFS_DIR}/sbin/init
```
### Build an initrd image
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/initrd-builder
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh ${ROOTFS_DIR}'
```
### Install the initrd image
```
$ commit=$(git log --format=%h -1 HEAD)
$ date=$(date +%Y-%m-%d-%T.%N%z)
$ image="kata-containers-initrd-${date}-${commit}"
$ sudo install -o root -g root -m 0640 -D kata-containers-initrd.img "/usr/share/kata-containers/${image}"
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers-initrd.img)
```
# Install guest kernel images
You can build and install the guest kernel image as shown [here](https://github.com/kata-containers/packaging/tree/master/kernel#build-kata-containers-kernel).
# Install a hypervisor
When setting up Kata using a [packaged installation method](https://github.com/kata-containers/documentation/tree/master/install#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
## Build a custom QEMU
Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
```
$ go get -d github.com/kata-containers/qemu
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/runtime/versions.yaml | cut -d '"' -f2)
$ cd ${GOPATH}/src/github.com/kata-containers/qemu
$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
```
To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
```
$ go get -d github.com/kata-containers/packaging
$ cd $your_qemu_directory
$ ${GOPATH}/src/github.com/kata-containers/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
$ eval ./configure "$(cat kata.cfg)"
$ make -j $(nproc)
$ sudo -E make install
```
### Build a custom QEMU for aarch64/arm64 - REQUIRED
> **Note:**
>
> - You should only do this step if you are on aarch64/arm64.
> - You should include [Eric Auger's latest PCDIMM/NVDIMM patches](https://patchwork.kernel.org/cover/10647305/) which are
> under upstream review for supporting NVDIMM on aarch64.
>
You could build the custom `qemu-system-aarch64` as required with the following command:
```
$ go get -d github.com/kata-containers/tests
$ script -fec 'sudo -E ${GOPATH}/src/github.com/kata-containers/tests/.ci/install_qemu.sh'
```
# Run Kata Containers with Docker
## Update the Docker systemd unit file
```
$ dockerUnit=$(systemctl show -p FragmentPath docker.service | cut -d "=" -f 2)
$ unitFile=${dockerUnit:-/etc/systemd/system/docker.service.d/kata-containers.conf}
$ test -e "$unitFile" || { sudo mkdir -p "$(dirname $unitFile)"; echo -e "[Service]\nType=simple\nExecStart=\nExecStart=/usr/bin/dockerd -D --default-runtime runc" | sudo tee "$unitFile"; }
$ grep -q "kata-runtime=" $unitFile || sudo sed -i 's!^\(ExecStart=[^$].*$\)!\1 --add-runtime kata-runtime=/usr/local/bin/kata-runtime!g' "$unitFile"
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
## Create a container using Kata
```
$ sudo docker run -ti --runtime kata-runtime busybox sh
```
# Run Kata Containers with Kubernetes
Refer to to the [Run Kata Containers with Kubernetes](how-to/run-kata-with-k8s.md) how-to guide.
# Troubleshoot Kata Containers
If you are unable to create a Kata Container first ensure you have
[enabled full debug](#enable-full-debug)
before attempting to create a container. Then run the
[`kata-collect-data.sh`](https://github.com/kata-containers/runtime/blob/master/data/kata-collect-data.sh.in)
script and paste its output directly into a
[GitHub issue](https://github.com/kata-containers/kata-containers/issues/new).
> **Note:**
>
> The `kata-collect-data.sh` script is built from the
> [runtime](https://github.com/kata-containers/runtime) repository.
To perform analysis on Kata logs, use the
[`kata-log-parser`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser)
tool, which can convert the logs into formats (e.g. JSON, TOML, XML, and YAML).
To obtain a full backtrace for the agent, proxy, runtime, or shim send the
`SIGUSR1` signal to the process ID of the component. The component will send a
backtrace to the system log on the host system and continue to run without
interruption.
For example, to obtain a backtrace for `kata-proxy`:
```
$ sudo kill -USR1 $kata_proxy_pid
$ sudo journalctl -t kata-proxy
```
See [Set up a debug console](#set-up-a-debug-console).
# Appendices
## Checking Docker default runtime
```
$ sudo docker info 2>/dev/null | grep -i "default runtime" | cut -d: -f2- | grep -q runc && echo "SUCCESS" || echo "ERROR: Incorrect default Docker runtime"
```
## Set up a debug console
By default you cannot login to a virtual machine, since this can be sensitive
from a security perspective. Also, allowing logins would require additional
packages in the rootfs, which would increase the size of the image used to
boot the virtual machine.
If you want to login to a virtual machine that hosts your containers, complete
the following steps (using rootfs or initrd image).
> **Note:** The following debug console instructions assume a systemd-based guest
> O/S image. This means you must create a rootfs for a distro that supports systemd.
> Currently, all distros supported by [osbuilder](https://github.com/kata-containers/osbuilder) support systemd
> except for Alpine Linux.
>
> Look for `INIT_PROCESS=systemd` in the `config.sh` osbuilder rootfs config file
> to verify an osbuilder distro supports systemd for the distro you want to build rootfs for.
> For an example, see the [Clear Linux config.sh file](https://github.com/kata-containers/osbuilder/blob/master/rootfs-builder/clearlinux/config.sh).
>
> For a non-systemd-based distro, create an equivalent system
> service using that distros init system syntax. Alternatively, you can build a distro
> that contains a shell (e.g. `bash(1)`). In this circumstance it is likely you need to install
> additional packages in the rootfs and add “agent.debug_console” to kernel parameters in the runtime
> config file. This tells the Kata agent to launch the console directly.
>
> Once these steps are taken you can connect to the virtual machine using the [debug console](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#connect-to-the-virtual-machine-using-the-debug-console).
### Create a custom image containing a shell
To login to a virtual machine, you must
[create a custom rootfs](#create-a-rootfs-image) or [custom initrd](#create-an-initrd-image---optional)
containing a shell such as `bash(1)`. For Clear Linux, you will need
an additional `coreutils` package.
For example using CentOS:
```
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos'
```
### Create a debug systemd service
Create the service file that starts the shell in the rootfs directory:
```
$ cat <<EOT | sudo tee ${ROOTFS_DIR}/lib/systemd/system/kata-debug.service
[Unit]
Description=Kata Containers debug console
[Service]
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
StandardInput=tty
StandardOutput=tty
# Must be disabled to allow the job to access the real console
PrivateDevices=no
Type=simple
ExecStart=/bin/bash
Restart=always
EOT
```
**Note**: You might need to adjust the `ExecStart=` path.
Add a dependency to start the debug console:
```
$ sudo sed -i '$a Requires=kata-debug.service' ${ROOTFS_DIR}/lib/systemd/system/kata-containers.target
```
### Build the debug image
Follow the instructions in the [Build a rootfs image](#build-a-rootfs-image)
section when using rootfs, or when using initrd, complete the steps in the [Build an initrd image](#build-an-initrd-image) section.
### Configure runtime for custom debug image
Install the image:
>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
>with the initrd image name `kata-containers-initrd.img`.
```
$ name="kata-containers-centos-with-debug-console.img"
$ sudo install -o root -g root -m 0640 kata-containers.img "/usr/share/kata-containers/${name}"
```
Next, modify the `image=` values in the `[hypervisor.qemu]` section of the
[configuration file](https://github.com/kata-containers/runtime#configuration)
to specify the full path to the image name specified in the previous code
section. Alternatively, recreate the symbolic link so it points to
the new debug image:
```
$ (cd /usr/share/kata-containers && sudo ln -sf "$name" kata-containers.img)
```
**Note**: You should take care to undo this change after you finish debugging
to avoid all subsequently created containers from using the debug image.
### Ensure debug options are valid
For the debug console to work, you **must** ensure that proxy debug is
**disabled** in the configuration file. If proxy debug is enabled, you will
not see any output when you connect to the virtual machine:
```
$ sudo mkdir -p /etc/kata-containers/
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
$ sudo awk '{if (/^\[proxy\.kata\]/) {got=1}; if (got == 1 && /^.*enable_debug/) {print "#enable_debug = true"; got=0; next; } else {print}}' /etc/kata-containers/configuration.toml > /tmp/configuration.toml
$ sudo install -o root -g root -m 0640 /tmp/configuration.toml /etc/kata-containers/
```
### Create a container
Create a container as normal. For example using Docker:
```
$ sudo docker run -ti busybox sh
```
### Connect to the virtual machine using the debug console
```
$ id=$(sudo docker ps -q --no-trunc)
$ console="/var/run/vc/vm/${id}/console.sock"
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"
```
**Note**: You need to press the `RETURN` key to see the shell prompt.
To disconnect from the virtual machine, type `CONTROL+q` (hold down the
`CONTROL` key and press `q`).
### Obtain details of the image
If the image is created using
[osbuilder](https://github.com/kata-containers/osbuilder), the following YAML
file exists and contains details of the image and how it was created:
```
$ cat /var/lib/osbuilder/osbuilder.yaml
```
## Capturing kernel boot logs
Sometimes it is useful to capture the kernel boot messages from a Kata Container
launch. If the container launches to the point whereby you can `exec` into it, and
if the container has the necessary components installed, often you can execute the `dmesg`
command inside the container to view the kernel boot logs.
If however you are unable to `exec` into the container, you can enable some debug
options to have the kernel boot messages logged into the system journal.
Which debug options you enable depends on if you are using the hypervisor `vsock` mode
or not, as defined by the `use_vsock` setting in the `[hypervisor.qemu]` section of
the configuration file. The following details the settings:
- For `use_vsock = false`:
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[proxy.kata]` sections
- For `use_vsock = true`:
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[shim.kata]` sections
For generic information on enabling debug in the configuration file, see the
[Enable full debug](#enable-full-debug) section.
The kernel boot messages will appear in the `kata-proxy` or `kata-shim` log appropriately,
such as:
```bash
$ sudo journalctl -t kata-proxy
-- Logs begin at Thu 2020-02-13 16:20:40 UTC, end at Thu 2020-02-13 16:30:23 UTC. --
...
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.608714324Z" level=info msg="[ 1.418768] brd: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.628493231Z" level=info msg="[ 1.438612] loop: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.67707956Z" level=info msg="[ 1.487165] pmem0: p1\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
...
```
## Running standalone
It is possible to start the runtime without a container manager. This is
mostly useful for testing and debugging purposes.
### Create an OCI bundle
To build an
[OCI bundle](https://github.com/opencontainers/runtime-spec/blob/master/bundle.md),
required by the runtime:
```
$ bundle="/tmp/bundle"
$ rootfs="$bundle/rootfs"
$ mkdir -p "$rootfs" && (cd "$bundle" && kata-runtime spec)
$ sudo docker export $(sudo docker create busybox) | tar -C "$rootfs" -xvf -
```
### Launch the runtime to create a container
Run the runtime standalone by providing it with the path to the
previously-created [OCI bundle](#create-an-oci-bundle):
```
$ sudo kata-runtime --log=/dev/stdout run --bundle "$bundle" foo
```

View File

@ -0,0 +1,237 @@
* [Introduction](#introduction)
* [General requirements](#general-requirements)
* [Linking advice](#linking-advice)
* [Notes](#notes)
* [Warnings and other admonitions](#warnings-and-other-admonitions)
* [Files and command names](#files-and-command-names)
* [Code blocks](#code-blocks)
* [Images](#images)
* [Spelling](#spelling)
* [Names](#names)
* [Version numbers](#version-numbers)
* [The apostrophe](#the-apostrophe)
# Introduction
This document outlines the requirements for all documentation in the [Kata
Containers](https://github.com/kata-containers) project.
# General requirements
All documents must:
- Be written in simple English.
- Be written in [GitHub Flavored Markdown](https://github.github.com/gfm) format.
- Have a `.md` file extension.
- Include a TOC (table of contents) at the top of the document with links to
all heading sections. We recommend using the
[`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
tool to generate the TOC.
- Be linked to from another document in the same repository.
Although GitHub allows navigation of the entire repository, it should be
possible to access all documentation purely by navigating links inside the
documents, starting from the repositories top-level `README`.
If you are adding a new document, ensure you add a link to it in the
"closest" `README` above the directory where you created your document.
- If the document needs to tell the user to manipulate files or commands, use a
[code block](#code-blocks) to specify the commands.
If at all possible, ensure that every command in the code blocks can be run
non-interactively. If this is possible, the document can be tested by the CI
which can then execute the commands specified to ensure the instructions are
correct. This avoids documents becoming out of date over time.
# Linking advice
Linking between documents is strongly encouraged to help users and developers
navigate the material more easily. Linking also avoids repetition - if a
document needs to refer to a concept already well described in another section
or document, do not repeat it, link to it
(the [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) principle).
Another advantage of this approach is that changes only need to be applied in
one place: where the concept is defined (not the potentially many places where
the concept is referred to using a link).
# Notes
Important information that is not part of the main document flow should be
added as a Note in bold with all content contained within a block quote:
> **Note:** This is a really important point!
>
> This particular note also spans multiple lines. The entire note should be
> included inside the quoted block.
If there are multiple notes, bullets should be used:
> **Notes:**
>
> - I am important point 1.
>
> - I am important point 2.
>
> - I am important point *n*.
# Warnings and other admonitions
Use the same approach as for [notes](#notes). For example:
> **Warning:** Running this command assumes you understand the risks of doing so.
Other examples:
> **Warnings:**
>
> - Do not unplug your computer!
> - Always read the label.
> - Do not pass go. Do not collect $200.
> **Tip:** Read the manual page for further information on available options.
> **Hint:** Look behind you!
# Files and command names
All filenames and command names should be rendered in a fixed-format font
using backticks:
> Run the `foo` command to make it work.
> Modify the `bar` option in file `/etc/baz/baz.conf`.
Render any options that need to be specified to the command in the same manner:
> Run `bar -axz --apply foo.yaml` to make the changes.
For standard system commands, it is also acceptable to specify the name along
with the manual page section that documents the command in brackets:
> The command to list files in a directory is called `ls(1)`.
# Code blocks
This section lists requirements for displaying commands and command output.
The requirements must be adhered to since documentation containing code blocks
is validated by the CI system, which executes the command blocks with the help
of the
[doc-to-script](https://github.com/kata-containers/tests/tree/master/.ci/kata-doc-to-script.sh)
utility.
- If a document includes commands the user should run, they **MUST** be shown
in a *bash code block* with every command line prefixed with `$ ` to denote
a shell prompt:
<pre>
```bash
$ echo "Hi - I am some bash code"
$ sudo docker run -ti busybox true
$ [ $? -eq 0 ] && echo "success"
```
<pre>
- If a command needs to be run as the `root` user, it must be run using
`sudo(8)`.
```bash
$ sudo echo "I'm running as root"
```
- All lines beginning `# ` should be comment lines, *NOT* commands to run as
the `root` user.
- Try to avoid showing the *output* of commands.
The reasons for this:
- Command output can change, leading to confusion when the output the user
sees does not match the output in the documentation.
- There is the risk the user will get confused between what parts of the
block refer to the commands they should type and the output that they
should not.
- It can make the document look overly "busy" or complex.
In the unusual case that you need to display command *output*, use an
unadorned code block (\`\`\`):
<pre>
The output of the `ls(1)` command is expected to be:
```
ls: cannot access '/foo': No such file or directory
```
<pre>
- Long lines should not span across multiple lines by using the `\`
continuation character.
GitHub automatically renders such blocks with scrollbars. Consequently,
backslash continuation characters are not necessary and are a visual
distraction. These characters also mess up a user's shell history when
commands are pasted into a terminal.
# Images
All binary image files must be in a standard and well-supported format such as
PNG. This format is preferred for vector graphics such as diagrams because the
information is stored more efficiently, leading to smaller file sizes. JPEG
images are acceptable, but this format is more appropriate to store
photographic images.
When possible, generate images using freely available software.
Every binary image file **MUST** be accompanied by the "source" file used to
generate it. This guarantees that the image can be modified by updating the
source file and re-generating the binary format image file.
Ideally, the format of all image source files is an open standard, non-binary
one such as SVG. Text formats are highly preferable because you can manipulate
and compare them with standard tools (e.g. `diff(1)`).
# Spelling
Since this project uses a number of terms not found in conventional
dictionaries, we have a
[spell checking tool](https://github.com/kata-containers/tests/tree/master/cmd/check-spelling)
that checks both dictionary words and the additional terms we use.
Run the spell checking tool on your document before raising a PR to ensure it
is free of mistakes.
If your document introduces new terms, you need to update the custom
dictionary used by the spell checking tool to incorporate the new words.
# Names
Occasionally documents need to specify the name of people. Write such names in
backticks. The main reason for this is to keep the [spell checker](#spelling) happy (since
it cannot manage all possible names). However, since backticks render in a
fixed-width font, this makes the names clearer:
> Welcome to `Clark Kent`, the newest member of the Kata Containers Architecture Committee.
# Version numbers
Write version number in backticks. This keeps the [spell checker](#spelling)
happy and since backticks render in a fixed-width font, it also makes the
numbers clearer:
> Ensure you are using at least version `1.2.3-alpha3.wibble.1` of the tool.
# The apostrophe
The apostrophe character (`'`) must **only** be used for showing possession
("Peter's book") and for standard contractions (such as "don't").
Use double-quotes ("...") in all other circumstances you use quotes outside of
[code blocks](#code-blocks).

201
docs/LICENSE Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
# Licensing strategy
* [Project License](#project-license)
* [License file](#license-file)
* [License for individual files](#license-for-individual-files)
## Project License
The license for the [Kata Containers](https://github.com/kata-containers)
project is [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0).
## License file
All repositories in the project have a top level file called `LICENSE`. This
file lists full details of all licences used by the repository.
## License for individual files
Where possible all files in all repositories also contain a
[SPDX](https://spdx.org) license identifier. This provides fine-grained
licensing and allows automated tooling to check the license of individual
files.
This SPDX licence identifier requirement is enforced by the
[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).

280
docs/Limitations.md Normal file
View File

@ -0,0 +1,280 @@
* [Overview](#overview)
* [Definition of a limitation](#definition-of-a-limitation)
* [Scope](#scope)
* [Contributing](#contributing)
* [Pending items](#pending-items)
* [Runtime commands](#runtime-commands)
* [checkpoint and restore](#checkpoint-and-restore)
* [events command](#events-command)
* [update command](#update-command)
* [Networking](#networking)
* [Docker swarm and compose support](#docker-swarm-and-compose-support)
* [Resource management](#resource-management)
* [docker run and shared memory](#docker-run-and-shared-memory)
* [docker run and sysctl](#docker-run-and-sysctl)
* [Docker daemon features](#docker-daemon-features)
* [SELinux support](#selinux-support)
* [Architectural limitations](#architectural-limitations)
* [Networking limitations](#networking-limitations)
* [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
* [docker --net=host](#docker---nethost)
* [docker run --link](#docker-run---link)
* [Host resource sharing](#host-resource-sharing)
* [docker run --privileged](#docker-run---privileged)
* [Miscellaneous](#miscellaneous)
* [Docker --security-opt option partially supported](#docker---security-opt-option-partially-supported)
* [Appendices](#appendices)
* [The constraints challenge](#the-constraints-challenge)
---
# Overview
A [Kata Container](https://github.com/kata-containers) utilizes a Virtual Machine (VM) to enhance security and
isolation of container workloads. As a result, the system has a number of differences
and limitations when compared with the default [Docker*](https://www.docker.com/) runtime,
[`runc`](https://github.com/opencontainers/runc).
Some of these limitations have potential solutions, whereas others exist
due to fundamental architectural differences generally related to the
use of VMs.
The [Kata Container runtime](https://github.com/kata-containers/runtime)
launches each container within its own hardware isolated VM, and each VM has
its own kernel. Due to this higher degree of isolation, certain container
capabilities cannot be supported or are implicitly enabled through the VM.
# Definition of a limitation
The [Open Container Initiative](https://www.opencontainers.org/)
[Runtime Specification](https://github.com/opencontainers/runtime-spec) ("OCI spec")
defines the minimum specifications a runtime must support to interoperate with
container managers such as Docker. If a runtime does not support some aspect
of the OCI spec, it is by definition a limitation.
However, the OCI runtime reference implementation (`runc`) does not perfectly
align with the OCI spec itself.
Further, since the default OCI runtime used by Docker is `runc`, Docker
expects runtimes to behave as `runc` does. This implies that another form of
limitation arises if the behavior of a runtime implementation does not align
with that of `runc`. Having two standards complicates the challenge of
supporting a Docker environment since a runtime must support the official OCI
spec and the non-standard extensions provided by `runc`.
# Scope
Each known limitation is captured in a separate GitHub issue that contains
detailed information about the issue. These issues are tagged with the
`limitation` label. This document is a curated summary of important known
limitations and provides links to the relevant GitHub issues.
The following link shows the latest list of limitations:
- https://github.com/pulls?utf8=%E2%9C%93&q=is%3Aopen+label%3Alimitation+org%3Akata-containers
# Contributing
If you would like to work on resolving a limitation, please refer to the
[contributors guide](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
If you wish to raise an issue for a new limitation, either
[raise an issue directly on the runtime](https://github.com/kata-containers/runtime/issues/new)
or see the
[project table of contents](https://github.com/kata-containers/kata-containers)
for advice on which repository to raise the issue against.
# Pending items
This section lists items that might be possible to fix.
## Runtime commands
### checkpoint and restore
The runtime does not provide `checkpoint` and `restore` commands. There
are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
Note that the OCI standard does not specify `checkpoint` and `restore`
commands.
See issue https://github.com/kata-containers/runtime/issues/184 for more information.
### events command
The runtime does not fully implement the `events` command. `OOM` notifications and `Intel RDT` stats are not fully supported.
Note that the OCI standard does not specify an `events` command.
See issue https://github.com/kata-containers/runtime/issues/308 and https://github.com/kata-containers/runtime/issues/309 for more information.
### update command
Currently, only block I/O weight is not supported.
All other configurations are supported and are working properly.
## Networking
### Docker swarm and compose support
The newest version of Docker supported is specified by the
`externals.docker.version` variable in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
Basic Docker swarm support works. However, if you want to use custom networks
with Docker's swarm, an older version of Docker is required. This is specified
by the `externals.docker.meta.swarm-version` variable in the
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
See issue https://github.com/kata-containers/runtime/issues/175 for more information.
Docker compose normally uses custom networks, so also has the same limitations.
## Resource management
Due to the way VMs differ in their CPU and memory allocation, and sharing
across the host system, the implementation of an equivalent method for
these commands is potentially challenging.
See issue https://github.com/clearcontainers/runtime/issues/341 and [the constraints challenge](#the-constraints-challenge) for more information.
For CPUs resource management see
[CPU constraints](design/vcpu-handling.md).
### docker run and shared memory
The runtime does not implement the `docker run --shm-size` command to
set the size of the `/dev/shm tmpfs` within the container. It is possible to pass this configuration value into the VM container so the appropriate mount command happens at launch time.
See issue https://github.com/kata-containers/kata-containers/issues/21 for more information.
### docker run and sysctl
The `docker run --sysctl` feature is not implemented. At the runtime
level, this equates to the `linux.sysctl` OCI configuration. Docker
allows configuring the sysctl settings that support namespacing. From a security and isolation point of view, it might make sense to set them in the VM, which isolates sysctl settings. Also, given that each Kata Container has its own kernel, we can support setting of sysctl settings that are not namespaced. In some cases, we might need to support configuring some of the settings on both the host side Kata Container namespace and the Kata Containers kernel.
See issue https://github.com/kata-containers/runtime/issues/185 for more information.
## Docker daemon features
Some features enabled or implemented via the
[`dockerd` daemon](https://docs.docker.com/config/daemon/) configuration are not yet
implemented.
### SELinux support
The `dockerd` configuration option `"selinux-enabled": true` is not presently implemented
in Kata Containers. Enabling this option causes an OCI runtime error.
See issue https://github.com/kata-containers/runtime/issues/784 for more information.
The consequence of this is that the [Docker --security-opt is only partially supported](#docker---security-opt-option-partially-supported).
Kubernetes [SELinux labels](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#assign-selinux-labels-to-a-container) will also not be applied.
# Architectural limitations
This section lists items that might not be fixed due to fundamental
architectural differences between "soft containers" (i.e. traditional Linux*
containers) and those based on VMs.
## Networking limitations
### Support for joining an existing VM network
Docker supports the ability for containers to join another containers
namespace with the `docker run --net=containers` syntax. This allows
multiple containers to share a common network namespace and the network
interfaces placed in the network namespace. Kata Containers does not
support network namespace sharing. If a Kata Container is setup to
share the network namespace of a `runc` container, the runtime
effectively takes over all the network interfaces assigned to the
namespace and binds them to the VM. Consequently, the `runc` container loses
its network connectivity.
### docker --net=host
Docker host network support (`docker --net=host run`) is not supported.
It is not possible to directly access the host networking configuration
from within the VM.
The `--net=host` option can still be used with `runc` containers and
inter-mixed with running Kata Containers, thus enabling use of `--net=host`
when necessary.
It should be noted, currently passing the `--net=host` option into a
Kata Container may result in the Kata Container networking setup
modifying, re-configuring and therefore possibly breaking the host
networking setup. Do not use `--net=host` with Kata Containers.
### docker run --link
The runtime does not support the `docker run --link` command. This
command is now deprecated by docker and we have no intention of adding support.
Equivalent functionality can be achieved with the newer docker networking commands.
See more documentation at
[docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).
## Host resource sharing
### docker run --privileged
Privileged support in Kata is essentially different from `runc` containers.
Kata does support `docker run --privileged` command, but in this case full access
to the guest VM is provided in addition to some host access.
The container runs with elevated capabilities within the guest and is granted
access to guest devices instead of the host devices.
This is also true with using `securityContext privileged=true` with Kubernetes.
The container may also be granted full access to a subset of host devices
(https://github.com/kata-containers/runtime/issues/1568).
See [Privileged Kata Containers](how-to/privileged.md) for how to configure some of this behavior.
# Miscellaneous
This section lists limitations where the possible solutions are uncertain.
## Docker --security-opt option partially supported
The `--security-opt=` option used by Docker is partially supported.
We only support `--security-opt=no-new-privileges` and `--security-opt seccomp=/path/to/seccomp/profile.json`
option as of today.
Note: The `--security-opt apparmor=your_profile` is not yet supported. See https://github.com/kata-containers/runtime/issues/707.
# Appendices
## The constraints challenge
Applying resource constraints such as cgroup, CPU, memory, and storage to a workload is not always straightforward with a VM based system. A Kata Container runs in an isolated environment inside a virtual machine. This, coupled with the architecture of Kata Containers, offers many more possibilities than are available to traditional Linux containers due to the various layers and contexts.
In some cases it might be necessary to apply the constraints to multiple levels. In other cases, the hardware isolated VM provides equivalent functionality to the the requested constraint.
The following examples outline some of the various areas constraints can be applied:
- Inside the VM
Constrain the guest kernel. This can be achieved by passing particular values through the kernel command line used to boot the guest kernel. Alternatively, sysctl values can be applied at early boot.
- Inside the container
Constrain the container created inside the VM.
- Outside the VM:
- Constrain the hypervisor process by applying host-level constraints.
- Constrain all processes running inside the hypervisor.
This can be achieved by specifying particular hypervisor configuration options.
- Constrain the [shim](https://github.com/kata-containers/shim) process.
This process represents the container workload running inside the VM.
- Constrain the [proxy](https://github.com/kata-containers/proxy) process.
Note that in some circumstances it might be necessary to apply particular constraints
to more than one of the previous areas to achieve the desired level of isolation and resource control.

8
docs/Makefile Normal file
View File

@ -0,0 +1,8 @@
#
# Copyright (c) 2018 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
default:
@true

76
docs/README.md Normal file
View File

@ -0,0 +1,76 @@
# Documentation
* [Getting Started](#getting-started)
* [More User Guides](#more-user-guides)
* [Kata Use-Cases](#kata-use-cases)
* [Developer Guide](#developer-guide)
* [Design and Implementations](#design-and-implementations)
* [How to Contribute](#how-to-contribute)
* [Code Licensing](#code-licensing)
* [The Release Process](#the-release-process)
* [Help Improving the Documents](#help-improving-the-documents)
* [Website Changes](#website-changes)
The [Kata Containers](https://github.com/kata-containers)
documentation repository hosts overall system documentation, with information
common to multiple components.
For details of the other Kata Containers repositories, see the
[repository summary](https://github.com/kata-containers/kata-containers).
## Getting Started
* [Installation guides](./install/README.md): Install and run Kata Containers with Docker or Kubernetes
## More User Guides
* [Upgrading](Upgrading.md): how to upgrade from [Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to [Kata Containers](https://github.com/kata-containers) and how to upgrade an existing Kata Containers system to the latest version.
* [Limitations](Limitations.md): differences and limitations compared with the default [Docker](https://www.docker.com/) runtime,
[`runc`](https://github.com/opencontainers/runc).
### Howto guides
See the [howto documentation](how-to).
## Kata Use-Cases
* [GPU Passthrough with Kata](./use-cases/GPU-passthrough-and-Kata.md)
* [OpenStack Zun with Kata Containers](./use-cases/zun_kata.md)
* [SR-IOV with Kata](./use-cases/using-SRIOV-and-kata.md)
* [Intel QAT with Kata](./use-cases/using-Intel-QAT-and-kata.md)
* [VPP with Kata](./use-cases/using-vpp-and-kata.md)
* [SPDK vhost-user with Kata](./use-cases/using-SPDK-vhostuser-and-kata.md)
## Developer Guide
Documents that help to understand and contribute to Kata Containers.
### Design and Implementations
* [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
* [Kata Containers design](./design/README.md): More Kata Containers design documents
### How to Contribute
* [Developer Guide](Developer-Guide.md): Setup the Kata Containers developing environments
* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md)
* [Code of Conduct](CODE_OF_CONDUCT.md)
### Code Licensing
* [Licensing](Licensing-strategy.md): About the licensing strategy of Kata Containers.
### The Release Process
* [Release strategy](Stable-Branch-Strategy.md)
* [Release Process](Release-Process.md)
## Help Improving the Documents
* [Documentation Requirements](Documentation-Requirements.md)
## Website Changes
If you have a suggestion for how we can improve the
[website](https://katacontainers.io), please raise an issue (or a PR) on
[the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).

118
docs/Release-Process.md Normal file
View File

@ -0,0 +1,118 @@
# How to do a Kata Containers Release
This document lists the tasks required to create a Kata Release.
<!-- TOC START min:1 max:3 link:true asterisk:false update:true -->
- [How to do a Kata Containers Release](#how-to-do-a-kata-containers-release)
- [Requirements](#requirements)
- [Release Process](#release-process)
- [Bump all Kata repositories](#bump-all-kata-repositories)
- [Merge all bump version Pull requests](#merge-all-bump-version-pull-requests)
- [Tag all Kata repositories](#tag-all-kata-repositories)
- [Check Git-hub Actions](#check-git-hub-actions)
- [Create OBS Packages](#create-obs-packages)
- [Create release notes](#create-release-notes)
- [Announce the release](#announce-the-release)
<!-- TOC END -->
## Requirements
- [hub](https://github.com/github/hub)
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
- GitHub permissions to push tags and create releases in Kata repositories.
- GPG configured to sign git tags. https://help.github.com/articles/generating-a-new-gpg-key/
- You should configure your GitHub to use your ssh keys (to push to branches). See https://help.github.com/articles/adding-a-new-ssh-key-to-your-github-account/.
* As an alternative, configure hub to push and fork with HTTPS, `git config --global hub.protocol https` (Not tested yet) *
## Release Process
### Bump all Kata repositories
- We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
- Start a new job with variables for the job passed as:
- `BRANCH=<the-branch-you-want-to-bump>`
- `NEW_VERSION=<the-new-kata-version>`
For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
Alternatively, you can also bump the repositories using a script in the Kata packaging repo
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
$ export NEW_VERSION=<the-new-kata-version>
$ export BRANCH=<the-branch-you-want-to-bump>
$ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
```
### Merge all bump version Pull requests
- The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
- Check any failures and fix if needed.
- Work with the Kata approvers to verify that the CI works and the pull requests are merged.
### Tag all Kata repositories
Once all the pull requests to bump versions in all Kata repositories are merged,
tag all the repositories as shown below.
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
$ git checkout <kata-branch-to-release>
$ git pull
$ ./tag_repos.sh -p -b "$BRANCH" tag
```
### Check Git-hub Actions
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/runtime/releases).
### Create OBS Packages
- We have set up an [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) job
to trigger generation of Kata packages in [OBS](https://build.opensuse.org/).
Go to the [Azure Pipelines job that creates OBS packages](https://dev.azure.com/kata-containers/release-process/_release?_a=releases&view=mine&definitionId=1).
- Click on "Create release" (blue button, at top right corner).
It should prompt you for variables to be passed to the release job. They should look like:
```
BRANCH="the-kata-branch-that-is-release"
BUILD_HEAD=false
OBS_BRANCH="the-kata-branch-that-is-release"
```
Note: If the release is `Alpha` , `Beta` , or `RC` (that is part of a `master` release), please use `OBS_BRANCH=master`.
The above step shall create OBS packages for Kata for various distributions that Kata supports and test them as well.
- Verify that the packages have built successfully by checking the [Kata OBS project page](https://build.opensuse.org/project/subprojects/home:katacontainers).
- Make sure packages work correctly. This can be done manually or via the [package testing pipeline](http://jenkins.katacontainers.io/job/package-release-testing).
You have to make sure the packages are already published by OBS before this step.
It should prompt you for variables to be passed to the pipeline:
```
BRANCH="<kata-branch-to-release>"
NEW_VERSION=<the-version-you-expect-to-be-packaged|latest>
```
Note: `latest` will verify that a package provides the latest Kata tag in that branch.
### Create release notes
We have a script in place in the packaging repository to create release notes that include a short-log of the commits across Kata components.
Run the script as shown below:
```
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
# Note: OLD_VERSION is where the script should start to get changes.
$ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
# Edit the `notes.md` file to review and make any changes to the release notes.
# Add the release notes in GitHub runtime.
$ hub -C "${GOPATH}/src/github.com/kata-containers/runtime" release edit -F notes.md "${NEW_VERSION}"
```
### Announce the release
Publish in [Slack and Kata mailing list](https://github.com/kata-containers/community#join-us) that new release is ready.

View File

@ -0,0 +1,151 @@
Branch and release maintenance for the Kata Containers project.
## Introduction
This document provides details about Kata Containers releases.
## Versioning
The Kata Containers project uses [semantic versioning](http://semver.org/) for all releases.
Semantic versions are comprised of three fields in the form:
```
MAJOR.MINOR.PATCH
```
For examples: `1.0.0`, `1.0.0-rc.5`, and `99.123.77+foo.bar.baz.5`.
Semantic versioning is used since the version number is able to convey clear
information about how a new version relates to the previous version.
For example, semantic versioning can also provide assurances to allow users to know
when they must upgrade compared with when they might want to upgrade:
- When `PATCH` increases, the new release contains important **security fixes**
and an upgrade is recommended.
The patch field can contain extra details after the number.
Dashes denote pre-release versions. `1.0.0-rc.5` in the example denotes the fifth release
candidate for release `1.0.0`. Plus signs denote other details. In our example, `+foo.bar.baz.5`
provides additional information regarding release `99.123.77` in the previous example.
- When `MINOR` increases, the new release adds **new features** but *without
changing the existing behavior*.
- When `MAJOR` increases, the new release adds **new features, bug fixes, or
both** and which *changes the behavior from the previous release* (incompatible with previous releases).
A major release will also likely require a change of the container manager version used,
for example Docker\*. Please refer to the release notes for further details.
## Release Strategy
Any new features added since the last release will be available in the next minor
release. These will include bug fixes as well. To facilitate a stable user environment,
Kata provides stable branch-based releases and a master branch release.
## Stable branch patch criteria
No new features should be introduced to stable branches. This is intended to limit risk to users,
providing only bug and security fixes.
## Branch Management
Kata Containers will maintain two stable release branches in addition to the master branch.
Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
maintenance for a branch is announced on the Kata Containers mailing list. Users can determine
the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
latest stable branch available.
A couple of examples follow to help clarify this process.
### New bug fix introduced
A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
This fix is applied to both the master and stable branches, and there is no need to create a new
stable branch.
| Branch | Original version | New version |
|--|--|--|
| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
| `stable-1.2` | `1.2.0` | `1.2.1` |
| `stable-1.1` | `1.1.2` | `1.1.3` |
### New release made feature or change adding new inter-component dependency
A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
is dropped from maintenance.
| Branch | Original version | New version |
|--|--|--|
| `master` | `1.3.0-rc1` | `1.3.0` |
| `stable-1.3` | N/A| `1.3.0` |
| `stable-1.2` | `1.2.1` | `1.2.2` |
| `stable-1.1` | `1.1.3` | (unmaintained) |
Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
create a couple of alpha releases gathering features targeted for that particular release (in
this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
A new stable branch is created for the release candidate. Only bug fixes and any security issues
are added to the branch going forward until release 1.4.0 is made.
## Backporting Process
Development that occurs against the master branch and applicable code commits should also be submitted
against the stable branches. Some guidelines for this process follow::
1. Only bug and security fixes which do not introduce inter-component dependencies are
candidates for stable branches. These PRs should be marked with "bug" in GitHub.
2. Once a PR is created against master which meets requirement of (1), a comparable one
should also be submitted against the stable branches. It is the responsibility of the submitter
to apply their pull request against stable, and it is the responsibility of the
reviewers to help identify stable-candidate pull requests.
## Continuous Integration Testing
The test repository is forked to create stable branches from master. Full CI
runs on each stable and master PR using its respective tests repository branch.
### An alternative method for CI testing:
Ideally, the continuous integration infrastructure will run the same test suite on both master
and the stable branches. When tests are modified or new feature tests are introduced, explicit
logic should exist within the testing CI to make sure only applicable tests are executed against
stable and master. While this is not in place currently, it should be considered in the long term.
## Release Management
### Patch releases
Releases are made every three weeks, which include a GitHub release as
well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
release is created and an announcement is made on the developer mailing list to highlight this.
If a release is being made, each repository is tagged for this release, regardless
of whether changes are introduced. The release schedule can be seen on the
[release rotation wiki page](https://github.com/kata-containers/community/wiki/Release-Team-Rota).
If there is urgent need for a fix, a patch release will be made outside of the planned schedule.
The process followed for making a release can be found at [Release Process](Release-Process.md).
## Minor releases
### Frequency
Minor releases are less frequent in order to provide a more stable baseline for users. They are currently
running on a twelve week cadence. As the Kata Containers code base has reached a certain level of
maturity, we have increased the cadence from six weeks to twelve weeks. The release schedule can be seen on the
[release rotation wiki page](https://github.com/kata-containers/community/wiki/Release-Team-Rota).
### Compatibility
Kata guarantees compatibility between components that are within one minor release of each other.
This is critical for dependencies which cross between host (runtime, shim, proxy) and
the guest (hypervisor, rootfs and agent). For example, consider a cluster with a long-running
deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
runtime still communicates with 1.1.3 agent within workload-never-dies.
Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.

185
docs/Upgrading.md Normal file
View File

@ -0,0 +1,185 @@
* [Introduction](#introduction)
* [Unsupported scenarios](#unsupported-scenarios)
* [Maintenance Warning](#maintenance-warning)
* [Upgrade from Clear Containers](#upgrade-from-clear-containers)
* [Stop all running Clear Container instances](#stop-all-running-clear-container-instances)
* [Configuration migration](#configuration-migration)
* [Remove Clear Containers packages](#remove-clear-containers-packages)
* [Fedora](#fedora)
* [Ubuntu](#ubuntu)
* [Disable old container manager configuration](#disable-old-container-manager-configuration)
* [Install Kata Containers](#install-kata-containers)
* [Create a Kata Container](#create-a-kata-container)
* [Upgrade from runV](#upgrade-from-runv)
* [Upgrade Kata Containers](#upgrade-kata-containers)
* [Appendices](#appendices)
* [Assets](#assets)
* [Guest kernel](#guest-kernel)
* [Image](#image)
* [Determining asset versions](#determining-asset-versions)
# Introduction
This document explains how to upgrade from
[Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to
[Kata Containers](https://github.com/kata-containers) and how to upgrade an existing
Kata Containers system to the latest version.
# Unsupported scenarios
Upgrading a Clear Containers system on the following distributions is **not**
supported since the installation process for these distributions makes use of
unpackaged components:
- [CentOS](https://github.com/clearcontainers/runtime/blob/master/docs/centos-installation-guide.md)
- [BCLinux](https://github.com/clearcontainers/runtime/blob/master/docs/bclinux-installation-guide.md)
- [RHEL](https://github.com/clearcontainers/runtime/blob/master/docs/rhel-installation-guide.md)
- [SLES](https://github.com/clearcontainers/runtime/blob/master/docs/sles-installation-guide.md)
Additionally, upgrading
[Clear Linux](https://github.com/clearcontainers/runtime/blob/master/docs/clearlinux-installation-guide.md)
is not supported as Kata Containers packages do not yet exist.
# Maintenance Warning
The Clear Containers codebase is no longer being developed. Only new releases
will be considered for significant bug fixes.
The main development focus is now on Kata Containers. All Clear Containers
users are encouraged to switch to Kata Containers.
# Upgrade from Clear Containers
Since Kata Containers can co-exist on the same system as Clear Containers, if
you already have Clear Containers installed, the upgrade process is simply to
install Kata Containers. However, since Clear Containers is
[no longer being actively developed](#maintenance-warning),
you are encouraged to remove Clear Containers from your systems.
## Stop all running Clear Container instances
Assuming a Docker\* system, to stop all currently running Clear Containers:
```
$ for container in $(sudo docker ps -q); do sudo docker stop $container; done
```
## Configuration migration
The automatic migration of
[Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration) to
[Kata Containers configuration](https://github.com/kata-containers/runtime#configuration) is
not supported.
If you have made changes to your Clear Containers configuration, you should
review those changes and decide whether to manually apply those changes to the
Kata Containers configuration.
> **Note**: This step must be completed before continuing to
> [remove the Clear Containers packages](#remove-clear-containers-packages) since doing so will
> *delete the default Clear Containers configuration file from your system*.
## Remove Clear Containers packages
> **Warning**: If you have modified your
> [Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration),
> you might want to make a safe copy of the configuration file before removing the
> packages since doing so will *delete the default configuration file*
### Fedora
```
$ sudo -E dnf remove cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
$ sudo rm /etc/yum.repos.d/home:clearcontainers:clear-containers-3.repo
```
### Ubuntu
```
$ sudo apt-get purge cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
$ sudo rm /etc/apt/sources.list.d/clear-containers.list
```
## Disable old container manager configuration
Assuming a Docker installation, remove the docker configuration for Clear
Containers:
```
$ sudo rm /etc/systemd/system/docker.service.d/clear-containers.conf
```
## Install Kata Containers
Follow one of the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
## Create a Kata Container
```
$ sudo docker run -ti busybox sh
```
# Upgrade from runV
runV and Kata Containers can run together on the same system without affecting each other, as long as they are
not configured to use the same container root storage. Currently, runV defaults to `/run/runv` and Kata Containers
defaults to `/var/run/kata-containers`.
Now, to upgrade from runV you need to fresh install Kata Containers by following one of
the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
# Upgrade Kata Containers
As shown in the
[installation instructions](https://github.com/kata-containers/documentation/blob/master/install),
Kata Containers provide binaries for popular distributions in their native
packaging formats. This allows Kata Containers to be upgraded using the
standard package management tools for your distribution.
# Appendices
## Assets
Kata Containers requires additional resources to create a virtual machine
container. These resources are called
[Kata Containers assets](./design/architecture.md#assets),
which comprise a guest kernel and a root filesystem or initrd image. This
section describes when these components are updated.
Since the official assets are packaged, they are automatically upgraded when
new package versions are published.
> **Warning**: Note that if you use custom assets (by modifying the
> [Kata Runtime configuration > file](https://github.com/kata-containers/runtime/#configuration)),
> it is your responsibility to ensure they are updated as necessary.
### Guest kernel
The `kata-linux-container` package contains a Linux\* kernel based on the
latest vanilla version of the
[long-term kernel](https://www.kernel.org/)
plus a small number of
[patches](https://github.com/kata-containers/packaging/tree/master/kernel).
The `Longterm` branch is only updated with
[important bug fixes](https://www.kernel.org/category/releases.html)
meaning this package is only updated when necessary.
The guest kernel package is updated when a new long-term kernel is released
and when any patch updates are required.
### Image
The `kata-containers-image` package is updated only when critical updates are
available for the packages used to create it, such as:
- systemd
- [Kata Containers Agent](https://github.com/kata-containers/agent)
### Determining asset versions
To see which versions of the assets being used:
```
$ kata-runtime kata-env
```

1
docs/VERSION Normal file
View File

@ -0,0 +1 @@
1.11.0-rc0

10
docs/design/README.md Normal file
View File

@ -0,0 +1,10 @@
# Design
Kata Containers design documents:
- [Kata Containers architecture](architecture.md)
- [API Design of Kata Containers](kata-api-design.md)
- [Design requirements for Kata Containers](kata-design-requirements.md)
- [VSocks](VSocks.md)
- [VCPU handling](vcpu-handling.md)
- [Host cgroups](host-cgroups.md)

134
docs/design/VSocks.md Normal file
View File

@ -0,0 +1,134 @@
# Kata Containers and VSOCKs
- [Introduction](#introduction)
- [proxy communication diagram](#proxy-communication-diagram)
- [VSOCK communication diagram](#vsock-communication-diagram)
- [System requirements](#system-requirements)
- [Advantages of using VSOCKs](#advantages-of-using-vsocks)
- [High density](#high-density)
- [Reliability](#reliability)
## Introduction
There are two different ways processes in the virtual machine can communicate
with processes in the host. The first one is by using serial ports, where the
processes in the virtual machine can read/write data from/to a serial port
device and the processes in the host can read/write data from/to a Unix socket.
Most GNU/Linux distributions have support for serial ports, making it the most
portable solution. However, the serial link limits read/write access to one
process at a time. To deal with this limitation the resources (serial port and
Unix socket) must be multiplexed. In Kata Containers those resources are
multiplexed by using [`kata-proxy`][2] and [Yamux][3], the following diagram shows
how it's implemented.
### proxy communication diagram
```
.----------------------.
| .------------------. |
| | .-----. .-----. | |
| | |cont1| |cont2| | |
| | `-----' `-----' | |
| | \ / | |
| | .---------. | |
| | | agent | | |
| | `---------' | |
| | | | |
| | .-----------. | |
| |POD |serial port| | |
| `----|-----------|-' |
| | socket | |
| `-----------' |
| | |
| .-------. |
| | proxy | |
| `-------' |
| | |
| .------./ \.------. |
| | shim | | shim | |
| `------' `------' |
| Host |
`----------------------'
```
A newer, simpler method is [VSOCKs][4], which can accept connections from
multiple clients and does not require multiplexers ([`kata-proxy`][2] and
[Yamux][3]). The following diagram shows how it's implemented in Kata Containers.
### VSOCK communication diagram
```
.----------------------.
| .------------------. |
| | .-----. .-----. | |
| | |cont1| |cont2| | |
| | `-----' `-----' | |
| | | | | |
| | .---------. | |
| | | agent | | |
| | `---------' | |
| | | | | |
| | POD .-------. | |
| `-----| vsock |----' |
| `-------' |
| | | |
| .------. .------. |
| | shim | | shim | |
| `------' `------' |
| Host |
`----------------------'
```
## System requirements
The host Linux kernel version must be greater than or equal to v4.8, and the
`vhost_vsock` module must be loaded or built-in (`CONFIG_VHOST_VSOCK=y`). To
load the module run the following command:
```
$ sudo modprobe -i vhost_vsock
```
The Kata Containers version must be greater than or equal to 1.2.0 and `use_vsock`
must be set to `true` in the runtime [configuration file][1].
### With VMWare guest
To use Kata Containers with VSOCKs in a VMWare guest environment, first stop the `vmware-tools` service and unload the VMWare Linux kernel module.
```
sudo systemctl stop vmware-tools
sudo modprobe -r vmw_vsock_vmci_transport
sudo modprobe -i vhost_vsock
```
## Advantages of using VSOCKs
### High density
Using a proxy for multiplexing the connections between the VM and the host uses
4.5MB per [POD][5]. In a high density deployment this could add up to GBs of
memory that could have been used to host more PODs. When we talk about density
each kilobyte matters and it might be the decisive factor between run another
POD or not. For example if you have 500 PODs running in a server, the same
amount of [`kata-proxy`][2] processes will be running and consuming for around
2250MB of RAM. Before making the decision not to use VSOCKs, you should ask
yourself, how many more containers can run with the memory RAM consumed by the
Kata proxies?
### Reliability
[`kata-proxy`][2] is in charge of multiplexing the connections between virtual
machine and host processes, if it dies all connections get broken. For example
if you have a [POD][5] with 10 containers running, if `kata-proxy` dies it would
be impossible to contact your containers, though they would still be running.
Since communication via VSOCKs is direct, the only way to lose communication
with the containers is if the VM itself or the [shim][6] dies, if this happens
the containers are removed automatically.
[1]: https://github.com/kata-containers/runtime#configuration
[2]: https://github.com/kata-containers/proxy
[3]: https://github.com/hashicorp/yamux
[4]: https://wiki.qemu.org/Features/VirtioVsock
[5]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods
[6]: https://github.com/kata-containers/shim

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -0,0 +1,47 @@
@startuml
User->CLI: network add-interface
CLI->virtcontainers: AddInterface
virtcontainers->QEMU:QMP-hot-add-network
virtcontainers->agent:UpdateInterface
note right
the agent's UpdateInterface code will need to be augmented
to have a timeout/wait associated with this for the network
device to appear (ie, wait for qmp to complete)
end note
agent->User: err, interface detail
User->CLI: network del-interface
CLI->virtcontainers: DeleteInterface
note right
There will be no call to the agent. We rely on guest kernel
to clean up any state associated with the interface.
end note
virtcontainers->QEMU:QMP-hot-delete-network
virtcontainers->User: err, interface detail
User->CLI: network list-interface
CLI->virtcontainers: ListInterfaces
virtcontainers->agent:ListInterfaces
agent->User: err, list of interface details
User->CLI: network update-routes
CLI->virtcontainers: UpdateRoutes
note right
routes are handled in a 'one shot' basis,
setting all of the routes for the network. This needs to
be called after interfaces are added, and should be called
after interfaces are removed. It should be fine to call once
after adding all of the expected interfaces. If you know all
the resulting routes, simply calling set routes with the
complete list should suffice.
end note
virtcontainers->agent:UpdateRoutes
agent->User: err, list of routes
User->CLI: network list-routes
CLI->virtcontainers: ListRoutes
virtcontainers->agent:ListRoutes
agent->User: err, list of routes
@enduml

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 509 KiB

View File

@ -0,0 +1,174 @@
Title: Kata Flow
participant CRI
participant CRIO
participant Kata Runtime
participant virtcontainers
participant hypervisor
participant agent
participant shim-pod
participant shim-ctr
participant proxy
# Run the sandbox
CRI->CRIO: RunPodSandbox()
CRIO->Kata Runtime: create
Kata Runtime->virtcontainers: CreateSandbox()
Note left of virtcontainers: Sandbox\nReady
virtcontainers->virtcontainers: createNetwork()
virtcontainers->virtcontainers: Execute PreStart Hooks
virtcontainers->+hypervisor: Start VM (inside the netns)
hypervisor-->-virtcontainers: VM started
virtcontainers->proxy: Start Proxy
proxy->hypervisor: Connect the VM
virtcontainers->+agent: CreateSandbox()
agent-->-virtcontainers: Sandbox Created
virtcontainers->+agent: CreateContainer()
agent-->-virtcontainers: Container Created
virtcontainers->shim-pod: Start Shim
shim-pod->agent: ReadStdout() (blocking call)
shim-pod->agent: ReadStderr() (blocking call)
shim-pod->agent: WaitProcess() (blocking call)
Note left of virtcontainers: Container-pod\nReady
virtcontainers-->Kata Runtime: End of CreateSandbox()
Kata Runtime-->CRIO: End of create
CRIO->Kata Runtime: start
Kata Runtime->virtcontainers: StartSandbox()
Note left of virtcontainers: Sandbox\nRunning
virtcontainers->+agent: StartContainer()
agent-->-virtcontainers: Container Started
Note left of virtcontainers: Container-pod\nRunning
virtcontainers->virtcontainers: Execute PostStart Hooks
virtcontainers-->Kata Runtime: End of StartSandbox()
Kata Runtime-->CRIO: End of start
CRIO-->CRI: End of RunPodSandbox()
# Create the container
CRI->CRIO: CreateContainer()
CRIO->Kata Runtime: create
Kata Runtime->virtcontainers: CreateContainer()
virtcontainers->+agent: CreateContainer()
agent-->-virtcontainers: Container Created
virtcontainers->shim-ctr: Start Shim
shim-ctr->agent: ReadStdout() (blocking call)
shim-ctr->agent: ReadStderr() (blocking call)
shim-ctr->agent: WaitProcess() (blocking call)
Note left of virtcontainers: Container-ctr\nReady
virtcontainers-->Kata Runtime: End of CreateContainer()
Kata Runtime-->CRIO: End of create
CRIO-->CRI: End of CreateContainer()
# Start the container
CRI->CRIO: StartContainer()
CRIO->Kata Runtime: start
Kata Runtime->virtcontainers: StartContainer()
virtcontainers->+agent: StartContainer()
agent-->-virtcontainers: Container Started
Note left of virtcontainers: Container-ctr\nRunning
virtcontainers-->Kata Runtime: End of StartContainer()
Kata Runtime-->CRIO: End of start
CRIO-->CRI: End of StartContainer()
# Stop the container
CRI->CRIO: StopContainer()
CRIO->Kata Runtime: kill
Kata Runtime->virtcontainers: KillContainer()
virtcontainers->+agent: SignalProcess()
alt SIGTERM OR SIGKILL
agent-->shim-ctr: WaitProcess() returns
end
agent-->-virtcontainers: Process Signalled
virtcontainers-->Kata Runtime: End of KillContainer()
alt SIGTERM OR SIGKILL
Kata Runtime->virtcontainers: StopContainer()
virtcontainers->+shim-ctr: waitForShim()
alt Timeout exceeded
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->shim-ctr: WaitProcess() returns
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->shim-ctr: waitForShim()
end
shim-ctr-->-virtcontainers: Shim terminated
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->+agent: RemoveContainer()
agent-->-virtcontainers: Container Removed
Note left of virtcontainers: Container-ctr\nStopped
virtcontainers-->Kata Runtime: End of StopContainer()
end
Kata Runtime-->CRIO: End of kill
CRIO-->CRI: End of StopContainer()
# Remove the container
CRI->CRIO: RemoveContainer()
CRIO->Kata Runtime: delete
Kata Runtime->virtcontainers: DeleteContainer()
virtcontainers->virtcontainers: Delete container resources
virtcontainers-->Kata Runtime: End of DeleteContainer()
Kata Runtime-->CRIO: End of delete
CRIO-->CRI: End of RemoveContainer()
# Stop the sandbox
CRI->CRIO: StopPodSandbox()
CRIO->Kata Runtime: kill
Kata Runtime->virtcontainers: KillContainer()
virtcontainers->+agent: SignalProcess()
alt SIGTERM OR SIGKILL
agent-->shim-pod: WaitProcess() returns
end
agent-->-virtcontainers: Process Signalled
virtcontainers-->Kata Runtime: End of KillContainer()
alt SIGTERM OR SIGKILL
Kata Runtime->virtcontainers: StopSandbox()
loop for each container
alt Container-ctr
virtcontainers->+shim-ctr: waitForShim()
alt Timeout exceeded
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->shim-ctr: WaitProcess() returns
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->shim-ctr: waitForShim()
end
shim-ctr-->-virtcontainers: Shim terminated
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->+agent: RemoveContainer()
agent-->-virtcontainers: Container Removed
Note left of virtcontainers: Container-ctr\nStopped
else Container-pod
virtcontainers->+shim-pod: waitForShim()
alt Timeout exceeded
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->shim-pod: WaitProcess() returns
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->shim-pod: waitForShim()
end
shim-pod-->-virtcontainers: Shim terminated
virtcontainers->+agent: SignalProcess(SIGKILL)
agent-->-virtcontainers: Process Signalled by SIGKILL
virtcontainers->+agent: RemoveContainer()
agent-->-virtcontainers: Container Removed
Note left of virtcontainers: Container-pod\nStopped
end
end
virtcontainers->+agent: DestroySandbox()
agent-->-virtcontainers: Sandbox Destroyed
virtcontainers->hypervisor: Stop VM
Note left of virtcontainers: Sandbox\nStopped
virtcontainers->virtcontainers: removeNetwork()
virtcontainers->virtcontainers: Execute PostStop Hooks
virtcontainers-->Kata Runtime: End of StopSandbox()
end
Kata Runtime-->CRIO: End of kill
CRIO-->CRI: End of StopPodSandbox()
# Remove the sandbox
CRI->CRIO: RemovePodSandbox()
CRIO->Kata Runtime: delete
Kata Runtime->virtcontainers: DeleteSandbox()
loop for each container
virtcontainers->virtcontainers: Delete container resources
end
virtcontainers->virtcontainers: Delete sandbox resources
virtcontainers-->Kata Runtime: End of DeleteSandbox()
Kata Runtime-->CRIO: End of delete
CRIO-->CRI: End of RemovePodSandbox()

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -0,0 +1,31 @@
Title: Kata Flow
participant Docker
participant Kata Runtime
participant virtcontainers
participant hypervisor
participant agent
participant shim-pod
participant shim-ctr
participant proxy
#Docker Create!
Docker->Kata Runtime: create
Kata Runtime->virtcontainers: CreateSandbox()
Note left of virtcontainers: Sandbox\nReady
virtcontainers->virtcontainers: createNetwork()
virtcontainers->virtcontainers: Execute PreStart Hooks
virtcontainers->+hypervisor: Start VM (inside the netns)
hypervisor-->-virtcontainers: VM started
virtcontainers->proxy: Start Proxy
proxy->hypervisor: Connect the VM
virtcontainers->+agent: CreateSandbox()
agent-->-virtcontainers: Sandbox Created
virtcontainers->+agent: CreateContainer()
agent-->-virtcontainers: Container Created
virtcontainers->shim-pod: Start Shim
shim->agent: ReadStdout() (blocking call)
shim->agent: ReadStderr() (blocking call)
shim->agent: WaitProcess() (blocking call)
Note left of virtcontainers: Container\nReady
virtcontainers-->Kata Runtime: End of CreateSandbox()
Kata Runtime-->Docker: End of create

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.8 KiB

View File

@ -0,0 +1,20 @@
Title: Docker Exec
participant Docker
participant kata-runtime
participant virtcontainers
participant shim
participant hypervisor
participant agent
participant proxy
#Docker Exec
Docker->kata-runtime: exec
kata-runtime->virtcontainers: EnterContainer()
virtcontainers->agent: exec
agent->virtcontainers: Process started in the container
virtcontainers->shim: start shim
shim->agent: ReadStdout()
shim->agent: ReadStderr()
shim->agent: WaitProcess()
virtcontainers->kata-runtime: End of EnterContainer()
kata-runtime-->Docker: End of exec

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.3 KiB

View File

@ -0,0 +1,20 @@
Title: Docker Start
participant Docker
participant Kata Runtime
participant virtcontainers
participant hypervisor
participant agent
participant shim-pod
participant shim-ctr
participant proxy
#Docker Start
Docker->Kata Runtime: start
Kata Runtime->virtcontainers: StartSandbox()
Note left of virtcontainers: Sandbox\nRunning
virtcontainers->+agent: StartContainer()
agent-->-virtcontainers: Container Started
Note left of virtcontainers: Container-pod\nRunning
virtcontainers->virtcontainers: Execute PostStart Hooks
virtcontainers-->Kata Runtime: End of StartSandbox()
Kata Runtime-->Docker: End of start

Binary file not shown.

After

Width:  |  Height:  |  Size: 163 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

654
docs/design/architecture.md Normal file
View File

@ -0,0 +1,654 @@
# Kata Containers Architecture
* [Overview](#overview)
* [Virtualization](#virtualization)
* [Guest assets](#guest-assets)
* [Guest kernel](#guest-kernel)
* [Guest Image](#guest-image)
* [Root filesystem image](#root-filesystem-image)
* [Initrd image](#initrd-image)
* [Agent](#agent)
* [Runtime](#runtime)
* [Configuration](#configuration)
* [Significant OCI commands](#significant-oci-commands)
* [create](#create)
* [start](#start)
* [exec](#exec)
* [kill](#kill)
* [delete](#delete)
* [Proxy](#proxy)
* [Shim](#shim)
* [Networking](#networking)
* [Storage](#storage)
* [Kubernetes Support](#kubernetes-support)
* [Problem Statement](#problem-statement)
* [Containerd](#containerd)
* [CRI-O](#cri-o)
* [OCI Annotations](#oci-annotations)
* [Mixing VM based and namespace based runtimes](#mixing-vm-based-and-namespace-based-runtimes)
* [Appendices](#appendices)
* [DAX](#dax)
## Overview
This is an architectural overview of Kata Containers, based on the 1.5.0 release.
The two primary deliverables of the Kata Containers project are a container runtime
and a CRI friendly shim. There is also a CRI friendly library API behind them.
The [Kata Containers runtime (`kata-runtime`)](https://github.com/kata-containers/runtime)
is compatible with the [OCI](https://github.com/opencontainers) [runtime specification](https://github.com/opencontainers/runtime-spec)
and therefore works seamlessly with the
[Docker\* Engine](https://www.docker.com/products/docker-engine) pluggable runtime
architecture. It also supports the [Kubernetes\* Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
[Containerd CRI Plugin\*](https://github.com/containerd/cri) implementation. In other words, you can transparently
select between the [default Docker and CRI shim runtime (runc)](https://github.com/opencontainers/runc)
and `kata-runtime`.
`kata-runtime` creates a QEMU\*/KVM virtual machine for each container or pod,
the Docker engine or `kubelet` (Kubernetes) creates respectively.
![Docker and Kata Containers](arch-images/docker-kata.png)
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
is another Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead
of `2N+1` shims (a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself), and no standalone
`kata-proxy` process even if no VSOCK is available.
![Kubernetes integration with shimv2](arch-images/shimv2.svg)
The container process is then spawned by
[agent](https://github.com/kata-containers/agent), an agent process running
as a daemon inside the virtual machine. `kata-agent` runs a gRPC server in
the guest using a VIRTIO serial or VSOCK interface which QEMU exposes as a socket
file on the host. `kata-runtime` uses a gRPC protocol to communicate with
the agent. This protocol allows the runtime to send container management
commands to the agent. The protocol is also used to carry the I/O streams (stdout,
stderr, stdin) between the containers and the manage engines (e.g. Docker Engine).
For any given container, both the init process and all potentially executed
commands within that container, together with their related I/O streams, need
to go through the VIRTIO serial or VSOCK interface exported by QEMU.
In the VIRTIO serial case, a [Kata Containers
proxy (`kata-proxy`)](https://github.com/kata-containers/proxy) instance is
launched for each virtual machine to handle multiplexing and demultiplexing
those commands and streams.
On the host, each container process's removal is handled by a reaper in the higher
layers of the container stack. In the case of Docker or containerd it is handled by `containerd-shim`.
In the case of CRI-O it is handled by `conmon`. For clarity, for the remainder
of this document the term "container process reaper" will be used to refer to
either reaper. As Kata Containers processes run inside their own virtual machines,
the container process reaper cannot monitor, control
or reap them. `kata-runtime` fixes that issue by creating an [additional shim process
(`kata-shim`)](https://github.com/kata-containers/shim) between the container process
reaper and `kata-proxy`. A `kata-shim` instance will both forward signals and `stdin`
streams to the container process on the guest and pass the container `stdout`
and `stderr` streams back up the stack to the CRI shim or Docker via the container process
reaper. `kata-runtime` creates a `kata-shim` daemon for each container and for each
OCI command received to run within an already running container (example, `docker
exec`).
Since Kata Containers version 1.5, the new introduced `shimv2` has integrated the
functionalities of the reaper, the `kata-runtime`, the `kata-shim`, and the `kata-proxy`.
As a result, there will not be any of the additional processes previously listed.
The container workload, that is, the actual OCI bundle rootfs, is exported from the
host to the virtual machine. In the case where a block-based graph driver is
configured, `virtio-scsi` will be used. In all other cases a 9pfs VIRTIO mount point
will be used. `kata-agent` uses this mount point as the root filesystem for the
container processes.
## Virtualization
How Kata Containers maps container concepts to virtual machine technologies, and how this is realized in the multiple
hypervisors and VMMs that Kata supports is described within the [virtualization documentation](./virtualization.md)
## Guest assets
The hypervisor will launch a virtual machine which includes a minimal guest kernel
and a guest image.
### Guest kernel
The guest kernel is passed to the hypervisor and used to boot the virtual
machine. The default kernel provided in Kata Containers is highly optimized for
kernel boot time and minimal memory footprint, providing only those services
required by a container workload. This is based on a very current upstream Linux
kernel.
### Guest image
Kata Containers supports both an `initrd` and `rootfs` based minimal guest image.
#### Root filesystem image
The default packaged root filesystem image, sometimes referred to as the "mini O/S", is a
highly optimized container bootstrap system based on [Clear Linux](https://clearlinux.org/). It provides an extremely minimal environment and
has a highly optimized boot path.
The only services running in the context of the mini O/S are the init daemon
(`systemd`) and the [Agent](#agent). The real workload the user wishes to run
is created using libcontainer, creating a container in the same manner that is done
by `runc`.
For example, when `docker run -ti ubuntu date` is run:
- The hypervisor will boot the mini-OS image using the guest kernel.
- `systemd`, running inside the mini-OS context, will launch the `kata-agent` in
the same context.
- The agent will create a new confined context to run the specified command in
(`date` in this example).
- The agent will then execute the command (`date` in this example) inside this
new context, first setting the root filesystem to the expected Ubuntu\* root
filesystem.
#### Initrd image
A compressed `cpio(1)` archive, created from a rootfs which is loaded into memory and used as part of the Linux startup process. During startup, the kernel unpacks it into a special instance of a `tmpfs` that becomes the initial root filesystem.
The only service running in the context of the initrd is the [Agent](#agent) as the init daemon. The real workload the user wishes to run is created using libcontainer, creating a container in the same manner that is done by `runc`.
## Agent
[`kata-agent`](https://github.com/kata-containers/agent) is a process running in the
guest as a supervisor for managing containers and processes running within
those containers.
The `kata-agent` execution unit is the sandbox. A `kata-agent` sandbox is a container sandbox defined by a set of namespaces (NS, UTS, IPC and PID). `kata-runtime` can
run several containers per VM to support container engines that require multiple
containers running inside a pod. In the case of docker, `kata-runtime` creates a
single container per pod.
`kata-agent` communicates with the other Kata components over gRPC.
It also runs a [`yamux`](https://github.com/hashicorp/yamux) server on the same gRPC URL.
The `kata-agent` makes use of [`libcontainer`](https://github.com/opencontainers/runc/tree/master/libcontainer)
to manage the lifecycle of the container. This way the `kata-agent` reuses most
of the code used by [`runc`](https://github.com/opencontainers/runc).
### Agent gRPC protocol
placeholder
## Runtime
`kata-runtime` is an OCI compatible container runtime and is responsible for handling
all commands specified by
[the OCI runtime specification](https://github.com/opencontainers/runtime-spec)
and launching `kata-shim` instances.
`kata-runtime` heavily utilizes the
[virtcontainers project](https://github.com/containers/virtcontainers), which
provides a generic, runtime-specification agnostic, hardware-virtualized containers
library.
### Configuration
The runtime uses a TOML format configuration file called `configuration.toml`. By
default this file is installed in the `/usr/share/defaults/kata-containers`
directory and contains various settings such as the paths to the hypervisor,
the guest kernel and the mini-OS image.
Most users will not need to modify the configuration file.
The file is well commented and provides a few "knobs" that can be used to modify
the behavior of the runtime.
The configuration file is also used to enable runtime [debug output](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#enable-full-debug).
### Significant OCI commands
Here we describe how `kata-runtime` handles the most important OCI commands.
#### `create`
When handling the OCI
[`create`](https://github.com/kata-containers/runtime/blob/master/cli/create.go)
command, `kata-runtime` goes through the following steps:
1. Create the network namespace where we will spawn VM and shims processes.
2. Call into the pre-start hooks. One of them should be responsible for creating
the `veth` network pair between the host network namespace and the network namespace
freshly created.
3. Scan the network from the new network namespace, and create a MACVTAP connection
between the `veth` interface and a `tap` interface into the VM.
4. Start the VM inside the network namespace by providing the `tap` interface
previously created.
5. Wait for the VM to be ready.
6. Start `kata-proxy`, which will connect to the created VM. The `kata-proxy` process
will take care of proxying all communications with the VM. Kata has a single proxy
per VM.
7. Communicate with `kata-agent` (through the proxy) to configure the sandbox
inside the VM.
8. Communicate with `kata-agent` to create the container, relying on the OCI
configuration file `config.json` initially provided to `kata-runtime`. This
spawns the container process inside the VM, leveraging the `libcontainer` package.
9. Start `kata-shim`, which will connect to the gRPC server socket provided by the `kata-proxy`. `kata-shim` will spawn a few Go routines to parallelize blocking calls `ReadStdout()` , `ReadStderr()` and `WaitProcess()`. Both `ReadStdout()` and `ReadStderr()` are run through infinite loops since `kata-shim` wants the output of those until the container process terminates. `WaitProcess()` is a unique call which returns the exit code of the container process when it terminates inside the VM. Note that `kata-shim` is started inside the network namespace, to allow upper layers to determine which network namespace has been created and by checking the `kata-shim` process. It also creates a new PID namespace by entering into it. This ensures that all `kata-shim` processes belonging to the same container will get killed when the `kata-shim` representing the container process terminates.
At this point the container process is running inside of the VM, and it is represented
on the host system by the `kata-shim` process.
![`kata-oci-create`](arch-images/kata-oci-create.svg)
#### `start`
With traditional containers, [`start`](https://github.com/kata-containers/runtime/blob/master/cli/start.go) launches a container process in its own set of namespaces. With Kata Containers, the main task of `kata-runtime` is to ask [`kata-agent`](#agent) to start the container workload inside the virtual machine. `kata-runtime` will run through the following steps:
1. Communicate with `kata-agent` (through the proxy) to start the container workload
inside the VM. If, for example, the command to execute inside of the container is `top`,
the `kata-shim`'s `ReadStdOut()` will start returning text output for top, and
`WaitProcess()` will continue to block as long as the `top` process runs.
2. Call into the post-start hooks. Usually, this is a no-op since nothing is provided
(this needs clarification)
![`kata-oci-start`](arch-images/kata-oci-start.svg)
#### `exec`
OCI [`exec`](https://github.com/kata-containers/runtime/blob/master/cli/exec.go) allows you to run an additional command within an already running
container. In Kata Containers, this is handled as follows:
1. A request is sent to the `kata agent` (through the proxy) to start a new process
inside an existing container running within the VM.
2. A new `kata-shim` is created within the same network and PID namespaces as the
original `kata-shim` representing the container process. This new `kata-shim` is
used for the new exec process.
Now the process started with `exec` is running within the VM, sharing `uts`, `pid`, `mnt` and `ipc` namespaces with the container process.
![`kata-oci-exec`](arch-images/kata-oci-exec.svg)
#### `kill`
When sending the OCI [`kill`](https://github.com/kata-containers/runtime/blob/master/cli/kill.go) command, the container runtime should send a
[UNIX signal](https://en.wikipedia.org/wiki/Unix_signal) to the container process.
A `kill` sending a termination signal such as `SIGKILL` or `SIGTERM` is expected
to terminate the container process. In the context of a traditional container,
this means stopping the container. For `kata-runtime`, this translates to stopping
the container and the VM associated with it.
1. Send a request to kill the container process to the `kata-agent` (through the proxy).
2. Wait for `kata-shim` process to exit.
3. Force kill the container process if `kata-shim` process didn't return after a
timeout. This is done by communicating with `kata-agent` (connecting the proxy),
sending `SIGKILL` signal to the container process inside the VM.
4. Wait for `kata-shim` process to exit, and return an error if we reach the
timeout again.
5. Communicate with `kata-agent` (through the proxy) to remove the container
configuration from the VM.
6. Communicate with `kata-agent` (through the proxy) to destroy the sandbox
configuration from the VM.
7. Stop the VM.
8. Remove all network configurations inside the network namespace and delete the
namespace.
9. Execute post-stop hooks.
If `kill` was invoked with a non-termination signal, this simply signals the container process. Otherwise, everything has been torn down, and the VM has been removed.
#### `delete`
[`delete`](https://github.com/kata-containers/runtime/blob/master/cli/delete.go) removes all internal resources related to a container. A running container
cannot be deleted unless the OCI runtime is explicitly being asked to, by using
`--force` flag.
If the sandbox is not stopped, but the particular container process returned on
its own already, the `kata-runtime` will first go through most of the steps a `kill`
would go through for a termination signal. After this process, or if the `sandboxID` was already stopped to begin with, then `kata-runtime` will:
1. Remove container resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>/<containerID>`.
2. Remove sandbox resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>`.
At this point, everything related to the container should have been removed from the host system, and no related process should be running.
#### `state`
[`state`](https://github.com/kata-containers/runtime/blob/master/cli/state.go)
returns the status of the container. For `kata-runtime`, this means being
able to detect if the container is still running by looking at the state of `kata-shim`
process representing this container process.
1. Ask the container status by checking information stored on disk. (clarification needed)
2. Check `kata-shim` process representing the container.
3. In case the container status on disk was supposed to be `ready` or `running`,
and the `kata-shim` process no longer exists, this involves the detection of a
stopped container. This means that before returning the container status,
the container has to be properly stopped. Here are the steps involved in this detection:
1. Wait for `kata-shim` process to exit.
2. Force kill the container process if `kata-shim` process didn't return after a timeout. This is done by communicating with `kata-agent` (connecting the proxy), sending `SIGKILL` signal to the container process inside the VM.
3. Wait for `kata-shim` process to exit, and return an error if we reach the timeout again.
4. Communicate with `kata-agent` (connecting the proxy) to remove the container configuration from the VM.
4. Return container status.
## Proxy
Communication with the VM can be achieved by either `virtio-serial` or, if the host
kernel is newer than v4.8, a virtual socket, `vsock` can be used. The default is `virtio-serial`.
The VM will likely be running multiple container processes. In the event `virtio-serial`
is used, the I/O streams associated with each process needs to be multiplexed and demultiplexed on the host. On systems with `vsock` support, this component becomes optional.
`kata-proxy` is a process offering access to the VM [`kata-agent`](https://github.com/kata-containers/agent)
to multiple `kata-shim` and `kata-runtime` clients associated with the VM. Its
main role is to route the I/O streams and signals between each `kata-shim`
instance and the `kata-agent`.
`kata-proxy` connects to `kata-agent` on a Unix domain socket that `kata-runtime` provides
while spawning `kata-proxy`.
`kata-proxy` uses [`yamux`](https://github.com/hashicorp/yamux) to multiplex gRPC
requests on its connection to the `kata-agent`.
When proxy type is configured as `proxyBuiltIn`, we do not spawn a separate
process to proxy gRPC connections. Instead a built-in Yamux gRPC dialer is used to connect
directly to `kata-agent`. This is used by CRI container runtime server `frakti` which
calls directly into `kata-runtime`.
## Shim
A container process reaper, such as Docker's `containerd-shim` or CRI-O's `conmon`,
is designed around the assumption that it can monitor and reap the actual container
process. As the container process reaper runs on the host, it cannot directly
monitor a process running within a virtual machine. At most it can see the QEMU
process, but that is not enough. With Kata Containers, `kata-shim` acts as the
container process that the container process reaper can monitor. Therefore
`kata-shim` needs to handle all container I/O streams (`stdout`, `stdin` and `stderr`)
and forward all signals the container process reaper decides to send to the container
process.
`kata-shim` has an implicit knowledge about which VM agent will handle those streams
and signals and thus acts as an encapsulation layer between the container process
reaper and the `kata-agent`. `kata-shim`:
- Connects to `kata-proxy` on a Unix domain socket. The socket URL is passed from
`kata-runtime` to `kata-shim` when the former spawns the latter along with a
`containerID` and `execID`. The `containerID` and `execID` are used to identify
the true container process that the shim process will be shadowing or representing.
- Forwards the standard input stream from the container process reaper into
`kata-proxy` using gRPC `WriteStdin` gRPC API.
- Reads the standard output/error from the container process.
- Forwards signals it receives from the container process reaper to `kata-proxy`
using `SignalProcessRequest` API.
- Monitors terminal changes and forwards them to `kata-proxy` using gRPC `TtyWinResize`
API.
## Networking
Containers will typically live in their own, possibly shared, networking namespace.
At some point in a container lifecycle, container engines will set up that namespace
to add the container to a network which is isolated from the host network, but
which is shared between containers
In order to do so, container engines will usually add one end of a virtual
ethernet (`veth`) pair into the container networking namespace. The other end of
the `veth` pair is added to the host networking namespace.
This is a very namespace-centric approach as many hypervisors (in particular QEMU)
cannot handle `veth` interfaces. Typically, `TAP` interfaces are created for VM
connectivity.
To overcome incompatibility between typical container engines expectations
and virtual machines, `kata-runtime` networking transparently connects `veth`
interfaces with `TAP` ones using MACVTAP:
![Kata Containers networking](arch-images/network.png)
Kata Containers supports both
[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model)
and [CNI](https://github.com/containernetworking/cni) for networking management.
### CNM
![High-level CNM Diagram](arch-images/CNM_overall_diagram.png)
__CNM lifecycle__
1. `RequestPool`
2. `CreateNetwork`
3. `RequestAddress`
4. `CreateEndPoint`
5. `CreateContainer`
6. Create `config.json`
7. Create PID and network namespace
8. `ProcessExternalKey`
9. `JoinEndPoint`
10. `LaunchContainer`
11. Launch
12. Run container
![Detailed CNM Diagram](arch-images/CNM_detailed_diagram.png)
__Runtime network setup with CNM__
1. Read `config.json`
2. Create the network namespace
3. Call the `prestart` hook (from inside the netns)
4. Scan network interfaces inside netns and get the name of the interface
created by prestart hook
5. Create bridge, TAP, and link all together with network interface previously
created
### Network Hotplug
Kata Containers has developed a set of network sub-commands and APIs to add, list and
remove a guest network endpoint and to manipulate the guest route table.
The following diagram illustrates the Kata Containers network hotplug workflow.
![Network Hotplug](arch-images/kata-containers-network-hotplug.png)
## Storage
Container workloads are shared with the virtualized environment through [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt).
The devicemapper storage driver is a special case. The driver uses dedicated block
devices rather than formatted filesystems, and operates at the block level rather
than the file level. This knowledge is used to directly use the underlying block
device instead of the overlay file system for the container root file system. The
block device maps to the top read-write layer for the overlay. This approach gives
much better I/O performance compared to using 9pfs to share the container file system.
The approach above does introduce a limitation in terms of dynamic file copy
in/out of the container using the `docker cp` operations. The copy operation from
host to container accesses the mounted file system on the host-side. This is
not expected to work and may lead to inconsistencies as the block device will
be simultaneously written to from two different mounts. The copy operation from
container to host will work, provided the user calls `sync(1)` from within the
container prior to the copy to make sure any outstanding cached data is written
to the block device.
```
docker cp [OPTIONS] CONTAINER:SRC_PATH HOST:DEST_PATH
docker cp [OPTIONS] HOST:SRC_PATH CONTAINER:DEST_PATH
```
Kata Containers has the ability to hotplug and remove block devices, which makes it
possible to use block devices for containers started after the VM has been launched.
Users can check to see if the container uses the devicemapper block device as its
rootfs by calling `mount(8)` within the container. If the devicemapper block device
is used, `/` will be mounted on `/dev/vda`. Users can disable direct mounting
of the underlying block device through the runtime configuration.
## Kubernetes support
[Kubernetes\*](https://github.com/kubernetes/kubernetes/) is a popular open source
container orchestration engine. In Kubernetes, a set of containers sharing resources
such as networking, storage, mount, PID, etc. is called a
[Pod](https://kubernetes.io/docs/user-guide/pods/).
A node can have multiple pods, but at a minimum, a node within a Kubernetes cluster
only needs to run a container runtime and a container agent (called a
[Kubelet](https://kubernetes.io/docs/admin/kubelet/)).
A Kubernetes cluster runs a control plane where a scheduler (typically running on a
dedicated master node) calls into a compute Kubelet. This Kubelet instance is
responsible for managing the lifecycle of pods within the nodes and eventually relies
on a container runtime to handle execution. The Kubelet architecture decouples
lifecycle management from container execution through the dedicated
`gRPC` based [Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/container-runtime-interface-v1.md).
In other words, a Kubelet is a CRI client and expects a CRI implementation to
handle the server side of the interface.
[CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and [Containerd CRI Plugin\*](https://github.com/containerd/cri) are CRI implementations that rely on [OCI](https://github.com/opencontainers/runtime-spec)
compatible runtimes for managing container instances.
Kata Containers is an officially supported CRI-O and Containerd CRI Plugin runtime. It is OCI compatible and therefore aligns with project's architecture and requirements.
However, due to the fact that Kubernetes execution units are sets of containers (also
known as pods) rather than single containers, the Kata Containers runtime needs to
get extra information to seamlessly integrate with Kubernetes.
### Problem statement
The Kubernetes\* execution unit is a pod that has specifications detailing constraints
such as namespaces, groups, hardware resources, security contents, *etc* shared by all
the containers within that pod.
By default the Kubelet will send a container creation request to its CRI runtime for
each pod and container creation. Without additional metadata from the CRI runtime,
the Kata Containers runtime will thus create one virtual machine for each pod and for
each containers within a pod. However the task of providing the Kubernetes pod semantics
when creating one virtual machine for each container within the same pod is complex given
the resources of these virtual machines (such as networking or PID) need to be shared.
The challenge with Kata Containers when working as a Kubernetes\* runtime is thus to know
when to create a full virtual machine (for pods) and when to create a new container inside
a previously created virtual machine. In both cases it will get called with very similar
arguments, so it needs the help of the Kubernetes CRI runtime to be able to distinguish a
pod creation request from a container one.
### Containerd
As of Kata Containers 1.5, using `shimv2` with containerd 1.2.0 or above is the preferred
way to run Kata Containers with Kubernetes ([see the howto](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
The CRI-O will catch up soon ([`kubernetes-sigs/cri-o#2024`](https://github.com/kubernetes-sigs/cri-o/issues/2024)).
Refer to the following how-to guides:
- [How to use Kata Containers and Containerd](/how-to/containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)
### CRI-O
#### OCI annotations
In order for the Kata Containers runtime (or any virtual machine based OCI compatible
runtime) to be able to understand if it needs to create a full virtual machine or if it
has to create a new container inside an existing pod's virtual machine, CRI-O adds
specific annotations to the OCI configuration file (`config.json`) which is passed to
the OCI compatible runtime.
Before calling its runtime, CRI-O will always add a `io.kubernetes.cri-o.ContainerType`
annotation to the `config.json` configuration file it produces from the Kubelet CRI
request. The `io.kubernetes.cri-o.ContainerType` annotation can either be set to `sandbox`
or `container`. Kata Containers will then use this annotation to decide if it needs to
respectively create a virtual machine or a container inside a virtual machine associated
with a Kubernetes pod:
```Go
containerType, err := ociSpec.ContainerType()
if err != nil {
return err
}
handleFactory(ctx, runtimeConfig)
disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal)
var process vc.Process
switch containerType {
case vc.PodSandbox:
process, err = createSandbox(ctx, ociSpec, runtimeConfig, containerID, bundlePath, console, disableOutput, systemdCgroup)
if err != nil {
return err
}
case vc.PodContainer:
process, err = createContainer(ctx, ociSpec, containerID, bundlePath, console, disableOutput)
if err != nil {
return err
}
}
```
#### Mixing VM based and namespace based runtimes
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](/how-to/containerd-kata.md#kubernetes-runtimeclass)
> has been supported and the user can specify runtime without the non-standardized annotations.
One interesting evolution of the CRI-O support for `kata-runtime` is the ability
to run virtual machine based pods alongside namespace ones. With CRI-O and Kata
Containers, one can introduce the concept of workload trust inside a Kubernetes
cluster.
A cluster operator can now tag (through Kubernetes annotations) container workloads
as `trusted` or `untrusted`. The former labels known to be safe workloads while
the latter describes potentially malicious or misbehaving workloads that need the
highest degree of isolation. In a software development context, an example of a `trusted` workload would be a containerized continuous integration engine whereas all
developers applications would be `untrusted` by default. Developers workloads can
be buggy, unstable or even include malicious code and thus from a security perspective
it makes sense to tag them as `untrusted`. A CRI-O and Kata Containers based
Kubernetes cluster handles this use case transparently as long as the deployed
containers are properly tagged. All `untrusted` containers will be handled by Kata Containers and thus run in a hardware virtualized secure sandbox while `runc`, for
example, could handle the `trusted` ones.
CRI-O's default behavior is to trust all pods, except when they're annotated with
`io.kubernetes.cri-o.TrustedSandbox` set to `false`. The default CRI-O trust level
is set through its `configuration.toml` configuration file. Generally speaking,
the CRI-O runtime selection between its trusted runtime (typically `runc`) and its untrusted one (`kata-runtime`) is a function of the pod `Privileged` setting, the `io.kubernetes.cri-o.TrustedSandbox` annotation value, and the default CRI-O trust
level. When a pod is `Privileged`, the runtime will always be `runc`. However, when
a pod is **not** `Privileged` the runtime selection is done as follows:
| | `io.kubernetes.cri-o.TrustedSandbox` not set | `io.kubernetes.cri-o.TrustedSandbox` = `true` | `io.kubernetes.cri-o.TrustedSandbox` = `false` |
| :--- | :---: | :---: | :---: |
| Default CRI-O trust level: `trusted` | runc | runc | Kata Containers |
| Default CRI-O trust level: `untrusted` | Kata Containers | Kata Containers | Kata Containers |
# Appendices
## DAX
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.txt)
feature to efficiently map some host-side files into the guest VM space.
In particular, Kata Containers uses the QEMU NVDIMM feature to provide a
memory-mapped virtual device that can be used to DAX map the virtual machine's
root filesystem into the guest memory address space.
Mapping files using DAX provides a number of benefits over more traditional VM
file and device mapping mechanisms:
- Mapping as a direct access devices allows the guest to directly access
the host memory pages (such as via Execute In Place (XIP)), bypassing the guest
page cache. This provides both time and space optimizations.
- Mapping as a direct access device inside the VM allows pages from the
host to be demand loaded using page faults, rather than having to make requests
via a virtualized device (causing expensive VM exits/hypercalls), thus providing
a speed optimization.
- Utilizing `MAP_SHARED` shared memory on the host allows the host to efficiently
share pages.
Kata Containers uses the following steps to set up the DAX mappings:
1. QEMU is configured with an NVDIMM memory device, with a memory file
backend to map in the host-side file into the virtual NVDIMM space.
2. The guest kernel command line mounts this NVDIMM device with the DAX
feature enabled, allowing direct page mapping and access, thus bypassing the
guest page cache.
![DAX](arch-images/DAX.png)
Information on the use of NVDIMM via QEMU is available in the [QEMU source code](http://git.qemu-project.org/?p=qemu.git;a=blob;f=docs/nvdimm.txt;hb=HEAD)

335
docs/design/host-cgroups.md Normal file
View File

@ -0,0 +1,335 @@
- [Host cgroup management](#host-cgroup-management)
- [Introduction](#introduction)
- [`SandboxCgroupOnly` enabled](#sandboxcgrouponly-enabled)
- [What does Kata do in this configuration?](#what-does-kata-do-in-this-configuration)
- [Why create a Kata-cgroup under the parent cgroup?](#why-create-a-kata-cgroup-under-the-parent-cgroup)
- [Improvements](#improvements)
- [`SandboxCgroupOnly` disabled (default, legacy)](#sandboxcgrouponly-disabled-default-legacy)
- [What does this method do?](#what-does-this-method-do)
- [Impact](#impact)
- [Supported cgroups](#supported-cgroups)
- [Cgroups V1](#cgroups-v1)
- [Cgroups V2](#cgroups-v2)
- [Distro Support](#distro-support)
- [Summary](#summary)
# Host cgroup management
## Introduction
In Kata Containers, workloads run in a virtual machine that is managed by a virtual
machine monitor (VMM) running on the host. As a result, Kata Containers run over two layers of cgroups. The
first layer is in the guest where the workload is placed, while the second layer is on the host where the
VMM and associated threads are running.
The OCI [runtime specification][linux-config] provides guidance on where the container cgroups should be placed:
> [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups
> hierarchy for containers or to run a new process in an existing container
cgroups are hierarchical, and this can be seen with the following pod example:
- Pod 1: `cgroupsPath=/kubepods/pod1`
- Container 1:
`cgroupsPath=/kubepods/pod1/container1`
- Container 2:
`cgroupsPath=/kubepods/pod1/container2`
- Pod 2: `cgroupsPath=/kubepods/pod2`
- Container 1:
`cgroupsPath=/kubepods/pod2/container2`
- Container 2:
`cgroupsPath=/kubepods/pod2/container2`
Depending on the upper-level orchestrator, the cgroup under which the pod is placed is
managed by the orchestrator. In the case of Kubernetes, the pod-cgroup is created by Kubelet,
while the container cgroups are to be handled by the runtime. Kubelet will size the pod-cgroup
based on the container resource requirements.
Kata Containers introduces a non-negligible overhead for running a sandbox (pod). Based on this, two scenarios are possible:
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod-cgroup, or
2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup.
Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](https://github.com/kata-containers/runtime#configuration)
file.
## `SandboxCgroupOnly` enabled
With `SandboxCgroupOnly` enabled, it is expected that the parent cgroup is sized to take the overhead of running
a sandbox into account. This is ideal, as all the applicable Kata Containers components can be placed within the
given cgroup-path.
In the context of Kubernetes, Kubelet will size the pod-cgroup to take the overhead of running a Kata-based sandbox
into account. This will be feasible in the 1.16 Kubernetes release through the `PodOverhead` feature.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | | kata-shimv2, VMM and threads: | | | |
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
| | | | | | | |
| | | | kata_<sandbox-id> | | | |
| | | +--------------------------------------+ | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
```
### What does Kata do in this configuration?
1. Given a `PodSandbox` container creation, let:
```
podCgroup=Parent(container.CgroupsPath)
KataSandboxCgroup=<podCgroup>/kata_<PodSandboxID>
```
2. Create the cgroup, `KataSandboxCgroup`
3. Join the `KataSandboxCgroup`
Any process created by the runtime will be created in `KataSandboxCgroup`.
The runtime will limit the cgroup in the host only if the sandbox doesn't have a
container type annotation, but the caller is free to set the proper limits for the `podCgroup`.
In the example above the pod cgroups are `/kubepods/pod1` and `/kubepods/pod2`.
Kata creates the unrestricted sandbox cgroup under the pod cgroup.
### Why create a Kata-cgroup under the parent cgroup?
`Docker` does not have a notion of pods, and will not create a cgroup directory
to place a particular container in (i.e., all containers would be in a path like
`/docker/container-id`. To simplify the implementation and continue to support `Docker`,
Kata Containers creates the sandbox-cgroup, in the case of Kubernetes, or a container cgroup, in the case
of docker.
### Improvements
- Get statistics about pod resources
If the Kata caller wants to know the resource usage on the host it can get
statistics from the pod cgroup. All cgroups stats in the hierarchy will include
the Kata overhead. This gives the possibility of gathering usage-statics at the
pod level and the container level.
- Better host resource isolation
Because the Kata runtime will place all the Kata processes in the pod cgroup,
the resource limits that the caller applies to the pod cgroup will affect all
processes that belong to the Kata sandbox in the host. This will improve the
isolation in the host preventing Kata to become a noisy neighbor.
## `SandboxCgroupOnly` disabled (default, legacy)
If the cgroup provided to Kata is not sized appropriately, instability will be
introduced when fully constraining Kata components, and the user-workload will
see a subset of resources that were requested. Based on this, the default
handling for Kata Containers is to not fully constrain the VMM and Kata
components on the host.
```
+----------------------------------------------------------+
| +---------------------------------------------------+ |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 1 | | |
| | +---------------------------------------------+ | |
| | | |
| | +---------------------------------------------+ | |
| | | +--------------------------------------+ | | |
| | | |Container 1 |-|Container 2 | | | |
| | | | |-| | | | |
| | | | Shim+container1 |-| Shim+container2 | | | |
| | | +--------------------------------------+ | | |
| | | | | |
| | |Pod 2 | | |
| | +---------------------------------------------+ | |
| |kubepods | |
| +---------------------------------------------------+ |
| +---------------------------------------------------+ |
| | Hypervisor | |
| |Kata | |
| +---------------------------------------------------+ |
| |
|Node |
+----------------------------------------------------------+
```
### What does this method do?
1. Given a container creation let `containerCgroupHost=container.CgroupsPath`
1. Rename `containerCgroupHost` path to add `kata_`
1. Let `PodCgroupPath=PodSanboxContainerCgroup` where `PodSanboxContainerCgroup` is the cgroup of a container of type `PodSandbox`
1. Limit the `PodCgroupPath` with the sum of all the container limits in the Sandbox
1. Move only vCPU threads of hypervisor to `PodCgroupPath`
1. Per each container, move its `kata-shim` to its own `containerCgroupHost`
1. Move hypervisor and applicable threads to memory cgroup `/kata`
_Note_: the Kata Containers runtime will not add all the hypervisor threads to
the cgroup path requested, only vCPUs. These threads are run unconstrained.
This mitigates the risk of the VMM and other threads receiving an out of memory scenario (`OOM`).
#### Impact
If resources are reserved at a system level to account for the overheads of
running sandbox containers, this configuration can be utilized with adequate
stability. In this scenario, non-negligible amounts of CPU and memory will be
utilized unaccounted for on the host.
[linux-config]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md
[cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
# Supported cgroups
Kata Containers supports cgroups `v1` and `v2`. In the following sections each cgroup is
described briefly and what changes are needed in Kata Containers to support it.
## Cgroups V1
`Cgroups V1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is
mounted under a separate cgroup filesystem. A `Cgroups v1` hierarchy may look like the following
diagram:
```
/sys/fs/cgroup/
├── blkio
│ ├── cgroup.procs
│ └── tasks
├── cpu -> cpu,cpuacct
├── cpuacct -> cpu,cpuacct
├── cpu,cpuacct
│ ├── cgroup.procs
│ └── tasks
├── cpuset
│ ├── cgroup.procs
│ └── tasks
├── devices
│ ├── cgroup.procs
│ └── tasks
├── freezer
│ ├── cgroup.procs
│ └── tasks
├── hugetlb
│ ├── cgroup.procs
│ └── tasks
├── memory
│ ├── cgroup.procs
│ └── tasks
├── net_cls -> net_cls,net_prio
├── net_cls,net_prio
│ ├── cgroup.procs
│ └── tasks
├── net_prio -> net_cls,net_prio
├── perf_event
│ ├── cgroup.procs
│ └── tasks
├── pids
│ ├── cgroup.procs
│ └── tasks
└── systemd
├── cgroup.procs
└── tasks
```
A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file,
or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file.
Kata Containers supports `v1` by default and no change in the configuration file is needed.
To know more about `cgroups v1`, see [cgroupsv1(7)][2].
## Cgroups V2
`Cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are
mounted under the same cgroup filesystem. A `Cgroups v2` hierarchy may look like the following
diagram:
```
/sys/fs/cgroup/system.slice
├── cgroup.controllers
├── cgroup.events
├── cgroup.freeze
├── cgroup.max.depth
├── cgroup.max.descendants
├── cgroup.procs
├── cgroup.stat
├── cgroup.subtree_control
├── cgroup.threads
├── cgroup.type
├── cpu.max
├── cpu.pressure
├── cpu.stat
├── cpu.weight
├── cpu.weight.nice
├── io.bfq.weight
├── io.latency
├── io.max
├── io.pressure
├── io.stat
├── memory.current
├── memory.events
├── memory.events.local
├── memory.high
├── memory.low
├── memory.max
├── memory.min
├── memory.oom.group
├── memory.pressure
├── memory.stat
├── memory.swap.current
├── memory.swap.events
├── memory.swap.max
├── pids.current
├── pids.events
└── pids.max
```
Same as `cgroups v1`, a process can join the cgroup by writing its process id (`pid`) to
`cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to
`cgroup.threads` file.
For backwards compatibility Kata Containers defaults to supporting cgroups v1 by default.
To change this to `v2`, set `sandbox_cgroup_only=true` in the `configuration.toml` file.
To know more about `cgroups v2`, see [cgroupsv2(7)][3].
### Distro Support
Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition.
For more information about the status of this feature see [issue #2494][4].
# Summary
| cgroup option | default? | status | pros | cons | cgroups
|-|-|-|-|-|-|
| `SandboxCgroupOnly=false` | yes | legacy | Easiest to make Kata work | Unaccounted for memory and resource utilization | v1
| `SandboxCgroupOnly=true` | no | recommended | Complete tracking of Kata memory and CPU utilization. In Kubernetes, the Kubelet can fully constrain Kata via the pod cgroup | Requires upper layer orchestrator which sizes sandbox cgroup appropriately | v1, v2
[1]: http://man7.org/linux/man-pages/man5/tmpfs.5.html
[2]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_1
[3]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_2
[4]: https://github.com/kata-containers/runtime/issues/2494

View File

@ -0,0 +1,117 @@
# Kata API Design
To fulfill the [Kata design requirements](kata-design-requirements.md), and based on the discussion on [Virtcontainers API extensions](https://docs.google.com/presentation/d/1dbGrD1h9cpuqAPooiEgtiwWDGCYhVPdatq7owsKHDEQ), the Kata runtime library features the following APIs:
- Sandbox based top API
- Storage and network hotplug API
- Plugin frameworks for external proprietary Kata runtime extensions
- Built-in shim and proxy types and capabilities
## Sandbox Based API
### Sandbox Management API
|Name|Description|
|---|---|
|`CreateSandbox(SandboxConfig)`| Create and start a sandbox, and return the sandbox structure.|
|`FetchSandbox(ID)`| Connect to an existing sandbox and return the sandbox structure.|
|`ListSandboxes()`| List all existing sandboxes with status. |
### Sandbox Operation API
|Name|Description|
|---|---|
|`sandbox.Pause()`| Pause the sandbox.|
|`sandbox.Resume()`| Resume the paused sandbox.|
|`sandbox.Release()`| Release a sandbox data structure, close connections to the agent, and quit any goroutines associated with the sandbox. Mostly used for daemon restart.|
|`sandbox.Delete()`| Destroy the sandbox and remove all persistent metadata.|
|`sandbox.Status()`| Get the status of the sandbox and containers.|
|`sandbox.Monitor()`| Return a context handler for caller to monitor sandbox callbacks such as error termination.|
|`sandbox.CreateContainer()`| Create new container in the sandbox.|
|`sandbox.DeleteContainer()`| Delete a container from the sandbox.|
|`sandbox.StartContainer()`| Start a container in the sandbox.|
|`sandbox.StatusContainer()`| Get the status of a container in the sandbox.|
|`sandbox.EnterContainer()`| Run a new process in a container.|
|`sandbox.WaitProcess()`| Wait on a process to terminate.|
### Sandbox Hotplug API
|Name|Description|
|---|---|
|`sandbox.AddDevice()`| Add new storage device to the sandbox.|
|`sandbox.AddInterface()`| Add new NIC to the sandbox.|
|`sandbox.RemoveInterface()`| Remove a NIC from the sandbox.|
|`sandbox.ListInterfaces()`| List all NICs and their configurations in the sandbox.|
|`sandbox.UpdateRoutes()`| Update the sandbox route table (e.g. for portmapping support).|
|`sandbox.ListRoutes()`| List the sandbox route table.|
### Sandbox Relay API
|Name|Description|
|---|---|
|`sandbox.WinsizeProcess(containerID, processID, Height, Width)`|Relay TTY resize request to a process.|
|`sandbox.SignalProcess(containerID, processID, signalID, signalALL)`| Relay a signal to a process or all processes in a container.|
|`sandbox.IOStream(containerID, processID)`| Relay a process stdio. Return stdin/stdout/stderr pipes to the process stdin/stdout/stderr streams.|
## Plugin framework for external proprietary Kata runtime extensions
### Hypervisor plugin
TBD.
### Metadata storage plugin
The metadata storage plugin controls where sandbox metadata is saved.
All metadata storage plugins must implement the following API:
|Name|Description|
|---|---|
|`storage.Save(key, value)`| Save a record.|
|`storage.Load(key)`| Load a record.|
|`storage.Delete(key)`| Delete a record.|
Built-in implementations include:
- Filesystem storage
- LevelDB storage
### VM Factory plugin
The VM factory plugin controls how a sandbox factory creates new VMs.
All VM factory plugins must implement following API:
|Name|Description|
|---|---|
|`VMFactory.NewVM(HypervisorConfig)`|Create a new VM based on `HypervisorConfig`.|
Built-in implementations include:
|Name|Description|
|---|---|
|`CreateNew()`| Create brand new VM based on `HypervisorConfig`.|
|`CreateFromTemplate()`| Create new VM from template.|
|`CreateFromCache()`| Create new VM from VM caches.|
### Sandbox Creation Plugin Workflow
![Sandbox Creation Plugin Workflow](https://raw.githubusercontent.com/bergwolf/raw-contents/master/kata/Kata-sandbox-creation.png "Sandbox Creation Plugin Workflow")
### Sandbox Connection Plugin Workflow
![Sandbox Connection Plugin Workflow](https://raw.githubusercontent.com/bergwolf/raw-contents/master/kata/Sandbox-Connection.png "Sandbox Connection Plugin Workflow")
## Built-in Shim and Proxy Types and Capabilities
### Built-in shim/proxy sandbox configurations
- Supported shim configurations:
|Name|Description|
|---|---|
|`noopshim`|Do not start any shim process.|
|`ccshim`| Start the cc-shim binary.|
|`katashim`| Start the `kata-shim` binary.|
|`katashimbuiltin`|No standalone shim process but shim functionality APIs are exported.|
- Supported proxy configurations:
|Name|Description|
|---|---|
|`noopProxy`| a dummy proxy implementation of the proxy interface, only used for testing purpose.|
|`noProxy`|generic implementation for any case where no actual proxy is needed.|
|`ccProxy`|run `ccProxy` to proxy between runtime and agent.|
|`kataProxy`|run `kata-proxy` to translate Yamux connections between runtime and Kata agent. |
|`kataProxyBuiltin`| no standalone proxy process and connect to Kata agent with internal Yamux translation.|
### Built-in Shim Capability
Built-in shim capability is implemented by removing standalone shim process, and
supporting the shim related APIs.
### Built-in Proxy Capability
Built-in proxy capability is achieved by removing standalone proxy process, and
connecting to Kata agent with a custom gRPC dialer that is internal Yamux translation.
The behavior is enabled when proxy is configured as `kataProxyBuiltin`.

View File

@ -0,0 +1,95 @@
## Design requirements
The Kata Containers runtime **MUST** fulfill all of the following requirements:
### OCI compatibility
The Kata Containers runtime **MUST** implement the [OCI runtime specification](https://github.com/opencontainers/runtime-spec) and support all
the OCI runtime operations.
### [`runc`](https://github.com/opencontainers/runc) CLI compatibility
In theory, being OCI compatible should be enough. In practice, the Kata Containers runtime
should comply with the latest *stable* `runc` CLI. In particular, it **MUST** implement the
following `runc` commands:
* `create`
* `delete`
* `exec`
* `kill`
* `list`
* `pause`
* `ps`
* `start`
* `state`
* `version`
The Kata Containers runtime **MUST** implement the following command line options:
* `--console-socket`
* `--pid-file`
### [CRI](http://blog.kubernetes.io/2016/12/container-runtime-interface-cri-in-kubernetes.html) and [Kubernetes](https://kubernetes.io) support
The Kata Containers project **MUST** provide two interfaces for CRI shims to manage hardware
virtualization based Kubernetes pods and containers:
- An OCI and `runc` compatible command line interface, as described in the previous section.
This interface is used by implementations such as [`CRI-O`](http://cri-o.io) and [`cri-containerd`](https://github.com/containerd/cri-containerd), for example.
- A hardware virtualization runtime library API for CRI shims to consume and provide a more
CRI native implementation. The [`frakti`](https://github.com/kubernetes/frakti) CRI shim is an example of such a consumer.
### Multiple hardware architectures support
The Kata Containers runtime **MUST NOT** be architecture-specific. It should be able to support
multiple hardware architectures and provide a modular and flexible design for adding support
for additional ones.
### Multiple hypervisor support
The Kata Containers runtime **MUST NOT** be tied to any specific hardware virtualization technology,
hypervisor, or virtual machine monitor implementation.
It should support multiple hypervisors and provide a pluggable and flexible design to add support
for additional ones.
#### Nesting
The Kata Containers runtime **MUST** support nested virtualization environments.
### Networking
* The Kata Containers runtime **MUST** support CNI plugin.
* The Kata Containers runtime **MUST** support both legacy and IPv6 networks.
### I/O
#### Devices direct assignment
In order for containers to directly consume host hardware resources, the Kata Containers runtime
**MUST** provide containers with secure pass through for generic devices such as GPUs, SRIOV,
RDMA, QAT, by leveraging I/O virtualization technologies (IOMMU, interrupt remapping).
#### Acceleration
The Kata Containers runtime **MUST** support accelerated and user-space-based I/O operations
for networking (e.g. DPDK) as well as storage through `vhost-user` sockets.
#### Scalability
The Kata Containers runtime **MUST** support scalable I/O through the SRIOV technology.
### Virtualization overhead reduction
A compelling aspect of containers is their minimal overhead compared to bare metal applications.
A container runtime should keep the overhead to a minimum in order to provide the expected user
experience.
The Kata Containers runtime implementation **SHOULD** be optimized for:
* Minimal workload boot and shutdown times
* Minimal workload memory footprint
* Maximal networking throughput
* Minimal networking latency
### Testing and debugging
#### Continuous Integration
Each Kata Containers runtime pull request **MUST** pass at least the following set of container-related
tests:
* Unit tests: runtime unit tests coverage >75%
* Functional tests: the entire runtime CLI and APIs
* Integration tests: Docker and Kubernetes
#### Debugging
The Kata Containers runtime implementation **MUST** use structured logging in order to namespace
log messages to facilitate debugging.

View File

@ -0,0 +1,175 @@
- [Virtual machine vCPU sizing in Kata Containers](#virtual-machine-vcpu-sizing-in-kata-containers)
* [Default number of virtual CPUs](#default-number-of-virtual-cpus)
* [Virtual CPUs and Kubernetes pods](#virtual-cpus-and-kubernetes-pods)
* [Container lifecycle](#container-lifecycle)
* [Container without CPU constraint](#container-without-cpu-constraint)
* [Container with CPU constraint](#container-with-cpu-constraint)
* [Do not waste resources](#do-not-waste-resources)
# Virtual machine vCPU sizing in Kata Containers
## Default number of virtual CPUs
Before starting a container, the [runtime][6] reads the `default_vcpus` option
from the [configuration file][7] to determine the number of virtual CPUs
(vCPUs) needed to start the virtual machine. By default, `default_vcpus` is
equal to 1 for fast boot time and a small memory footprint per virtual machine.
Be aware that increasing this value negatively impacts the virtual machine's
boot time and memory footprint.
In general, we recommend that you do not edit this variable, unless you know
what are you doing. If your container needs more than one vCPU, use
[docker `--cpus`][1], [docker update][4], or [Kubernetes `cpu` limits][2] to
assign more resources.
*Docker*
```sh
$ docker run --name foo -ti --cpus 2 debian bash
$ docker update --cpus 4 foo
```
*Kubernetes*
```yaml
# ~/cpu-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: cpu-demo
namespace: sandbox
spec:
containers:
- name: cpu0
image: vish/stress
resources:
limits:
cpu: "3"
args:
- -cpus
- "5"
```
```sh
$ sudo -E kubectl create -f ~/cpu-demo.yaml
```
## Virtual CPUs and Kubernetes pods
A Kubernetes pod is a group of one or more containers, with shared storage and
network, and a specification for how to run the containers [[specification][3]].
In Kata Containers this group of containers, which is called a sandbox, runs inside
the same virtual machine. If you do not specify a CPU constraint, the runtime does
not add more vCPUs and the container is not placed inside a CPU cgroup.
Instead, the container uses the number of vCPUs specified by `default_vcpus`
and shares these resources with other containers in the same situation
(without a CPU constraint).
## Container lifecycle
When you create a container with a CPU constraint, the runtime adds the
number of vCPUs required by the container. Similarly, when the container terminates,
the runtime removes these resources.
## Container without CPU constraint
A container without a CPU constraint uses the default number of vCPUs specified
in the configuration file. In the case of Kubernetes pods, containers without a
CPU constraint use and share between them the default number of vCPUs. For
example, if `default_vcpus` is equal to 1 and you have 2 containers without CPU
constraints with each container trying to consume 100% of vCPU, the resources
divide in two parts, 50% of vCPU for each container because your virtual
machine does not have enough resources to satisfy containers needs. If you want
to give access to a greater or lesser portion of vCPUs to a specific container,
use [`docker --cpu-shares`][1] or [Kubernetes `cpu` requests][2].
*Docker*
```sh
$ docker run -ti --cpus-shares=512 debian bash
```
*Kubernetes*
```yaml
# ~/cpu-demo.yaml
apiVersion: v1
kind: Pod
metadata:
name: cpu-demo
namespace: sandbox
spec:
containers:
- name: cpu0
image: vish/stress
resources:
requests:
cpu: "0.7"
args:
- -cpus
- "3"
```
```sh
$ sudo -E kubectl create -f ~/cpu-demo.yaml
```
Before running containers without CPU constraint, consider that your containers
are not running alone. Since your containers run inside a virtual machine other
processes use the vCPUs as well (e.g. `systemd` and the Kata Containers
[agent][5]). In general, we recommend setting `default_vcpus` equal to 1 to
allow non-container processes to run on this vCPU and to specify a CPU
constraint for each container. If your container is already running and needs
more vCPUs, you can add more using [docker update][4].
## Container with CPU constraint
The runtime calculates the number of vCPUs required by a container with CPU
constraints using the following formula: `vCPUs = ceiling( quota / period )`, where
`quota` specifies the number of microseconds per CPU Period that the container is
guaranteed CPU access and `period` specifies the CPU CFS scheduler period of time
in microseconds. The result determines the number of vCPU to hot plug into the
virtual machine. Once the vCPUs have been added, the [agent][5] places the
container inside a CPU cgroup. This placement allows the container to use only
its assigned resources.
## Do not waste resources
If you already know the number of vCPUs needed for each container and pod, or
just want to run them with the same number of vCPUs, you can specify that
number using the `default_vcpus` option in the configuration file, each virtual
machine starts with that number of vCPUs. One limitation of this approach is
that these vCPUs cannot be removed later and you might be wasting
resources. For example, if you set `default_vcpus` to 8 and run only one
container with a CPU constraint of 1 vCPUs, you might be wasting 7 vCPUs since
the virtual machine starts with 8 vCPUs and 1 vCPUs is added and assigned
to the container. Non-container processes might be able to use 8 vCPUs but they
use a maximum 1 vCPU, hence 7 vCPUs might not be used.
*Container without CPU constraint*
```sh
$ docker run -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cpu.cfs_*"
1 # number of vCPUs
100000 # cfs period
-1 # cfs quota
```
*Container with CPU constraint*
```sh
docker run --cpus 4 -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cpu.cfs_*"
5 # number of vCPUs
100000 # cfs period
400000 # cfs quota
```
[1]: https://docs.docker.com/config/containers/resource_constraints/#cpu
[2]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/
[4]: https://docs.docker.com/engine/reference/commandline/update/
[5]: https://github.com/kata-containers/agent
[6]: https://github.com/kata-containers/runtime
[7]: https://github.com/kata-containers/runtime#configuration

View File

@ -0,0 +1,129 @@
# Virtualization in Kata Containers
- [Virtualization in Kata Containers](#virtualization-in-kata-containers)
- [Mapping container concepts to virtual machine technologies](#mapping-container-concepts-to-virtual-machine-technologies)
- [Kata Containers Hypervisor and VMM support](#kata-containers-hypervisor-and-vmm-support)
- [QEMU/KVM](#qemukvm)
- [Machine accelerators](#machine-accelerators)
- [Hotplug devices](#hotplug-devices)
- [Firecracker/KVM](#firecrackerkvm)
- [Cloud Hypervisor/KVM](#cloud-hypervisorkvm)
- [Summary](#summary)
Kata Containers, a second layer of isolation is created on top of those provided by traditional namespace-containers. The
hardware virtualization interface is the basis of this additional layer. Kata will launch a lightweight virtual machine,
and use the guests Linux kernel to create a container workload, or workloads in the case of multi-container pods. In Kubernetes
and in the Kata implementation, the sandbox is carried out at the pod level. In Kata, this sandbox is created using a virtual machine.
This document describes how Kata Containers maps container technologies to virtual machines technologies, and how this is realized in
the multiple hypervisors and virtual machine monitors that Kata supports.
## Mapping container concepts to virtual machine technologies
A typical deployment of Kata Containers will be in Kubernetes by way of a Container Runtime Interface (CRI) implementation. On every node,
Kubelet will interact with a CRI implementor (such as containerd or CRI-O), which will in turn interface with Kata Containers (an OCI based runtime).
The CRI API, as defined at the [Kubernetes CRI-API repo](https://github.com/kubernetes/cri-api/), implies a few constructs being supported by the
CRI implementation, and ultimately in Kata Containers. In order to support the full [API](https://github.com/kubernetes/cri-api/blob/a6f63f369f6d50e9d0886f2eda63d585fbd1ab6a/pkg/apis/runtime/v1alpha2/api.proto#L34-L110) with the CRI-implementor, Kata must provide the following constructs:
![API to construct](./arch-images/api-to-construct.png)
These constructs can then be further mapped to what devices are necessary for interfacing with the virtual machine:
![construct to VM concept](./arch-images/construct-to-vm-concept.png)
Ultimately, these concepts map to specific para-virtualized devices or virtualization technologies.
![VM concept to underlying technology](./arch-images/vm-concept-to-tech.png)
Each hypervisor or VMM varies on how or if it handles each of these.
## Kata Containers Hypervisor and VMM support
Kata Containers is designed to support multiple virtual machine monitors (VMMs) and hypervisors.
Kata Containers supports:
- [ACRN hypervisor](https://projectacrn.org/)
- [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor)/[KVM](https://www.linux-kvm.org/page/Main_Page)
- [Firecracker](https://github.com/firecracker-microvm/firecracker)/KVM
- [QEMU](http://www.qemu-project.org/)/KVM
Which configuration to use will depend on the end user's requirements. Details of each solution and a summary are provided below.
### QEMU/KVM
Kata Containers with QEMU has complete compatibility with Kubernetes.
Depending on the host architecture, Kata Containers supports various machine types,
for example `pc` and `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
machine type is `pc`. The machine type and its [`Machine accelerators`](#machine-accelerators) can
be changed by editing the runtime [`configuration`](./architecture.md/#configuration) file.
Devices and features used:
- virtio VSOCK or virtio serial
- virtio block or virtio SCSI
- virtio net
- virtio fs or virtio 9p (recommend: virtio fs)
- VFIO
- hotplug
- machine accelerators
Machine accelerators and hotplug are used in Kata Containers to manage resource constraints, improve boot time and reduce memory footprint. These are documented below.
#### Machine accelerators
Machine accelerators are architecture specific and can be used to improve the performance
and enable specific features of the machine types. The following machine accelerators
are used in Kata Containers:
- NVDIMM: This machine accelerator is x86 specific and only supported by `pc` and
`q35` machine types. `nvdimm` is used to provide the root filesystem as a persistent
memory device to the Virtual Machine.
#### Hotplug devices
The Kata Containers VM starts with a minimum amount of resources, allowing for faster boot time and a reduction in memory footprint. As the container launch progresses,
devices are hotplugged to the VM. For example, when a CPU constraint is specified which includes additional CPUs, they can be hot added. Kata Containers has support
for hot-adding the following devices:
- Virtio block
- Virtio SCSI
- VFIO
- CPU
### Firecracker/KVM
Firecracker, built on many rust crates that are within [rust-VMM](https://github.com/rust-vmm), has a very limited device model, providing a lighter
footprint and attack surface, focusing on function-as-a-service like use cases. As a result, Kata Containers with Firecracker VMM supports a subset of the CRI API.
Firecracker does not support file-system sharing, and as a result only block-based storage drivers are supported. Firecracker does not support device
hotplug nor does it support VFIO. As a result, Kata Containers with Firecracker VMM does not support updating container resources after boot, nor
does it support device passthrough.
Devices used:
- virtio VSOCK
- virtio block
- virtio net
### Cloud Hypervisor/KVM
Cloud Hypervisor, based on [rust-VMM](https://github.com/rust-vmm), is designed to have a lighter footprint and attack surface. For Kata Containers,
relative to Firecracker, the Cloud Hypervisor configuration provides better compatibility at the expense of exposing additional devices: file system
sharing and direct device assignment. As of the 1.10 release of Kata Containers, Cloud Hypervisor does not support device hotplug, and as a result
does not support updating container resources after boot, or utilizing block based volumes. While Cloud Hypervisor does support VFIO, Kata is still adding
this support. As of 1.10, Kata does not support block based volumes or direct device assignment. See [Cloud Hypervisor device support documentation](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/docs/device_model.md)
for more details on Cloud Hypervisor.
Devices used:
- virtio VSOCK
- virtio block
- virtio net
- virtio fs
### Summary
| Solution | release introduced | brief summary |
|-|-|-|
| QEMU | 1.0 | upstream QEMU, with support for hotplug and filesystem sharing |
| NEMU | 1.4 | Deprecated, removed as of 1.10 release. Slimmed down fork of QEMU, with experimental support of virtio-fs |
| Firecracker | 1.5 | upstream Firecracker, rust-VMM based, no VFIO, no FS sharing, no memory/CPU hotplug |
| QEMU-virtio-fs | 1.7 | upstream QEMU with support for virtio-fs. Will be removed once virtio-fs lands in upstream QEMU |
| Cloud Hypervisor | 1.10 | rust-VMM based, includes VFIO and FS sharing through virtio-fs, no hotplug |

28
docs/how-to/README.md Normal file
View File

@ -0,0 +1,28 @@
# Howto Guides
* [Howto Guides](#howto-guides)
* [Kubernetes Integration](#kubernetes-integration)
* [Hypervisors Integration](#hypervisors-integration)
* [Advanced Topics](#advanced-topics)
## Kubernetes Integration
- [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
- [How to use Kata Containers and Containerd](containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
- [Kata Containers and service mesh for Kubernetes](service-mesh.md)
- [How to import Kata Containers logs into Fluentd](how-to-import-kata-logs-with-fluentd.md)
## Hypervisors Integration
- [Kata Containers with Firecracker](https://github.com/kata-containers/documentation/wiki/Initial-release-of-Kata-Containers-with-Firecracker-support)
- [Kata Containers with NEMU](how-to-use-kata-containers-with-nemu.md)
- [Kata Containers with ACRN Hypervisor](how-to-use-kata-containers-with-acrn.md)
## Advanced Topics
- [How to use Kata Containers with virtio-fs](how-to-use-virtio-fs-with-kata.md)
- [Setting Sysctls with Kata](how-to-use-sysctls-with-kata.md)
- [What Is VMCache and How To Enable It](what-is-vm-cache-and-how-do-I-use-it.md)
- [What Is VM Templating and How To Enable It](what-is-vm-templating-and-how-do-I-use-it.md)
- [Privileged Kata Containers](privileged.md)
- [How to load kernel modules in Kata Containers](how-to-load-kernel-modules-with-kata.md)
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
- [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)

View File

@ -0,0 +1,368 @@
# How to use Kata Containers and Containerd
- [Concepts](#concepts)
- [Kubernetes `RuntimeClass`](#kubernetes-runtimeclass)
- [Containerd Runtime V2 API: Shim V2 API](#containerd-runtime-v2-api-shim-v2-api)
- [Install](#install)
- [Install Kata Containers](#install-kata-containers)
- [Install containerd with CRI plugin](#install-containerd-with-cri-plugin)
- [Install CNI plugins](#install-cni-plugins)
- [Install `cri-tools`](#install-cri-tools)
- [Configuration](#configuration)
- [Configure containerd to use Kata Containers](#configure-containerd-to-use-kata-containers)
- [Kata Containers as a `RuntimeClass`](#kata-containers-as-a-runtimeclass)
- [Kata Containers as the runtime for untrusted workload](#kata-containers-as-the-runtime-for-untrusted-workload)
- [Kata Containers as the default runtime](#kata-containers-as-the-default-runtime)
- [Configuration for `cri-tools`](#configuration-for-cri-tools)
- [Run](#run)
- [Launch containers with `ctr` command line](#launch-containers-with-ctr-command-line)
- [Launch Pods with `crictl` command line](#launch-pods-with-crictl-command-line)
This document covers the installation and configuration of [containerd](https://containerd.io/)
and [Kata Containers](https://katacontainers.io). The containerd provides not only the `ctr`
command line tool, but also the [CRI](https://kubernetes.io/blog/2016/12/container-runtime-interface-cri-in-kubernetes/)
interface for [Kubernetes](https://kubernetes.io) and other CRI clients.
This document is primarily written for Kata Containers v1.5.0-rc2 or above, and containerd v1.2.0 or above.
Previous versions are addressed here, but we suggest users upgrade to the newer versions for better support.
## Concepts
### Kubernetes `RuntimeClass`
[`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/) is a Kubernetes feature first
introduced in Kubernetes 1.12 as alpha. It is the feature for selecting the container runtime configuration to
use to run a pods containers. This feature is supported in `containerd` since [v1.2.0](https://github.com/containerd/containerd/releases/tag/v1.2.0).
Before the `RuntimeClass` was introduced, Kubernetes was not aware of the difference of runtimes on the node. `kubelet`
creates Pod sandboxes and containers through CRI implementations, and treats all the Pods equally. However, there
are requirements to run trusted Pods (i.e. Kubernetes plugin) in a native container like runc, and to run untrusted
workloads with isolated sandboxes (i.e. Kata Containers).
As a result, the CRI implementations extended their semantics for the requirements:
- At the beginning, [Frakti](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
treat Pod with `host` network as trusted, while others are treated as untrusted.
- The containerd introduced an annotation for untrusted Pods since [v1.0](https://github.com/containerd/cri/blob/v1.0.0-rc.0/docs/config.md):
```yaml
annotations:
io.kubernetes.cri.untrusted-workload: "true"
```
- Similarly, CRI-O introduced the annotation `io.kubernetes.cri-o.TrustedSandbox` for untrusted Pods.
To eliminate the complexity of user configuration introduced by the non-standardized annotations and provide
extensibility, `RuntimeClass` was introduced. This gives users the ability to affect the runtime behavior
through `RuntimeClass` without the knowledge of the CRI daemons. We suggest that users with multiple runtimes
use `RuntimeClass` instead of the deprecated annotations.
### Containerd Runtime V2 API: Shim V2 API
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`
process were used, even with VSOCK not available.
![Kubernetes integration with shimv2](../design/arch-images/shimv2.svg)
The shim v2 is introduced in containerd [v1.2.0](https://github.com/containerd/containerd/releases/tag/v1.2.0) and Kata `shimv2`
is implemented in Kata Containers v1.5.0.
## Install
### Install Kata Containers
Follow the instructions to [install Kata Containers](https://github.com/kata-containers/documentation/blob/master/install/README.md).
### Install containerd with CRI plugin
> **Note:** `cri` is a native plugin of containerd 1.1 and above. It is built into containerd and enabled by default.
> You do not need to install `cri` if you have containerd 1.1 or above. Just remove the `cri` plugin from the list of
> `disabled_plugins` in the containerd configuration file (`/etc/containerd/config.toml`).
Follow the instructions from the [CRI installation guide](http://github.com/containerd/cri/blob/master/docs/installation.md).
Then, check if `containerd` is now available:
```bash
$ command -v containerd
```
### Install CNI plugins
> **Note:** You do not need to install CNI plugins if you do not want to use containerd with Kubernetes.
> If you have installed Kubernetes with `kubeadm`, you might have already installed the CNI plugins.
You can manually install CNI plugins as follows:
```bash
$ go get github.com/containernetworking/plugins
$ pushd $GOPATH/src/github.com/containernetworking/plugins
$ ./build_linux.sh
$ sudo mkdir /opt/cni
$ sudo cp -r bin /opt/cni/
$ popd
```
### Install `cri-tools`
> **Note:** `cri-tools` is a set of tools for CRI used for development and testing. Users who only want
> to use containerd with Kubernetes can skip the `cri-tools`.
You can install the `cri-tools` from source code:
```bash
$ go get github.com/kubernetes-incubator/cri-tools
$ pushd $GOPATH/src/github.com/kubernetes-incubator/cri-tools
$ make
$ sudo -E make install
$ popd
```
## Configuration
### Configure containerd to use Kata Containers
By default, the configuration of containerd is located at `/etc/containerd/config.toml`, and the
`cri` plugins are placed in the following section:
```toml
[plugins]
[plugins.cri]
[plugins.cri.containerd]
[plugins.cri.containerd.default_runtime]
#runtime_type = "io.containerd.runtime.v1.linux"
[plugins.cri.cni]
# conf_dir is the directory in which the admin places a CNI conf.
conf_dir = "/etc/cni/net.d"
```
The following sections outline how to add Kata Containers to the configurations.
#### Kata Containers as a `RuntimeClass`
For
- Kata Containers v1.5.0 or above (including `1.5.0-rc`)
- Containerd v1.2.0 or above
- Kubernetes v1.12.0 or above
The `RuntimeClass` is suggested.
The following configuration includes three runtime classes:
- `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime.
- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming))
where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the
binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)).
- `plugins.cri.containerd.runtimes.katacli`: the `containerd-shim-runc-v1` calls `kata-runtime`, which is the legacy process.
```toml
[plugins.cri.containerd]
no_pivot = false
[plugins.cri.containerd.runtimes]
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v1"
[plugins.cri.containerd.runtimes.runc.options]
NoPivotRoot = false
NoNewKeyring = false
ShimCgroup = ""
IoUid = 0
IoGid = 0
BinaryName = "runc"
Root = ""
CriuPath = ""
SystemdCgroup = false
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins.cri.containerd.runtimes.katacli]
runtime_type = "io.containerd.runc.v1"
[plugins.cri.containerd.runtimes.katacli.options]
NoPivotRoot = false
NoNewKeyring = false
ShimCgroup = ""
IoUid = 0
IoGid = 0
BinaryName = "/usr/bin/kata-runtime"
Root = ""
CriuPath = ""
SystemdCgroup = false
```
From Containerd v1.2.4 and Kata v1.6.0, there is a new runtime option supported, which allows you to specify a specific Kata configuration file as follows:
```toml
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins.cri.containerd.runtimes.kata.options]
ConfigPath = "/etc/kata-containers/config.toml"
```
This `ConfigPath` option is optional. If you do not specify it, shimv2 first tries to get the configuration file from the environment variable `KATA_CONF_FILE`. If neither are set, shimv2 will use the default Kata configuration file paths (`/etc/kata-containers/configuration.toml` and `/usr/share/defaults/kata-containers/configuration.toml`).
If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0 and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
shell script with the following:
```bash
#!/bin/bash
KATA_CONF_FILE=/etc/kata-containers/firecracker.toml containerd-shim-kata-v2 $@
```
Name it as `/usr/local/bin/containerd-shim-katafc-v2` and reference it in the configuration of containerd:
```toml
[plugins.cri.containerd.runtimes.kata-firecracker]
runtime_type = "io.containerd.katafc.v2"
```
#### Kata Containers as the runtime for untrusted workload
For cases without `RuntimeClass` support, we can use the legacy annotation method to support using Kata Containers
for an untrusted workload. With the following configuration, you can run trusted workloads with a runtime such as `runc`
and then, run an untrusted workload with Kata Containers:
```toml
[plugins.cri.containerd]
# "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
[plugins.cri.containerd.default_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
[plugins.cri.containerd.untrusted_workload_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.kata.v2"
```
For the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
```toml
[plugins.cri.containerd]
# "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
[plugins.cri.containerd.default_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
[plugins.cri.containerd.untrusted_workload_runtime]
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# runtime_engine is the name of the runtime engine used by containerd.
runtime_engine = "/usr/bin/kata-runtime"
```
You can find more information on the [Containerd config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
#### Kata Containers as the default runtime
If you want to set Kata Containers as the only runtime in the deployment, you can simply configure as follows:
```toml
[plugins.cri.containerd]
[plugins.cri.containerd.default_runtime]
runtime_type = "io.containerd.kata.v2"
```
Alternatively, for the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
```toml
[plugins.cri.containerd]
[plugins.cri.containerd.default_runtime]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/bin/kata-runtime"
```
### Configuration for `cri-tools`
> **Note:** If you skipped the [Install `cri-tools`](#install-cri-tools) section, you can skip this section too.
First, add the CNI configuration in the containerd configuration.
The following is the configuration if you installed CNI as the *[Install CNI plugins](#install-cni-plugins)* section outlined.
Put the CNI configuration as `/etc/cni/net.d/10-mynet.conf`:
```json
{
"cniVersion": "0.2.0",
"name": "mynet",
"type": "bridge",
"bridge": "cni0",
"isGateway": true,
"ipMasq": true,
"ipam": {
"type": "host-local",
"subnet": "172.19.0.0/24",
"routes": [
{ "dst": "0.0.0.0/0" }
]
}
}
```
Next, reference the configuration directory through containerd `config.toml`:
```toml
[plugins.cri.cni]
# conf_dir is the directory in which the admin places a CNI conf.
conf_dir = "/etc/cni/net.d"
```
The configuration file of `crictl` command line tool in `cri-tools` locates at `/etc/crictl.yaml`:
```yaml
runtime-endpoint: unix:///var/run/containerd/containerd.sock
image-endpoint: unix:///var/run/containerd/containerd.sock
timeout: 10
debug: true
```
## Run
### Launch containers with `ctr` command line
To run a container with Kata Containers through the containerd command line, you can run the following:
```bash
$ sudo ctr image pull docker.io/library/busybox:latest
$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
```
This launches a BusyBox container named `hello`, and it will be removed by `--rm` after it quits.
### Launch Pods with `crictl` command line
With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag.
Use the following to launch Pod with `kata` runtime class with the pod in [the example](https://github.com/kubernetes-sigs/cri-tools/tree/master/docs/examples)
of `cri-tools`:
```bash
$ sudo crictl runp -r kata podsandbox-config.yaml
36e23521e8f89fabd9044924c9aeb34890c60e85e1748e8daca7e2e673f8653e
```
You can add container to the launched Pod with the following:
```bash
$ sudo crictl create 36e23521e8f89 container-config.yaml podsandbox-config.yaml
1aab7585530e62c446734f12f6899f095ce53422dafcf5a80055ba11b95f2da7
```
Now, start it with the following:
```bash
$ sudo crictl start 1aab7585530e6
1aab7585530e6
```
In Kubernetes, you need to create a `RuntimeClass` resource and add the `RuntimeClass` field in the Pod Spec
(see this [document](https://kubernetes.io/docs/concepts/containers/runtime-class/) for more information).
If `RuntimeClass` is not supported, you can use the following annotation in a Kubernetes pod to identify as an untrusted workload:
```yaml
annotations:
io.kubernetes.cri.untrusted-workload: "true"
```

View File

@ -0,0 +1,463 @@
# Importing Kata Containers logs with Fluentd
* [Introduction](#introduction)
* [Overview](#overview)
* [Test stack](#test-stack)
* [Importing the logs](#importing-the-logs)
* [Direct import `logfmt` from `systemd`](#direct-import-logfmt-from-systemd)
* [Configuring `minikube`](#configuring-minikube)
* [Pull from `systemd`](#pull-from-systemd)
* [Systemd Summary](#systemd-summary)
* [Directly importing JSON](#directly-importing-json)
* [JSON in files](#json-in-files)
* [Prefixing all keys](#prefixing-all-keys)
* [Kata `shimv2`](#kata-shimv2)
* [Caveats](#caveats)
* [Summary](#summary)
# Introduction
This document describes how to import Kata Containers logs into [Fluentd](https://www.fluentd.org/),
typically for importing into an
Elastic/Fluentd/Kibana([EFK](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/fluentd-elasticsearch#running-efk-stack-in-production))
or Elastic/Logstash/Kibana([ELK](https://www.elastic.co/elastic-stack)) stack.
The majority of this document focusses on CRI-O based (classic) Kata runtime. Much of that information
also applies to the Kata `shimv2` runtime. Differences pertaining to Kata `shimv2` can be found in their
[own section](#kata-shimv2).
> **Note:** This document does not cover any aspect of "log rotation". It is expected that any production
> stack already has a method in place to control node log growth.
# Overview
Kata generates logs. The logs can come from numerous parts of the Kata stack (the runtime, proxy, shim
and even the agent). By default the logs
[go to the system journal](https://github.com/kata-containers/runtime#logging),
but they can also be configured to be stored in files.
The logs default format is in [`logfmt` structured logging](https://brandur.org/logfmt), but can be switched to
be JSON with a command line option.
Provided below are some examples of Kata log import and processing using
[Fluentd](https://www.fluentd.org/).
## Test stack
Some of the testing we can perform locally, but other times we really need a live stack for testing.
We will use a [`minikube`](https://github.com/kubernetes/minikube/) stack with EFK enabled and Kata
installed to do our tests. Some details such as specific paths and versions of components may need
to be adapted to your specific installation.
The [Kata minikube installation guide](../install/minikube-installation-guide.md) was used to install
`minikube` with Kata Containers enabled.
The minikube EFK stack `addon` is then enabled:
```bash
$ minikube addons enable efk
```
> *Note*: Installing and booting EFK can take a little while - check progress with
> `kubectl get pods -n=kube-system` and wait for all the pods to get to the `Running` state.
## Importing the logs
Kata offers us two choices to make when storing the logs:
- Do we store them to the system log, or to separate files?
- Do we store them in `logfmt` format, or `JSON`?
We will start by examining the Kata default setup (`logfmt` stored in the system log), and then look
at other options.
## Direct import `logfmt` from `systemd`
Fluentd contains both a component that can read the `systemd` system journals, and a component
that can parse `logfmt` entries. We will utilise these in two separate steps to evaluate how well
the Kata logs import to the EFK stack.
### Configuring `minikube`
> **Note:** Setting up, configuration and deployment of `minikube` is not covered in exacting
> detail in this guide. It is presumed the user has the abilities and their own Kubernetes/Fluentd
> stack they are able to utilise in order to modify and test as necessary.
Minikube by default
[configures](https://github.com/kubernetes/minikube/blob/master/deploy/iso/minikube-iso/board/coreos/minikube/rootfs-overlay/etc/systemd/journald.conf)
the `systemd-journald` with the
[`Storage=volatile`](https://www.freedesktop.org/software/systemd/man/journald.conf.html) option,
which results in the journal being stored in `/run/log/journal`. Unfortunately, the Minikube EFK
Fluentd install extracts most of its logs in `/var/log`, and therefore does not mount `/run/log`
into the Fluentd pod by default. This prevents us from reading the system journal by default.
This can be worked around by patching the Minikube EFK `addon` YAML to mount `/run/log` into the
Fluentd container:
```patch
diff --git a/deploy/addons/efk/fluentd-es-rc.yaml.tmpl b/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
index 75e386984..83bea48b9 100644
--- a/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
+++ b/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
@@ -44,6 +44,8 @@ spec:
volumeMounts:
- name: varlog
mountPath: /var/log
+ - name: runlog
+ mountPath: /run/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
@@ -57,6 +59,9 @@ spec:
- name: varlog
hostPath:
path: /var/log
+ - name: runlog
+ hostPath:
+ path: /run/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
```
> **Note:** After making this change you will need to build your own `minikube` to encapsulate
> and use this change, or fine another method to (re-)launch the Fluentd containers for the change
> to take effect.
### Pull from `systemd`
We will start with testing Fluentd pulling the Kata logs directly from the system journal with the
Fluentd [systemd plugin](https://github.com/fluent-plugin-systemd/fluent-plugin-systemd).
We modify the Fluentd config file with the following fragment. For reference, the Minikube
YAML can be found
[on GitHub](https://github.com/kubernetes/minikube/blob/master/deploy/addons/efk/fluentd-es-configmap.yaml.tmpl):
> **Note:** The below Fluentd config fragment is in the "older style" to match the Minikube version of
> Fluentd. If using a more up to date version of Fluentd, you may need to update some parameters, such as
> using `matches` rather than `filters` and placing `@` before `type`. Your Fluentd should warn you in its
> logs if such updates are necessary.
```
<source>
type systemd
tag kata-containers
path /run/log/journal
pos_file /run/log/journal/kata-journald.pos
filters [{"SYSLOG_IDENTIFIER": "kata-runtime"}, {"SYSLOG_IDENTIFIER": "kata-proxy"}, {"SYSLOG_IDENTIFIER": "kata-shim"}]
read_from_head true
</source>
```
We then apply the new YAML, and restart the Fluentd pod (by killing it, and letting the `ReplicationController`
start a new instance, which will pick up the new `ConfigurationMap`):
```bash
$ kubectl apply -f new-fluentd-cm.yaml
$ kubectl delete pod -n=kube-system fluentd-es-XXXXX
```
Now open the Kibana UI to the Minikube EFK `addon`, and launch a Kata QEMU based test pod in order to
generate some Kata specific log entries:
```bash
$ minikube addons open efk
$ cd $GOPATH/src/github.com/kata-containers/packaging/kata-deploy
$ kubectl apply -f examples/nginx-deployment-qemu.yaml
```
Looking at the Kibana UI, we can now see that some `kata-runtime` tagged records have appeared:
![Kata tags in EFK](./images/efk_kata_tag.png)
If we now filter on that tag, we can see just the Kata related entries
![Kata tags in EFK](./images/efk_filter_on_tag.png)
If we expand one of those entries, we can see we have imported useful information. You can then
sub-filter on, for instance, the `SYSLOG_IDENTIFIER` to differentiate the Kata components, and
on the `PRIORITY` to filter out critical issues etc.
Kata generates a significant amount of Kata specific information, which can be seen as
[`logfmt`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser#logfile-requirements).
data contained in the `MESSAGE` field. Imported as-is, there is no easy way to filter on that data
in Kibana:
![Kata tags in EFK](./images/efk_syslog_entry_detail.png).
We can however further sub-parse the Kata entries using the
[Fluentd plugins](https://docs.fluentbit.io/manual/parser/logfmt) that will parse
`logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
section. At the same time, we will prefix the new fields with `kata_` to make it clear where
they have come from:
```
<filter kata-containers>
@type parser
key_name MESSAGE
format logfmt
reserve_data true
inject_key_prefix kata_
</filter>
```
The Minikube Fluentd version does not come with the `logfmt` parser installed, so we will run a local
test to check the parsing works. The resulting output from Fluentd is:
```
2020-02-21 10:31:27.810781647 +0000 kata-containers:
{"_BOOT_ID":"590edceeef5545a784ec8c6181a10400",
"_MACHINE_ID":"3dd49df65a1b467bac8d51f2eaa17e92",
"_HOSTNAME":"minikube",
"PRIORITY":"6",
"_UID":"0",
"_GID":"0",
"_SYSTEMD_SLICE":"system.slice",
"_SELINUX_CONTEXT":"kernel",
"_CAP_EFFECTIVE":"3fffffffff",
"_TRANSPORT":"syslog",
"_SYSTEMD_CGROUP":"/system.slice/crio.service",
"_SYSTEMD_UNIT":"crio.service",
"_SYSTEMD_INVOCATION_ID":"f2d99c784e6f406c87742f4bca16a4f6",
"SYSLOG_IDENTIFIER":"kata-runtime",
"_COMM":"kata-runtime",
"_EXE":"/opt/kata/bin/kata-runtime",
"SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
"_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
"SYSLOG_PID":"14314",
"_PID":"14314",
"MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
"SYSLOG_RAW":"<6>Feb 21 10:31:27 kata-runtime[14314]: time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox\n",
"_SOURCE_REALTIME_TIMESTAMP":"1582281087810805",
"kata_level":"info",
"kata_msg":"release sandbox",
"kata_arch":"amd64",
"kata_command":"state",
"kata_container":"7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
"kata_name":"kata-runtime",
"kata_pid":14314,
"kata_sandbox":"1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c",
"kata_source":"virtcontainers",
"kata_subsystem":"sandbox"}
```
Here we can see that the `MESSAGE` field has been parsed out and pre-pended into the `kata_*` fields,
which contain usefully filterable fields such as `kata_level`, `kata_command` and `kata_subsystem` etc.
### Systemd Summary
We have managed to configure Fluentd to capture the Kata logs entries from the system
journal, and further managed to then parse out the `logfmt` message into JSON to allow further analysis
inside Elastic/Kibana.
## Directly importing JSON
The underlying basic data format used by Fluentd and Elastic is JSON. If we output JSON
directly from Kata, that should make overall import and processing of the log entries more efficient.
There are potentially two things we can do with Kata here:
- Get Kata to [output its logs in `JSON` format](https://github.com/kata-containers/runtime#logging) rather
than `logfmt`.
- Get Kata to log directly into a file, rather than via the system journal. This would allow us to not need
to parse the systemd format files, and capture the Kata log lines directly. It would also avoid Fluentd
having to potentially parse or skip over many non-Kata related systemd journal that it is not at all
interested in.
In theory we could get Kata to post its messages in JSON format to the systemd journal by adding the
`--log-format=json` option to the Kata runtime, and then swapping the `logfmt` parser for the `json`
parser, but we would still need to parse the systemd files. We will skip this setup in this document, and
go directly to a full Kata specific JSON format logfile test.
### JSON in files
Kata runtime has the ability to generate JSON logs directly, rather than its default `logfmt` format. Passing
the `--log-format=json` argument to the Kata runtime enables this. The easiest way to pass in this extra
parameter from a [Kata deploy](https://github.com/kata-containers/packaging/tree/master/kata-deploy) installation
is to edit the `/opt/kata/bin/kata-qemu` shell script (generated by the
[Kata packaging release scripts](https://github.com/kata-containers/packaging/blob/master/release/kata-deploy-binaries.sh)).
At the same time, we will add the `--log=/var/log/kata-runtime.log` argument to store the Kata logs in their
own file (rather than into the system journal).
```bash
#!/bin/bash
/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
```
And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is
generating timestamps in `iso8601` format. Kata places these timestamps into a field called `time`, which
is the default field the Fluentd parser looks for:
```
<source>
type tail
tag kata-containers
path /var/log/kata-runtime.log
pos_file /var/log/kata-runtime.pos
format json
time_format %iso8601
read_from_head true
</source>
```
This imports the `kata-runtime` logs, with the resulting records looking like:
![Kata tags in EFK](./images/efk_direct_from_json.png)
Something to note here is that we seem to have gained an awful lot of fairly identical looking fields in the
elastic database:
![Kata tags in EFK](./images/efk_direct_json_fields.png)
In reality, they are not all identical, but do come out of one of the Kata log entries - from the
`kill` command. A JSON fragment showing an example is below:
```json
{
...
"EndpointProperties": {
"Iface": {
"Index": 4,
"MTU": 1460,
"TxQLen": 0,
"Name": "eth0",
"HardwareAddr": "ClgKAQAL",
"Flags": 19,
"RawFlags": 69699,
"ParentIndex": 15,
"MasterIndex": 0,
"Namespace": null,
"Alias": "",
"Statistics": {
"RxPackets": 1,
"TxPackets": 5,
"RxBytes": 42,
"TxBytes": 426,
"RxErrors": 0,
"TxErrors": 0,
"RxDropped": 0,
"TxDropped": 0,
"Multicast": 0,
"Collisions": 0,
"RxLengthErrors": 0,
"RxOverErrors": 0,
"RxCrcErrors": 0,
"RxFrameErrors": 0,
"RxFifoErrors": 0,
"RxMissedErrors": 0,
"TxAbortedErrors": 0,
"TxCarrierErrors": 0,
"TxFifoErrors": 0,
"TxHeartbeatErrors": 0,
"TxWindowErrors": 0,
"RxCompressed": 0,
"TxCompressed": 0
...
```
If these new fields are not required, then a Fluentd
[`record_transformer` filter](https://docs.fluentd.org/filter/record_transformer#remove_keys)
could be used to delete them before they are injected into Elastic.
#### Prefixing all keys
It may be noted above that all the fields are imported with their base native name, such as
`arch` and `level`. It may be better for data storage and processing if all the fields were
identifiable as having come from Kata, and avoid namespace clashes with other imports.
This can be achieved by prefixing all the keys with, say, `kata_`. It appears `fluend` cannot
do this directly in the input or match phases, but can in the filter/parse phase (as was done
when processing `logfmt` data for instance). To achieve this, we can first input the Kata
JSON data as a single line, and then add the prefix using a JSON filter section:
```
# Pull in as a single line...
<source>
@type tail
path /var/log/kata-runtime.log
pos_file /var/log/kata-runtime.pos
read_from_head true
tag kata-runtime
<parse>
@type none
</parse>
</source>
<filter kata-runtime>
@type parser
key_name message
# drop the original single line `message` entry
reserve_data false
inject_key_prefix kata_
<parse>
@type json
</parse>
</filter>
```
# Kata `shimv2`
When using the Kata `shimv2` runtime with `containerd`, as described in this
[how-to guide](containerd-kata.md#containerd-runtime-v2-api-shim-v2-api), the Kata logs are routed
differently, and some adjustments to the above methods will be necessary to filter them in Fluentd.
The Kata `shimv2` logs are different in two primary ways:
- The Kata logs are directed via `containerd`, and will be captured along with the `containerd` logs,
such as on the containerd stdout or in the system journal.
- In parallel, Kata `shimv2` places its logs into the system journal under the systemd name of `kata`.
Below is an example Fluentd configuration fragment showing one possible method of extracting and separating
the `containerd` and Kata logs from the system journal by filtering on the Kata `SYSLOG_IDENTIFIER` field,
using the [Fluentd v0.12 rewrite_tag_filter](https://docs.fluentd.org/v/0.12/output/rewrite_tag_filter):
```yaml
<source>
type systemd
path /path/to/journal
# capture the containerd logs
filters [{ "_SYSTEMD_UNIT": "containerd.service" }]
pos_file /tmp/systemd-containerd.pos
read_from_head true
# tag those temporarily, as we will filter them and rewrite the tags
tag containerd_tmp_tag
</source>
# filter out and split between kata entries and containerd entries
<match containerd_tmp_tag>
@type rewrite_tag_filter
# Tag Kata entries
<rule>
key SYSLOG_IDENTIFIER
pattern kata
tag kata_tag
</rule>
# Anything that was not matched so far, tag as containerd
<rule>
key MESSAGE
pattern /.+/
tag containerd_tag
</rule>
</match>
```
# Caveats
> **Warning:** You should be aware of the following caveats, which may disrupt or change what and how
> you capture and process the Kata Containers logs.
The following caveats should be noted:
- There is a [known issue](https://github.com/kata-containers/runtime/issues/985) whereby enabling
full debug in Kata, particularly enabling agent kernel log messages, can result in corrupt log lines
being generated by Kata (due to overlapping multiple output streams).
- Presently only the `kata-runtime` can generate JSON logs, and direct them to files. Other components
such as the `proxy` and `shim` can only presently report to the system journal. Hopefully these
components will be extended with extra functionality.
# Summary
We have shown how native Kata logs using the systemd journal and `logfmt` data can be import, and also
how Kata can be instructed to generate JSON logs directly, and import those into Fluentd.
We have detailed a few known caveats, and leave it to the implementer to choose the best method for their
system.

View File

@ -0,0 +1,106 @@
# Loading kernel modules
A new feature for loading kernel modules was introduced in Kata Containers 1.9.
The list of kernel modules and their parameters can be provided using the
configuration file or OCI annotations. The [Kata runtime][1] gives that
information to the [Kata Agent][2] through gRPC when the sandbox is created.
The [Kata Agent][2] will insert the kernel modules using `modprobe(8)`, hence
modules dependencies are resolved automatically.
The sandbox will not be started when:
* A kernel module is specified and the `modprobe(8)` command is not installed in
the guest or it fails loading the module.
* The module is not available in the guest or it doesn't meet the guest kernel
requirements, like architecture and version.
In the following sections are documented the different ways that exist for
loading kernel modules in Kata Containers.
- [Using Kata Configuration file](#using-kata-configuration-file)
- [Using annotations](#using-annotations)
# Using Kata Configuration file
```
NOTE: Use this method, only if you need to pass the kernel modules to all
containers. Please use annotations described below to set per pod annotations.
```
The list of kernel modules and parameters can be set in the `kernel_modules`
option as a coma separated list, where each entry in the list specifies a kernel
module and its parameters. Each list element comprises one or more space separated
fields. The first field specifies the module name and subsequent fields specify
individual parameters for the module.
The following example specifies two modules to load: `e1000e` and `i915`. Two parameters
are specified for the `e1000` module: `InterruptThrottleRate` (which takes an array
of integer values) and `EEE` (which requires a single integer value).
```toml
kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915"]
```
Not all the container managers allow users provide custom annotations, hence
this is the only way that Kata Containers provide for loading modules when
custom annotations are not supported.
There are some limitations with this approach:
* Write access to the Kata configuration file is required.
* The configuration file must be updated when a new container is created,
otherwise the same list of modules is used, even if they are not needed in the
container.
# Using annotations
As was mentioned above, not all containers need the same modules, therefore using
the configuration file for specifying the list of kernel modules per [POD][3] can
be a pain. Unlike the configuration file, annotations provide a way to specify
custom configurations per POD.
The list of kernel modules and parameters can be set using the annotation
`io.katacontainers.config.agent.kernel_modules` as a semicolon separated
list, where the first word of each element is considered as the module name and
the rest as its parameters.
In the following example two PODs are created, but the kernel modules `e1000e`
and `i915` are inserted only in the POD `pod1`.
```yaml
apiVersion: v1
kind: Pod
metadata:
name: pod1
annotations:
io.katacontainers.config.agent.kernel_modules: "e1000e EEE=1; i915"
spec:
runtimeClassName: kata
containers:
- name: c1
image: busybox
command:
- sh
stdin: true
tty: true
---
apiVersion: v1
kind: Pod
metadata:
name: pod2
spec:
runtimeClassName: kata
containers:
- name: c2
image: busybox
command:
- sh
stdin: true
tty: true
```
[1]: https://github.com/kata-containers/runtime
[2]: https://github.com/kata-containers/agent
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/

View File

@ -0,0 +1,160 @@
# Per-Pod Kata Configurations
Kata Containers gives users freedom to customize at per-pod level, by setting
a wide range of Kata specific annotations in the pod specification.
# Kata Configuration Annotations
There are several kinds of Kata configurations and they are listed below.
## Global Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.config_path` | string | Kata config file location that overrides the default config paths |
| `io.katacontainers.pkg.oci.bundle_path` | string | OCI bundle path |
| `io.katacontainers.pkg.oci.container_type`| string | OCI container type. Only accepts `pod_container` and `pod_sandbox` |
## Runtime Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.config.runtime.experimental` | `boolean` | determines if experimental features enabled |
| `io.katacontainers.config.runtime.disable_guest_seccomp`| `boolean` | determines if `seccomp` should be applied inside guest |
| `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
| `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
| `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
## Agent Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.config.agent.enable_tracing` | `boolean` | enable tracing for the agent |
| `io.katacontainers.config.agent.kernel_modules` | string | the list of kernel modules and their parameters that will be loaded in the guest kernel. Semicolon separated list of kernel modules and their parameters. These modules will be loaded in the guest kernel using `modprobe`(8). E.g., `e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0` |
| `io.katacontainers.config.agent.trace_mode` | string | the trace mode for the agent |
| `io.katacontainers.config.agent.trace_type` | string | the trace type for the agent |
## Hypervisor Options
| Key | Value Type | Comments |
|-------| ----- | ----- |
| `io.katacontainers.config.hypervisor.asset_hash_type` | string | the hash type used for assets verification, default is `sha512` |
| `io.katacontainers.config.hypervisor.block_device_cache_direct` | `boolean` | Denotes whether use of `O_DIRECT` (bypass the host page cache) is enabled |
| `io.katacontainers.config.hypervisor.block_device_cache_noflush` | `boolean` | Denotes whether flush requests for the device are ignored |
| `io.katacontainers.config.hypervisor.block_device_cache_set` | `boolean` | cache-related options will be set to block devices or not |
| `io.katacontainers.config.hypervisor.block_device_driver` | string | the driver to be used for block device, valid values are `virtio-blk`, `virtio-scsi`, `nvdimm`|
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
| `io.katacontainers.config.hypervisor.default_vcpus` | uint32| the default vCPUs assigned for a VM by the hypervisor |
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
| `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver |
| `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor |
| `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
| `io.katacontainers.config.hypervisor.file_mem_backend` | string | file based memory backend root directory |
| `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
| `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
| `io.katacontainers.config.hypervisor.guest_hook_path` | string | the path within the VM that will be used for drop in hooks |
| `io.katacontainers.config.hypervisor.hotplug_vfio_on_root_bus` | `boolean` | indicate if devices need to be hotplugged on the root bus instead of a bridge|
| `io.katacontainers.config.hypervisor.hypervisor_hash` | string | container hypervisor binary SHA-512 hash value |
| `io.katacontainers.config.hypervisor.image_hash` | string | container guest image SHA-512 hash value |
| `io.katacontainers.config.hypervisor.image` | string | the guest image that will run in the container VM |
| `io.katacontainers.config.hypervisor.initrd_hash` | string | container guest initrd SHA-512 hash value |
| `io.katacontainers.config.hypervisor.initrd` | string | the guest initrd image that will run in the container VM |
| `io.katacontainers.config.hypervisor.jailer_hash` | string | container jailer SHA-512 hash value |
| `io.katacontainers.config.hypervisor.jailer_path` | string | the jailer that will constrain the container VM |
| `io.katacontainers.config.hypervisor.kernel_hash` | string | container kernel image SHA-512 hash value |
| `io.katacontainers.config.hypervisor.kernel_params` | string | additional guest kernel parameters |
| `io.katacontainers.config.hypervisor.kernel` | string | the kernel used to boot the container VM |
| `io.katacontainers.config.hypervisor.machine_accelerators` | string | machine specific accelerators for the hypervisor |
| `io.katacontainers.config.hypervisor.machine_type` | string | the type of machine being emulated by the hypervisor |
| `io.katacontainers.config.hypervisor.memory_offset` | uint32| the memory space used for `nvdimm` device by the hypervisor |
| `io.katacontainers.config.hypervisor.memory_slots` | uint32| the memory slots assigned to the VM by the hypervisor |
| `io.katacontainers.config.hypervisor.msize_9p` | uint32 | the `msize` for 9p shares |
| `io.katacontainers.config.hypervisor.path` | string | the hypervisor that will run the container VM |
| `io.katacontainers.config.hypervisor.shared_fs` | string | the shared file system type, either `virtio-9p` or `virtio-fs` |
| `io.katacontainers.config.hypervisor.use_vsock` | `boolean` | specify use of `vsock` for agent communication |
| `io.katacontainers.config.hypervisor.virtio_fs_cache_size` | uint32 | virtio-fs DAX cache size in `MiB` |
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
# CRI Configuration
In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.
For containerd, annotations specified in the pod spec are passed down to Kata
starting with version `1.3.0`. Additionally, extra configuration is needed for containerd,
by providing a `pod_annotations` field in the containerd config file. The `pod_annotations`
field is a list of annotations that can be passed down to Kata as OCI annotations.
It supports golang match patterns. Since annotations supported by Kata follow the pattern
`io.katacontainers.*`, the following configuration would work for passing annotations to
Kata from containerd:
```
$ cat /etc/containerd/config
....
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.runc.v1"
pod_annotations = ["io.katacontainers.*"]
[plugins.cri.containerd.runtimes.kata.options]
BinaryName = "/usr/bin/kata-runtime"
....
```
Additional documentation on the above configuration can be found in the
[containerd docs](https://github.com/containerd/cri/blob/8d5a8355d07783ba2f8f451209f6bdcc7c412346/docs/config.md).
# Example - Using annotations
As mentioned above, not all containers need the same modules, therefore using
the configuration file for specifying the list of kernel modules per POD can
be a pain. Unlike the configuration file, annotations provide a way to specify
custom configurations per POD.
The list of kernel modules and parameters can be set using the annotation
`io.katacontainers.config.agent.kernel_modules` as a semicolon separated
list, where the first word of each element is considered as the module name and
the rest as its parameters.
Also users might want to enable guest `seccomp` to provide better isolation with a
little performance sacrifice. The annotation
`io.katacontainers.config.runtime.disable_guest_seccomp` can used for such purpose.
In the following example two PODs are created, but the kernel modules `e1000e`
and `i915` are inserted only in the POD `pod1`. Also guest `seccomp` is only enabled
in the POD `pod2`.
```yaml
apiVersion: v1
kind: Pod
metadata:
name: pod1
annotations:
io.katacontainers.config.agent.kernel_modules: "e1000e EEE=1; i915"
spec:
runtimeClassName: kata
containers:
- name: c1
image: busybox
command:
- sh
stdin: true
tty: true
---
apiVersion: v1
kind: Pod
metadata:
name: pod2
annotations:
io.katacontainers.config.runtime.disable_guest_seccomp: false
spec:
runtimeClassName: kata
containers:
- name: c2
image: busybox
command:
- sh
stdin: true
tty: true
```

View File

@ -0,0 +1,220 @@
# How to use Kata Containers and CRI (containerd plugin) with Kubernetes
* [Requirements](#requirements)
* [Install and configure containerd](#install-and-configure-containerd)
* [Install and configure Kubernetes](#install-and-configure-kubernetes)
* [Install Kubernetes](#install-kubernetes)
* [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
* [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
* [Start Kubernetes](#start-kubernetes)
* [Install a Pod Network](#install-a-pod-network)
* [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
* [Delete created pod](#delete-created-pod)
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
The Kubernetes cluster will use the
[CRI containerd plugin](https://github.com/containerd/cri) and
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
## Requirements
- Kubernetes, Kubelet, `kubeadm`
- containerd with `cri` plug-in
- Kata Containers
> **Note:** For information about the supported versions of these components,
> see the Kata Containers
> [`versions.yaml`](https://github.com/kata-containers/runtime/blob/master/versions.yaml)
> file.
## Install and configure containerd
First, follow the [How to use Kata Containers and Containerd](containerd-kata.md) to install and configure containerd.
Then, make sure the containerd works with the [examples in it](containerd-kata.md#run).
## Install and configure Kubernetes
### Install Kubernetes
- Follow the instructions for
[`kubeadm` installation](https://kubernetes.io/docs/setup/independent/install-kubeadm/).
- Check `kubeadm` is now available
```bash
$ command -v kubeadm
```
### Configure Kubelet to use containerd
In order to allow Kubelet to use containerd (using the CRI interface), configure the service to point to the `containerd` socket.
- Configure Kubernetes to use `containerd`
```bash
$ sudo mkdir -p /etc/systemd/system/kubelet.service.d/
$ cat << EOF | sudo tee /etc/systemd/system/kubelet.service.d/0-containerd.conf
[Service]
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
EOF
```
- Inform systemd about the new configuration
```bash
$ sudo systemctl daemon-reload
```
### Configure HTTP proxy - OPTIONAL
If you are behind a proxy, use the following script to configure your proxy for docker, Kubelet, and containerd:
```bash
$ services="
kubelet
containerd
docker
"
$ for service in ${services}; do
service_dir="/etc/systemd/system/${service}.service.d/"
sudo mkdir -p ${service_dir}
cat << EOT | sudo tee "${service_dir}/proxy.conf"
[Service]
Environment="HTTP_PROXY=${http_proxy}"
Environment="HTTPS_PROXY=${https_proxy}"
Environment="NO_PROXY=${no_proxy}"
EOT
done
$ sudo systemctl daemon-reload
```
## Start Kubernetes
- Make sure `containerd` is up and running
```bash
$ sudo systemctl restart containerd
$ sudo systemctl status containerd
```
- Prevent conflicts between `docker` iptables (packet filtering) rules and k8s pod communication
If Docker is installed on the node, it is necessary to modify the rule
below. See https://github.com/kubernetes/kubernetes/issues/40182 for further
details.
```bash
$ sudo iptables -P FORWARD ACCEPT
```
- Start cluster using `kubeadm`
```bash
$ sudo kubeadm init --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
$ export KUBECONFIG=/etc/kubernetes/admin.conf
$ sudo -E kubectl get nodes
$ sudo -E kubectl get pods
```
## Install a Pod Network
A pod network plugin is needed to allow pods to communicate with each other.
- Install the `flannel` plugin by following the
[Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
guide, starting from the **Installing a pod network** section.
- Create a pod network using flannel
> **Note:** There is no known way to determine programmatically the best version (commit) to use.
> See https://github.com/coreos/flannel/issues/995.
```bash
$ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
```
- Wait for the pod network to become available
```bash
# number of seconds to wait for pod network to become available
$ timeout_dns=420
$ while [ "$timeout_dns" -gt 0 ]; do
if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
break
fi
sleep 1s
((timeout_dns--))
done
```
- Check the pod network is running
```bash
$ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
```
## Allow pods to run in the master node
By default, the cluster will not schedule pods in the master node. To enable master node scheduling:
```bash
$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
```
## Create an untrusted pod using Kata Containers
By default, all pods are created with the default runtime configured in CRI containerd plugin.
If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
[Kata Containers runtime](https://github.com/kata-containers/runtime/blob/master/README.md).
- Create an untrusted pod configuration
```bash
$ cat << EOT | tee nginx-untrusted.yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx-untrusted
annotations:
io.kubernetes.cri.untrusted-workload: "true"
spec:
containers:
- name: nginx
image: nginx
EOT
```
- Create an untrusted pod
```bash
$ sudo -E kubectl apply -f nginx-untrusted.yaml
```
- Check pod is running
```bash
$ sudo -E kubectl get pods
```
- Check hypervisor is running
```bash
$ ps aux | grep qemu
```
## Delete created pod
```bash
$ sudo -E kubectl delete -f nginx-untrusted.yaml
```

View File

@ -0,0 +1,130 @@
# Kata Containers with ACRN
This document provides an overview on how to run Kata containers with ACRN hypervisor and device model.
- [Introduction](#introduction)
- [Pre-requisites](#pre-requisites)
- [Configure Docker](#configure-docker)
- [Configure Kata Containers with ACRN](#configure-kata-containers-with-acrn)
## Introduction
ACRN is a flexible, lightweight Type-1 reference hypervisor built with real-time and safety-criticality in mind. ACRN uses an open source platform making it optimized to streamline embedded development.
Some of the key features being:
- Small footprint - Approx. 25K lines of code (LOC).
- Real Time - Low latency, faster boot time, improves overall responsiveness with hardware.
- Adaptability - Multi-OS support for guest operating systems like Linux, Android, RTOSes.
- Rich I/O mediators - Allows sharing of various I/O devices across VMs.
- Optimized for a variety of IoT (Internet of Things) and embedded device solutions.
Please refer to ACRN [documentation](https://projectacrn.github.io/latest/index.html) for more details on ACRN hypervisor and device model.
## Pre-requisites
This document requires the presence of the ACRN hypervisor and Kata Containers on your system. Install using the instructions available through the following links:
- ACRN supported [Hardware](https://projectacrn.github.io/latest/hardware.html#supported-hardware).
> **Note:** Please make sure to have a minimum of 4 logical processors (HT) or cores.
- ACRN [software](https://projectacrn.github.io/latest/tutorials/kbl-nuc-sdc.html#use-the-script-to-set-up-acrn-automatically) setup.
- For networking, ACRN supports either MACVTAP or TAP. If MACVTAP is not enabled in the Service OS, please follow the below steps to update the kernel:
```sh
$ git clone https://github.com/projectacrn/acrn-kernel.git
$ cd acrn-kernel
$ cp kernel_config_sos .config
$ sed -i "s/# CONFIG_MACVLAN is not set/CONFIG_MACVLAN=y/" .config
$ sed -i '$ i CONFIG_MACVTAP=y' .config
$ make clean && make olddefconfig && make && sudo make modules_install INSTALL_MOD_PATH=out/
```
Login into Service OS and update the kernel with MACVTAP support:
```sh
$ sudo mount /dev/sda1 /mnt
$ sudo scp -r <user name>@<host address>:<your workspace>/acrn-kernel/arch/x86/boot/bzImage /mnt/EFI/org.clearlinux/
$ sudo scp -r <user name>@<host address>:<your workspace>/acrn-kernel/out/lib/modules/* /lib/modules/
$ conf_file=$(sed -n '$ s/default //p' /mnt/loader/loader.conf).conf
$ kernel_img=$(sed -n 2p /mnt/loader/entries/$conf_file | cut -d'/' -f4)
$ sudo sed -i "s/$kernel_img/bzImage/g" /mnt/loader/entries/$conf_file
$ sync && sudo umount /mnt && sudo reboot
```
- Kata Containers installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md) steps.
> **Note:** Create rootfs image and not initrd image.
In order to run Kata with ACRN, your container stack must provide block-based storage, such as device-mapper.
> **Note:** Currently, by design you can only launch one VM from Kata Containers using ACRN hypervisor (SDC scenario). Based on feedback from community we can increase number of VMs.
## Configure Docker
To configure Docker for device-mapper and Kata,
1. Stop Docker daemon if it is already running.
```bash
$ sudo systemctl stop docker
```
2. Set `/etc/docker/daemon.json` with the following contents.
```
{
"storage-driver": "devicemapper"
}
```
3. Restart docker.
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Configure [Docker](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#update-the-docker-systemd-unit-file) to use `kata-runtime`.
## Configure Kata Containers with ACRN
To configure Kata Containers with ACRN, copy the generated `configuration-acrn.toml` file when building the `kata-runtime` to either `/etc/kata-containers/configuration.toml` or `/usr/share/defaults/kata-containers/configuration.toml`.
The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)
```bash
$ sudo kata-runtime --kata-show-default-config-paths
```
>**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.
```bash
$ sudo ./offline_cpu.sh
```
Start an ACRN based Kata Container,
```bash
$ sudo docker run -ti --runtime=kata-runtime busybox sh
```
You will see ACRN(`acrn-dm`) is now running on your system, as well as a `kata-shim`, `kata-proxy`. You should obtain an interactive shell prompt. Verify that all the Kata processes terminate once you exit the container.
```bash
$ ps -ef | grep -E "kata|acrn"
```
Validate ACRN hypervisor by using `kata-runtime kata-env`,
```sh
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/'
[Hypervisor]
MachineType = ""
Version = "DM version is: 1.2-unstable-254577a6-dirty (daily tag:acrn-2019w27.4-140000p)
Path = "/usr/bin/acrn-dm"
BlockDeviceDriver = "virtio-blk"
EntropySource = "/dev/urandom"
Msize9p = 0
MemorySlots = 10
Debug = false
UseVSock = false
SharedFS = ""
```

View File

@ -0,0 +1,115 @@
# Kata Containers with NEMU
* [Introduction](#introduction)
* [Pre-requisites](#pre-requisites)
* [NEMU](#nemu)
* [Download and build](#download-and-build)
* [x86_64](#x86_64)
* [aarch64](#aarch64)
* [Configure Kata Containers](#configure-kata-containers)
Kata Containers relies by default on the QEMU hypervisor in order to spawn the virtual machines running containers. [NEMU](https://github.com/intel/nemu) is a fork of QEMU that:
- Reduces the number of lines of code.
- Removes all legacy devices.
- Reduces the emulation as far as possible.
## Introduction
This document describes how to run Kata Containers with NEMU, first by explaining how to download, build and install it. Then it walks through the steps needed to update your Kata Containers configuration in order to run with NEMU.
## Pre-requisites
This document requires Kata Containers to be [installed](https://github.com/kata-containers/documentation/blob/master/install/README.md) on your system.
Also, it's worth noting that NEMU only supports `x86_64` and `aarch64` architecture.
## NEMU
### Download and build
```bash
$ git clone https://github.com/intel/nemu.git
$ cd nemu
$ git fetch origin
$ git checkout origin/experiment/automatic-removal
```
#### x86_64
```
$ SRCDIR=$PWD ./tools/build_x86_64_virt.sh
```
#### aarch64
```
$ SRCDIR=$PWD ./tools/build_aarch64.sh
```
> **Note:** The branch `experiment/automatic-removal` is a branch published by Jenkins after it has applied the automatic removal script to the `topic/virt-x86` branch. The purpose of this code removal being to reduce the source tree by removing files not being used by NEMU.
After those commands have successfully returned, you will find the NEMU binary at `$HOME/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt` (__x86__), or `$HOME/build-aarch64/aarch64-softmmu/qemu-system-aarch64` (__ARM__).
You also need the `OVMF` firmware in order to boot the virtual machine's kernel. It can currently be found at this [location](https://github.com/intel/ovmf-virt/releases).
```bash
$ sudo mkdir -p /usr/share/nemu
$ OVMF_URL=$(curl -sL https://api.github.com/repos/intel/ovmf-virt/releases/latest | jq -S '.assets[0].browser_download_url')
$ curl -o OVMF.fd -L $(sed -e 's/^"//' -e 's/"$//' <<<"$OVMF_URL")
$ sudo install -o root -g root -m 0640 OVMF.fd /usr/share/nemu/
```
> **Note:** The OVMF firmware will be located at this temporary location until the changes can be pushed upstream.
## Configure Kata Containers
All you need from this section is to modify the configuration file `/usr/share/defaults/kata-containers/configuration.toml` to specify the options related to the hypervisor.
```diff
[hypervisor.qemu]
-path = "/usr/bin/qemu-lite-system-x86_64"
+path = "/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt"
kernel = "/usr/share/kata-containers/vmlinuz.container"
initrd = "/usr/share/kata-containers/kata-containers-initrd.img"
image = "/usr/share/kata-containers/kata-containers.img"
-machine_type = "pc"
+machine_type = "virt"
# Optional space-separated list of options to pass to the guest kernel.
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
@@ -31,7 +31,7 @@
# Path to the firmware.
# If you want that qemu uses the default firmware leave this option empty
-firmware = ""
+firmware = "/usr/share/nemu/OVMF.fd"
# Machine accelerators
# comma-separated list of machine accelerators to pass to the hypervisor.
```
As you can see from this snippet above, all you need to change is:
- The path to the hypervisor binary, `/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt` in this example.
- The machine type from `pc` to `virt`.
- The path to the firmware binary, `/usr/share/nemu/OVMF.fd` in this example.
Once you have saved those modifications, you can start a new container:
```bash
$ docker run --runtime=kata-runtime -it busybox
```
And you will be able to verify this new container is running with the NEMU hypervisor by looking for the hypervisor path and the machine type from the `qemu` process running on your system:
```bash
$ ps -aux | grep qemu
root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
... -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
```
Also relying on `kata-runtime kata-env` is a reliable way to validate you are using the expected hypervisor:
```bash
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/'
[Hypervisor]
MachineType = "virt"
Version = "NEMU (like QEMU) version 3.0.0 (v3.0.0-179-gaf9a791)\nCopyright (c) 2003-2017 Fabrice Bellard and the QEMU Project developers"
Path = "/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt"
BlockDeviceDriver = "virtio-scsi"
EntropySource = "/dev/urandom"
Msize9p = 8192
MemorySlots = 10
Debug = true
UseVSock = false
```

View File

@ -0,0 +1,143 @@
# Setting Sysctls with Kata
## Sysctls
In Linux, the sysctl interface allows an administrator to modify kernel
parameters at runtime. Parameters are available via the `/proc/sys/` virtual
process file system.
The parameters include the following subsystems among others:
- `fs` (file systems)
- `kernel` (kernel)
- `net` (networking)
- `vm` (virtual memory)
To get a complete list of kernel parameters, run:
```
$ sudo sysctl -a
```
Both Docker and Kubernetes provide mechanisms for setting namespaced sysctls.
Namespaced sysctls can be set per pod in the case of Kubernetes or per container
in case of Docker.
The following sysctls are known to be namespaced and can be set with
Docker and Kubernetes:
- `kernel.shm*`
- `kernel.msg*`
- `kernel.sem`
- `fs.mqueue.*`
- `net.*`
### Namespaced Sysctls:
Kata Containers supports setting namespaced sysctls with Docker and Kubernetes.
All namespaced sysctls can be set in the same way as regular Linux based
containers, the difference being, in the case of Kata they are set inside the guest.
#### Setting Namespaced Sysctls with Docker:
```
$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/fs/mqueue/queues_max
256
$ sudo docker run --runtime=kata-runtime --sysctl fs.mqueue.queues_max=512 -it alpine cat /proc/sys/fs/mqueue/queues_max
512
```
... and:
```
$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/kernel/shmmax
18446744073692774399
$ sudo docker run --runtime=kata-runtime --sysctl kernel.shmmax=1024 -it alpine cat /proc/sys/kernel/shmmax
1024
```
For additional documentation on setting sysctls with Docker please refer to [Docker-sysctl-doc](https://docs.docker.com/engine/reference/commandline/run/#configure-namespaced-kernel-parameters-sysctls-at-runtime).
#### Setting Namespaced Sysctls with Kubernetes:
Kubernetes considers certain sysctls as safe and others as unsafe. For detailed
information about what sysctls are considered unsafe, please refer to the [Kubernetes sysctl docs](https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/).
For using unsafe sysctls, the cluster admin would need to allow these as:
```
$ kubelet --allowed-unsafe-sysctls 'kernel.msg*,net.ipv4.route.min_pmtu' ...
```
or using the declarative approach as:
```
$ cat kubeadm.yaml
apiVersion: kubeadm.k8s.io/v1alpha3
kind: InitConfiguration
nodeRegistration:
kubeletExtraArgs:
allowed-unsafe-sysctls: "kernel.msg*,kernel.shm.*,net.*"
...
```
The above YAML can then be passed to `kubeadm init` as:
```
$ sudo -E kubeadm init --config=kubeadm.yaml
```
Both safe and unsafe sysctls can be enabled in the same way in the Pod YAML:
```
apiVersion: v1
kind: Pod
metadata:
name: sysctl-example
spec:
securityContext:
sysctls:
- name: kernel.shm_rmid_forced
value: "0"
- name: net.ipv4.route.min_pmtu
value: "1024"
```
### Non-Namespaced Sysctls:
Docker and Kubernetes disallow sysctls without a namespace.
The recommendation is to set them directly on the host or use a privileged
container in the case of Kubernetes.
In the case of Kata, the approach of setting sysctls on the host does not
work since the host sysctls have no effect on a Kata Container running
inside a guest. Kata gives you the ability to set non-namespaced sysctls using a privileged container.
This has the advantage that the non-namespaced sysctls are set inside the guest
without having any effect on the `/proc/sys` values of any other pod or the
host itself.
The recommended approach to do this would be to set the sysctl value in a
privileged init container. In this way, the application containers do not need any elevated
privileges.
```
apiVersion: v1
kind: Pod
metadata:
name: busybox-kata
spec:
runtimeClassName: kata-qemu
securityContext:
sysctls:
- name: kernel.shm_rmid_forced
value: "0"
containers:
- name: busybox-container
securityContext:
privileged: true
image: debian
command:
- sleep
- "3000"
initContainers:
- name: init-sys
securityContext:
privileged: true
image: busybox
command: ['sh', '-c', 'echo "64000" > /proc/sys/vm/max_map_count']
```

View File

@ -0,0 +1,51 @@
# Kata Containers with virtio-fs
- [Introduction](#introduction)
- [Pre-requisites](#pre-requisites)
- [Install Kata Containers with virtio-fs support](#install-kata-containers-with-virtio-fs-support)
- [Run a Kata Container utilizing virtio-fs](#run-a-kata-container-utilizing-virtio-fs)
## Introduction
Container deployments utilize explicit or implicit file sharing between host filesystem and containers. From a trust perspective, avoiding a shared file-system between the trusted host and untrusted container is recommended. This is not always feasible. In Kata Containers, block-based volumes are preferred as they allow usage of either device pass through or `virtio-blk` for access within the virtual machine.
As of the 1.7 release of Kata Containers, [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) is the default filesystem sharing mechanism. While this does allow for workload compatibility, it does so with degraded performance and potential for POSIX compliance limitations.
To help address these limitations, [virtio-fs](https://virtio-fs.gitlab.io/) has been developed. virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. In Kata Containers, virtio-fs can be used to share container volumes, secrets, config-maps, configuration files (hostname, hosts, `resolv.conf`) and the container rootfs on the host with the guest. virtio-fs provides significant performance and POSIX compliance improvements compared to 9pfs.
Enabling of virtio-fs requires changes in the guest kernel as well as the VMM. For Kata Containers, experimental virtio-fs support is enabled through the [NEMU VMM](https://github.com/intel/nemu).
**Note: virtio-fs support is experimental in the 1.7 release of Kata Containers. Work is underway to improve stability, performance and upstream integration. This is available for early preview - use at your own risk**
This document describes how to get Kata Containers to work with virtio-fs.
## Pre-requisites
* Before Kata 1.8 this feature required the host to have hugepages support enabled. Enable this with the `sysctl vm.nr_hugepages=1024` command on the host.
## Install Kata Containers with virtio-fs support
The Kata Containers NEMU configuration, the NEMU VMM and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.7 release. While the feature is experimental, distribution packages are not supported, but installation is available through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
Install the latest release of Kata as follows:
```
docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
```
This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-nemu`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
## Run a Kata Container utilizing virtio-fs
Once installed, start a new container, utilizing NEMU + `virtiofs`:
```bash
$ docker run --runtime=kata-nemu -it busybox
```
Verify the new container is running with the NEMU hypervisor as well as using `virtiofsd`. To do this look for the hypervisor path and the `virtiofs` daemon process on the host:
```bash
$ ps -aux | grep virtiofs
root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
... -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
root ... /home/foo/build-x86_64_virt/virtiofsd-x86_64 ...
```

View File

@ -0,0 +1,53 @@
# Kata Containers with `virtio-mem`
- [Introduction](#introduction)
- [Requisites](#requisites)
- [Run a Kata Container utilizing `virtio-mem`](#run-a-kata-container-utilizing-virtio-mem)
## Introduction
The basic idea of `virtio-mem` is to provide a flexible, cross-architecture memory hot plug and hot unplug solution that avoids many limitations imposed by existing technologies, architectures, and interfaces.
More details can be found in https://lkml.org/lkml/2019/12/12/681.
Kata Containers with `virtio-mem` supports memory resize.
## Requisites
Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
The Linux kernel and QEMU upstream version still not support `virtio-mem`. @davidhildenbrand is working on them.
Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem. Its base version of QEMU cannot work with Kata Containers. So merge the commit of `virtio-mem` to upstream QEMU.)
Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
```toml
[hypervisor.qemu]
path = "qemu-dir"
kernel = "vmlinux-dir"
```
Enable `virtio-mem` with following line in the Kata Containers configuration:
```toml
enable_virtio_mem = true
```
## Run a Kata Container utilizing `virtio-mem`
Use following command to enable memory overcommitment of a Linux kernel. Because QEMU `virtio-mem` device need to allocate a lot of memory.
```
$ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
```
Use following command start a Kata Container.
```
$ docker run --rm -it --runtime=kata --name test busybox
```
Use following command set the memory size of test to default_memory + 512m.
```
$ docker update -m 512m --memory-swap -1 test
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

View File

@ -0,0 +1,24 @@
#!/bin/bash
# Copyright (c) 2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Description: Offline SOS CPUs except BSP before launch UOS
[ $(id -u) -eq 0 ] || { echo >&2 "ERROR: run as root"; exit 1; }
for i in $(ls -d /sys/devices/system/cpu/cpu[1-9]*); do
online=`cat $i/online`
idx=`echo $i | tr -cd "[0-9]"`
echo "INFO:$0: cpu$idx online=$online"
if [ "$online" = "1" ]; then
echo 0 > $i/online
while [ "$online" = "1" ]; do
sleep 1
echo 0 > $i/online
online=`cat $i/online`
done
echo $idx > /sys/class/vhm/acrn_vhm/offline_cpu
fi
done

79
docs/how-to/privileged.md Normal file
View File

@ -0,0 +1,79 @@
# Privileged Kata Containers
Kata Containers supports creation of containers that are "privileged" (i.e. have additional capabilities and access
that is not normally granted).
* [Warnings](#warnings)
* [Host Devices](#host-devices)
* [Containerd and CRI](#containerd-and-cri)
* [CRI-O](#cri-o)
## Warnings
**Warning:** Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured
correctly.
### Host Devices
By default, when privileged is enabled for a container, all the `/dev/*` block devices from the host are mounted
into the guest. This will allow the privileged container inside the Kata guest to gain access to mount any block device
from the host, a potentially undesirable side-effect that decreases the security of Kata.
The following sections document how to configure this behavior in different container runtimes.
#### Containerd and CRI
The Containerd CRI allows configuring the privileged host devices behavior for each runtime in the CRI config. This is
done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host
devices into the guest, even when privileged is enabled.
Support for configuring privileged host devices behaviour was added in containerd `1.3.0` version.
See below example config:
```toml
[plugins]
[plugins.cri]
[plugins.cri.containerd]
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v1"
privileged_without_host_devices = false
[plugins.cri.containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
privileged_without_host_devices = true
[plugins.cri.containerd.runtimes.kata.options]
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml"
```
- [Kata Containers with Containerd and CRI documentation](how-to-use-k8s-with-cri-containerd-and-kata.md)
- [Containerd CRI config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
#### CRI-O
Similar to containerd, CRI-O allows configuring the privileged host devices
behavior for each runtime in the CRI config. This is done with the
`privileged_without_host_devices` option. Setting this to `true` will disable
hot plugging of the host devices into the guest, even when privileged is enabled.
Support for configuring privileged host devices behaviour was added in CRI-O `1.16.0` version.
See below example config:
```toml
[crio.runtime.runtimes.runc]
runtime_path = "/usr/local/bin/crio-runc"
runtime_type = "oci"
runtime_root = "/run/runc"
privileged_without_host_devices = false
[crio.runtime.runtimes.kata]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
privileged_without_host_devices = true
[crio.runtime.runtimes.kata-shim2]
runtime_path = "/usr/local/bin/containerd-shim-kata-v2"
runtime_type = "vm"
privileged_without_host_devices = true
```
- [Kata Containers with CRI-O](https://github.com/kata-containers/documentation/blob/master/how-to/run-kata-with-k8s.md#cri-o)

View File

@ -0,0 +1,204 @@
# Run Kata Containers with Kubernetes
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
* [Prerequisites](#prerequisites)
* [Install a CRI implementation](#install-a-cri-implementation)
* [CRI-O](#cri-o)
* [Kubernetes Runtime Class (CRI-O v1.12 )](#kubernetes-runtime-class-cri-o-v112)
* [Untrusted annotation (until CRI-O v1.12)](#untrusted-annotation-until-cri-o-v112)
* [Network namespace management](#network-namespace-management)
* [containerd with CRI plugin](#containerd-with-cri-plugin)
* [Install Kubernetes](#install-kubernetes)
* [Configure for CRI-O](#configure-for-cri-o)
* [Configure for containerd](#configure-for-containerd)
* [Run a Kubernetes pod with Kata Containers](#run-a-kubernetes-pod-with-kata-containers)
## Prerequisites
This guide requires Kata Containers available on your system, install-able by following [this guide](https://github.com/kata-containers/documentation/blob/master/install/README.md).
## Install a CRI implementation
Kubernetes CRI (Container Runtime Interface) implementations allow using any
OCI-compatible runtime with Kubernetes, such as the Kata Containers runtime.
Kata Containers support both the [CRI-O](https://github.com/kubernetes-incubator/cri-o) and
[CRI-containerd](https://github.com/containerd/cri) CRI implementations.
After choosing one CRI implementation, you must make the appropriate configuration
to ensure it integrates with Kata Containers.
Kata Containers 1.5 introduced the `shimv2` for containerd 1.2.0, reducing the components
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
An equivalent shim implementation for CRI-O is planned.
### CRI-O
For CRI-O installation instructions, refer to the [CRI-O Tutorial](https://github.com/kubernetes-incubator/cri-o/blob/master/tutorial.md) page.
The following sections show how to set up the CRI-O configuration file (default path: `/etc/crio/crio.conf`) for Kata.
Unless otherwise stated, all the following settings are specific to the `crio.runtime` table:
```toml
# The "crio.runtime" table contains settings pertaining to the OCI
# runtime used and options for how to set up and manage the OCI runtime.
[crio.runtime]
```
A comprehensive documentation of the configuration file can be found [here](https://github.com/cri-o/cri-o/blob/master/docs/crio.conf.5.md).
> **Note**: After any change to this file, the CRI-O daemon have to be restarted with:
>````
>$ sudo systemctl restart crio
>````
#### Kubernetes Runtime Class (CRI-O v1.12+)
The [Kubernetes Runtime Class](https://kubernetes.io/docs/concepts/containers/runtime-class/)
is the preferred way of specifying the container runtime configuration to run a Pod's containers.
To use this feature, Kata must added as a runtime handler with:
```toml
[crio.runtime.runtimes.kata-runtime]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
```
You can also add multiple entries to specify alternatives hypervisors, e.g.:
```toml
[crio.runtime.runtimes.kata-qemu]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
[crio.runtime.runtimes.kata-fc]
runtime_path = "/usr/bin/kata-runtime"
runtime_type = "oci"
```
#### Untrusted annotation (until CRI-O v1.12)
The untrusted annotation is used to specify a runtime for __untrusted__ workloads, i.e.
a runtime to be used when the workload cannot be trusted and a higher level of security
is required. An additional flag can be used to let CRI-O know if a workload
should be considered _trusted_ or _untrusted_ by default.
For further details, see the documentation
[here](https://github.com/kata-containers/documentation/blob/master/design/architecture.md#mixing-vm-based-and-namespace-based-runtimes).
```toml
# runtime is the OCI compatible runtime used for trusted container workloads.
# This is a mandatory setting as this runtime will be the default one
# and will also be used for untrusted container workloads if
# runtime_untrusted_workload is not set.
runtime = "/usr/bin/runc"
# runtime_untrusted_workload is the OCI compatible runtime used for untrusted
# container workloads. This is an optional setting, except if
# default_container_trust is set to "untrusted".
runtime_untrusted_workload = "/usr/bin/kata-runtime"
# default_workload_trust is the default level of trust crio puts in container
# workloads. It can either be "trusted" or "untrusted", and the default
# is "trusted".
# Containers can be run through different container runtimes, depending on
# the trust hints we receive from kubelet:
# - If kubelet tags a container workload as untrusted, crio will try first to
# run it through the untrusted container workload runtime. If it is not set,
# crio will use the trusted runtime.
# - If kubelet does not provide any information about the container workload trust
# level, the selected runtime will depend on the default_container_trust setting.
# If it is set to "untrusted", then all containers except for the host privileged
# ones, will be run by the runtime_untrusted_workload runtime. Host privileged
# containers are by definition trusted and will always use the trusted container
# runtime. If default_container_trust is set to "trusted", crio will use the trusted
# container runtime for all containers.
default_workload_trust = "untrusted"
```
#### Network namespace management
To enable networking for the workloads run by Kata, CRI-O needs to be configured to
manage network namespaces, by setting the following key to `true`.
In CRI-O v1.16:
```toml
manage_network_ns_lifecycle = true
```
In CRI-O v1.17+:
```toml
manage_ns_lifecycle = true
```
### containerd with CRI plugin
If you select containerd with `cri` plugin, follow the "Getting Started for Developers"
instructions [here](https://github.com/containerd/cri#getting-started-for-developers)
to properly install it.
To customize containerd to select Kata Containers runtime, follow our
"Configure containerd to use Kata Containers" internal documentation
[here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
## Install Kubernetes
Depending on what your needs are and what you expect to do with Kubernetes,
please refer to the following
[documentation](https://kubernetes.io/docs/setup/) to install it correctly.
Kubernetes talks with CRI implementations through a `container-runtime-endpoint`,
also called CRI socket. This socket path is different depending on which CRI
implementation you chose, and the Kubelet service has to be updated accordingly.
### Configure for CRI-O
`/etc/systemd/system/kubelet.service.d/0-crio.conf`
```
[Service]
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///var/run/crio/crio.sock"
```
### Configure for containerd
`/etc/systemd/system/kubelet.service.d/0-cri-containerd.conf`
```
[Service]
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
```
For more information about containerd see the "Configure Kubelet to use containerd"
documentation [here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd).
## Run a Kubernetes pod with Kata Containers
After you update your Kubelet service based on the CRI implementation you
are using, reload and restart Kubelet. Then, start your cluster:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart kubelet
# If using CRI-O
$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
# If using CRI-containerd
$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
$ export KUBECONFIG=/etc/kubernetes/admin.conf
```
You can force Kubelet to use Kata Containers by adding some `untrusted`
annotation to your pod configuration. In our case, this ensures Kata
Containers is the selected runtime to run the described workload.
`nginx-untrusted.yaml`
```yaml
apiVersion: v1
kind: Pod
metadata:
name: nginx-untrusted
annotations:
io.kubernetes.cri.untrusted-workload: "true"
spec:
containers:
- name: nginx
image: nginx
```
Next, you run your pod:
```
$ sudo -E kubectl apply -f nginx-untrusted.yaml
```

246
docs/how-to/service-mesh.md Normal file
View File

@ -0,0 +1,246 @@
# Kata Containers and service mesh for Kubernetes
* [Assumptions](#assumptions)
* [How they work](#how-they-work)
* [Prerequisites](#prerequisites)
* [Kata and Kubernetes](#kata-and-kubernetes)
* [Restrictions](#restrictions)
* [Install and deploy your service mesh](#install-and-deploy-your-service-mesh)
* [Service Mesh Istio](#service-mesh-istio)
* [Service Mesh Linkerd](#service-mesh-linkerd)
* [Inject your services with sidecars](#inject-your-services-with-sidecars)
* [Sidecar Istio](#sidecar-istio)
* [Sidecar Linkerd](#sidecar-linkerd)
* [Run your services with Kata](#run-your-services-with-kata)
* [Lower privileges](#lower-privileges)
* [Add annotations](#add-annotations)
* [Deploy](#deploy)
A service mesh is a way to monitor and control the traffic between
micro-services running in your Kubernetes cluster. It is a powerful
tool that you might want to use in combination with the security
brought by Kata Containers.
## Assumptions
You are expected to be familiar with concepts such as __pods__,
__containers__, __control plane__, __data plane__, and __sidecar__.
## How they work
Istio and Linkerd both rely on the same model, where they run controller
applications in the control plane, and inject a proxy as a sidecar inside
the pod running the service. The proxy registers in the control plane as
a first step, and it constantly sends different sorts of information about
the service running inside the pod. That information comes from the
filtering performed when receiving all the traffic initially intended for
the service. That is how the interaction between the control plane and the
proxy allows the user to apply load balancing and authentication rules to
the incoming and outgoing traffic, inside the cluster, and between multiple
micro-services.
This cannot not happen without a good amount of `iptables` rules ensuring
the packets reach the proxy instead of the expected service. Rules are
setup through an __init__ container because they have to be there as soon
as the proxy starts.
## Prerequisites
### Kata and Kubernetes
Follow the [instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
to get Kata Containers properly installed and configured with Kubernetes.
You can choose between CRI-O and CRI-containerd, both are supported
through this document.
For both cases, select the workloads as _trusted_ by default. This way,
your cluster and your service mesh run with `runc`, and only the containers
you choose to annotate run with Kata Containers.
### Restrictions
As documented [here](https://github.com/linkerd/linkerd2/issues/982),
a kernel version between 4.14.22 and 4.14.40 causes a deadlock when
`getsockopt()` gets called with the `SO_ORIGINAL_DST` option. Unfortunately,
both service meshes use this system call with this same option from the
proxy container running inside the VM. This means that you cannot run
this kernel version range as the guest kernel for Kata if you want your
service mesh to work.
As mentioned when explaining the basic functioning of those service meshes,
`iptables` are heavily used, and they need to be properly enabled through
the guest kernel config. If they are not properly enabled, the init container
is not able to perform a proper setup of the rules.
## Install and deploy your service mesh
### Service Mesh Istio
As a reference, you can follow Istio [instructions](https://istio.io/docs/setup/kubernetes/quick-start/#download-and-prepare-for-the-installation).
The following is a summary of what you need to install Istio on your system:
```
$ curl -L https://git.io/getLatestIstio | sh -
$ cd istio-*
$ export PATH=$PWD/bin:$PATH
```
Now deploy Istio in the control plane of your cluster with the following:
```
$ kubectl apply -f install/kubernetes/istio-demo.yaml
```
To verify that the control plane is properly deployed, you can use both of
the following commands:
```
$ kubectl get svc -n istio-system
$ kubectl get pods -n istio-system -o wide
```
### Service Mesh Linkerd
As a reference, follow the Linkerd [instructions](https://linkerd.io/2/getting-started/index.html).
The following is a summary of what you need to install Linkerd on your system:
```
$ curl https://run.linkerd.io/install | sh
$ export PATH=$PATH:$HOME/.linkerd/bin
```
Now deploy Linkerd in the control plane of your cluster with the following:
```
$ linkerd install | kubectl apply -f -
```
To verify that the control plane is properly deployed, you can use both of
the following commands:
```
$ kubectl get svc -n linkerd
$ kubectl get pods -n linkerd -o wide
```
## Inject your services with sidecars
Once the control plane is running, you need a deployment to define a few
services that rely on each other. Then, you inject the YAML file with the
sidecar proxy using the tools provided by each service mesh.
If you do not have such a deployment ready, refer to the samples provided
by each project.
### Sidecar Istio
Istio provides a [`bookinfo`](https://istio.io/docs/examples/bookinfo/)
sample, which you can rely on to inject their `envoy` proxy as a
sidecar.
You need to use their tool called `istioctl kube-inject` to inject
your YAML file. We use their `bookinfo` sample as an example:
```
$ istioctl kube-inject -f samples/bookinfo/kube/bookinfo.yaml -o bookinfo-injected.yaml
```
### Sidecar Linkerd
Linkerd provides an [`emojivoto`](https://linkerd.io/2/getting-started/index.html)
sample, which you can rely on to inject their `linkerd` proxy as a
sidecar.
You need to use their tool called `linkerd inject` to inject your YAML
file. We use their `emojivoto` sample as example:
```
$ wget https://raw.githubusercontent.com/runconduit/conduit-examples/master/emojivoto/emojivoto.yml
$ linkerd inject emojivoto.yml > emojivoto-injected.yaml
```
## Run your services with Kata
Now that your service deployment is injected with the appropriate sidecar
containers, manually edit your deployment to make it work with Kata.
### Lower privileges
In Kubernetes, the __init__ container is often `privileged` as it needs to
setup the environment, which often needs some root privileges. In the case
of those services meshes, all they need is the `NET_ADMIN` capability to
modify the underlying network rules. Linkerd, by default, does not use
`privileged` container, but Istio does.
Because of the previous reason, if you use Istio you need to switch all
containers with `privileged: true` to `privileged: false`.
### Add annotations
There is no difference between Istio and Linkerd in this section. It is
about which CRI implementation you use.
For both CRI-O and CRI-containerd, you have to add an annotation indicating
the workload for this deployment is not _trusted_, which will trigger
`kata-runtime` to be called instead of `runc`.
__CRI-O:__
Add the following annotation for CRI-O
```yaml
io.kubernetes.cri-o.TrustedSandbox: "false"
```
The following is an example of what your YAML can look like:
```yaml
...
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
creationTimestamp: null
name: details-v1
spec:
replicas: 1
strategy: {}
template:
metadata:
annotations:
io.kubernetes.cri-o.TrustedSandbox: "false"
sidecar.istio.io/status: '{"version":"55c9e544b52e1d4e45d18a58d0b34ba4b72531e45fb6d1572c77191422556ffc","initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-certs"],"imagePullSecrets":null}'
creationTimestamp: null
labels:
app: details
version: v1
...
```
__CRI-containerd:__
Add the following annotation for CRI-containerd
```yaml
io.kubernetes.cri.untrusted-workload: "true"
```
The following is an example of what your YAML can look like:
```yaml
...
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
creationTimestamp: null
name: details-v1
spec:
replicas: 1
strategy: {}
template:
metadata:
annotations:
io.kubernetes.cri.untrusted-workload: "true"
sidecar.istio.io/status: '{"version":"55c9e544b52e1d4e45d18a58d0b34ba4b72531e45fb6d1572c77191422556ffc","initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-certs"],"imagePullSecrets":null}'
creationTimestamp: null
labels:
app: details
version: v1
...
```
### Deploy
Deploy your application by using the following:
```
$ kubectl apply -f myapp-injected.yaml
```

View File

@ -0,0 +1,47 @@
# What Is VMCache and How To Enable It
* [What is VMCache](#what-is-vmcache)
* [How is this different to VM templating](#how-is-this-different-to-vm-templating)
* [How to enable VMCache](#how-to-enable-vmcache)
* [Limitations](#limitations)
### What is VMCache
VMCache is a new function that creates VMs as caches before using it.
It helps speed up new container creation.
The function consists of a server and some clients communicating
through Unix socket. The protocol is gRPC in [`protocols/cache/cache.proto`](https://github.com/kata-containers/runtime/blob/master/protocols/cache/cache.proto).
The VMCache server will create some VMs and cache them by factory cache.
It will convert the VM to gRPC format and transport it when gets
requested from clients.
Factory `grpccache` is the VMCache client. It will request gRPC format
VM and convert it back to a VM. If VMCache function is enabled,
`kata-runtime` will request VM from factory `grpccache` when it creates
a new sandbox.
### How is this different to VM templating
Both [VM templating](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md) and VMCache help speed up new container creation.
When VM templating enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. So it saves a lot of memory if there are many Kata Containers running on the same host.
VMCache is not vulnerable to [share memory CVE](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md#what-are-the-cons) because each VM doesn't share the memory.
### How to enable VMCache
VMCache can be enabled by changing your Kata Containers config file (`/usr/share/defaults/kata-containers/configuration.toml`,
overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
* `vm_cache_number` specifies the number of caches of VMCache:
* unspecified or == 0
VMCache is disabled
* `> 0`
will be set to the specified number
* `vm_cache_endpoint` specifies the address of the Unix socket.
Then you can create a VM templating for later usage by calling:
```
$ sudo kata-runtime factory init
```
and purge it by `ctrl-c` it.
### Limitations
* Cannot work with VM templating.
* Only supports the QEMU hypervisor.

View File

@ -0,0 +1,60 @@
# What Is VM Templating and How To Enable It
### What is VM templating
VM templating is a Kata Containers feature that enables new VM
creation using a cloning technique. When enabled, new VMs are created
by cloning from a pre-created template VM, and they will share the
same initramfs, kernel and agent memory in readonly mode. It is very
much like a process fork done by the kernel but here we *fork* VMs.
### How is this different from VMCache
Both [VMCache](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-cache-and-how-do-I-use-it.md) and VM templating help speed up new container creation.
When VMCache enabled, new VMs are created by the VMCache server. So it is not vulnerable to share memory CVE because each VM doesn't share the memory.
VM templating saves a lot of memory if there are many Kata Containers running on the same host.
### What are the Pros
VM templating helps speed up new container creation and saves a lot
of memory if there are many Kata Containers running on the same host.
If you are running a density workload, or care a lot about container
startup speed, VM templating can be very useful.
In one example, we created 100 Kata Containers each claiming 128MB
guest memory and ended up saving 9GB of memory in total when VM templating
is enabled, which is about 72% of the total guest memory. See [full results
here](https://github.com/kata-containers/runtime/pull/303#issuecomment-395846767).
In another example, we created ten Kata Containers with containerd shimv2
and calculated the average boot up speed for each of them. The result
showed that VM templating speeds up Kata Containers creation by as much as
38.68%. See [full results here](https://gist.github.com/bergwolf/06974a3c5981494a40e2c408681c085d).
### What are the Cons
One drawback of VM templating is that it cannot avoid cross-VM side-channel
attack such as [CVE-2015-2877](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-2877)
that originally targeted at the Linux KSM feature.
It was concluded that "Share-until-written approaches for memory conservation among
mutually untrusting tenants are inherently detectable for information disclosure,
and can be classified as potentially misunderstood behaviors rather than vulnerabilities."
**Warning**: If you care about such attack vector, do not use VM templating or KSM.
### How to enable VM templating
VM templating can be enabled by changing your Kata Containers config file (`/usr/share/defaults/kata-containers/configuration.toml`,
overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
- `qemu-lite` is specified in `hypervisor.qemu`->`path` section
- `enable_template = true`
- `initrd =` is set
- `image =` option is commented out or removed
Then you can create a VM templating for later usage by calling
```
$ sudo kata-runtime factory init
```
and purge it by calling
```
$ sudo kata-runtime factory destroy
```
If you do not want to call `kata-runtime factory init` by hand,
the very first Kata container you create will automatically create a VM templating.

127
docs/install/README.md Normal file
View File

@ -0,0 +1,127 @@
# Kata Containers installation user guides
* [Prerequisites](#prerequisites)
* [Packaged installation methods](#packaged-installation-methods)
* [Supported Distributions](#supported-distributions)
* [Official packages](#official-packages)
* [Automatic Installation](#automatic-installation)
* [Snap Installation](#snap-installation)
* [Scripted Installation](#scripted-installation)
* [Manual Installation](#manual-installation)
* [Build from source installation](#build-from-source-installation)
* [Installing on a Cloud Service Platform](#installing-on-a-cloud-service-platform)
* [Further information](#further-information)
The following is an overview of the different installation methods available. All of these methods equally result
in a system configured to run Kata Containers.
## Prerequisites
Kata Containers requires nested virtualization or bare metal.
See the
[hardware requirements](https://github.com/kata-containers/runtime/blob/master/README.md#hardware-requirements)
to see if your system is capable of running Kata Containers.
## Packaged installation methods
> **Notes:**
>
> - Packaged installation methods uses your distribution's native package format (such as RPM or DEB).
| Installation method | Description | Distributions supported |
|------------------------------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------|
| [Automatic](#automatic-installation) |Run a single command to install a full system |[see table](#supported-distributions) |
| [Using snap](#snap-installation) |Easy to install and automatic updates |any distro that supports snapd |
| [Using official distro packages](#official-packages) |Kata packages provided by Linux distributions official repositories |[see table](#supported-distributions) |
| [Scripted](#scripted-installation) |Generates an installation script which will result in a working system when executed |[see table](#supported-distributions) |
| [Manual](#manual-installation) |Allows the user to read a brief document and execute the specified commands step-by-step |[see table](#supported-distributions) |
### Supported Distributions
Kata is packaged by the Kata community for:
|Distribution (link to installation guide) | Versions |
|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
|[CentOS](centos-installation-guide.md) | 7 |
|[Debian](debian-installation-guide.md) | 9, 10 |
|[Fedora](fedora-installation-guide.md) | 28, 29, 30 |
|[openSUSE](opensuse-installation-guide.md) | [Leap](opensuse-leap-installation-guide.md) (15, 15.1)<br>[Tumbleweed](opensuse-tumbleweed-installation-guide.md) |
|[Red Hat Enterprise Linux (RHEL)](rhel-installation-guide.md) | 7 |
|[SUSE Linux Enterprise Server (SLES)](sles-installation-guide.md)| SLES 12 SP3 |
|[Ubuntu](ubuntu-installation-guide.md) | 16.04, 18.04 |
#### Official packages
Kata packages are provided by official distribution repositories for:
|Distribution (link to packages) | Versions |
|-----------------------------------------------------------------|------------|
|[openSUSE](https://software.opensuse.org/package/katacontainers) | Tumbleweed |
### Automatic Installation
[Use `kata-manager`](installing-with-kata-manager.md) to automatically install Kata packages.
### Snap Installation
[![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/kata-containers)
[Use snap](snap-installation-guide.md) to install Kata Containers from https://snapcraft.io.
### Scripted Installation
[Use `kata-doc-to-script`](installing-with-kata-doc-to-script.md) to generate installation scripts that can be reviewed before they are executed.
### Manual Installation
Manual installation instructions are available for [these distributions](#supported-distributions) and document how to:
1. Add the Kata Containers repository to your distro package manager, and import the packages signing key.
2. Install the Kata Containers packages.
3. Install a supported container manager.
4. Configure the container manager to use `kata-runtime` as the default OCI runtime. Or, for Kata Containers 1.5.0 or above, configure the
`io.containerd.kata.v2` to be the runtime shim (see [containerd runtime v2 (shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
and [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)).
> **Notes on upgrading**:
> - If you are installing Kata Containers on a system that already has Clear Containers or `runv` installed,
> first read [the upgrading document](../Upgrading.md).
> **Notes on releases**:
> - [This download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/)
> hosts the Kata Containers packages built by OBS for all the supported architectures.
> Packages are available for the latest and stable releases (more info [here](https://github.com/kata-containers/documentation/blob/master/Stable-Branch-Strategy.md)).
>
> - The following guides apply to the latest Kata Containers release
> (a.k.a. `master` release).
>
> - When choosing a stable release, replace all `master` occurrences in the URLs
> with a `stable-x.y` version available on the [download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/).
> **Notes on packages source verification**:
> - The Kata packages hosted on the download server are signed with GPG to ensure integrity and authenticity.
>
> - The public key used to sign packages is available [at this link](https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub); the fingerprint is `9FDC0CB6 3708CF80 3696E2DC D0B37B82 6063F3ED`.
>
> - Only trust the signing key and fingerprint listed in the previous bullet point. Do not disable GPG checks,
> otherwise packages source and authenticity is not guaranteed.
## Build from source installation
> **Notes:**
>
> - Power users who decide to build from sources should be aware of the
> implications of using an unpackaged system which will not be automatically
> updated as new [releases](../Stable-Branch-Strategy.md) are made available.
[Building from sources](../Developer-Guide.md#initial-setup) allows power users
who are comfortable building software from source to use the latest component
versions. This is not recommended for normal users.
## Installing on a Cloud Service Platform
* [Amazon Web Services (AWS)](aws-installation-guide.md)
* [Google Compute Engine (GCE)](gce-installation-guide.md)
* [Microsoft Azure](azure-installation-guide.md)
* [Minikube](minikube-installation-guide.md)
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
## Further information
* The [upgrading document](../Upgrading.md).
* The [developer guide](../Developer-Guide.md).
* The [runtime documentation](https://github.com/kata-containers/runtime/blob/master/README.md).

View File

@ -0,0 +1,140 @@
# Install Kata Containers on Amazon Web Services
* [Install and Configure AWS CLI](#install-and-configure-aws-cli)
* [Create or Import an EC2 SSH key pair](#create-or-import-an-ec2-ssh-key-pair)
* [Launch i3.metal instance](#launch-i3metal-instance)
* [Install Kata](#install-kata)
Kata Containers on Amazon Web Services (AWS) makes use of [i3.metal](https://aws.amazon.com/ec2/instance-types/i3/) instances. Most of the installation procedure is identical to that for Kata on your preferred distribution, except that you have to run it on bare metal instances since AWS doesn't support nested virtualization yet. This guide walks you through creating an i3.metal instance.
## Install and Configure AWS CLI
### Requirements
* Python:
* Python 2 version 2.6.5+
* Python 3 version 3.3+
### Install
Install with this command:
```bash
$ pip install awscli --upgrade --user
```
### Configure
First, verify it:
```bash
$ aws --version
```
Then configure it:
```bash
$ aws configure
```
Specify the required parameters:
```
AWS Access Key ID []: <your-key-id-from-iam>
AWS Secret Access Key []: <your-secret-access-key-from-iam>
Default region name []: <your-aws-region-for-your-i3-metal-instance>
Default output format [None]: <yaml-or-json-or-empty>
```
Alternatively, you can create the files: `~/.aws/credentials` and `~/.aws/config`:
```bash
$ cat <<EOF > ~/.aws/credentials
[default]
aws_access_key_id = <your-key-id-from-iam>
aws_secret_access_key = <your-secret-access-key-from-iam>
EOF
$ cat <<EOF > ~/.aws/config
[default]
region = <your-aws-region-for-your-i3-metal-instance>
EOF
```
For more information on how to get AWS credentials please refer to [this guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). Alternatively, you can ask the administrator of your AWS account to issue one with the AWS CLI:
```sh
$ aws_username="myusername"
$ aws iam create-access-key --user-name="$aws_username"
```
More general AWS CLI guidelines can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/installing.html).
## Create or Import an EC2 SSH key pair
You will need this to access your instance.
To create:
```bash
$ aws ec2 create-key-pair --key-name MyKeyPair | grep KeyMaterial | cut -d: -f2- | tr -d ' \n\"\,' > MyKeyPair.pem
$ chmod 400 MyKeyPair.pem
```
Alternatively to import using your public SSH key:
```bash
$ aws ec2 import-key-pair --key-name "MyKeyPair" --public-key-material file://MyKeyPair.pub
```
## Launch i3.metal instance
Get the latest Bionic Ubuntu AMI (Amazon Image) or the latest AMI for the Linux distribution you would like to use. For example:
```bash
$ aws ec2 describe-images --owners 099720109477 --filters "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" --query 'sort_by(Images, &CreationDate)[].ImageId '
```
This command will produce output similar to the following:
```
[
...
"ami-063aa838bd7631e0b",
"ami-03d5270fcb641f79b"
]
```
Launch the EC2 instance and pick IP the `INSTANCEID`:
```bash
$ aws ec2 run-instances --image-id ami-03d5270fcb641f79b --count 1 --instance-type i3.metal --key-name MyKeyPair --associate-public-ip-address > /tmp/aws.json
$ export INSTANCEID=$(grep InstanceId /tmp/aws.json | cut -d: -f2- | tr -d ' \n\"\,')
```
Wait for the instance to come up, the output of the following command should be `running`:
```bash
$ aws ec2 describe-instances --instance-id=${INSTANCEID} | grep running | cut -d: -f2- | tr -d ' \"\,'
```
Get the public IP address for the instances:
```bash
$ export IP=$(aws ec2 describe-instances --instance-id=${INSTANCEID} | grep PublicIpAddress | cut -d: -f2- | tr -d ' \n\"\,')
```
Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-launch.html) for more details on how to launch instances with the AWS CLI.
SSH into the machine
```bash
$ ssh -i MyKeyPair.pen ubuntu@${IP}
```
Go onto the next step.
## Install Kata
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).

View File

@ -0,0 +1,18 @@
# Install Kata Containers on Microsoft Azure
Kata Containers on Azure use nested virtualization to provide an identical installation
experience to Kata on your preferred Linux distribution.
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid
local dependencies and setup.
## Create a new virtual machine with nesting support
Create a new virtual machine with:
* Nesting support (v3 series)
* your distro of choice
## Set up with distribution specific quick start
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).

View File

@ -0,0 +1,17 @@
# Install Kata Containers on CentOS
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ sudo yum -y install yum-utils
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/CentOS_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/centos-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,22 @@
# Install Kata Containers on Debian
1. Install the Kata Containers components with the following commands:
```bash
$ export DEBIAN_FRONTEND=noninteractive
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ source /etc/os-release
$ [ "$ID" = debian ] && [ -z "$VERSION_ID" ] && echo >&2 "ERROR: Debian unstable not supported.
You can try stable packages here:
http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}" && exit 1
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/ /' > /etc/apt/sources.list.d/kata-containers.list"
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/Release.key | sudo apt-key add -
$ sudo -E apt-get update
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/debian-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,75 @@
# Install Docker for Kata Containers on CentOS
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../centos-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
$ sudo yum -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/centos).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,103 @@
# Install Docker for Kata Containers on Debian
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../debian-installation-guide.md).
> - This guide allows for installation with `systemd` or `sysVinit` init systems.
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo apt-get -y install apt-transport-https ca-certificates curl gnupg2 software-properties-common
$ curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo "$ID")/gpg | sudo apt-key add -
$ sudo add-apt-repository "deb https://download.docker.com/linux/$(. /etc/os-release; echo "$ID") $(lsb_release -cs) stable"
$ sudo apt-get update
$ sudo -E apt-get -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/debian).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
a. `sysVinit`
- with `sysVinit`, docker config is stored in `/etc/default/docker`, edit the options similar to the following:
```sh
$ sudo sh -c "echo '# specify docker runtime for kata-containers
DOCKER_OPTS=\"-D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime\"' >> /etc/default/docker"
```
b. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
c. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with one of the following (depending on init choice):
a. `sysVinit`
```sh
$ sudo /etc/init.d/docker stop
$ sudo /etc/init.d/docker start
```
To watch for errors:
```sh
$ tail -f /var/log/docker.log
```
b. systemd
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,77 @@
# Install Docker for Kata Containers on Fedora
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../fedora-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ source /etc/os-release
$ sudo dnf config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo
$ sudo dnf makecache
$ sudo dnf -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/fedora).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,75 @@
# Install Docker for Kata Containers on openSUSE
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../opensuse-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo zypper -n install docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://software.opensuse.org/package/docker).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. Specify the runtime options in `/etc/sysconfig/docker` (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ DOCKER_SYSCONFIG=/etc/sysconfig/docker
# Add kata-runtime to the list of available runtimes, if not already listed
$ grep -qE "^ *DOCKER_OPTS=.+--add-runtime[= ] *kata-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)\" *$|\1 --add-runtime kata-runtime=/usr/bin/kata-runtime\"|g" $DOCKER_SYSCONFIG
# If a current default runtime is specified, overwrite it with kata-runtime
$ sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+--default-runtime[= ] *)[^ \"]+(.*\"$)|\1kata-runtime\2|g" $DOCKER_SYSCONFIG
# Add kata-runtime as default runtime, if no default runtime is specified
$ grep -qE "^ *DOCKER_OPTS=.+--default-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)(\"$)|\1 --default-runtime=kata-runtime\2|g" $DOCKER_SYSCONFIG
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,14 @@
# Install Docker for Kata Containers on openSUSE Leap
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
Please download this file and run kata-doc-to-script.sh again."
```
-->

View File

@ -0,0 +1,14 @@
# Install Docker for Kata Containers on openSUSE Tumbleweed
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
Please download this file and run kata-doc-to-script.sh again."
```
-->

View File

@ -0,0 +1,76 @@
# Install Docker for Kata Containers on RHEL
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../rhel-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ export rhel_devtoolset_version="7"
$ sudo subscription-manager repos --enable=rhel-${rhel_devtoolset_version}-server-extras-rpms
$ sudo yum -y install docker && systemctl enable --now docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_atomic_host/7/html-single/getting_started_with_containers/#getting_docker_in_rhel_7).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,74 @@
# Install Docker for Kata Containers on SLES
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../sles-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo zypper -n install docker
```
For more information on installing Docker please refer to the
[Docker Guide](https://www.suse.com/documentation/sles-12/singlehtml/book_sles_docker/book_sles_docker.html).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,79 @@
# Install Docker for Kata Containers on Ubuntu
> **Note:**
>
> - This guide assumes you have
> [already installed the Kata Containers packages](../ubuntu-installation-guide.md).
1. Install the latest version of Docker with the following commands:
> **Notes:**
>
> - This step is only required if Docker is not installed on the system.
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
```bash
$ sudo -E apt-get -y install apt-transport-https ca-certificates software-properties-common
$ curl -sL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
$ arch=$(dpkg --print-architecture)
$ sudo -E add-apt-repository "deb [arch=${arch}] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
$ sudo -E apt-get update
$ sudo -E apt-get -y install docker-ce
```
For more information on installing Docker please refer to the
[Docker Guide](https://docs.docker.com/engine/installation/linux/ubuntu).
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
[Service]
ExecStart=
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
EOF
```
2. Docker `daemon.json`
Create docker configuration folder.
```
$ sudo mkdir -p /etc/docker
```
Add the following definitions to `/etc/docker/daemon.json`:
```json
{
"default-runtime": "kata-runtime",
"runtimes": {
"kata-runtime": {
"path": "/usr/bin/kata-runtime"
}
}
}
```
3. Restart the Docker systemd service with the following commands:
```bash
$ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Run Kata Containers
You are now ready to run Kata Containers:
```bash
$ sudo docker run busybox uname -a
```
The previous command shows details of the kernel version running inside the
container, which is different to the host kernel version.

View File

@ -0,0 +1,17 @@
# Install Kata Containers on Fedora
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo dnf -y install dnf-plugins-core
$ sudo -E dnf config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Fedora_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E dnf -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/fedora-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,133 @@
# Install Kata Containers on Google Compute Engine
* [Create an Image with Nested Virtualization Enabled](#create-an-image-with-nested-virtualization-enabled)
* [Create the Image](#create-the-image)
* [Verify VMX is Available](#verify-vmx-is-available)
* [Install Kata](#install-kata)
* [Create a Kata-enabled Image](#create-a-kata-enabled-image)
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime kata-check` checks for nested virtualization, but does not fail if support is not found.
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:
```bash
$ gcloud info || { echo "ERROR: no Google Cloud SDK"; exit 1; }
```
## Create an Image with Nested Virtualization Enabled
VM images on GCE are grouped into families under projects. Officially supported images are automatically discoverable with `gcloud compute images list`. That command produces a list similar to the following (likely with different image names):
```bash
$ gcloud compute images list
NAME PROJECT FAMILY DEPRECATED STATUS
centos-7-v20180523 centos-cloud centos-7 READY
coreos-stable-1745-5-0-v20180531 coreos-cloud coreos-stable READY
cos-beta-67-10575-45-0 cos-cloud cos-beta READY
cos-stable-66-10452-89-0 cos-cloud cos-stable READY
debian-9-stretch-v20180510 debian-cloud debian-9 READY
rhel-7-v20180522 rhel-cloud rhel-7 READY
sles-11-sp4-v20180523 suse-cloud sles-11 READY
ubuntu-1604-xenial-v20180522 ubuntu-os-cloud ubuntu-1604-lts READY
ubuntu-1804-bionic-v20180522 ubuntu-os-cloud ubuntu-1804-lts READY
```
Each distribution has its own project, and each project can host images for multiple versions of the distribution, typically grouped into families. We recommend you select images by project and family, rather than by name. This ensures any scripts or other automation always works with a non-deprecated image, including security updates, updates to GCE-specific scripts, etc.
### Create the Image
The following example (substitute your preferred distribution project and image family) produces an image with nested virtualization enabled in your currently active GCE project:
```bash
$ SOURCE_IMAGE_PROJECT=ubuntu-os-cloud
$ SOURCE_IMAGE_FAMILY=ubuntu-1804-lts
$ IMAGE_NAME=${SOURCE_IMAGE_FAMILY}-nested
$ gcloud compute images create \
--source-image-project $SOURCE_IMAGE_PROJECT \
--source-image-family $SOURCE_IMAGE_FAMILY \
--licenses=https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx \
$IMAGE_NAME
```
If successful, `gcloud` reports that the image was created. Verify that the image has the nested virtualization license with `gcloud compute images describe $IMAGE_NAME`. This produces output like the following (some fields have been removed for clarity and to redact personal info):
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: ubuntu-1804-lts-nested
sourceImage: https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20180522
sourceImageId: '3280575157699667619'
sourceType: RAW
status: READY
```
The primary criterion of interest here is the presence of the `enable-vmx` license. Without that licence Kata will not work. Without that license Kata does not work. The presence of that license instructs the Google Compute Engine hypervisor to enable Intel's VT-x instructions in virtual machines created from the image. Note that nested virtualization is only available in VMs running on Intel Haswell or later CPU micro-architectures.
### Verify VMX is Available
Assuming you created a nested-enabled image using the previous instructions, verify that VMs created from this image are VMX-enabled with the following:
1. Create a VM from the image created previously:
```bash
$ gcloud compute instances create \
--image $IMAGE_NAME \
--machine-type n1-standard-2 \
--min-cpu-platform "Intel Broadwell" \
kata-testing
```
> **NOTE**: In most zones the `--min-cpu-platform` argument can be omitted. It is only necessary in GCE Zones that include hosts based on Intel's Ivybridge platform.
2. Verify that the VMX CPUID flag is set:
```bash
$ gcloud compute ssh kata-testing
# While ssh'd into the VM:
$ [ -z "$(lscpu|grep GenuineIntel)" ] && { echo "ERROR: Need an Intel CPU"; exit 1; }
```
If this fails, ensure you created your instance from the correct image and that the previously listed `enable-vmx` license is included.
## Install Kata
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).
## Create a Kata-enabled Image
Optionally, after installing Kata, create an image to preserve the fruits of your labor:
```bash
$ gcloud compute instances stop kata-testing
$ gcloud compute images create \
--source-disk kata-testing \
kata-base
```
The result is an image that includes any changes made to the `kata-testing` instance as well as the `enable-vmx` flag. Verify this with `gcloud compute images describe kata-base`. The result, which omits some fields for clarity, should be similar to the following:
```yaml
diskSizeGb: '10'
kind: compute#image
licenseCodes:
- '1002001'
- '5926592092274602096'
licenses:
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
name: kata-base
selfLink: https://www.googleapis.com/compute/v1/projects/my-kata-project/global/images/kata-base
sourceDisk: https://www.googleapis.com/compute/v1/projects/my-kata-project/zones/us-west1-a/disks/kata-testing
sourceType: RAW
status: READY
```

View File

@ -0,0 +1,47 @@
# Installing with `kata-doc-to-script`
* [Introduction](#introduction)
* [Packages Installation](#packages-installation)
* [Docker Installation and Setup](#docker-installation-and-setup)
## Introduction
Use [these installation instructions](README.md#supported-distributions) together with
[`kata-doc-to-script`](https://github.com/kata-containers/tests/blob/master/.ci/kata-doc-to-script.sh)
to generate installation bash scripts.
> Note:
> - Only the Docker container manager installation can be scripted. For other setups you must
> install and configure the container manager manually.
## Packages Installation
```bash
$ source /etc/os-release
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/${ID}-installation-guide.md
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-installation-guide.md ${ID}-install.sh"
```
For example, if your distribution is CentOS, the previous example will generate a runnable shell script called `centos-install.sh`.
To proceed with the installation, run:
```bash
$ source /etc/os-release
$ bash "./${ID}-install.sh"
```
## Docker Installation and Setup
```bash
$ source /etc/os-release
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/${ID}-docker-install.md
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-docker-install.md ${ID}-docker-install.sh"
```
For example, if your distribution is CentOS, this will generate a runnable shell script called `centos-docker-install.sh`.
To proceed with the Docker installation, run:
```bash
$ source /etc/os-release
$ bash "./${ID}-docker-install.sh"
```

View File

@ -0,0 +1,47 @@
# Installing with `kata-manager`
* [Introduction](#introduction)
* [Full Installation](#full-installation)
* [Install the Kata packages only](#install-the-kata-packages-only)
* [Further Information](#further-information)
## Introduction
`kata-manager` automates the Kata Containers installation procedure documented for [these Linux distributions](README.md#supported-distributions).
> **Note**:
> - `kata-manager` requires `curl` and `sudo` installed on your system.
>
> - Full installation mode is only available for Docker container manager. For other setups, you
> can still use `kata-manager` to [install Kata package](#install-the-kata-packages-only), and then setup your container manager manually.
>
> - You can run `kata-manager` in dry run mode by passing the `-n` flag. Dry run mode allows you to review the
> commands that `kata-manager` would run, without doing any change to your system.
## Full Installation
This command does the following:
1. Installs Kata Containers packages
2. Installs Docker
3. Configure Docker to use the Kata OCI runtime by default
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-docker-system"
```
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) remove-packages"
```
-->
## Install the Kata packages only
Use the following command to only install Kata Containers packages.
```bash
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-packages"
```
## Further Information
For more information on what `kata-manager` can do, refer to the [`kata-manager` page](https://github.com/kata-containers/tests/blob/master/cmd/kata-manager).

View File

@ -0,0 +1,238 @@
# Installing Kata Containers in Minikube
* [Installing Kata Containers in Minikube](#installing-kata-containers-in-minikube)
* [Introduction](#introduction)
* [Prerequisites](#prerequisites)
* [Setting up Minikube](#setting-up-minikube)
* [Checking for nested virtualization](#checking-for-nested-virtualization)
* [Check Minikube is running](#check-minikube-is-running)
* [Installing Kata Containers](#installing-kata-containers)
* [Enabling Kata Containers](#enabling-kata-containers)
* [Register the runtime](#register-the-runtime)
* [Testing Kata Containers](#testing-kata-containers)
* [Wrapping up](#wrapping-up)
## Introduction
[Minikube](https://kubernetes.io/docs/setup/minikube/) is an easy way to try out a Kubernetes (k8s)
cluster locally. It creates a single node Kubernetes stack in a local VM.
[Kata Containers](https://github.com/kata-containers) can be installed into a Minikube cluster using
[`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
This document details the pre-requisites, installation steps, and how to check
the installation has been successful.
## Prerequisites
This installation guide has only been verified under a Minikube Linux installation, using the
[`kvm2`](https://minikube.sigs.k8s.io/docs/drivers/kvm2/) driver.
> **Notes:**
> - This installation guide may not work for macOS installations of Minikube, due to the lack of
nested virtualization support on that platform.
> - This installation guide has not been tested on a Windows installation.
> - Kata under Minikube does not currently support Kata Firecracker (`kata-fc`).
> Although the `kata-fc` binary will be installed as part of these instructions,
> via `kata-deploy`, pods cannot be launched with `kata-fc`, and will fail to start.
Before commencing installation, it is strongly recommended you read the
[Minikube installation guide](https://kubernetes.io/docs/tasks/tools/install-minikube/).
## Checking for nested virtualization
For Kata Containers to work under a Minikube VM, your host system must support
nested virtualization. If you are using a Linux system utilizing Intel VT-x
and the `kvm_intel` driver, you can perform the following check:
```sh
$ cat /sys/module/kvm_intel/parameters/nested
```
If your system does not report `Y` from the `nested` parameter, then details on how
to enable nested virtualization can be found on the
[KVM Nested Guests page](https://www.linux-kvm.org/page/Nested_Guests)
Alternatively, and for other architectures, the Kata Containers built in
[`kata-check`](https://github.com/kata-containers/runtime#hardware-requirements)
command can be used *inside Minikube* once Kata has been installed, to check for compatibility.
## Setting up Minikube
To enable Kata Containers under Minikube, you need to add a few configuration options to the
default Minikube setup. You can easily accomplish this as Minikube supports them on the setup commandline.
Minikube can be set up to use either CRI-O or containerd.
Here are the features to set up a CRI-O based Minikube, and why you need them:
| what | why |
| ---- | --- |
| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
| `--container-runtime=cri-o` | Using CRI-O for Kata |
| `--enable-default-cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
| `--memory 6144` | Allocate sufficient memory, as Kata Containers default to 1 or 2Gb |
| `--network-plugin=cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
| `--vm-driver kvm2` | The host VM driver |
To use containerd, modify the `--container-runtime` argument:
| what | why |
| ---- | --- |
| `--container-runtime=containerd` | Using containerd for Kata |
> **Notes:**
> - Adjust the `--memory 6144` line to suit your environment and requirements. Kata Containers default to
> requesting 2048MB per container. We recommended you supply more than that to the Minikube node.
> - Prior to Minikube/Kubernetes v1.14, the beta `RuntimeClass` feature also needed enabling with
> the following.
>
> | what | why |
> | ---- | --- |
> | `--feature-gates=RuntimeClass=true` | Kata needs to use the `RuntimeClass` Kubernetes feature |
The full command is therefore:
```sh
$ minikube start --vm-driver kvm2 --memory 6144 --network-plugin=cni --enable-default-cni --container-runtime=cri-o --bootstrapper=kubeadm
```
> **Note:** For Kata Containers later than v1.6.1, the now default `tcfilter` networking of Kata Containers
> does not work for Minikube versions less than v1.1.1. Please ensure you use Minikube version v1.1.1
> or above.
## Check Minikube is running
Before you install Kata Containers, check that your Minikube is operating. On your guest:
```sh
$ kubectl get nodes
```
You should see your `master` node listed as being `Ready`.
Check you have virtualization enabled inside your Minikube. The following should return
a number larger than `0` if you have either of the `vmx` or `svm` nested virtualization features
available:
```sh
$ minikube ssh "egrep -c 'vmx|svm' /proc/cpuinfo"
```
## Installing Kata Containers
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
configured for you) to deploy them:
```sh
$ git clone https://github.com/kata-containers/packaging.git
$ cd packaging/kata-deploy
$ kubectl apply -f kata-rbac.yaml
$ kubectl apply -f kata-deploy.yaml
```
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
a few minutes for the operation to complete. You can check the installation has worked by checking
the status of the `kata-deploy` pod, which will be executing
[this script](https://github.com/kata-containers/packaging/blob/master/kata-deploy/scripts/kata-deploy.sh),
and will be executing a `sleep infinity` once it has successfully completed its work.
You can accomplish this by running the following:
```sh
$ podname=$(kubectl -n kube-system get pods -o=name | fgrep kata-deploy | sed 's?pod/??')
$ kubectl -n kube-system exec ${podname} -- ps -ef | fgrep infinity
```
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
> completed on all nodes.
## Enabling Kata Containers
> **Note:** Only Minikube/Kubernetes versions <= 1.13 require this step. Since version
> v1.14, the `RuntimeClass` is enabled by default. Performing this step on Kubernetes > v1.14 is
> however benign.
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
```sh
$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/node-api/master/manifests/runtimeclass_crd.yaml > runtimeclass_crd.yaml
```
### Register the runtime
Now register the `kata qemu` runtime with that class. This should result in no errors:
```sh
$ cd packaging/kata-deploy/k8s-1.14
$ kubectl apply -f kata-qemu-runtimeClass.yaml
```
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
## Testing Kata Containers
Launch a container that has been defined to run on Kata Containers. The enabling is configured by
the following lines in the YAML file. See the Kubernetes
[Runtime Class Documentation](https://kubernetes.io/docs/concepts/containers/runtime-class/#usage)
for more details.
```yaml
spec:
runtimeClassName: kata-qemu
```
Perform the following action to launch a Kata Containers based Apache PHP pod:
```sh
$ cd packaging/kata-deploy/examples
$ kubectl apply -f test-deploy-kata-qemu.yaml
```
This may take a few moments if the container image needs to be pulled down into the cluster.
Check progress using:
```sh
$ kubectl rollout status deployment php-apache-kata-qemu
```
There are a couple of ways to verify it is running with Kata Containers.
In theory, you should not be able to tell your pod is running as a Kata Containers container.
Careful examination can verify your pod is in fact a Kata Containers pod.
First, look on the node for a `qemu` running. You should see a QEMU command line output here,
indicating that your pod is running inside a Kata Containers VM:
```sh
$ minikube ssh -- pgrep -a qemu
```
Another way to verify Kata Containers is running is to look in the container itself and check
which kernel is running there. For a normal software container you will be running
the same kernel as the node. For a Kata Container you will be running a Kata Containers kernel
inside the Kata Containers VM.
First, examine which kernel is running inside the Minikube node itself:
```sh
$ minikube ssh -- uname -a
```
And then compare that against the kernel that is running inside the container:
```sh
$ podname=$(kubectl get pods -o=name | fgrep php-apache-kata-qemu | sed 's?pod/??')
$ kubectl exec ${podname} -- uname -a
```
You should see the node and pod are running different kernel versions.
## Wrapping up
This guide has shown an easy way to setup Minikube with Kata Containers.
Be aware, this is only a small single node Kubernetes cluster running under a nested virtualization setup.
As such, it has limitations, but as a first introduction to Kata Containers, and how to install it under Kubernetes,
it should suffice for initial learning and experimentation.

View File

@ -0,0 +1,23 @@
# Install Kata Containers on openSUSE
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ DISTRO_REPO=$(sed "s/ /_/g" <<< "$NAME")
$ [ -n "$VERSION" ] && DISTRO_REPO+="_${VERSION}"
$ DISTRO_REPO=$(echo $DISTRO_REPO | tr -d ' ')
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ REPO_ALIAS="kata-${BRANCH}"
$ PUBKEY="/tmp/rpm-signkey.pub"
$ curl -SsL -o "$PUBKEY" "https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub"
$ sudo -E rpm --import "$PUBKEY"
$ zypper lr "$REPO_ALIAS" && sudo -E zypper -n removerepo "$REPO_ALIAS"
$ sudo -E zypper addrepo --refresh "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/${DISTRO_REPO}/" "$REPO_ALIAS"
$ sudo -E zypper -n install kata-runtime
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,19 @@
# Install Kata Containers on openSUSE Leap
1. Install Kata Containers on openSUSE by following the instructions in the
[openSUSE install guide](opensuse-installation-guide.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
Please download this file and run kata-doc-to-script.sh again."
```
-->
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-leap-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,19 @@
# Install Kata Containers on openSUSE Tumbleweed
1. Install Kata Containers on openSUSE by following the instructions in the
[openSUSE install guide](opensuse-installation-guide.md).
<!--
You can ignore the content of this comment.
(test code run by test-install-docs.sh to validate code blocks this document)
```bash
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
Please download this file and run kata-doc-to-script.sh again."
```
-->
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/opensuse-tumbleweed-docker-install.md)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,16 @@
# Install Kata Containers on RHEL
1. Install the Kata Containers components with the following commands:
```bash
$ source /etc/os-release
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/RHEL_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/rhel-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,15 @@
# Install Kata Containers on SLES
1. Install the Kata Containers components with the following commands:
```bash
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo -E zypper addrepo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/SLE_15_SP1/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
$ sudo -E zypper -n --no-gpg-checks install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/sles-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,13 @@
# Install Kata Containers from `snapcraft.io`
Kata Containers can be installed in any Linux distribution that supports
[snapd](https://docs.snapcraft.io/installing-snapd).
Run the following command to install Kata Containers:
```bash
$ sudo snap install kata-containers --classic
```
For further information on integrating and configuring the `snap` Kata Containers install,
refer to the [Kata Containers packaging `snap` documentation](https://github.com/kata-containers/packaging/blob/master/snap/README.md#configure-kata-containers).

View File

@ -0,0 +1,17 @@
# Install Kata Containers on Ubuntu
1. Install the Kata Containers components with the following commands:
```bash
$ ARCH=$(arch)
$ BRANCH="${BRANCH:-master}"
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
$ sudo -E apt-get update
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
```
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/ubuntu-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -0,0 +1,16 @@
# Install Kata Containers on VEXXHOST
Kata Containers on VEXXHOST use nested virtualization to provide an identical
installation experience to Kata on your preferred Linux distribution.
This guide assumes you have an OpenStack public cloud account set up and tools
to remotely connect to your virtual machine (SSH).
## Create a new virtual machine with nesting support
All regions support nested virtualization using the V2 flavors (those prefixed
with v2). The recommended machine type for container workloads is `v2-highcpu` range.
## Set up with distribution specific quick start
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).

View File

@ -0,0 +1,6 @@
# Using GPUs with Kata Containers
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
- [Intel](Intel-GPU-passthrough-and-Kata.md)
- [Nvidia](Nvidia-GPU-passthrough-and-Kata.md)

View File

@ -0,0 +1,295 @@
# Using Intel GPU device with Kata Containers
- [Using Intel GPU device with Kata Containers](#using-intel-gpu-device-with-kata-containers)
- [Hardware Requirements](#hardware-requirements)
- [Host Kernel Requirements](#host-kernel-requirements)
- [Install and configure Kata Containers](#install-and-configure-kata-containers)
- [Build Kata Containers kernel with GPU support](#build-kata-containers-kernel-with-gpu-support)
- [GVT-d with Kata Containers](#gvt-d-with-kata-containers)
- [GVT-g with Kata Containers](#gvt-g-with-kata-containers)
An Intel Graphics device can be passed to a Kata Containers container using GPU
passthrough (Intel GVT-d) as well as GPU mediated passthrough (Intel GVT-g).
Intel GVT-d (one VM to one physical GPU) also named as Intel-Graphics-Device
passthrough feature is one flavor of graphics virtualization approach.
This flavor allows direct assignment of an entire GPU to a single user,
passing the native driver capabilities through the hypervisor without any limitations.
Intel GVT-g (multiple VMs to one physical GPU) is a full GPU virtualization solution
with mediated pass-through.<br/>
A virtual GPU instance is maintained for each VM, with part of performance critical
resources, directly assigned. The ability to run a native graphics driver inside a
VM without hypervisor intervention in performance critical paths, achieves a good
balance among performance, feature, and sharing capability.
| Technology | Description | Behaviour | Detail |
|-|-|-|-|
| Intel GVT-d | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| Intel GVT-g | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
## Hardware Requirements
- For client platforms, 5th generation Intel® Core Processor Graphics or higher are required.
- For server platforms, E3_v4 or higher Xeon Processor Graphics are required.
The following steps outline the workflow for using an Intel Graphics device with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
```
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_VFIO_MDEV=m
CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_DRM_I915_GVT=m
CONFIG_DRM_I915_GVT_KVMGT=m
```
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
line.
## Install and configure Kata Containers
To use this feature, you need Kata version 1.3.0 or above.
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
to install the latest version of Kata.
In order to pass a GPU to a Kata Container, you need to enable the `hotplug_vfio_on_root_bus`
configuration in the Kata `configuration.toml` file as shown below.
```
$ sudo sed -i -e 's/^# *\(hotplug_vfio_on_root_bus\).*=.*$/\1 = true/g' /usr/share/defaults/kata-containers/configuration.toml
```
Make sure you are using the `pc` machine type by verifying `machine_type = "pc"` is
set in the `configuration.toml`.
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU support.
To use an Intel GPU with Kata Containers, you need to build a kernel with the necessary
GPU support.
The following i915 kernel config options need to be enabled:
```
CONFIG_DRM=y
CONFIG_DRM_I915=y
CONFIG_DRM_I915_USERPTR=y
```
Build the Kata Containers kernel with the previous config options, using the instructions
described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
For further details on building and installing guest kernels, see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports Intel GPU:
```
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -g intel -f setup
# Build guest kernel
$ ./build-kernel.sh -g intel build
# Install guest kernel
$ sudo -E ./build-kernel.sh -g intel install
/usr/share/kata-containers/vmlinux-intel-gpu.container -> vmlinux-5.4.15-70-intel-gpu
/usr/share/kata-containers/vmlinuz-intel-gpu.container -> vmlinuz-5.4.15-70-intel-gpu
```
Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
```
kernel = "/usr/share/kata-containers/vmlinuz-intel-gpu.container"
```
## GVT-d with Kata Containers
Use the following steps to pass an Intel Graphics device in GVT-d mode with Kata:
1. Find the Bus-Device-Function (BDF) for GPU device:
```
$ sudo lspci -nn -D | grep Graphics
0000:00:02.0 VGA compatible controller [0300]: Intel Corporation Broadwell-U Integrated Graphics [8086:1616] (rev 09)
```
Run the previous command to determine the BDF for the GPU device on host.<br/>
From the previous output, PCI address `0000:00:02.0` is assigned to the hardware GPU device.<br/>
This BDF is used later to unbind the GPU device from the host.<br/>
"8086 1616" is the device ID of the hardware GPU device. It is used later to
rebind the GPU device to `vfio-pci` driver.
2. Find the IOMMU group for the GPU device:
```
$ BDF="0000:00:02.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
/sys/kernel/iommu_groups/1
```
The previous output shows that the GPU belongs to IOMMU group 1.
3. Unbind the GPU:
```
$ echo $BDF | sudo tee /sys/bus/pci/devices/$BDF/driver/unbind
```
4. Bind the GPU to the `vfio-pci` device driver:
```
$ sudo modprobe vfio-pci
$ echo 8086 1616 | sudo tee /sys/bus/pci/drivers/vfio-pci/new_id
$ echo $BDF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
```
After you run the previous commands, the GPU is bound to `vfio-pci` driver.<br/>
A new directory with the IOMMU group number is created under `/dev/vfio`:
```
$ ls -l /dev/vfio
total 0
crw------- 1 root root 241, 0 May 18 15:38 1
crw-rw-rw- 1 root root 10, 196 May 18 15:37 vfio
```
5. Start a Kata container with GPU device:
```
$ sudo docker run -it --runtime=kata-runtime --rm --device /dev/vfio/1 -v /dev:/dev debian /bin/bash
```
Run `lspci` within the container to verify the GPU device is seen in the list of
the PCI devices. Note the vendor-device id of the GPU ("8086:1616") in the `lspci` output.
```
$ lspci -nn -D
0000:00:00.0 Class [0600]: Device [8086:1237] (rev 02)
0000:00:01.0 Class [0601]: Device [8086:7000]
0000:00:01.1 Class [0101]: Device [8086:7010]
0000:00:01.3 Class [0680]: Device [8086:7113] (rev 03)
0000:00:02.0 Class [0604]: Device [1b36:0001]
0000:00:03.0 Class [0780]: Device [1af4:1003]
0000:00:04.0 Class [0100]: Device [1af4:1004]
0000:00:05.0 Class [0002]: Device [1af4:1009]
0000:00:06.0 Class [0200]: Device [1af4:1000]
0000:00:0f.0 Class [0300]: Device [8086:1616] (rev 09)
```
Additionally, you can access the device node for the graphics device:
```
$ ls /dev/dri
card0 renderD128
```
## GVT-g with Kata Containers
For GVT-g, you append `i915.enable_gvt=1` in addition to `intel_iommu=on`
on your host kernel command line and then reboot your host.
Use the following steps to pass an Intel Graphics device in GVT-g mode to a Kata Container:
1. Find the BDF for GPU device:
```
$ sudo lspci -nn -D | grep Graphics
0000:00:02.0 VGA compatible controller [0300]: Intel Corporation Broadwell-U Integrated Graphics [8086:1616] (rev 09)
```
Run the previous command to find out the BDF for the GPU device on host.
The previous output shows PCI address "0000:00:02.0" is assigned to the GPU device.
2. Choose the MDEV (Mediated Device) type for VGPU (Virtual GPU):
For background on `mdev` types, please follow this [kernel documentation](https://github.com/torvalds/linux/blob/master/Documentation/driver-api/vfio-mediated-device.rst).
* List out the `mdev` types for the VGPU:
```
$ BDF="0000:00:02.0"
$ ls /sys/devices/pci0000:00/$BDF/mdev_supported_types
i915-GVTg_V4_1 i915-GVTg_V4_2 i915-GVTg_V4_4 i915-GVTg_V4_8
```
* Inspect the `mdev` types and choose one that fits your requirement:
```
$ cd /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8 && ls
available_instances create description device_api devices
$ cat description
low_gm_size: 64MB
high_gm_size: 384MB
fence: 4
resolution: 1024x768
weight: 2
$ cat available_instances
7
```
The output of file `description` represents the GPU resources that are
assigned to the VGPU with specified MDEV type.The output of file `available_instances`
represents the remaining amount of VGPUs you can create with specified MDEV type.
3. Create a VGPU:
* Generate a UUID:
```
$ gpu_uuid=$(uuid)
```
* Write the UUID to the `create` file under the chosen `mdev` type:
```
$ echo $(gpu_uuid) | sudo tee /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8/create
```
4. Find the IOMMU group for the VGPU:
```
$ ls -la /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8/devices/${gpu_uuid}/iommu_group
lrwxrwxrwx 1 root root 0 May 18 14:35 devices/bbc4aafe-5807-11e8-a43e-03533cceae7d/iommu_group -> ../../../../kernel/iommu_groups/0
$ ls -l /dev/vfio
total 0
crw------- 1 root root 241, 0 May 18 11:30 0
crw-rw-rw- 1 root root 10, 196 May 18 11:29 vfio
```
The IOMMU group "0" is created from the previous output.<br/>
Now you can use the device node `/dev/vfio/0` in docker command line to pass
the VGPU to a Kata Container.
5. Start Kata container with GPU device enabled:
```
$ sudo docker run -it --runtime=kata-runtime --rm --device /dev/vfio/0 -v /dev:/dev debian /bin/bash
$ lspci -nn -D
0000:00:00.0 Class [0600]: Device [8086:1237] (rev 02)
0000:00:01.0 Class [0601]: Device [8086:7000]
0000:00:01.1 Class [0101]: Device [8086:7010]
0000:00:01.3 Class [0680]: Device [8086:7113] (rev 03)
0000:00:02.0 Class [0604]: Device [1b36:0001]
0000:00:03.0 Class [0780]: Device [1af4:1003]
0000:00:04.0 Class [0100]: Device [1af4:1004]
0000:00:05.0 Class [0002]: Device [1af4:1009]
0000:00:06.0 Class [0200]: Device [1af4:1000]
0000:00:0f.0 Class [0300]: Device [8086:1616] (rev 09)
```
BDF "0000:00:0f.0" is assigned to the VGPU device.
Additionally, you can access the device node for the graphics device:
```
$ ls /dev/dri
card0 renderD128
```

View File

@ -0,0 +1,313 @@
# Using Nvidia GPU device with Kata Containers
- [Using Nvidia GPU device with Kata Containers](#using-nvidia-gpu-device-with-kata-containers)
- [Hardware Requirements](#hardware-requirements)
- [Host BIOS Requirements](#host-bios-requirements)
- [Host Kernel Requirements](#host-kernel-requirements)
- [Install and configure Kata Containers](#install-and-configure-kata-containers)
- [Build Kata Containers kernel with GPU support](#build-kata-containers-kernel-with-gpu-support)
- [Nvidia GPU pass-through mode with Kata Containers](#nvidia-gpu-pass-through-mode-with-kata-containers)
- [Nvidia vGPU mode with Kata Containers](#nvidia-vgpu-mode-with-kata-containers)
- [Install Nvidia Driver in Kata Containers](#install-nvidia-driver-in-kata-containers)
- [References](#references)
An Nvidia GPU device can be passed to a Kata Containers container using GPU passthrough
(Nvidia GPU pass-through mode) as well as GPU mediated passthrough (Nvidia vGPU mode). 
Nvidia GPU pass-through mode, an entire physical GPU is directly assigned to one VM,
bypassing the Nvidia Virtual GPU Manager. In this mode of operation, the GPU is accessed
exclusively by the Nvidia driver running in the VM to which it is assigned.
The GPU is not shared among VMs.
Nvidia Virtual GPU (vGPU) enables multiple virtual machines (VMs) to have simultaneous,
direct access to a single physical GPU, using the same Nvidia graphics drivers that are
deployed on non-virtualized operating systems. By doing this, Nvidia vGPU provides VMs
with unparalleled graphics performance, compute performance, and application compatibility,
together with the cost-effectiveness and scalability brought about by sharing a GPU
among multiple workloads.
| Technology | Description | Behaviour | Detail |
| --- | --- | --- | --- |
| Nvidia GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
| Nvidia vGPU mode | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
## Hardware Requirements
Nvidia GPUs Recommended for Virtualization:
- Nvidia Tesla (T4, M10, P6, V100 or newer)
- Nvidia Quadro RTX 6000/8000
## Host BIOS Requirements
Some hardware requires a larger PCI BARs window, for example, Nvidia Tesla P100, K40m
```
$ lspci -s 04:00.0 -vv | grep Region
Region 0: Memory at c6000000 (32-bit, non-prefetchable) [size=16M]
Region 1: Memory at 383800000000 (64-bit, prefetchable) [size=16G] #above 4G
Region 3: Memory at 383c00000000 (64-bit, prefetchable) [size=32M]
```
For large BARs devices, MMIO mapping above 4G address space should be `enabled`
in the PCI configuration of the BIOS.
Some hardware vendors use different name in BIOS, such as:
- Above 4G Decoding
- Memory Hole for PCI MMIO
- Memory Mapped I/O above 4GB
The following steps outline the workflow for using an Nvidia GPU with Kata.
## Host Kernel Requirements
The following configurations need to be enabled on your host kernel:
- `CONFIG_VFIO`
- `CONFIG_VFIO_IOMMU_TYPE1`
- `CONFIG_VFIO_MDEV`
- `CONFIG_VFIO_MDEV_DEVICE`
- `CONFIG_VFIO_PCI`
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command line.
## Install and configure Kata Containers
To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata version 1.3.0 or above.
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
to install the latest version of Kata.
The following configuration in the Kata `configuration.toml` file as shown below can work:
```
machine_type = "pc"
hotplug_vfio_on_root_bus = true
```
To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above.
The following configuration in the Kata `configuration.toml` file as shown below can work:
Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver):
```
machine_type = "q35"
hotplug_vfio_on_root_bus = false
```
Hotplug for PCIe devices by `pciehp` (Linux's PCIe Hotplug driver):
```
machine_type = "q35"
hotplug_vfio_on_root_bus = true
pcie_root_port = 1
```
## Build Kata Containers kernel with GPU support
The default guest kernel installed with Kata Containers does not provide GPU support.
To use an Nvidia GPU with Kata Containers, you need to build a kernel with the
necessary GPU support.
The following kernel config options need to be enabled:
```
# Support PCI/PCIe device hotplug (Required for large BARs device)
CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_HOTPLUG_PCI_SHPC=y
# Support for loading modules (Required for load Nvidia drivers)
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# Enable the MMIO access method for PCIe devices (Required for large BARs device)
CONFIG_PCI_MMCONFIG=y
```
The following kernel config options need to be disabled:
```
# Disable Open Source Nvidia driver nouveau
# It conflicts with Nvidia official driver
CONFIG_DRM_NOUVEAU=n
```
> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
It is worth checking that it is not enabled in your kernel configuration to prevent any conflicts.
Build the Kata Containers kernel with the previous config options,
using the instructions described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
For further details on building and installing guest kernels,
see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports Nvidia GPU:
```
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -v 4.19.86 -g nvidia -f setup
# Build guest kernel
$ ./build-kernel.sh -v 4.19.86 -g nvidia build
# Install guest kernel
$ sudo -E ./build-kernel.sh -v 4.19.86 -g nvidia install
/usr/share/kata-containers/vmlinux-nvidia-gpu.container -> vmlinux-4.19.86-70-nvidia-gpu
/usr/share/kata-containers/vmlinuz-nvidia-gpu.container -> vmlinuz-4.19.86-70-nvidia-gpu
```
To build Nvidia Driver in Kata container, `kernel-devel` is required.
This is a way to generate rpm packages for `kernel-devel`:
```
$ cd kata-linux-4.19.86-68
$ make rpm-pkg
Output RPMs:
~/rpmbuild/RPMS/x86_64/kernel-devel-4.19.86_nvidia_gpu-1.x86_64.rpm
```
> **Note**:
> - `kernel-devel` should be installed in Kata container before run Nvidia driver installer.
> - Run `make deb-pkg` to build the deb package.
Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
```
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
```
## Nvidia GPU pass-through mode with Kata Containers
Use the following steps to pass an Nvidia GPU device in pass-through mode with Kata:
1. Find the Bus-Device-Function (BDF) for GPU device on host:
```
$ sudo lspci -nn -D | grep -i nvidia
0000:04:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
0000:84:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
```
> PCI address `0000:04:00.0` is assigned to the hardware GPU device.
> `10de:15f8` is the device ID of the hardware GPU device.
2. Find the IOMMU group for the GPU device:
```
$ BDF="0000:04:00.0"
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
/sys/kernel/iommu_groups/45
```
The previous output shows that the GPU belongs to IOMMU group 45.
3. Check the IOMMU group number under `/dev/vfio`:
```
$ ls -l /dev/vfio
total 0
crw------- 1 root root 248, 0 Feb 28 09:57 45
crw------- 1 root root 248, 1 Feb 28 09:57 54
crw-rw-rw- 1 root root 10, 196 Feb 28 09:57 vfio
```
4. Start a Kata container with GPU device:
```
$ sudo docker run -it --runtime=kata-runtime --cap-add=ALL --device /dev/vfio/45 centos /bin/bash
```
5. Run `lspci` within the container to verify the GPU device is seen in the list
of the PCI devices. Note the vendor-device id of the GPU (`10de:15f8`) in the `lspci` output.
```
$ lspci -nn -D | grep '10de:15f8'
0000:01:01.0 3D controller [0302]: NVIDIA Corporation GP100GL [Tesla P100 PCIe 16GB] [10de:15f8] (rev a1)
```
6. Additionally, you can check the PCI BARs space of the Nvidia GPU device in the container:
```
$ lspci -s 01:01.0 -vv | grep Region
Region 0: Memory at c0000000 (32-bit, non-prefetchable) [disabled] [size=16M]
Region 1: Memory at 4400000000 (64-bit, prefetchable) [disabled] [size=16G]
Region 3: Memory at 4800000000 (64-bit, prefetchable) [disabled] [size=32M]
```
> **Note**: If you see a message similar to the above, the BAR space of the Nvidia
> GPU has been successfully allocated.
## Nvidia vGPU mode with Kata Containers
Nvidia vGPU is a licensed product on all supported GPU boards. A software license
is required to enable all vGPU features within the guest VM.
> **Note**: There is no suitable test environment, so it is not written here.
## Install Nvidia Driver in Kata Containers
Download the official Nvidia driver from
[https://www.nvidia.com/Download/index.aspx](https://www.nvidia.com/Download/index.aspx),
for example `NVIDIA-Linux-x86_64-418.87.01.run`.
Install the `kernel-devel`(generated in the previous steps) for guest kernel:
```
$ sudo rpm -ivh kernel-devel-4.19.86_gpu-1.x86_64.rpm
```
Here is an example to extract, compile and install Nvidia driver:
```
## Extract
$ sh ./NVIDIA-Linux-x86_64-418.87.01.run -x
## Compile and install (It will take some time)
$ cd NVIDIA-Linux-x86_64-418.87.01
$ sudo ./nvidia-installer -a -q --ui=none \
--no-cc-version-check \
--no-opengl-files --no-install-libglvnd \
--kernel-source-path=/usr/src/kernels/`uname -r`
```
Or just run one command line:
```
$ sudo sh ./NVIDIA-Linux-x86_64-418.87.01.run -a -q --ui=none \
--no-cc-version-check \
--no-opengl-files --no-install-libglvnd \
--kernel-source-path=/usr/src/kernels/`uname -r`
```
To view detailed logs of the installer:
```
$ tail -f /var/log/nvidia-installer.log
```
Load Nvidia driver module manually
```
# Optionalgenerate modules.dep and map files for Nvidia driver
$ sudo depmod
# Load module
$ sudo modprobe nvidia-drm
# Check module
$ lsmod | grep nvidia
nvidia_drm 45056 0
nvidia_modeset 1093632 1 nvidia_drm
nvidia 18202624 1 nvidia_modeset
drm_kms_helper 159744 1 nvidia_drm
drm 364544 3 nvidia_drm,drm_kms_helper
i2c_core 65536 3 nvidia,drm_kms_helper,drm
ipmi_msghandler 49152 1 nvidia
```
Check Nvidia device status with `nvidia-smi`
```
$ nvidia-smi
Tue Mar 3 00:03:49 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.01 Driver Version: 418.87.01 CUDA Version: 10.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... Off | 00000000:01:01.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
## References
- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
- https://gitlab.com/nvidia/container-images/driver/-/tree/master
- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Some files were not shown because too many files have changed in this diff Show More