docs: merge documentation repository
Generated by git subtree add --prefix=docs git@github.com:kata-containers/documentation.git master git-subtree-dir: docs git-subtree-mainline:ec146a1b39
git-subtree-split:510287204b
Fixes: #329 Signed-off-by: Peng Tao <bergwolf@hyper.sh>
25
docs/.ci/lib.sh
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
export tests_repo="${tests_repo:-github.com/kata-containers/tests}"
|
||||||
|
export tests_repo_dir="$GOPATH/src/$tests_repo"
|
||||||
|
|
||||||
|
clone_tests_repo()
|
||||||
|
{
|
||||||
|
# KATA_CI_NO_NETWORK is (has to be) ignored if there is
|
||||||
|
# no existing clone.
|
||||||
|
if [ -d "$tests_repo_dir" -a -n "$KATA_CI_NO_NETWORK" ]
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
go get -d -u "$tests_repo" || true
|
||||||
|
}
|
||||||
|
|
||||||
|
run_static_checks()
|
||||||
|
{
|
||||||
|
clone_tests_repo
|
||||||
|
bash "$tests_repo_dir/.ci/static-checks.sh" "github.com/kata-containers/documentation"
|
||||||
|
}
|
11
docs/.ci/run.sh
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cidir=$(dirname "$0")
|
||||||
|
bash "${cidir}/test-install-docs.sh"
|
17
docs/.ci/setup.sh
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cidir=$(dirname "$0")
|
||||||
|
source "${cidir}/lib.sh"
|
||||||
|
|
||||||
|
clone_tests_repo
|
||||||
|
|
||||||
|
pushd "${tests_repo_dir}"
|
||||||
|
.ci/setup.sh
|
||||||
|
popd
|
12
docs/.ci/static-checks.sh
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cidir=$(dirname "$0")
|
||||||
|
source "${cidir}/lib.sh"
|
||||||
|
|
||||||
|
run_static_checks
|
351
docs/.ci/test-install-docs.sh
Executable file
@ -0,0 +1,351 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# The go binary isn't installed, but we checkout the repos to the standard
|
||||||
|
# golang locations.
|
||||||
|
export GOPATH=${GOPATH:-${HOME}/go}
|
||||||
|
|
||||||
|
typeset -r script_name="${0##*/}"
|
||||||
|
typeset -r script_dir="$(cd "$(dirname "${0}")" && pwd)"
|
||||||
|
|
||||||
|
typeset -r docker_image="busybox"
|
||||||
|
typeset -r kata_project_url="github.com/kata-containers"
|
||||||
|
typeset -r test_repo="${kata_project_url}/tests"
|
||||||
|
typeset -r test_repo_url="https://${test_repo}"
|
||||||
|
typeset -r test_repo_dir="${GOPATH}/src/${test_repo}"
|
||||||
|
typeset -r kata_project_dir="${GOPATH}/src/${kata_project_url}"
|
||||||
|
|
||||||
|
typeset -r mgr="${test_repo_dir}/cmd/kata-manager/kata-manager.sh"
|
||||||
|
typeset -r doc_to_script="${test_repo_dir}/.ci/kata-doc-to-script.sh"
|
||||||
|
|
||||||
|
die()
|
||||||
|
{
|
||||||
|
local msg="$*"
|
||||||
|
echo >&2 "ERROR: $msg"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
info()
|
||||||
|
{
|
||||||
|
local msg="$*"
|
||||||
|
echo "INFO: $msg"
|
||||||
|
}
|
||||||
|
|
||||||
|
usage()
|
||||||
|
{
|
||||||
|
cat <<EOT
|
||||||
|
Description: Run Kata documentation CI tests.
|
||||||
|
|
||||||
|
Usage: $script_name [options]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
|
||||||
|
-h : Show this help.
|
||||||
|
-t <dir> : Run all scripts ("\*.sh" files) in the specified
|
||||||
|
directory.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- The '-t' option is not generally useful - it is used by this
|
||||||
|
script which re-exec's itself with this option.
|
||||||
|
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# Re-execute the running script from a temporary directory to allow the
|
||||||
|
# script to continue executing even if the original source file is deleted.
|
||||||
|
reexec_in_tmpdir()
|
||||||
|
{
|
||||||
|
local -r test_dir="$1"
|
||||||
|
|
||||||
|
[ -d "${test_dir}" ] || die "invalid test dir: ${test_dir}"
|
||||||
|
|
||||||
|
if [ "${script_dir}" = "${test_dir}" ]
|
||||||
|
then
|
||||||
|
# Already running from temp directory so nothing else to do
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
local new
|
||||||
|
new="${test_dir}/${script_name}"
|
||||||
|
|
||||||
|
install --mode 750 "${0}" "${new}"
|
||||||
|
|
||||||
|
info "Re-execing ${0} as ${new}"
|
||||||
|
|
||||||
|
cd "${test_dir}"
|
||||||
|
|
||||||
|
exec "${new}" -t "${test_dir}/tests"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grab a copy of the tests repository
|
||||||
|
get_tests_repo()
|
||||||
|
{
|
||||||
|
[ -d "${test_repo_dir}" ] && return
|
||||||
|
|
||||||
|
mkdir -p "${kata_project_dir}"
|
||||||
|
|
||||||
|
git clone "${test_repo_url}" "${test_repo_dir}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete all local github repo clones.
|
||||||
|
#
|
||||||
|
# This is required to ensure that the tests themselves (re-)create these
|
||||||
|
# clones.
|
||||||
|
delete_kata_repos()
|
||||||
|
{
|
||||||
|
[ -n "${KATA_DEV_MODE}" ] && die "Not continuing as this is a dev system"
|
||||||
|
[ -z "${CI}" ] && die "Not continuing as this is a non-CI environment"
|
||||||
|
|
||||||
|
local cwd="$PWD"
|
||||||
|
|
||||||
|
info "Deleting all local kata repositories below ${kata_project_dir}"
|
||||||
|
|
||||||
|
[ -d "${kata_project_dir}" ] && rm -rf "${kata_project_dir}" || true
|
||||||
|
|
||||||
|
# Recreate the empty directory, taking care to handle the scenario
|
||||||
|
# where the script is run from within the just-deleted directory.
|
||||||
|
mkdir -p "$cwd" && cd "$cwd"
|
||||||
|
}
|
||||||
|
|
||||||
|
setup()
|
||||||
|
{
|
||||||
|
source /etc/os-release || source /usr/lib/os-release
|
||||||
|
|
||||||
|
mkdir -p "${GOPATH}"
|
||||||
|
|
||||||
|
get_tests_repo
|
||||||
|
|
||||||
|
[ -e "$mgr" ] || die "cannot find $mgr"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform a simple test to create a container
|
||||||
|
create_kata_container()
|
||||||
|
{
|
||||||
|
local -r test_name="$1"
|
||||||
|
|
||||||
|
local -r msg=$(info "Successfully tested ${test_name} on distro ${ID} ${VERSION}")
|
||||||
|
|
||||||
|
# Perform a basic test
|
||||||
|
sudo -E docker run --rm -i --runtime "kata-runtime" "${docker_image}" echo "$msg"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the kata manager to "execute" the install guide to ensure the commands
|
||||||
|
# it specified result in a working system.
|
||||||
|
test_distro_install_guide()
|
||||||
|
{
|
||||||
|
info "Installing system from the $ID install guide"
|
||||||
|
|
||||||
|
$mgr install-docker-system
|
||||||
|
|
||||||
|
$mgr configure-image
|
||||||
|
$mgr enable-debug
|
||||||
|
|
||||||
|
local mgr_name="${mgr##*/}"
|
||||||
|
|
||||||
|
local test_name="${mgr_name} to test install guide"
|
||||||
|
|
||||||
|
info "Install using ${test_name}"
|
||||||
|
|
||||||
|
create_kata_container "${test_name}"
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
$mgr remove-packages
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apart from the distro-specific install guides, users can choose to install
|
||||||
|
# using one of the following methods:
|
||||||
|
#
|
||||||
|
# - kata-manager ("Automatic" method).
|
||||||
|
# - kata-doc-to-script ("Scripted" method).
|
||||||
|
#
|
||||||
|
# Testing these is awkward because we need to "execute" the documents
|
||||||
|
# describing those install methods, but since those install methods should
|
||||||
|
# themselves entirely document/handle an installation method, we need to
|
||||||
|
# convert each install document to a script, then delete all the kata code
|
||||||
|
# repositories. This ensures that when each install method script is run, it
|
||||||
|
# does not rely on any local files (it should download anything it needs). But
|
||||||
|
# since we're deleting the repos, we need to copy this script to a temporary
|
||||||
|
# location, along with the install scripts this function generates, and then
|
||||||
|
# re-exec this script with an option to ask it to run the scripts the previous
|
||||||
|
# instance of this script just generated.
|
||||||
|
test_alternative_install_methods()
|
||||||
|
{
|
||||||
|
local -a files
|
||||||
|
files+=("installing-with-kata-manager.md")
|
||||||
|
files+=("installing-with-kata-doc-to-script.md")
|
||||||
|
|
||||||
|
local tmp_dir
|
||||||
|
|
||||||
|
tmp_dir=$(mktemp -d)
|
||||||
|
|
||||||
|
local script_file
|
||||||
|
|
||||||
|
local file
|
||||||
|
|
||||||
|
local tests_dir
|
||||||
|
tests_dir="${tmp_dir}/tests"
|
||||||
|
|
||||||
|
mkdir -p "${tests_dir}"
|
||||||
|
|
||||||
|
local -i num=0
|
||||||
|
|
||||||
|
# Convert the docs to scripts
|
||||||
|
for file in "${files[@]}"
|
||||||
|
do
|
||||||
|
num+=1
|
||||||
|
|
||||||
|
local file_path
|
||||||
|
local script_file
|
||||||
|
local script_file_path
|
||||||
|
local test_name
|
||||||
|
|
||||||
|
file_path="${script_dir}/../install/${file}"
|
||||||
|
script_file=${file/.md/.sh}
|
||||||
|
|
||||||
|
# Add a numeric prefix so the tests are run in the array order
|
||||||
|
test_name=$(printf "%.2d-%s" "${num}" "${script_file}")
|
||||||
|
|
||||||
|
script_file_path="${tests_dir}/${test_name}"
|
||||||
|
|
||||||
|
info "Creating test script ${test_name} from ${file}"
|
||||||
|
|
||||||
|
bash "${doc_to_script}" "${file_path}" "${script_file_path}"
|
||||||
|
done
|
||||||
|
|
||||||
|
reexec_in_tmpdir "${tmp_dir}"
|
||||||
|
|
||||||
|
# Not reached
|
||||||
|
die "re-exec failed"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_tests()
|
||||||
|
{
|
||||||
|
# If docker was installed by default, zap it.
|
||||||
|
$mgr -v -f remove-docker
|
||||||
|
|
||||||
|
test_distro_install_guide
|
||||||
|
test_alternative_install_methods
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect if any installation documents changed. If so, execute all the
|
||||||
|
# documents to test they result in a working system.
|
||||||
|
check_install_docs()
|
||||||
|
{
|
||||||
|
if [ -n "$TRAVIS" ]
|
||||||
|
then
|
||||||
|
info "Not testing install guide as Travis lacks modern distro support and VT-x"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# List of filters used to restrict the types of file changes.
|
||||||
|
# See git-diff-tree(1) for further info.
|
||||||
|
local filters=""
|
||||||
|
|
||||||
|
# Added file
|
||||||
|
filters+="A"
|
||||||
|
|
||||||
|
# Copied file
|
||||||
|
filters+="C"
|
||||||
|
|
||||||
|
# Modified file
|
||||||
|
filters+="M"
|
||||||
|
|
||||||
|
# Renamed file
|
||||||
|
filters+="R"
|
||||||
|
|
||||||
|
# Unmerged (U) and Unknown (X) files. These particular filters
|
||||||
|
# shouldn't be necessary but just in case...
|
||||||
|
filters+="UX"
|
||||||
|
|
||||||
|
# List of changed files
|
||||||
|
local files=$(git diff-tree \
|
||||||
|
--name-only \
|
||||||
|
--no-commit-id \
|
||||||
|
--diff-filter="${filters}" \
|
||||||
|
-r \
|
||||||
|
origin/master HEAD || true)
|
||||||
|
|
||||||
|
# No files were changed
|
||||||
|
[ -z "$files" ] && return
|
||||||
|
|
||||||
|
changed=$(echo "${files}" | grep "^install/.*\.md$" || true)
|
||||||
|
|
||||||
|
[ -z "$changed" ] && info "No install documents modified" && return
|
||||||
|
|
||||||
|
info "Found modified install documents: ${changed}"
|
||||||
|
|
||||||
|
# Regardless of which installation documents were changed, we test
|
||||||
|
# them all where possible.
|
||||||
|
run_tests
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the test scripts in the specified directory.
|
||||||
|
run_tests_from_dir()
|
||||||
|
{
|
||||||
|
local -r test_dir="$1"
|
||||||
|
|
||||||
|
[ -e "$test_dir" ] || die "invalid test dir: ${test_dir}"
|
||||||
|
|
||||||
|
cd "${test_dir}"
|
||||||
|
|
||||||
|
info "Looking for tests scripts to run in directory ${test_dir}"
|
||||||
|
|
||||||
|
for t in $(ls -- *.sh)
|
||||||
|
do
|
||||||
|
# Ensure the test script cannot access any local files
|
||||||
|
# (since it should be standalone and download any files
|
||||||
|
# it needs).
|
||||||
|
delete_kata_repos
|
||||||
|
|
||||||
|
info "Running test script '$t'"
|
||||||
|
bash -x "${t}"
|
||||||
|
|
||||||
|
# Ensure it is possible to use the installed system
|
||||||
|
create_kata_container "${t}"
|
||||||
|
|
||||||
|
# Re-run setup to recreate the tests repo that was deleted
|
||||||
|
# before the test ran.
|
||||||
|
setup
|
||||||
|
|
||||||
|
# Packaged install so clean up
|
||||||
|
# (Note that '$mgr' should now exist again)
|
||||||
|
$mgr remove-packages
|
||||||
|
done
|
||||||
|
|
||||||
|
# paranoia
|
||||||
|
[ -d "${test_dir}" ] && rm -rf "${test_dir}"
|
||||||
|
|
||||||
|
info "All tests passed"
|
||||||
|
}
|
||||||
|
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
local opt
|
||||||
|
local test_dir
|
||||||
|
|
||||||
|
setup
|
||||||
|
|
||||||
|
while getopts "ht:" opt
|
||||||
|
do
|
||||||
|
case "$opt" in
|
||||||
|
h) usage; exit 0;;
|
||||||
|
t) test_dir="$OPTARG";;
|
||||||
|
*) die "invalid option: $opt";;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -n "$test_dir" ]
|
||||||
|
then
|
||||||
|
run_tests_from_dir "$test_dir"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_install_docs
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
25
docs/.travis.yml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
sudo: required
|
||||||
|
dist: xenial
|
||||||
|
|
||||||
|
language: go
|
||||||
|
os:
|
||||||
|
- linux
|
||||||
|
- linux-ppc64le
|
||||||
|
|
||||||
|
go:
|
||||||
|
- "1.10.x"
|
||||||
|
|
||||||
|
before_install:
|
||||||
|
- ".ci/setup.sh"
|
||||||
|
|
||||||
|
before_script:
|
||||||
|
- ".ci/static-checks.sh"
|
||||||
|
|
||||||
|
script:
|
||||||
|
- ".ci/run.sh"
|
13
docs/CODEOWNERS
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright 2019 Intel Corporation.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
# Define any code owners for this repository.
|
||||||
|
# The code owners lists are used to help automatically enforce
|
||||||
|
# reviews and acks of the right groups on the right PRs.
|
||||||
|
|
||||||
|
# Order in this file is important. Only the last match will be
|
||||||
|
# used. See https://help.github.com/articles/about-code-owners/
|
||||||
|
|
||||||
|
*.md @kata-containers/documentation
|
||||||
|
|
3
docs/CODE_OF_CONDUCT.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
## Kata Containers Documentation Code of Conduct
|
||||||
|
|
||||||
|
Kata Containers follows the [OpenStack Foundation Code of Conduct](https://www.openstack.org/legal/community-code-of-conduct/).
|
5
docs/CONTRIBUTING.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Contributing
|
||||||
|
|
||||||
|
## This repo is part of [Kata Containers](https://katacontainers.io)
|
||||||
|
|
||||||
|
For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
|
690
docs/Developer-Guide.md
Normal file
@ -0,0 +1,690 @@
|
|||||||
|
* [Warning](#warning)
|
||||||
|
* [Assumptions](#assumptions)
|
||||||
|
* [Initial setup](#initial-setup)
|
||||||
|
* [Requirements to build individual components](#requirements-to-build-individual-components)
|
||||||
|
* [Build and install the Kata Containers runtime](#build-and-install-the-kata-containers-runtime)
|
||||||
|
* [Check hardware requirements](#check-hardware-requirements)
|
||||||
|
* [Configure to use initrd or rootfs image](#configure-to-use-initrd-or-rootfs-image)
|
||||||
|
* [Enable full debug](#enable-full-debug)
|
||||||
|
* [debug logs and shimv2](#debug-logs-and-shimv2)
|
||||||
|
* [Enabling full `containerd` debug](#enabling-full-containerd-debug)
|
||||||
|
* [Enabling just `containerd shim` debug](#enabling-just-containerd-shim-debug)
|
||||||
|
* [Enabling `CRI-O` and `shimv2` debug](#enabling-cri-o-and-shimv2-debug)
|
||||||
|
* [journald rate limiting](#journald-rate-limiting)
|
||||||
|
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
|
||||||
|
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
|
||||||
|
* [Build and install Kata proxy](#build-and-install-kata-proxy)
|
||||||
|
* [Build and install Kata shim](#build-and-install-kata-shim)
|
||||||
|
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
|
||||||
|
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
|
||||||
|
* [Get the osbuilder](#get-the-osbuilder)
|
||||||
|
* [Create a rootfs image](#create-a-rootfs-image)
|
||||||
|
* [Create a local rootfs](#create-a-local-rootfs)
|
||||||
|
* [Add a custom agent to the image - OPTIONAL](#add-a-custom-agent-to-the-image---optional)
|
||||||
|
* [Build a rootfs image](#build-a-rootfs-image)
|
||||||
|
* [Install the rootfs image](#install-the-rootfs-image)
|
||||||
|
* [Create an initrd image - OPTIONAL](#create-an-initrd-image---optional)
|
||||||
|
* [Create a local rootfs for initrd image](#create-a-local-rootfs-for-initrd-image)
|
||||||
|
* [Build an initrd image](#build-an-initrd-image)
|
||||||
|
* [Install the initrd image](#install-the-initrd-image)
|
||||||
|
* [Install guest kernel images](#install-guest-kernel-images)
|
||||||
|
* [Install a hypervisor](#install-a-hypervisor)
|
||||||
|
* [Build a custom QEMU](#build-a-custom-qemu)
|
||||||
|
* [Build a custom QEMU for aarch64/arm64 - REQUIRED](#build-a-custom-qemu-for-aarch64arm64---required)
|
||||||
|
* [Run Kata Containers with Docker](#run-kata-containers-with-docker)
|
||||||
|
* [Update the Docker systemd unit file](#update-the-docker-systemd-unit-file)
|
||||||
|
* [Create a container using Kata](#create-a-container-using-kata)
|
||||||
|
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
|
||||||
|
* [Troubleshoot Kata Containers](#troubleshoot-kata-containers)
|
||||||
|
* [Appendices](#appendices)
|
||||||
|
* [Checking Docker default runtime](#checking-docker-default-runtime)
|
||||||
|
* [Set up a debug console](#set-up-a-debug-console)
|
||||||
|
* [Create a custom image containing a shell](#create-a-custom-image-containing-a-shell)
|
||||||
|
* [Create a debug systemd service](#create-a-debug-systemd-service)
|
||||||
|
* [Build the debug image](#build-the-debug-image)
|
||||||
|
* [Configure runtime for custom debug image](#configure-runtime-for-custom-debug-image)
|
||||||
|
* [Ensure debug options are valid](#ensure-debug-options-are-valid)
|
||||||
|
* [Create a container](#create-a-container)
|
||||||
|
* [Connect to the virtual machine using the debug console](#connect-to-the-virtual-machine-using-the-debug-console)
|
||||||
|
* [Obtain details of the image](#obtain-details-of-the-image)
|
||||||
|
* [Capturing kernel boot logs](#capturing-kernel-boot-logs)
|
||||||
|
* [Running standalone](#running-standalone)
|
||||||
|
* [Create an OCI bundle](#create-an-oci-bundle)
|
||||||
|
* [Launch the runtime to create a container](#launch-the-runtime-to-create-a-container)
|
||||||
|
|
||||||
|
# Warning
|
||||||
|
|
||||||
|
This document is written **specifically for developers**: it is not intended for end users.
|
||||||
|
|
||||||
|
# Assumptions
|
||||||
|
|
||||||
|
- You are working on a non-critical test or development system.
|
||||||
|
|
||||||
|
# Initial setup
|
||||||
|
|
||||||
|
The recommended way to create a development environment is to first
|
||||||
|
[install the packaged versions of the Kata Containers components](install/README.md)
|
||||||
|
to create a working system.
|
||||||
|
|
||||||
|
The installation guide instructions will install all required Kata Containers
|
||||||
|
components, plus Docker*, the hypervisor, and the Kata Containers image and
|
||||||
|
guest kernel.
|
||||||
|
|
||||||
|
# Requirements to build individual components
|
||||||
|
|
||||||
|
You need to install the following to build Kata Containers components:
|
||||||
|
|
||||||
|
- [golang](https://golang.org/dl)
|
||||||
|
|
||||||
|
To view the versions of go known to work, see the `golang` entry in the
|
||||||
|
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
|
||||||
|
|
||||||
|
- `make`.
|
||||||
|
- `gcc` (required for building the shim and runtime).
|
||||||
|
|
||||||
|
# Build and install the Kata Containers runtime
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d -u github.com/kata-containers/runtime
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/runtime
|
||||||
|
$ make && sudo -E PATH=$PATH make install
|
||||||
|
```
|
||||||
|
|
||||||
|
The build will create the following:
|
||||||
|
|
||||||
|
- runtime binary: `/usr/local/bin/kata-runtime`
|
||||||
|
- configuration file: `/usr/share/defaults/kata-containers/configuration.toml`
|
||||||
|
|
||||||
|
# Check hardware requirements
|
||||||
|
|
||||||
|
You can check if your system is capable of creating a Kata Container by running the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo kata-runtime kata-check
|
||||||
|
```
|
||||||
|
|
||||||
|
If your system is *not* able to run Kata Containers, the previous command will error out and explain why.
|
||||||
|
|
||||||
|
## Configure to use initrd or rootfs image
|
||||||
|
|
||||||
|
Kata containers can run with either an initrd image or a rootfs image.
|
||||||
|
|
||||||
|
If you want to test with `initrd`, make sure you have `initrd = /usr/share/kata-containers/kata-containers-initrd.img`
|
||||||
|
in your configuration file, commenting out the `image` line:
|
||||||
|
|
||||||
|
`/usr/share/defaults/kata-containers/configuration.toml` and comment out the `image` line with the following. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/kata-containers/
|
||||||
|
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||||
|
$ sudo sed -i 's/^\(image =.*\)/# \1/g' /etc/kata-containers/configuration.toml
|
||||||
|
```
|
||||||
|
You can create the initrd image as shown in the [create an initrd image](#create-an-initrd-image---optional) section.
|
||||||
|
|
||||||
|
If you want to test with a rootfs `image`, make sure you have `image = /usr/share/kata-containers/kata-containers.img`
|
||||||
|
in your configuration file, commenting out the `initrd` line. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/kata-containers/
|
||||||
|
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||||
|
$ sudo sed -i 's/^\(initrd =.*\)/# \1/g' /etc/kata-containers/configuration.toml
|
||||||
|
```
|
||||||
|
The rootfs image is created as shown in the [create a rootfs image](#create-a-rootfs-image) section.
|
||||||
|
|
||||||
|
One of the `initrd` and `image` options in Kata runtime config file **MUST** be set but **not both**.
|
||||||
|
The main difference between the options is that the size of `initrd`(10MB+) is significantly smaller than
|
||||||
|
rootfs `image`(100MB+).
|
||||||
|
|
||||||
|
## Enable full debug
|
||||||
|
|
||||||
|
Enable full debug as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/kata-containers/
|
||||||
|
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||||
|
$ sudo sed -i -e 's/^# *\(enable_debug\).*=.*$/\1 = true/g' /etc/kata-containers/configuration.toml
|
||||||
|
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug initcall_debug"/g' /etc/kata-containers/configuration.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
### debug logs and shimv2
|
||||||
|
|
||||||
|
If you are using `containerd` and the Kata `containerd-shimv2` to launch Kata Containers, and wish
|
||||||
|
to enable Kata debug logging, there are two ways this can be enabled via the `containerd` configuration file,
|
||||||
|
detailed below.
|
||||||
|
|
||||||
|
The Kata logs appear in the `containerd` log files, along with logs from `containerd` itself.
|
||||||
|
|
||||||
|
For more information about `containerd` debug, please see the
|
||||||
|
[`containerd` documentation](https://github.com/containerd/containerd/blob/master/docs/getting-started.md).
|
||||||
|
|
||||||
|
#### Enabling full `containerd` debug
|
||||||
|
|
||||||
|
Enabling full `containerd` debug also enables the shimv2 debug. Edit the `containerd` configuration file
|
||||||
|
to include the top level debug option such as:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[debug]
|
||||||
|
level = "debug"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Enabling just `containerd shim` debug
|
||||||
|
|
||||||
|
If you only wish to enable debug for the `containerd` shims themselves, just enable the debug
|
||||||
|
option in the `plugins.linux` section of the `containerd` configuration file, such as:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.linux]
|
||||||
|
shim_debug = true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Enabling `CRI-O` and `shimv2` debug
|
||||||
|
|
||||||
|
Depending on the CRI-O version being used one of the following configuration files can
|
||||||
|
be found: `/etc/crio/crio.conf` or `/etc/crio/crio.conf.d/00-default`.
|
||||||
|
|
||||||
|
If the latter is found, the change must be done there as it'll take precedence, overriding
|
||||||
|
`/etc/crio/crio.conf`.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# Changes the verbosity of the logs based on the level it is set to. Options
|
||||||
|
# are fatal, panic, error, warn, info, debug and trace. This option supports
|
||||||
|
# live configuration reload.
|
||||||
|
log_level = "info"
|
||||||
|
```
|
||||||
|
|
||||||
|
Switching the default `log_level` from `info` to `debug` enables shimv2 debug logs.
|
||||||
|
CRI-O logs can be found by using the `crio` identifier, and Kata specific logs can
|
||||||
|
be found by using the `kata` identifier.
|
||||||
|
|
||||||
|
### journald rate limiting
|
||||||
|
|
||||||
|
Enabling [full debug](#enable-full-debug) results in the Kata components generating
|
||||||
|
large amounts of logging, which by default is stored in the system log. Depending on
|
||||||
|
your system configuration, it is possible that some events might be discarded by the
|
||||||
|
system logging daemon. The following shows how to determine this for `systemd-journald`,
|
||||||
|
and offers possible workarounds and fixes.
|
||||||
|
|
||||||
|
> **Note** The method of implementation can vary between Operating System installations.
|
||||||
|
> Amend these instructions as necessary to your system implementation,
|
||||||
|
> and consult with your system administrator for the appropriate configuration.
|
||||||
|
|
||||||
|
#### `systemd-journald` suppressing messages
|
||||||
|
|
||||||
|
`systemd-journald` can be configured to rate limit the number of journal entries
|
||||||
|
it stores. When messages are suppressed, it is noted in the logs. This can be checked
|
||||||
|
for by looking for those notifications, such as:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo journalctl --since today | fgrep Suppressed
|
||||||
|
Jun 29 14:51:17 mymachine systemd-journald[346]: Suppressed 4150 messages from /system.slice/docker.service
|
||||||
|
```
|
||||||
|
|
||||||
|
This message indicates that a number of log messages from the `docker.service` slice were
|
||||||
|
suppressed. In such a case, you can expect to have incomplete logging information
|
||||||
|
stored from the Kata Containers components.
|
||||||
|
|
||||||
|
#### Disabling `systemd-journald` rate limiting
|
||||||
|
|
||||||
|
In order to capture complete logs from the Kata Containers components, you
|
||||||
|
need to reduce or disable the `systemd-journald` rate limit. Configure
|
||||||
|
this at the global `systemd-journald` level, and it will apply to all system slices.
|
||||||
|
|
||||||
|
To disable `systemd-journald` rate limiting at the global level, edit the file
|
||||||
|
`/etc/systemd/journald.conf`, and add/uncomment the following lines:
|
||||||
|
|
||||||
|
```
|
||||||
|
RateLimitInterval=0s
|
||||||
|
RateLimitBurst=0
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart `systemd-journald` for the changes to take effect:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo systemctl restart systemd-journald
|
||||||
|
```
|
||||||
|
|
||||||
|
# Build and install Kata proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d -u github.com/kata-containers/proxy
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/proxy && make && sudo make install
|
||||||
|
```
|
||||||
|
|
||||||
|
# Build and install Kata shim
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d -u github.com/kata-containers/shim
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/shim && make && sudo make install
|
||||||
|
```
|
||||||
|
|
||||||
|
# Create and install rootfs and initrd image
|
||||||
|
|
||||||
|
## Build a custom Kata agent - OPTIONAL
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - You should only do this step if you are testing with the latest version of the agent.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d -u github.com/kata-containers/agent
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/agent && make
|
||||||
|
```
|
||||||
|
|
||||||
|
## Get the osbuilder
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d -u github.com/kata-containers/osbuilder
|
||||||
|
```
|
||||||
|
|
||||||
|
## Create a rootfs image
|
||||||
|
### Create a local rootfs
|
||||||
|
|
||||||
|
As a prerequisite, you need to install Docker. Otherwise, you will not be
|
||||||
|
able to run the `rootfs.sh` script with `USE_DOCKER=true` as expected in
|
||||||
|
the following example.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
|
||||||
|
$ sudo rm -rf ${ROOTFS_DIR}
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
|
||||||
|
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
|
||||||
|
```
|
||||||
|
You MUST choose one of `alpine`, `centos`, `clearlinux`, `debian`, `euleros`, `fedora`, `suse`, and `ubuntu` for `${distro}`. By default `seccomp` packages are not included in the rootfs image. Set `SECCOMP` to `yes` to include them.
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
|
||||||
|
> - You must ensure that the *default Docker runtime* is `runc` to make use of
|
||||||
|
> the `USE_DOCKER` variable. If that is not the case, remove the variable
|
||||||
|
> from the previous command. See [Checking Docker default runtime](#checking-docker-default-runtime).
|
||||||
|
|
||||||
|
### Add a custom agent to the image - OPTIONAL
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - You should only do this step if you are testing with the latest version of the agent.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo install -o root -g root -m 0550 -t ${ROOTFS_DIR}/bin ../../agent/kata-agent
|
||||||
|
$ sudo install -o root -g root -m 0440 ../../agent/kata-agent.service ${ROOTFS_DIR}/usr/lib/systemd/system/
|
||||||
|
$ sudo install -o root -g root -m 0440 ../../agent/kata-containers.target ${ROOTFS_DIR}/usr/lib/systemd/system/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build a rootfs image
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/image-builder
|
||||||
|
$ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh ${ROOTFS_DIR}'
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - You must ensure that the *default Docker runtime* is `runc` to make use of
|
||||||
|
> the `USE_DOCKER` variable. If that is not the case, remove the variable
|
||||||
|
> from the previous command. See [Checking Docker default runtime](#checking-docker-default-runtime).
|
||||||
|
> - If you do *not* wish to build under Docker, remove the `USE_DOCKER`
|
||||||
|
> variable in the previous command and ensure the `qemu-img` command is
|
||||||
|
> available on your system.
|
||||||
|
|
||||||
|
|
||||||
|
### Install the rootfs image
|
||||||
|
|
||||||
|
```
|
||||||
|
$ commit=$(git log --format=%h -1 HEAD)
|
||||||
|
$ date=$(date +%Y-%m-%d-%T.%N%z)
|
||||||
|
$ image="kata-containers-${date}-${commit}"
|
||||||
|
$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}"
|
||||||
|
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Create an initrd image - OPTIONAL
|
||||||
|
### Create a local rootfs for initrd image
|
||||||
|
```
|
||||||
|
$ export ROOTFS_DIR="${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs"
|
||||||
|
$ sudo rm -rf ${ROOTFS_DIR}
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
|
||||||
|
$ script -fec 'sudo -E GOPATH=$GOPATH AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh ${distro}'
|
||||||
|
```
|
||||||
|
`AGENT_INIT` controls if the guest image uses the Kata agent as the guest `init` process. When you create an initrd image,
|
||||||
|
always set `AGENT_INIT` to `yes`. By default `seccomp` packages are not included in the initrd image. Set `SECCOMP` to `yes` to include them.
|
||||||
|
|
||||||
|
You MUST choose one of `alpine`, `centos`, `clearlinux`, `euleros`, and `fedora` for `${distro}`.
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - Check the [compatibility matrix](https://github.com/kata-containers/osbuilder#platform-distro-compatibility-matrix) before creating rootfs.
|
||||||
|
|
||||||
|
Optionally, add your custom agent binary to the rootfs with the following:
|
||||||
|
```
|
||||||
|
$ sudo install -o root -g root -m 0550 -T ../../agent/kata-agent ${ROOTFS_DIR}/sbin/init
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build an initrd image
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/initrd-builder
|
||||||
|
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true ./initrd_builder.sh ${ROOTFS_DIR}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Install the initrd image
|
||||||
|
|
||||||
|
```
|
||||||
|
$ commit=$(git log --format=%h -1 HEAD)
|
||||||
|
$ date=$(date +%Y-%m-%d-%T.%N%z)
|
||||||
|
$ image="kata-containers-initrd-${date}-${commit}"
|
||||||
|
$ sudo install -o root -g root -m 0640 -D kata-containers-initrd.img "/usr/share/kata-containers/${image}"
|
||||||
|
$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers-initrd.img)
|
||||||
|
```
|
||||||
|
|
||||||
|
# Install guest kernel images
|
||||||
|
|
||||||
|
You can build and install the guest kernel image as shown [here](https://github.com/kata-containers/packaging/tree/master/kernel#build-kata-containers-kernel).
|
||||||
|
|
||||||
|
# Install a hypervisor
|
||||||
|
|
||||||
|
When setting up Kata using a [packaged installation method](https://github.com/kata-containers/documentation/tree/master/install#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
|
||||||
|
|
||||||
|
## Build a custom QEMU
|
||||||
|
|
||||||
|
Your QEMU directory need to be prepared with source code. Alternatively, you can use the [Kata containers QEMU](https://github.com/kata-containers/qemu/tree/master) and checkout the recommended branch:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d github.com/kata-containers/qemu
|
||||||
|
$ qemu_branch=$(grep qemu-lite- ${GOPATH}/src/github.com/kata-containers/runtime/versions.yaml | cut -d '"' -f2)
|
||||||
|
$ cd ${GOPATH}/src/github.com/kata-containers/qemu
|
||||||
|
$ git checkout -b $qemu_branch remotes/origin/$qemu_branch
|
||||||
|
$ your_qemu_directory=${GOPATH}/src/github.com/kata-containers/qemu
|
||||||
|
```
|
||||||
|
|
||||||
|
To build a version of QEMU using the same options as the default `qemu-lite` version , you could use the `configure-hypervisor.sh` script:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go get -d github.com/kata-containers/packaging
|
||||||
|
$ cd $your_qemu_directory
|
||||||
|
$ ${GOPATH}/src/github.com/kata-containers/packaging/scripts/configure-hypervisor.sh qemu > kata.cfg
|
||||||
|
$ eval ./configure "$(cat kata.cfg)"
|
||||||
|
$ make -j $(nproc)
|
||||||
|
$ sudo -E make install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build a custom QEMU for aarch64/arm64 - REQUIRED
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - You should only do this step if you are on aarch64/arm64.
|
||||||
|
> - You should include [Eric Auger's latest PCDIMM/NVDIMM patches](https://patchwork.kernel.org/cover/10647305/) which are
|
||||||
|
> under upstream review for supporting NVDIMM on aarch64.
|
||||||
|
>
|
||||||
|
You could build the custom `qemu-system-aarch64` as required with the following command:
|
||||||
|
```
|
||||||
|
$ go get -d github.com/kata-containers/tests
|
||||||
|
$ script -fec 'sudo -E ${GOPATH}/src/github.com/kata-containers/tests/.ci/install_qemu.sh'
|
||||||
|
```
|
||||||
|
|
||||||
|
# Run Kata Containers with Docker
|
||||||
|
|
||||||
|
## Update the Docker systemd unit file
|
||||||
|
|
||||||
|
```
|
||||||
|
$ dockerUnit=$(systemctl show -p FragmentPath docker.service | cut -d "=" -f 2)
|
||||||
|
$ unitFile=${dockerUnit:-/etc/systemd/system/docker.service.d/kata-containers.conf}
|
||||||
|
$ test -e "$unitFile" || { sudo mkdir -p "$(dirname $unitFile)"; echo -e "[Service]\nType=simple\nExecStart=\nExecStart=/usr/bin/dockerd -D --default-runtime runc" | sudo tee "$unitFile"; }
|
||||||
|
$ grep -q "kata-runtime=" $unitFile || sudo sed -i 's!^\(ExecStart=[^$].*$\)!\1 --add-runtime kata-runtime=/usr/local/bin/kata-runtime!g' "$unitFile"
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
## Create a container using Kata
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run -ti --runtime kata-runtime busybox sh
|
||||||
|
```
|
||||||
|
|
||||||
|
# Run Kata Containers with Kubernetes
|
||||||
|
Refer to to the [Run Kata Containers with Kubernetes](how-to/run-kata-with-k8s.md) how-to guide.
|
||||||
|
|
||||||
|
# Troubleshoot Kata Containers
|
||||||
|
|
||||||
|
If you are unable to create a Kata Container first ensure you have
|
||||||
|
[enabled full debug](#enable-full-debug)
|
||||||
|
before attempting to create a container. Then run the
|
||||||
|
[`kata-collect-data.sh`](https://github.com/kata-containers/runtime/blob/master/data/kata-collect-data.sh.in)
|
||||||
|
script and paste its output directly into a
|
||||||
|
[GitHub issue](https://github.com/kata-containers/kata-containers/issues/new).
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> The `kata-collect-data.sh` script is built from the
|
||||||
|
> [runtime](https://github.com/kata-containers/runtime) repository.
|
||||||
|
|
||||||
|
To perform analysis on Kata logs, use the
|
||||||
|
[`kata-log-parser`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser)
|
||||||
|
tool, which can convert the logs into formats (e.g. JSON, TOML, XML, and YAML).
|
||||||
|
|
||||||
|
To obtain a full backtrace for the agent, proxy, runtime, or shim send the
|
||||||
|
`SIGUSR1` signal to the process ID of the component. The component will send a
|
||||||
|
backtrace to the system log on the host system and continue to run without
|
||||||
|
interruption.
|
||||||
|
|
||||||
|
For example, to obtain a backtrace for `kata-proxy`:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo kill -USR1 $kata_proxy_pid
|
||||||
|
$ sudo journalctl -t kata-proxy
|
||||||
|
```
|
||||||
|
|
||||||
|
See [Set up a debug console](#set-up-a-debug-console).
|
||||||
|
|
||||||
|
# Appendices
|
||||||
|
|
||||||
|
## Checking Docker default runtime
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker info 2>/dev/null | grep -i "default runtime" | cut -d: -f2- | grep -q runc && echo "SUCCESS" || echo "ERROR: Incorrect default Docker runtime"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Set up a debug console
|
||||||
|
|
||||||
|
By default you cannot login to a virtual machine, since this can be sensitive
|
||||||
|
from a security perspective. Also, allowing logins would require additional
|
||||||
|
packages in the rootfs, which would increase the size of the image used to
|
||||||
|
boot the virtual machine.
|
||||||
|
|
||||||
|
If you want to login to a virtual machine that hosts your containers, complete
|
||||||
|
the following steps (using rootfs or initrd image).
|
||||||
|
|
||||||
|
> **Note:** The following debug console instructions assume a systemd-based guest
|
||||||
|
> O/S image. This means you must create a rootfs for a distro that supports systemd.
|
||||||
|
> Currently, all distros supported by [osbuilder](https://github.com/kata-containers/osbuilder) support systemd
|
||||||
|
> except for Alpine Linux.
|
||||||
|
>
|
||||||
|
> Look for `INIT_PROCESS=systemd` in the `config.sh` osbuilder rootfs config file
|
||||||
|
> to verify an osbuilder distro supports systemd for the distro you want to build rootfs for.
|
||||||
|
> For an example, see the [Clear Linux config.sh file](https://github.com/kata-containers/osbuilder/blob/master/rootfs-builder/clearlinux/config.sh).
|
||||||
|
>
|
||||||
|
> For a non-systemd-based distro, create an equivalent system
|
||||||
|
> service using that distro’s init system syntax. Alternatively, you can build a distro
|
||||||
|
> that contains a shell (e.g. `bash(1)`). In this circumstance it is likely you need to install
|
||||||
|
> additional packages in the rootfs and add “agent.debug_console” to kernel parameters in the runtime
|
||||||
|
> config file. This tells the Kata agent to launch the console directly.
|
||||||
|
>
|
||||||
|
> Once these steps are taken you can connect to the virtual machine using the [debug console](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#connect-to-the-virtual-machine-using-the-debug-console).
|
||||||
|
|
||||||
|
### Create a custom image containing a shell
|
||||||
|
|
||||||
|
To login to a virtual machine, you must
|
||||||
|
[create a custom rootfs](#create-a-rootfs-image) or [custom initrd](#create-an-initrd-image---optional)
|
||||||
|
containing a shell such as `bash(1)`. For Clear Linux, you will need
|
||||||
|
an additional `coreutils` package.
|
||||||
|
|
||||||
|
For example using CentOS:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/osbuilder/rootfs-builder
|
||||||
|
$ export ROOTFS_DIR=${GOPATH}/src/github.com/kata-containers/osbuilder/rootfs-builder/rootfs
|
||||||
|
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true EXTRA_PKGS="bash coreutils" ./rootfs.sh centos'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create a debug systemd service
|
||||||
|
|
||||||
|
Create the service file that starts the shell in the rootfs directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat <<EOT | sudo tee ${ROOTFS_DIR}/lib/systemd/system/kata-debug.service
|
||||||
|
[Unit]
|
||||||
|
Description=Kata Containers debug console
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
StandardInput=tty
|
||||||
|
StandardOutput=tty
|
||||||
|
# Must be disabled to allow the job to access the real console
|
||||||
|
PrivateDevices=no
|
||||||
|
Type=simple
|
||||||
|
ExecStart=/bin/bash
|
||||||
|
Restart=always
|
||||||
|
EOT
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: You might need to adjust the `ExecStart=` path.
|
||||||
|
|
||||||
|
Add a dependency to start the debug console:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo sed -i '$a Requires=kata-debug.service' ${ROOTFS_DIR}/lib/systemd/system/kata-containers.target
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build the debug image
|
||||||
|
|
||||||
|
Follow the instructions in the [Build a rootfs image](#build-a-rootfs-image)
|
||||||
|
section when using rootfs, or when using initrd, complete the steps in the [Build an initrd image](#build-an-initrd-image) section.
|
||||||
|
|
||||||
|
### Configure runtime for custom debug image
|
||||||
|
|
||||||
|
Install the image:
|
||||||
|
|
||||||
|
>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
|
||||||
|
>with the initrd image name `kata-containers-initrd.img`.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ name="kata-containers-centos-with-debug-console.img"
|
||||||
|
$ sudo install -o root -g root -m 0640 kata-containers.img "/usr/share/kata-containers/${name}"
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, modify the `image=` values in the `[hypervisor.qemu]` section of the
|
||||||
|
[configuration file](https://github.com/kata-containers/runtime#configuration)
|
||||||
|
to specify the full path to the image name specified in the previous code
|
||||||
|
section. Alternatively, recreate the symbolic link so it points to
|
||||||
|
the new debug image:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ (cd /usr/share/kata-containers && sudo ln -sf "$name" kata-containers.img)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: You should take care to undo this change after you finish debugging
|
||||||
|
to avoid all subsequently created containers from using the debug image.
|
||||||
|
|
||||||
|
### Ensure debug options are valid
|
||||||
|
|
||||||
|
For the debug console to work, you **must** ensure that proxy debug is
|
||||||
|
**disabled** in the configuration file. If proxy debug is enabled, you will
|
||||||
|
not see any output when you connect to the virtual machine:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/kata-containers/
|
||||||
|
$ sudo install -o root -g root -m 0640 /usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers
|
||||||
|
$ sudo awk '{if (/^\[proxy\.kata\]/) {got=1}; if (got == 1 && /^.*enable_debug/) {print "#enable_debug = true"; got=0; next; } else {print}}' /etc/kata-containers/configuration.toml > /tmp/configuration.toml
|
||||||
|
$ sudo install -o root -g root -m 0640 /tmp/configuration.toml /etc/kata-containers/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create a container
|
||||||
|
|
||||||
|
Create a container as normal. For example using Docker:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run -ti busybox sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Connect to the virtual machine using the debug console
|
||||||
|
|
||||||
|
```
|
||||||
|
$ id=$(sudo docker ps -q --no-trunc)
|
||||||
|
$ console="/var/run/vc/vm/${id}/console.sock"
|
||||||
|
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: You need to press the `RETURN` key to see the shell prompt.
|
||||||
|
|
||||||
|
To disconnect from the virtual machine, type `CONTROL+q` (hold down the
|
||||||
|
`CONTROL` key and press `q`).
|
||||||
|
|
||||||
|
### Obtain details of the image
|
||||||
|
|
||||||
|
If the image is created using
|
||||||
|
[osbuilder](https://github.com/kata-containers/osbuilder), the following YAML
|
||||||
|
file exists and contains details of the image and how it was created:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat /var/lib/osbuilder/osbuilder.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Capturing kernel boot logs
|
||||||
|
|
||||||
|
Sometimes it is useful to capture the kernel boot messages from a Kata Container
|
||||||
|
launch. If the container launches to the point whereby you can `exec` into it, and
|
||||||
|
if the container has the necessary components installed, often you can execute the `dmesg`
|
||||||
|
command inside the container to view the kernel boot logs.
|
||||||
|
|
||||||
|
If however you are unable to `exec` into the container, you can enable some debug
|
||||||
|
options to have the kernel boot messages logged into the system journal.
|
||||||
|
|
||||||
|
Which debug options you enable depends on if you are using the hypervisor `vsock` mode
|
||||||
|
or not, as defined by the `use_vsock` setting in the `[hypervisor.qemu]` section of
|
||||||
|
the configuration file. The following details the settings:
|
||||||
|
|
||||||
|
- For `use_vsock = false`:
|
||||||
|
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[proxy.kata]` sections
|
||||||
|
- For `use_vsock = true`:
|
||||||
|
- Set `enable_debug = true` in both the `[hypervisor.qemu]` and `[shim.kata]` sections
|
||||||
|
|
||||||
|
For generic information on enabling debug in the configuration file, see the
|
||||||
|
[Enable full debug](#enable-full-debug) section.
|
||||||
|
|
||||||
|
The kernel boot messages will appear in the `kata-proxy` or `kata-shim` log appropriately,
|
||||||
|
such as:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo journalctl -t kata-proxy
|
||||||
|
-- Logs begin at Thu 2020-02-13 16:20:40 UTC, end at Thu 2020-02-13 16:30:23 UTC. --
|
||||||
|
...
|
||||||
|
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.608714324Z" level=info msg="[ 1.418768] brd: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
|
||||||
|
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.628493231Z" level=info msg="[ 1.438612] loop: module loaded\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
|
||||||
|
Feb 13 16:20:56 minikube kata-proxy[17371]: time="2020-02-13T16:20:56.67707956Z" level=info msg="[ 1.487165] pmem0: p1\n" name=kata-proxy pid=17371 sandbox=a13ffb2b9b5a66f7787bdae9a427fa954a4d21ec4031d0179eee2573986a8a6e source=agent
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running standalone
|
||||||
|
|
||||||
|
It is possible to start the runtime without a container manager. This is
|
||||||
|
mostly useful for testing and debugging purposes.
|
||||||
|
|
||||||
|
### Create an OCI bundle
|
||||||
|
|
||||||
|
To build an
|
||||||
|
[OCI bundle](https://github.com/opencontainers/runtime-spec/blob/master/bundle.md),
|
||||||
|
required by the runtime:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ bundle="/tmp/bundle"
|
||||||
|
$ rootfs="$bundle/rootfs"
|
||||||
|
$ mkdir -p "$rootfs" && (cd "$bundle" && kata-runtime spec)
|
||||||
|
$ sudo docker export $(sudo docker create busybox) | tar -C "$rootfs" -xvf -
|
||||||
|
```
|
||||||
|
|
||||||
|
### Launch the runtime to create a container
|
||||||
|
|
||||||
|
Run the runtime standalone by providing it with the path to the
|
||||||
|
previously-created [OCI bundle](#create-an-oci-bundle):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo kata-runtime --log=/dev/stdout run --bundle "$bundle" foo
|
||||||
|
```
|
237
docs/Documentation-Requirements.md
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
* [Introduction](#introduction)
|
||||||
|
* [General requirements](#general-requirements)
|
||||||
|
* [Linking advice](#linking-advice)
|
||||||
|
* [Notes](#notes)
|
||||||
|
* [Warnings and other admonitions](#warnings-and-other-admonitions)
|
||||||
|
* [Files and command names](#files-and-command-names)
|
||||||
|
* [Code blocks](#code-blocks)
|
||||||
|
* [Images](#images)
|
||||||
|
* [Spelling](#spelling)
|
||||||
|
* [Names](#names)
|
||||||
|
* [Version numbers](#version-numbers)
|
||||||
|
* [The apostrophe](#the-apostrophe)
|
||||||
|
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
This document outlines the requirements for all documentation in the [Kata
|
||||||
|
Containers](https://github.com/kata-containers) project.
|
||||||
|
|
||||||
|
# General requirements
|
||||||
|
|
||||||
|
All documents must:
|
||||||
|
|
||||||
|
- Be written in simple English.
|
||||||
|
- Be written in [GitHub Flavored Markdown](https://github.github.com/gfm) format.
|
||||||
|
- Have a `.md` file extension.
|
||||||
|
- Include a TOC (table of contents) at the top of the document with links to
|
||||||
|
all heading sections. We recommend using the
|
||||||
|
[`kata-check-markdown`](https://github.com/kata-containers/tests/tree/master/cmd/check-markdown)
|
||||||
|
tool to generate the TOC.
|
||||||
|
- Be linked to from another document in the same repository.
|
||||||
|
|
||||||
|
Although GitHub allows navigation of the entire repository, it should be
|
||||||
|
possible to access all documentation purely by navigating links inside the
|
||||||
|
documents, starting from the repositories top-level `README`.
|
||||||
|
|
||||||
|
If you are adding a new document, ensure you add a link to it in the
|
||||||
|
"closest" `README` above the directory where you created your document.
|
||||||
|
- If the document needs to tell the user to manipulate files or commands, use a
|
||||||
|
[code block](#code-blocks) to specify the commands.
|
||||||
|
|
||||||
|
If at all possible, ensure that every command in the code blocks can be run
|
||||||
|
non-interactively. If this is possible, the document can be tested by the CI
|
||||||
|
which can then execute the commands specified to ensure the instructions are
|
||||||
|
correct. This avoids documents becoming out of date over time.
|
||||||
|
|
||||||
|
# Linking advice
|
||||||
|
|
||||||
|
Linking between documents is strongly encouraged to help users and developers
|
||||||
|
navigate the material more easily. Linking also avoids repetition - if a
|
||||||
|
document needs to refer to a concept already well described in another section
|
||||||
|
or document, do not repeat it, link to it
|
||||||
|
(the [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself) principle).
|
||||||
|
|
||||||
|
Another advantage of this approach is that changes only need to be applied in
|
||||||
|
one place: where the concept is defined (not the potentially many places where
|
||||||
|
the concept is referred to using a link).
|
||||||
|
|
||||||
|
# Notes
|
||||||
|
|
||||||
|
Important information that is not part of the main document flow should be
|
||||||
|
added as a Note in bold with all content contained within a block quote:
|
||||||
|
|
||||||
|
> **Note:** This is a really important point!
|
||||||
|
>
|
||||||
|
> This particular note also spans multiple lines. The entire note should be
|
||||||
|
> included inside the quoted block.
|
||||||
|
|
||||||
|
If there are multiple notes, bullets should be used:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - I am important point 1.
|
||||||
|
>
|
||||||
|
> - I am important point 2.
|
||||||
|
>
|
||||||
|
> - I am important point *n*.
|
||||||
|
|
||||||
|
# Warnings and other admonitions
|
||||||
|
|
||||||
|
Use the same approach as for [notes](#notes). For example:
|
||||||
|
|
||||||
|
> **Warning:** Running this command assumes you understand the risks of doing so.
|
||||||
|
|
||||||
|
Other examples:
|
||||||
|
|
||||||
|
> **Warnings:**
|
||||||
|
>
|
||||||
|
> - Do not unplug your computer!
|
||||||
|
> - Always read the label.
|
||||||
|
> - Do not pass go. Do not collect $200.
|
||||||
|
|
||||||
|
> **Tip:** Read the manual page for further information on available options.
|
||||||
|
|
||||||
|
> **Hint:** Look behind you!
|
||||||
|
|
||||||
|
# Files and command names
|
||||||
|
|
||||||
|
All filenames and command names should be rendered in a fixed-format font
|
||||||
|
using backticks:
|
||||||
|
|
||||||
|
> Run the `foo` command to make it work.
|
||||||
|
|
||||||
|
> Modify the `bar` option in file `/etc/baz/baz.conf`.
|
||||||
|
|
||||||
|
Render any options that need to be specified to the command in the same manner:
|
||||||
|
|
||||||
|
> Run `bar -axz --apply foo.yaml` to make the changes.
|
||||||
|
|
||||||
|
For standard system commands, it is also acceptable to specify the name along
|
||||||
|
with the manual page section that documents the command in brackets:
|
||||||
|
|
||||||
|
> The command to list files in a directory is called `ls(1)`.
|
||||||
|
|
||||||
|
# Code blocks
|
||||||
|
|
||||||
|
This section lists requirements for displaying commands and command output.
|
||||||
|
|
||||||
|
The requirements must be adhered to since documentation containing code blocks
|
||||||
|
is validated by the CI system, which executes the command blocks with the help
|
||||||
|
of the
|
||||||
|
[doc-to-script](https://github.com/kata-containers/tests/tree/master/.ci/kata-doc-to-script.sh)
|
||||||
|
utility.
|
||||||
|
|
||||||
|
- If a document includes commands the user should run, they **MUST** be shown
|
||||||
|
in a *bash code block* with every command line prefixed with `$ ` to denote
|
||||||
|
a shell prompt:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ echo "Hi - I am some bash code"
|
||||||
|
$ sudo docker run -ti busybox true
|
||||||
|
$ [ $? -eq 0 ] && echo "success"
|
||||||
|
```
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
|
||||||
|
- If a command needs to be run as the `root` user, it must be run using
|
||||||
|
`sudo(8)`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
|
||||||
|
$ sudo echo "I'm running as root"
|
||||||
|
```
|
||||||
|
|
||||||
|
- All lines beginning `# ` should be comment lines, *NOT* commands to run as
|
||||||
|
the `root` user.
|
||||||
|
|
||||||
|
- Try to avoid showing the *output* of commands.
|
||||||
|
|
||||||
|
The reasons for this:
|
||||||
|
|
||||||
|
- Command output can change, leading to confusion when the output the user
|
||||||
|
sees does not match the output in the documentation.
|
||||||
|
|
||||||
|
- There is the risk the user will get confused between what parts of the
|
||||||
|
block refer to the commands they should type and the output that they
|
||||||
|
should not.
|
||||||
|
|
||||||
|
- It can make the document look overly "busy" or complex.
|
||||||
|
|
||||||
|
In the unusual case that you need to display command *output*, use an
|
||||||
|
unadorned code block (\`\`\`):
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
|
||||||
|
The output of the `ls(1)` command is expected to be:
|
||||||
|
|
||||||
|
```
|
||||||
|
ls: cannot access '/foo': No such file or directory
|
||||||
|
```
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
|
||||||
|
- Long lines should not span across multiple lines by using the `\`
|
||||||
|
continuation character.
|
||||||
|
|
||||||
|
GitHub automatically renders such blocks with scrollbars. Consequently,
|
||||||
|
backslash continuation characters are not necessary and are a visual
|
||||||
|
distraction. These characters also mess up a user's shell history when
|
||||||
|
commands are pasted into a terminal.
|
||||||
|
|
||||||
|
# Images
|
||||||
|
|
||||||
|
All binary image files must be in a standard and well-supported format such as
|
||||||
|
PNG. This format is preferred for vector graphics such as diagrams because the
|
||||||
|
information is stored more efficiently, leading to smaller file sizes. JPEG
|
||||||
|
images are acceptable, but this format is more appropriate to store
|
||||||
|
photographic images.
|
||||||
|
|
||||||
|
When possible, generate images using freely available software.
|
||||||
|
|
||||||
|
Every binary image file **MUST** be accompanied by the "source" file used to
|
||||||
|
generate it. This guarantees that the image can be modified by updating the
|
||||||
|
source file and re-generating the binary format image file.
|
||||||
|
|
||||||
|
Ideally, the format of all image source files is an open standard, non-binary
|
||||||
|
one such as SVG. Text formats are highly preferable because you can manipulate
|
||||||
|
and compare them with standard tools (e.g. `diff(1)`).
|
||||||
|
|
||||||
|
# Spelling
|
||||||
|
|
||||||
|
Since this project uses a number of terms not found in conventional
|
||||||
|
dictionaries, we have a
|
||||||
|
[spell checking tool](https://github.com/kata-containers/tests/tree/master/cmd/check-spelling)
|
||||||
|
that checks both dictionary words and the additional terms we use.
|
||||||
|
|
||||||
|
Run the spell checking tool on your document before raising a PR to ensure it
|
||||||
|
is free of mistakes.
|
||||||
|
|
||||||
|
If your document introduces new terms, you need to update the custom
|
||||||
|
dictionary used by the spell checking tool to incorporate the new words.
|
||||||
|
|
||||||
|
# Names
|
||||||
|
|
||||||
|
Occasionally documents need to specify the name of people. Write such names in
|
||||||
|
backticks. The main reason for this is to keep the [spell checker](#spelling) happy (since
|
||||||
|
it cannot manage all possible names). However, since backticks render in a
|
||||||
|
fixed-width font, this makes the names clearer:
|
||||||
|
|
||||||
|
> Welcome to `Clark Kent`, the newest member of the Kata Containers Architecture Committee.
|
||||||
|
|
||||||
|
# Version numbers
|
||||||
|
|
||||||
|
Write version number in backticks. This keeps the [spell checker](#spelling)
|
||||||
|
happy and since backticks render in a fixed-width font, it also makes the
|
||||||
|
numbers clearer:
|
||||||
|
|
||||||
|
> Ensure you are using at least version `1.2.3-alpha3.wibble.1` of the tool.
|
||||||
|
|
||||||
|
# The apostrophe
|
||||||
|
|
||||||
|
The apostrophe character (`'`) must **only** be used for showing possession
|
||||||
|
("Peter's book") and for standard contractions (such as "don't").
|
||||||
|
|
||||||
|
Use double-quotes ("...") in all other circumstances you use quotes outside of
|
||||||
|
[code blocks](#code-blocks).
|
201
docs/LICENSE
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
25
docs/Licensing-strategy.md
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Licensing strategy
|
||||||
|
|
||||||
|
* [Project License](#project-license)
|
||||||
|
* [License file](#license-file)
|
||||||
|
* [License for individual files](#license-for-individual-files)
|
||||||
|
|
||||||
|
## Project License
|
||||||
|
|
||||||
|
The license for the [Kata Containers](https://github.com/kata-containers)
|
||||||
|
project is [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0).
|
||||||
|
|
||||||
|
## License file
|
||||||
|
|
||||||
|
All repositories in the project have a top level file called `LICENSE`. This
|
||||||
|
file lists full details of all licences used by the repository.
|
||||||
|
|
||||||
|
## License for individual files
|
||||||
|
|
||||||
|
Where possible all files in all repositories also contain a
|
||||||
|
[SPDX](https://spdx.org) license identifier. This provides fine-grained
|
||||||
|
licensing and allows automated tooling to check the license of individual
|
||||||
|
files.
|
||||||
|
|
||||||
|
This SPDX licence identifier requirement is enforced by the
|
||||||
|
[CI (Continuous Integration) system](https://github.com/kata-containers/tests/blob/master/.ci/static-checks.sh).
|
280
docs/Limitations.md
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
* [Overview](#overview)
|
||||||
|
* [Definition of a limitation](#definition-of-a-limitation)
|
||||||
|
* [Scope](#scope)
|
||||||
|
* [Contributing](#contributing)
|
||||||
|
* [Pending items](#pending-items)
|
||||||
|
* [Runtime commands](#runtime-commands)
|
||||||
|
* [checkpoint and restore](#checkpoint-and-restore)
|
||||||
|
* [events command](#events-command)
|
||||||
|
* [update command](#update-command)
|
||||||
|
* [Networking](#networking)
|
||||||
|
* [Docker swarm and compose support](#docker-swarm-and-compose-support)
|
||||||
|
* [Resource management](#resource-management)
|
||||||
|
* [docker run and shared memory](#docker-run-and-shared-memory)
|
||||||
|
* [docker run and sysctl](#docker-run-and-sysctl)
|
||||||
|
* [Docker daemon features](#docker-daemon-features)
|
||||||
|
* [SELinux support](#selinux-support)
|
||||||
|
* [Architectural limitations](#architectural-limitations)
|
||||||
|
* [Networking limitations](#networking-limitations)
|
||||||
|
* [Support for joining an existing VM network](#support-for-joining-an-existing-vm-network)
|
||||||
|
* [docker --net=host](#docker---nethost)
|
||||||
|
* [docker run --link](#docker-run---link)
|
||||||
|
* [Host resource sharing](#host-resource-sharing)
|
||||||
|
* [docker run --privileged](#docker-run---privileged)
|
||||||
|
* [Miscellaneous](#miscellaneous)
|
||||||
|
* [Docker --security-opt option partially supported](#docker---security-opt-option-partially-supported)
|
||||||
|
* [Appendices](#appendices)
|
||||||
|
* [The constraints challenge](#the-constraints-challenge)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
A [Kata Container](https://github.com/kata-containers) utilizes a Virtual Machine (VM) to enhance security and
|
||||||
|
isolation of container workloads. As a result, the system has a number of differences
|
||||||
|
and limitations when compared with the default [Docker*](https://www.docker.com/) runtime,
|
||||||
|
[`runc`](https://github.com/opencontainers/runc).
|
||||||
|
|
||||||
|
Some of these limitations have potential solutions, whereas others exist
|
||||||
|
due to fundamental architectural differences generally related to the
|
||||||
|
use of VMs.
|
||||||
|
|
||||||
|
The [Kata Container runtime](https://github.com/kata-containers/runtime)
|
||||||
|
launches each container within its own hardware isolated VM, and each VM has
|
||||||
|
its own kernel. Due to this higher degree of isolation, certain container
|
||||||
|
capabilities cannot be supported or are implicitly enabled through the VM.
|
||||||
|
|
||||||
|
# Definition of a limitation
|
||||||
|
|
||||||
|
The [Open Container Initiative](https://www.opencontainers.org/)
|
||||||
|
[Runtime Specification](https://github.com/opencontainers/runtime-spec) ("OCI spec")
|
||||||
|
defines the minimum specifications a runtime must support to interoperate with
|
||||||
|
container managers such as Docker. If a runtime does not support some aspect
|
||||||
|
of the OCI spec, it is by definition a limitation.
|
||||||
|
|
||||||
|
However, the OCI runtime reference implementation (`runc`) does not perfectly
|
||||||
|
align with the OCI spec itself.
|
||||||
|
|
||||||
|
Further, since the default OCI runtime used by Docker is `runc`, Docker
|
||||||
|
expects runtimes to behave as `runc` does. This implies that another form of
|
||||||
|
limitation arises if the behavior of a runtime implementation does not align
|
||||||
|
with that of `runc`. Having two standards complicates the challenge of
|
||||||
|
supporting a Docker environment since a runtime must support the official OCI
|
||||||
|
spec and the non-standard extensions provided by `runc`.
|
||||||
|
|
||||||
|
# Scope
|
||||||
|
|
||||||
|
Each known limitation is captured in a separate GitHub issue that contains
|
||||||
|
detailed information about the issue. These issues are tagged with the
|
||||||
|
`limitation` label. This document is a curated summary of important known
|
||||||
|
limitations and provides links to the relevant GitHub issues.
|
||||||
|
|
||||||
|
The following link shows the latest list of limitations:
|
||||||
|
|
||||||
|
- https://github.com/pulls?utf8=%E2%9C%93&q=is%3Aopen+label%3Alimitation+org%3Akata-containers
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
If you would like to work on resolving a limitation, please refer to the
|
||||||
|
[contributors guide](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
|
||||||
|
If you wish to raise an issue for a new limitation, either
|
||||||
|
[raise an issue directly on the runtime](https://github.com/kata-containers/runtime/issues/new)
|
||||||
|
or see the
|
||||||
|
[project table of contents](https://github.com/kata-containers/kata-containers)
|
||||||
|
for advice on which repository to raise the issue against.
|
||||||
|
|
||||||
|
# Pending items
|
||||||
|
|
||||||
|
This section lists items that might be possible to fix.
|
||||||
|
|
||||||
|
## Runtime commands
|
||||||
|
|
||||||
|
### checkpoint and restore
|
||||||
|
|
||||||
|
The runtime does not provide `checkpoint` and `restore` commands. There
|
||||||
|
are discussions about using VM save and restore to give [`criu`](https://github.com/checkpoint-restore/criu)-like functionality, which might provide a solution.
|
||||||
|
|
||||||
|
Note that the OCI standard does not specify `checkpoint` and `restore`
|
||||||
|
commands.
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/runtime/issues/184 for more information.
|
||||||
|
|
||||||
|
### events command
|
||||||
|
|
||||||
|
The runtime does not fully implement the `events` command. `OOM` notifications and `Intel RDT` stats are not fully supported.
|
||||||
|
|
||||||
|
Note that the OCI standard does not specify an `events` command.
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/runtime/issues/308 and https://github.com/kata-containers/runtime/issues/309 for more information.
|
||||||
|
|
||||||
|
### update command
|
||||||
|
|
||||||
|
Currently, only block I/O weight is not supported.
|
||||||
|
All other configurations are supported and are working properly.
|
||||||
|
|
||||||
|
## Networking
|
||||||
|
|
||||||
|
### Docker swarm and compose support
|
||||||
|
|
||||||
|
The newest version of Docker supported is specified by the
|
||||||
|
`externals.docker.version` variable in the
|
||||||
|
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
|
||||||
|
|
||||||
|
Basic Docker swarm support works. However, if you want to use custom networks
|
||||||
|
with Docker's swarm, an older version of Docker is required. This is specified
|
||||||
|
by the `externals.docker.meta.swarm-version` variable in the
|
||||||
|
[versions database](https://github.com/kata-containers/runtime/blob/master/versions.yaml).
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/runtime/issues/175 for more information.
|
||||||
|
|
||||||
|
Docker compose normally uses custom networks, so also has the same limitations.
|
||||||
|
|
||||||
|
## Resource management
|
||||||
|
|
||||||
|
Due to the way VMs differ in their CPU and memory allocation, and sharing
|
||||||
|
across the host system, the implementation of an equivalent method for
|
||||||
|
these commands is potentially challenging.
|
||||||
|
|
||||||
|
See issue https://github.com/clearcontainers/runtime/issues/341 and [the constraints challenge](#the-constraints-challenge) for more information.
|
||||||
|
|
||||||
|
For CPUs resource management see
|
||||||
|
[CPU constraints](design/vcpu-handling.md).
|
||||||
|
|
||||||
|
### docker run and shared memory
|
||||||
|
|
||||||
|
The runtime does not implement the `docker run --shm-size` command to
|
||||||
|
set the size of the `/dev/shm tmpfs` within the container. It is possible to pass this configuration value into the VM container so the appropriate mount command happens at launch time.
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/kata-containers/issues/21 for more information.
|
||||||
|
|
||||||
|
### docker run and sysctl
|
||||||
|
|
||||||
|
The `docker run --sysctl` feature is not implemented. At the runtime
|
||||||
|
level, this equates to the `linux.sysctl` OCI configuration. Docker
|
||||||
|
allows configuring the sysctl settings that support namespacing. From a security and isolation point of view, it might make sense to set them in the VM, which isolates sysctl settings. Also, given that each Kata Container has its own kernel, we can support setting of sysctl settings that are not namespaced. In some cases, we might need to support configuring some of the settings on both the host side Kata Container namespace and the Kata Containers kernel.
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/runtime/issues/185 for more information.
|
||||||
|
|
||||||
|
## Docker daemon features
|
||||||
|
|
||||||
|
Some features enabled or implemented via the
|
||||||
|
[`dockerd` daemon](https://docs.docker.com/config/daemon/) configuration are not yet
|
||||||
|
implemented.
|
||||||
|
|
||||||
|
### SELinux support
|
||||||
|
|
||||||
|
The `dockerd` configuration option `"selinux-enabled": true` is not presently implemented
|
||||||
|
in Kata Containers. Enabling this option causes an OCI runtime error.
|
||||||
|
|
||||||
|
See issue https://github.com/kata-containers/runtime/issues/784 for more information.
|
||||||
|
|
||||||
|
The consequence of this is that the [Docker --security-opt is only partially supported](#docker---security-opt-option-partially-supported).
|
||||||
|
|
||||||
|
Kubernetes [SELinux labels](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#assign-selinux-labels-to-a-container) will also not be applied.
|
||||||
|
|
||||||
|
# Architectural limitations
|
||||||
|
|
||||||
|
This section lists items that might not be fixed due to fundamental
|
||||||
|
architectural differences between "soft containers" (i.e. traditional Linux*
|
||||||
|
containers) and those based on VMs.
|
||||||
|
|
||||||
|
## Networking limitations
|
||||||
|
|
||||||
|
### Support for joining an existing VM network
|
||||||
|
|
||||||
|
Docker supports the ability for containers to join another containers
|
||||||
|
namespace with the `docker run --net=containers` syntax. This allows
|
||||||
|
multiple containers to share a common network namespace and the network
|
||||||
|
interfaces placed in the network namespace. Kata Containers does not
|
||||||
|
support network namespace sharing. If a Kata Container is setup to
|
||||||
|
share the network namespace of a `runc` container, the runtime
|
||||||
|
effectively takes over all the network interfaces assigned to the
|
||||||
|
namespace and binds them to the VM. Consequently, the `runc` container loses
|
||||||
|
its network connectivity.
|
||||||
|
|
||||||
|
### docker --net=host
|
||||||
|
|
||||||
|
Docker host network support (`docker --net=host run`) is not supported.
|
||||||
|
It is not possible to directly access the host networking configuration
|
||||||
|
from within the VM.
|
||||||
|
|
||||||
|
The `--net=host` option can still be used with `runc` containers and
|
||||||
|
inter-mixed with running Kata Containers, thus enabling use of `--net=host`
|
||||||
|
when necessary.
|
||||||
|
|
||||||
|
It should be noted, currently passing the `--net=host` option into a
|
||||||
|
Kata Container may result in the Kata Container networking setup
|
||||||
|
modifying, re-configuring and therefore possibly breaking the host
|
||||||
|
networking setup. Do not use `--net=host` with Kata Containers.
|
||||||
|
|
||||||
|
### docker run --link
|
||||||
|
|
||||||
|
The runtime does not support the `docker run --link` command. This
|
||||||
|
command is now deprecated by docker and we have no intention of adding support.
|
||||||
|
Equivalent functionality can be achieved with the newer docker networking commands.
|
||||||
|
|
||||||
|
See more documentation at
|
||||||
|
[docs.docker.com](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/).
|
||||||
|
|
||||||
|
## Host resource sharing
|
||||||
|
|
||||||
|
### docker run --privileged
|
||||||
|
|
||||||
|
Privileged support in Kata is essentially different from `runc` containers.
|
||||||
|
Kata does support `docker run --privileged` command, but in this case full access
|
||||||
|
to the guest VM is provided in addition to some host access.
|
||||||
|
|
||||||
|
The container runs with elevated capabilities within the guest and is granted
|
||||||
|
access to guest devices instead of the host devices.
|
||||||
|
This is also true with using `securityContext privileged=true` with Kubernetes.
|
||||||
|
|
||||||
|
The container may also be granted full access to a subset of host devices
|
||||||
|
(https://github.com/kata-containers/runtime/issues/1568).
|
||||||
|
|
||||||
|
See [Privileged Kata Containers](how-to/privileged.md) for how to configure some of this behavior.
|
||||||
|
|
||||||
|
# Miscellaneous
|
||||||
|
|
||||||
|
This section lists limitations where the possible solutions are uncertain.
|
||||||
|
|
||||||
|
## Docker --security-opt option partially supported
|
||||||
|
|
||||||
|
The `--security-opt=` option used by Docker is partially supported.
|
||||||
|
We only support `--security-opt=no-new-privileges` and `--security-opt seccomp=/path/to/seccomp/profile.json`
|
||||||
|
option as of today.
|
||||||
|
|
||||||
|
Note: The `--security-opt apparmor=your_profile` is not yet supported. See https://github.com/kata-containers/runtime/issues/707.
|
||||||
|
# Appendices
|
||||||
|
|
||||||
|
## The constraints challenge
|
||||||
|
|
||||||
|
Applying resource constraints such as cgroup, CPU, memory, and storage to a workload is not always straightforward with a VM based system. A Kata Container runs in an isolated environment inside a virtual machine. This, coupled with the architecture of Kata Containers, offers many more possibilities than are available to traditional Linux containers due to the various layers and contexts.
|
||||||
|
|
||||||
|
In some cases it might be necessary to apply the constraints to multiple levels. In other cases, the hardware isolated VM provides equivalent functionality to the the requested constraint.
|
||||||
|
|
||||||
|
The following examples outline some of the various areas constraints can be applied:
|
||||||
|
|
||||||
|
- Inside the VM
|
||||||
|
|
||||||
|
Constrain the guest kernel. This can be achieved by passing particular values through the kernel command line used to boot the guest kernel. Alternatively, sysctl values can be applied at early boot.
|
||||||
|
|
||||||
|
- Inside the container
|
||||||
|
|
||||||
|
Constrain the container created inside the VM.
|
||||||
|
|
||||||
|
- Outside the VM:
|
||||||
|
|
||||||
|
- Constrain the hypervisor process by applying host-level constraints.
|
||||||
|
|
||||||
|
- Constrain all processes running inside the hypervisor.
|
||||||
|
|
||||||
|
This can be achieved by specifying particular hypervisor configuration options.
|
||||||
|
|
||||||
|
- Constrain the [shim](https://github.com/kata-containers/shim) process.
|
||||||
|
|
||||||
|
This process represents the container workload running inside the VM.
|
||||||
|
|
||||||
|
- Constrain the [proxy](https://github.com/kata-containers/proxy) process.
|
||||||
|
|
||||||
|
Note that in some circumstances it might be necessary to apply particular constraints
|
||||||
|
to more than one of the previous areas to achieve the desired level of isolation and resource control.
|
8
docs/Makefile
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2018 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
default:
|
||||||
|
@true
|
76
docs/README.md
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# Documentation
|
||||||
|
|
||||||
|
* [Getting Started](#getting-started)
|
||||||
|
* [More User Guides](#more-user-guides)
|
||||||
|
* [Kata Use-Cases](#kata-use-cases)
|
||||||
|
* [Developer Guide](#developer-guide)
|
||||||
|
* [Design and Implementations](#design-and-implementations)
|
||||||
|
* [How to Contribute](#how-to-contribute)
|
||||||
|
* [Code Licensing](#code-licensing)
|
||||||
|
* [The Release Process](#the-release-process)
|
||||||
|
* [Help Improving the Documents](#help-improving-the-documents)
|
||||||
|
* [Website Changes](#website-changes)
|
||||||
|
|
||||||
|
The [Kata Containers](https://github.com/kata-containers)
|
||||||
|
documentation repository hosts overall system documentation, with information
|
||||||
|
common to multiple components.
|
||||||
|
|
||||||
|
For details of the other Kata Containers repositories, see the
|
||||||
|
[repository summary](https://github.com/kata-containers/kata-containers).
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
* [Installation guides](./install/README.md): Install and run Kata Containers with Docker or Kubernetes
|
||||||
|
|
||||||
|
## More User Guides
|
||||||
|
|
||||||
|
* [Upgrading](Upgrading.md): how to upgrade from [Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to [Kata Containers](https://github.com/kata-containers) and how to upgrade an existing Kata Containers system to the latest version.
|
||||||
|
* [Limitations](Limitations.md): differences and limitations compared with the default [Docker](https://www.docker.com/) runtime,
|
||||||
|
[`runc`](https://github.com/opencontainers/runc).
|
||||||
|
|
||||||
|
### Howto guides
|
||||||
|
|
||||||
|
See the [howto documentation](how-to).
|
||||||
|
|
||||||
|
## Kata Use-Cases
|
||||||
|
|
||||||
|
* [GPU Passthrough with Kata](./use-cases/GPU-passthrough-and-Kata.md)
|
||||||
|
* [OpenStack Zun with Kata Containers](./use-cases/zun_kata.md)
|
||||||
|
* [SR-IOV with Kata](./use-cases/using-SRIOV-and-kata.md)
|
||||||
|
* [Intel QAT with Kata](./use-cases/using-Intel-QAT-and-kata.md)
|
||||||
|
* [VPP with Kata](./use-cases/using-vpp-and-kata.md)
|
||||||
|
* [SPDK vhost-user with Kata](./use-cases/using-SPDK-vhostuser-and-kata.md)
|
||||||
|
|
||||||
|
## Developer Guide
|
||||||
|
|
||||||
|
Documents that help to understand and contribute to Kata Containers.
|
||||||
|
|
||||||
|
### Design and Implementations
|
||||||
|
|
||||||
|
* [Kata Containers Architecture](design/architecture.md): Architectural overview of Kata Containers
|
||||||
|
* [Kata Containers design](./design/README.md): More Kata Containers design documents
|
||||||
|
|
||||||
|
### How to Contribute
|
||||||
|
|
||||||
|
* [Developer Guide](Developer-Guide.md): Setup the Kata Containers developing environments
|
||||||
|
* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md)
|
||||||
|
* [Code of Conduct](CODE_OF_CONDUCT.md)
|
||||||
|
|
||||||
|
### Code Licensing
|
||||||
|
|
||||||
|
* [Licensing](Licensing-strategy.md): About the licensing strategy of Kata Containers.
|
||||||
|
|
||||||
|
### The Release Process
|
||||||
|
|
||||||
|
* [Release strategy](Stable-Branch-Strategy.md)
|
||||||
|
* [Release Process](Release-Process.md)
|
||||||
|
|
||||||
|
## Help Improving the Documents
|
||||||
|
|
||||||
|
* [Documentation Requirements](Documentation-Requirements.md)
|
||||||
|
|
||||||
|
## Website Changes
|
||||||
|
|
||||||
|
If you have a suggestion for how we can improve the
|
||||||
|
[website](https://katacontainers.io), please raise an issue (or a PR) on
|
||||||
|
[the repository that holds the source for the website](https://github.com/OpenStackweb/kata-netlify-refresh).
|
118
docs/Release-Process.md
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
|
||||||
|
# How to do a Kata Containers Release
|
||||||
|
This document lists the tasks required to create a Kata Release.
|
||||||
|
|
||||||
|
<!-- TOC START min:1 max:3 link:true asterisk:false update:true -->
|
||||||
|
- [How to do a Kata Containers Release](#how-to-do-a-kata-containers-release)
|
||||||
|
- [Requirements](#requirements)
|
||||||
|
- [Release Process](#release-process)
|
||||||
|
- [Bump all Kata repositories](#bump-all-kata-repositories)
|
||||||
|
- [Merge all bump version Pull requests](#merge-all-bump-version-pull-requests)
|
||||||
|
- [Tag all Kata repositories](#tag-all-kata-repositories)
|
||||||
|
- [Check Git-hub Actions](#check-git-hub-actions)
|
||||||
|
- [Create OBS Packages](#create-obs-packages)
|
||||||
|
- [Create release notes](#create-release-notes)
|
||||||
|
- [Announce the release](#announce-the-release)
|
||||||
|
<!-- TOC END -->
|
||||||
|
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- [hub](https://github.com/github/hub)
|
||||||
|
|
||||||
|
- OBS account with permissions on [`/home:katacontainers`](https://build.opensuse.org/project/subprojects/home:katacontainers)
|
||||||
|
|
||||||
|
- GitHub permissions to push tags and create releases in Kata repositories.
|
||||||
|
|
||||||
|
- GPG configured to sign git tags. https://help.github.com/articles/generating-a-new-gpg-key/
|
||||||
|
|
||||||
|
- You should configure your GitHub to use your ssh keys (to push to branches). See https://help.github.com/articles/adding-a-new-ssh-key-to-your-github-account/.
|
||||||
|
* As an alternative, configure hub to push and fork with HTTPS, `git config --global hub.protocol https` (Not tested yet) *
|
||||||
|
|
||||||
|
## Release Process
|
||||||
|
|
||||||
|
### Bump all Kata repositories
|
||||||
|
|
||||||
|
- We have set up a Jenkins job to bump the version in the `VERSION` file in all Kata repositories. Go to the [Jenkins bump-job page](http://jenkins.katacontainers.io/job/release/build) to trigger a new job.
|
||||||
|
- Start a new job with variables for the job passed as:
|
||||||
|
- `BRANCH=<the-branch-you-want-to-bump>`
|
||||||
|
- `NEW_VERSION=<the-new-kata-version>`
|
||||||
|
|
||||||
|
For example, in the case where you want to make a patch release `1.10.2`, the variable `NEW_VERSION` should be `1.10.2` and `BRANCH` should point to `stable-1.10`. In case of an alpha or release candidate release, `BRANCH` should point to `master` branch.
|
||||||
|
|
||||||
|
Alternatively, you can also bump the repositories using a script in the Kata packaging repo
|
||||||
|
```
|
||||||
|
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
|
||||||
|
$ export NEW_VERSION=<the-new-kata-version>
|
||||||
|
$ export BRANCH=<the-branch-you-want-to-bump>
|
||||||
|
$ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Merge all bump version Pull requests
|
||||||
|
|
||||||
|
- The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request.
|
||||||
|
- Check any failures and fix if needed.
|
||||||
|
- Work with the Kata approvers to verify that the CI works and the pull requests are merged.
|
||||||
|
|
||||||
|
### Tag all Kata repositories
|
||||||
|
|
||||||
|
Once all the pull requests to bump versions in all Kata repositories are merged,
|
||||||
|
tag all the repositories as shown below.
|
||||||
|
```
|
||||||
|
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
|
||||||
|
$ git checkout <kata-branch-to-release>
|
||||||
|
$ git pull
|
||||||
|
$ ./tag_repos.sh -p -b "$BRANCH" tag
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Git-hub Actions
|
||||||
|
|
||||||
|
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
|
||||||
|
|
||||||
|
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/runtime/releases).
|
||||||
|
|
||||||
|
### Create OBS Packages
|
||||||
|
|
||||||
|
- We have set up an [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) job
|
||||||
|
to trigger generation of Kata packages in [OBS](https://build.opensuse.org/).
|
||||||
|
Go to the [Azure Pipelines job that creates OBS packages](https://dev.azure.com/kata-containers/release-process/_release?_a=releases&view=mine&definitionId=1).
|
||||||
|
- Click on "Create release" (blue button, at top right corner).
|
||||||
|
It should prompt you for variables to be passed to the release job. They should look like:
|
||||||
|
|
||||||
|
```
|
||||||
|
BRANCH="the-kata-branch-that-is-release"
|
||||||
|
BUILD_HEAD=false
|
||||||
|
OBS_BRANCH="the-kata-branch-that-is-release"
|
||||||
|
```
|
||||||
|
Note: If the release is `Alpha` , `Beta` , or `RC` (that is part of a `master` release), please use `OBS_BRANCH=master`.
|
||||||
|
|
||||||
|
The above step shall create OBS packages for Kata for various distributions that Kata supports and test them as well.
|
||||||
|
- Verify that the packages have built successfully by checking the [Kata OBS project page](https://build.opensuse.org/project/subprojects/home:katacontainers).
|
||||||
|
- Make sure packages work correctly. This can be done manually or via the [package testing pipeline](http://jenkins.katacontainers.io/job/package-release-testing).
|
||||||
|
You have to make sure the packages are already published by OBS before this step.
|
||||||
|
It should prompt you for variables to be passed to the pipeline:
|
||||||
|
|
||||||
|
```
|
||||||
|
BRANCH="<kata-branch-to-release>"
|
||||||
|
NEW_VERSION=<the-version-you-expect-to-be-packaged|latest>
|
||||||
|
```
|
||||||
|
Note: `latest` will verify that a package provides the latest Kata tag in that branch.
|
||||||
|
|
||||||
|
### Create release notes
|
||||||
|
|
||||||
|
We have a script in place in the packaging repository to create release notes that include a short-log of the commits across Kata components.
|
||||||
|
|
||||||
|
Run the script as shown below:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cd ${GOPATH}/src/github.com/kata-containers/packaging/release
|
||||||
|
# Note: OLD_VERSION is where the script should start to get changes.
|
||||||
|
$ ./runtime-release-notes.sh ${OLD_VERSION} ${NEW_VERSION} > notes.md
|
||||||
|
# Edit the `notes.md` file to review and make any changes to the release notes.
|
||||||
|
# Add the release notes in GitHub runtime.
|
||||||
|
$ hub -C "${GOPATH}/src/github.com/kata-containers/runtime" release edit -F notes.md "${NEW_VERSION}"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Announce the release
|
||||||
|
|
||||||
|
Publish in [Slack and Kata mailing list](https://github.com/kata-containers/community#join-us) that new release is ready.
|
151
docs/Stable-Branch-Strategy.md
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
Branch and release maintenance for the Kata Containers project.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
This document provides details about Kata Containers releases.
|
||||||
|
|
||||||
|
## Versioning
|
||||||
|
|
||||||
|
The Kata Containers project uses [semantic versioning](http://semver.org/) for all releases.
|
||||||
|
Semantic versions are comprised of three fields in the form:
|
||||||
|
|
||||||
|
```
|
||||||
|
MAJOR.MINOR.PATCH
|
||||||
|
```
|
||||||
|
|
||||||
|
For examples: `1.0.0`, `1.0.0-rc.5`, and `99.123.77+foo.bar.baz.5`.
|
||||||
|
|
||||||
|
Semantic versioning is used since the version number is able to convey clear
|
||||||
|
information about how a new version relates to the previous version.
|
||||||
|
For example, semantic versioning can also provide assurances to allow users to know
|
||||||
|
when they must upgrade compared with when they might want to upgrade:
|
||||||
|
|
||||||
|
- When `PATCH` increases, the new release contains important **security fixes**
|
||||||
|
and an upgrade is recommended.
|
||||||
|
|
||||||
|
The patch field can contain extra details after the number.
|
||||||
|
Dashes denote pre-release versions. `1.0.0-rc.5` in the example denotes the fifth release
|
||||||
|
candidate for release `1.0.0`. Plus signs denote other details. In our example, `+foo.bar.baz.5`
|
||||||
|
provides additional information regarding release `99.123.77` in the previous example.
|
||||||
|
|
||||||
|
- When `MINOR` increases, the new release adds **new features** but *without
|
||||||
|
changing the existing behavior*.
|
||||||
|
|
||||||
|
- When `MAJOR` increases, the new release adds **new features, bug fixes, or
|
||||||
|
both** and which *changes the behavior from the previous release* (incompatible with previous releases).
|
||||||
|
|
||||||
|
A major release will also likely require a change of the container manager version used,
|
||||||
|
for example Docker\*. Please refer to the release notes for further details.
|
||||||
|
|
||||||
|
## Release Strategy
|
||||||
|
|
||||||
|
Any new features added since the last release will be available in the next minor
|
||||||
|
release. These will include bug fixes as well. To facilitate a stable user environment,
|
||||||
|
Kata provides stable branch-based releases and a master branch release.
|
||||||
|
|
||||||
|
## Stable branch patch criteria
|
||||||
|
|
||||||
|
No new features should be introduced to stable branches. This is intended to limit risk to users,
|
||||||
|
providing only bug and security fixes.
|
||||||
|
|
||||||
|
## Branch Management
|
||||||
|
Kata Containers will maintain two stable release branches in addition to the master branch.
|
||||||
|
Once a new MAJOR or MINOR release is created from master, a new stable branch is created for
|
||||||
|
the prior MAJOR or MINOR release and the older stable branch is no longer maintained. End of
|
||||||
|
maintenance for a branch is announced on the Kata Containers mailing list. Users can determine
|
||||||
|
the version currently installed by running `kata-runtime kata-env`. It is recommended to use the
|
||||||
|
latest stable branch available.
|
||||||
|
|
||||||
|
A couple of examples follow to help clarify this process.
|
||||||
|
|
||||||
|
### New bug fix introduced
|
||||||
|
|
||||||
|
A bug fix is submitted against the runtime which does not introduce new inter-component dependencies.
|
||||||
|
This fix is applied to both the master and stable branches, and there is no need to create a new
|
||||||
|
stable branch.
|
||||||
|
|
||||||
|
| Branch | Original version | New version |
|
||||||
|
|--|--|--|
|
||||||
|
| `master` | `1.3.0-rc0` | `1.3.0-rc1` |
|
||||||
|
| `stable-1.2` | `1.2.0` | `1.2.1` |
|
||||||
|
| `stable-1.1` | `1.1.2` | `1.1.3` |
|
||||||
|
|
||||||
|
|
||||||
|
### New release made feature or change adding new inter-component dependency
|
||||||
|
|
||||||
|
A new feature is introduced, which adds a new inter-component dependency. In this case a new stable
|
||||||
|
branch is created (stable-1.3) starting from master and the older stable branch (stable-1.1)
|
||||||
|
is dropped from maintenance.
|
||||||
|
|
||||||
|
|
||||||
|
| Branch | Original version | New version |
|
||||||
|
|--|--|--|
|
||||||
|
| `master` | `1.3.0-rc1` | `1.3.0` |
|
||||||
|
| `stable-1.3` | N/A| `1.3.0` |
|
||||||
|
| `stable-1.2` | `1.2.1` | `1.2.2` |
|
||||||
|
| `stable-1.1` | `1.1.3` | (unmaintained) |
|
||||||
|
|
||||||
|
Note, the stable-1.1 branch will still exist with tag 1.1.3, but under current plans it is
|
||||||
|
not maintained further. The next tag applied to master will be 1.4.0-alpha0. We would then
|
||||||
|
create a couple of alpha releases gathering features targeted for that particular release (in
|
||||||
|
this case 1.4.0), followed by a release candidate. The release candidate marks a feature freeze.
|
||||||
|
A new stable branch is created for the release candidate. Only bug fixes and any security issues
|
||||||
|
are added to the branch going forward until release 1.4.0 is made.
|
||||||
|
|
||||||
|
## Backporting Process
|
||||||
|
|
||||||
|
Development that occurs against the master branch and applicable code commits should also be submitted
|
||||||
|
against the stable branches. Some guidelines for this process follow::
|
||||||
|
1. Only bug and security fixes which do not introduce inter-component dependencies are
|
||||||
|
candidates for stable branches. These PRs should be marked with "bug" in GitHub.
|
||||||
|
2. Once a PR is created against master which meets requirement of (1), a comparable one
|
||||||
|
should also be submitted against the stable branches. It is the responsibility of the submitter
|
||||||
|
to apply their pull request against stable, and it is the responsibility of the
|
||||||
|
reviewers to help identify stable-candidate pull requests.
|
||||||
|
|
||||||
|
## Continuous Integration Testing
|
||||||
|
|
||||||
|
The test repository is forked to create stable branches from master. Full CI
|
||||||
|
runs on each stable and master PR using its respective tests repository branch.
|
||||||
|
|
||||||
|
### An alternative method for CI testing:
|
||||||
|
|
||||||
|
Ideally, the continuous integration infrastructure will run the same test suite on both master
|
||||||
|
and the stable branches. When tests are modified or new feature tests are introduced, explicit
|
||||||
|
logic should exist within the testing CI to make sure only applicable tests are executed against
|
||||||
|
stable and master. While this is not in place currently, it should be considered in the long term.
|
||||||
|
|
||||||
|
## Release Management
|
||||||
|
|
||||||
|
### Patch releases
|
||||||
|
|
||||||
|
Releases are made every three weeks, which include a GitHub release as
|
||||||
|
well as binary packages. These patch releases are made for both stable branches, and a "release candidate"
|
||||||
|
for the next `MAJOR` or `MINOR` is created from master. If there are no changes across all the repositories, no
|
||||||
|
release is created and an announcement is made on the developer mailing list to highlight this.
|
||||||
|
If a release is being made, each repository is tagged for this release, regardless
|
||||||
|
of whether changes are introduced. The release schedule can be seen on the
|
||||||
|
[release rotation wiki page](https://github.com/kata-containers/community/wiki/Release-Team-Rota).
|
||||||
|
|
||||||
|
If there is urgent need for a fix, a patch release will be made outside of the planned schedule.
|
||||||
|
|
||||||
|
The process followed for making a release can be found at [Release Process](Release-Process.md).
|
||||||
|
|
||||||
|
## Minor releases
|
||||||
|
|
||||||
|
### Frequency
|
||||||
|
Minor releases are less frequent in order to provide a more stable baseline for users. They are currently
|
||||||
|
running on a twelve week cadence. As the Kata Containers code base has reached a certain level of
|
||||||
|
maturity, we have increased the cadence from six weeks to twelve weeks. The release schedule can be seen on the
|
||||||
|
[release rotation wiki page](https://github.com/kata-containers/community/wiki/Release-Team-Rota).
|
||||||
|
|
||||||
|
### Compatibility
|
||||||
|
Kata guarantees compatibility between components that are within one minor release of each other.
|
||||||
|
|
||||||
|
This is critical for dependencies which cross between host (runtime, shim, proxy) and
|
||||||
|
the guest (hypervisor, rootfs and agent). For example, consider a cluster with a long-running
|
||||||
|
deployment, workload-never-dies, all on Kata version 1.1.3 components. If the operator updates
|
||||||
|
the Kata components to the next new minor release (i.e. 1.2.0), we need to guarantee that the 1.2.0
|
||||||
|
runtime still communicates with 1.1.3 agent within workload-never-dies.
|
||||||
|
|
||||||
|
Handling live-update is out of the scope of this document. See this [`kata-runtime` issue](https://github.com/kata-containers/runtime/issues/492) for details.
|
185
docs/Upgrading.md
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Unsupported scenarios](#unsupported-scenarios)
|
||||||
|
* [Maintenance Warning](#maintenance-warning)
|
||||||
|
* [Upgrade from Clear Containers](#upgrade-from-clear-containers)
|
||||||
|
* [Stop all running Clear Container instances](#stop-all-running-clear-container-instances)
|
||||||
|
* [Configuration migration](#configuration-migration)
|
||||||
|
* [Remove Clear Containers packages](#remove-clear-containers-packages)
|
||||||
|
* [Fedora](#fedora)
|
||||||
|
* [Ubuntu](#ubuntu)
|
||||||
|
* [Disable old container manager configuration](#disable-old-container-manager-configuration)
|
||||||
|
* [Install Kata Containers](#install-kata-containers)
|
||||||
|
* [Create a Kata Container](#create-a-kata-container)
|
||||||
|
* [Upgrade from runV](#upgrade-from-runv)
|
||||||
|
* [Upgrade Kata Containers](#upgrade-kata-containers)
|
||||||
|
* [Appendices](#appendices)
|
||||||
|
* [Assets](#assets)
|
||||||
|
* [Guest kernel](#guest-kernel)
|
||||||
|
* [Image](#image)
|
||||||
|
* [Determining asset versions](#determining-asset-versions)
|
||||||
|
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
This document explains how to upgrade from
|
||||||
|
[Clear Containers](https://github.com/clearcontainers) and [runV](https://github.com/hyperhq/runv) to
|
||||||
|
[Kata Containers](https://github.com/kata-containers) and how to upgrade an existing
|
||||||
|
Kata Containers system to the latest version.
|
||||||
|
|
||||||
|
# Unsupported scenarios
|
||||||
|
|
||||||
|
Upgrading a Clear Containers system on the following distributions is **not**
|
||||||
|
supported since the installation process for these distributions makes use of
|
||||||
|
unpackaged components:
|
||||||
|
|
||||||
|
- [CentOS](https://github.com/clearcontainers/runtime/blob/master/docs/centos-installation-guide.md)
|
||||||
|
- [BCLinux](https://github.com/clearcontainers/runtime/blob/master/docs/bclinux-installation-guide.md)
|
||||||
|
- [RHEL](https://github.com/clearcontainers/runtime/blob/master/docs/rhel-installation-guide.md)
|
||||||
|
- [SLES](https://github.com/clearcontainers/runtime/blob/master/docs/sles-installation-guide.md)
|
||||||
|
|
||||||
|
Additionally, upgrading
|
||||||
|
[Clear Linux](https://github.com/clearcontainers/runtime/blob/master/docs/clearlinux-installation-guide.md)
|
||||||
|
is not supported as Kata Containers packages do not yet exist.
|
||||||
|
|
||||||
|
# Maintenance Warning
|
||||||
|
|
||||||
|
The Clear Containers codebase is no longer being developed. Only new releases
|
||||||
|
will be considered for significant bug fixes.
|
||||||
|
|
||||||
|
The main development focus is now on Kata Containers. All Clear Containers
|
||||||
|
users are encouraged to switch to Kata Containers.
|
||||||
|
|
||||||
|
# Upgrade from Clear Containers
|
||||||
|
|
||||||
|
Since Kata Containers can co-exist on the same system as Clear Containers, if
|
||||||
|
you already have Clear Containers installed, the upgrade process is simply to
|
||||||
|
install Kata Containers. However, since Clear Containers is
|
||||||
|
[no longer being actively developed](#maintenance-warning),
|
||||||
|
you are encouraged to remove Clear Containers from your systems.
|
||||||
|
|
||||||
|
## Stop all running Clear Container instances
|
||||||
|
|
||||||
|
Assuming a Docker\* system, to stop all currently running Clear Containers:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ for container in $(sudo docker ps -q); do sudo docker stop $container; done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration migration
|
||||||
|
|
||||||
|
The automatic migration of
|
||||||
|
[Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration) to
|
||||||
|
[Kata Containers configuration](https://github.com/kata-containers/runtime#configuration) is
|
||||||
|
not supported.
|
||||||
|
|
||||||
|
If you have made changes to your Clear Containers configuration, you should
|
||||||
|
review those changes and decide whether to manually apply those changes to the
|
||||||
|
Kata Containers configuration.
|
||||||
|
|
||||||
|
> **Note**: This step must be completed before continuing to
|
||||||
|
> [remove the Clear Containers packages](#remove-clear-containers-packages) since doing so will
|
||||||
|
> *delete the default Clear Containers configuration file from your system*.
|
||||||
|
|
||||||
|
## Remove Clear Containers packages
|
||||||
|
|
||||||
|
> **Warning**: If you have modified your
|
||||||
|
> [Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration),
|
||||||
|
> you might want to make a safe copy of the configuration file before removing the
|
||||||
|
> packages since doing so will *delete the default configuration file*
|
||||||
|
|
||||||
|
### Fedora
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo -E dnf remove cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
|
||||||
|
$ sudo rm /etc/yum.repos.d/home:clearcontainers:clear-containers-3.repo
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ubuntu
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo apt-get purge cc-runtime\* cc-proxy\* cc-shim\* linux-container clear-containers-image qemu-lite cc-ksm-throttler
|
||||||
|
$ sudo rm /etc/apt/sources.list.d/clear-containers.list
|
||||||
|
```
|
||||||
|
|
||||||
|
## Disable old container manager configuration
|
||||||
|
|
||||||
|
Assuming a Docker installation, remove the docker configuration for Clear
|
||||||
|
Containers:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo rm /etc/systemd/system/docker.service.d/clear-containers.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install Kata Containers
|
||||||
|
|
||||||
|
Follow one of the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
|
||||||
|
|
||||||
|
## Create a Kata Container
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run -ti busybox sh
|
||||||
|
```
|
||||||
|
|
||||||
|
# Upgrade from runV
|
||||||
|
|
||||||
|
runV and Kata Containers can run together on the same system without affecting each other, as long as they are
|
||||||
|
not configured to use the same container root storage. Currently, runV defaults to `/run/runv` and Kata Containers
|
||||||
|
defaults to `/var/run/kata-containers`.
|
||||||
|
|
||||||
|
Now, to upgrade from runV you need to fresh install Kata Containers by following one of
|
||||||
|
the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
|
||||||
|
|
||||||
|
# Upgrade Kata Containers
|
||||||
|
|
||||||
|
As shown in the
|
||||||
|
[installation instructions](https://github.com/kata-containers/documentation/blob/master/install),
|
||||||
|
Kata Containers provide binaries for popular distributions in their native
|
||||||
|
packaging formats. This allows Kata Containers to be upgraded using the
|
||||||
|
standard package management tools for your distribution.
|
||||||
|
|
||||||
|
# Appendices
|
||||||
|
|
||||||
|
## Assets
|
||||||
|
|
||||||
|
Kata Containers requires additional resources to create a virtual machine
|
||||||
|
container. These resources are called
|
||||||
|
[Kata Containers assets](./design/architecture.md#assets),
|
||||||
|
which comprise a guest kernel and a root filesystem or initrd image. This
|
||||||
|
section describes when these components are updated.
|
||||||
|
|
||||||
|
Since the official assets are packaged, they are automatically upgraded when
|
||||||
|
new package versions are published.
|
||||||
|
|
||||||
|
> **Warning**: Note that if you use custom assets (by modifying the
|
||||||
|
> [Kata Runtime configuration > file](https://github.com/kata-containers/runtime/#configuration)),
|
||||||
|
> it is your responsibility to ensure they are updated as necessary.
|
||||||
|
|
||||||
|
### Guest kernel
|
||||||
|
|
||||||
|
The `kata-linux-container` package contains a Linux\* kernel based on the
|
||||||
|
latest vanilla version of the
|
||||||
|
[long-term kernel](https://www.kernel.org/)
|
||||||
|
plus a small number of
|
||||||
|
[patches](https://github.com/kata-containers/packaging/tree/master/kernel).
|
||||||
|
|
||||||
|
The `Longterm` branch is only updated with
|
||||||
|
[important bug fixes](https://www.kernel.org/category/releases.html)
|
||||||
|
meaning this package is only updated when necessary.
|
||||||
|
|
||||||
|
The guest kernel package is updated when a new long-term kernel is released
|
||||||
|
and when any patch updates are required.
|
||||||
|
|
||||||
|
### Image
|
||||||
|
|
||||||
|
The `kata-containers-image` package is updated only when critical updates are
|
||||||
|
available for the packages used to create it, such as:
|
||||||
|
|
||||||
|
- systemd
|
||||||
|
- [Kata Containers Agent](https://github.com/kata-containers/agent)
|
||||||
|
|
||||||
|
### Determining asset versions
|
||||||
|
|
||||||
|
To see which versions of the assets being used:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ kata-runtime kata-env
|
||||||
|
```
|
1
docs/VERSION
Normal file
@ -0,0 +1 @@
|
|||||||
|
1.11.0-rc0
|
10
docs/design/README.md
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Design
|
||||||
|
|
||||||
|
Kata Containers design documents:
|
||||||
|
|
||||||
|
- [Kata Containers architecture](architecture.md)
|
||||||
|
- [API Design of Kata Containers](kata-api-design.md)
|
||||||
|
- [Design requirements for Kata Containers](kata-design-requirements.md)
|
||||||
|
- [VSocks](VSocks.md)
|
||||||
|
- [VCPU handling](vcpu-handling.md)
|
||||||
|
- [Host cgroups](host-cgroups.md)
|
134
docs/design/VSocks.md
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
# Kata Containers and VSOCKs
|
||||||
|
|
||||||
|
- [Introduction](#introduction)
|
||||||
|
- [proxy communication diagram](#proxy-communication-diagram)
|
||||||
|
- [VSOCK communication diagram](#vsock-communication-diagram)
|
||||||
|
- [System requirements](#system-requirements)
|
||||||
|
- [Advantages of using VSOCKs](#advantages-of-using-vsocks)
|
||||||
|
- [High density](#high-density)
|
||||||
|
- [Reliability](#reliability)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
There are two different ways processes in the virtual machine can communicate
|
||||||
|
with processes in the host. The first one is by using serial ports, where the
|
||||||
|
processes in the virtual machine can read/write data from/to a serial port
|
||||||
|
device and the processes in the host can read/write data from/to a Unix socket.
|
||||||
|
Most GNU/Linux distributions have support for serial ports, making it the most
|
||||||
|
portable solution. However, the serial link limits read/write access to one
|
||||||
|
process at a time. To deal with this limitation the resources (serial port and
|
||||||
|
Unix socket) must be multiplexed. In Kata Containers those resources are
|
||||||
|
multiplexed by using [`kata-proxy`][2] and [Yamux][3], the following diagram shows
|
||||||
|
how it's implemented.
|
||||||
|
|
||||||
|
|
||||||
|
### proxy communication diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
.----------------------.
|
||||||
|
| .------------------. |
|
||||||
|
| | .-----. .-----. | |
|
||||||
|
| | |cont1| |cont2| | |
|
||||||
|
| | `-----' `-----' | |
|
||||||
|
| | \ / | |
|
||||||
|
| | .---------. | |
|
||||||
|
| | | agent | | |
|
||||||
|
| | `---------' | |
|
||||||
|
| | | | |
|
||||||
|
| | .-----------. | |
|
||||||
|
| |POD |serial port| | |
|
||||||
|
| `----|-----------|-' |
|
||||||
|
| | socket | |
|
||||||
|
| `-----------' |
|
||||||
|
| | |
|
||||||
|
| .-------. |
|
||||||
|
| | proxy | |
|
||||||
|
| `-------' |
|
||||||
|
| | |
|
||||||
|
| .------./ \.------. |
|
||||||
|
| | shim | | shim | |
|
||||||
|
| `------' `------' |
|
||||||
|
| Host |
|
||||||
|
`----------------------'
|
||||||
|
```
|
||||||
|
|
||||||
|
A newer, simpler method is [VSOCKs][4], which can accept connections from
|
||||||
|
multiple clients and does not require multiplexers ([`kata-proxy`][2] and
|
||||||
|
[Yamux][3]). The following diagram shows how it's implemented in Kata Containers.
|
||||||
|
|
||||||
|
|
||||||
|
### VSOCK communication diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
.----------------------.
|
||||||
|
| .------------------. |
|
||||||
|
| | .-----. .-----. | |
|
||||||
|
| | |cont1| |cont2| | |
|
||||||
|
| | `-----' `-----' | |
|
||||||
|
| | | | | |
|
||||||
|
| | .---------. | |
|
||||||
|
| | | agent | | |
|
||||||
|
| | `---------' | |
|
||||||
|
| | | | | |
|
||||||
|
| | POD .-------. | |
|
||||||
|
| `-----| vsock |----' |
|
||||||
|
| `-------' |
|
||||||
|
| | | |
|
||||||
|
| .------. .------. |
|
||||||
|
| | shim | | shim | |
|
||||||
|
| `------' `------' |
|
||||||
|
| Host |
|
||||||
|
`----------------------'
|
||||||
|
```
|
||||||
|
|
||||||
|
## System requirements
|
||||||
|
|
||||||
|
The host Linux kernel version must be greater than or equal to v4.8, and the
|
||||||
|
`vhost_vsock` module must be loaded or built-in (`CONFIG_VHOST_VSOCK=y`). To
|
||||||
|
load the module run the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo modprobe -i vhost_vsock
|
||||||
|
```
|
||||||
|
|
||||||
|
The Kata Containers version must be greater than or equal to 1.2.0 and `use_vsock`
|
||||||
|
must be set to `true` in the runtime [configuration file][1].
|
||||||
|
|
||||||
|
### With VMWare guest
|
||||||
|
To use Kata Containers with VSOCKs in a VMWare guest environment, first stop the `vmware-tools` service and unload the VMWare Linux kernel module.
|
||||||
|
```
|
||||||
|
sudo systemctl stop vmware-tools
|
||||||
|
sudo modprobe -r vmw_vsock_vmci_transport
|
||||||
|
sudo modprobe -i vhost_vsock
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advantages of using VSOCKs
|
||||||
|
|
||||||
|
### High density
|
||||||
|
|
||||||
|
Using a proxy for multiplexing the connections between the VM and the host uses
|
||||||
|
4.5MB per [POD][5]. In a high density deployment this could add up to GBs of
|
||||||
|
memory that could have been used to host more PODs. When we talk about density
|
||||||
|
each kilobyte matters and it might be the decisive factor between run another
|
||||||
|
POD or not. For example if you have 500 PODs running in a server, the same
|
||||||
|
amount of [`kata-proxy`][2] processes will be running and consuming for around
|
||||||
|
2250MB of RAM. Before making the decision not to use VSOCKs, you should ask
|
||||||
|
yourself, how many more containers can run with the memory RAM consumed by the
|
||||||
|
Kata proxies?
|
||||||
|
|
||||||
|
### Reliability
|
||||||
|
|
||||||
|
[`kata-proxy`][2] is in charge of multiplexing the connections between virtual
|
||||||
|
machine and host processes, if it dies all connections get broken. For example
|
||||||
|
if you have a [POD][5] with 10 containers running, if `kata-proxy` dies it would
|
||||||
|
be impossible to contact your containers, though they would still be running.
|
||||||
|
Since communication via VSOCKs is direct, the only way to lose communication
|
||||||
|
with the containers is if the VM itself or the [shim][6] dies, if this happens
|
||||||
|
the containers are removed automatically.
|
||||||
|
|
||||||
|
[1]: https://github.com/kata-containers/runtime#configuration
|
||||||
|
[2]: https://github.com/kata-containers/proxy
|
||||||
|
[3]: https://github.com/hashicorp/yamux
|
||||||
|
[4]: https://wiki.qemu.org/Features/VirtioVsock
|
||||||
|
[5]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods
|
||||||
|
[6]: https://github.com/kata-containers/shim
|
BIN
docs/design/arch-images/CNM_detailed_diagram.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
docs/design/arch-images/CNM_overall_diagram.png
Normal file
After Width: | Height: | Size: 25 KiB |
BIN
docs/design/arch-images/DAX.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
docs/design/arch-images/api-to-construct.png
Normal file
After Width: | Height: | Size: 293 KiB |
BIN
docs/design/arch-images/construct-to-vm-concept.png
Normal file
After Width: | Height: | Size: 114 KiB |
BIN
docs/design/arch-images/docker-kata.png
Normal file
After Width: | Height: | Size: 46 KiB |
@ -0,0 +1,47 @@
|
|||||||
|
@startuml
|
||||||
|
|
||||||
|
User->CLI: network add-interface
|
||||||
|
CLI->virtcontainers: AddInterface
|
||||||
|
virtcontainers->QEMU:QMP-hot-add-network
|
||||||
|
virtcontainers->agent:UpdateInterface
|
||||||
|
note right
|
||||||
|
the agent's UpdateInterface code will need to be augmented
|
||||||
|
to have a timeout/wait associated with this for the network
|
||||||
|
device to appear (ie, wait for qmp to complete)
|
||||||
|
end note
|
||||||
|
agent->User: err, interface detail
|
||||||
|
|
||||||
|
User->CLI: network del-interface
|
||||||
|
CLI->virtcontainers: DeleteInterface
|
||||||
|
note right
|
||||||
|
There will be no call to the agent. We rely on guest kernel
|
||||||
|
to clean up any state associated with the interface.
|
||||||
|
end note
|
||||||
|
virtcontainers->QEMU:QMP-hot-delete-network
|
||||||
|
virtcontainers->User: err, interface detail
|
||||||
|
|
||||||
|
User->CLI: network list-interface
|
||||||
|
CLI->virtcontainers: ListInterfaces
|
||||||
|
virtcontainers->agent:ListInterfaces
|
||||||
|
agent->User: err, list of interface details
|
||||||
|
|
||||||
|
User->CLI: network update-routes
|
||||||
|
CLI->virtcontainers: UpdateRoutes
|
||||||
|
note right
|
||||||
|
routes are handled in a 'one shot' basis,
|
||||||
|
setting all of the routes for the network. This needs to
|
||||||
|
be called after interfaces are added, and should be called
|
||||||
|
after interfaces are removed. It should be fine to call once
|
||||||
|
after adding all of the expected interfaces. If you know all
|
||||||
|
the resulting routes, simply calling set routes with the
|
||||||
|
complete list should suffice.
|
||||||
|
end note
|
||||||
|
virtcontainers->agent:UpdateRoutes
|
||||||
|
agent->User: err, list of routes
|
||||||
|
|
||||||
|
User->CLI: network list-routes
|
||||||
|
CLI->virtcontainers: ListRoutes
|
||||||
|
virtcontainers->agent:ListRoutes
|
||||||
|
agent->User: err, list of routes
|
||||||
|
|
||||||
|
@enduml
|
BIN
docs/design/arch-images/kata-containers-network-hotplug.png
Normal file
After Width: | Height: | Size: 51 KiB |
BIN
docs/design/arch-images/kata-crio-uml.png
Normal file
After Width: | Height: | Size: 509 KiB |
174
docs/design/arch-images/kata-crio-uml.txt
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
Title: Kata Flow
|
||||||
|
participant CRI
|
||||||
|
participant CRIO
|
||||||
|
participant Kata Runtime
|
||||||
|
participant virtcontainers
|
||||||
|
participant hypervisor
|
||||||
|
participant agent
|
||||||
|
participant shim-pod
|
||||||
|
participant shim-ctr
|
||||||
|
participant proxy
|
||||||
|
|
||||||
|
# Run the sandbox
|
||||||
|
CRI->CRIO: RunPodSandbox()
|
||||||
|
CRIO->Kata Runtime: create
|
||||||
|
Kata Runtime->virtcontainers: CreateSandbox()
|
||||||
|
Note left of virtcontainers: Sandbox\nReady
|
||||||
|
virtcontainers->virtcontainers: createNetwork()
|
||||||
|
virtcontainers->virtcontainers: Execute PreStart Hooks
|
||||||
|
virtcontainers->+hypervisor: Start VM (inside the netns)
|
||||||
|
hypervisor-->-virtcontainers: VM started
|
||||||
|
virtcontainers->proxy: Start Proxy
|
||||||
|
proxy->hypervisor: Connect the VM
|
||||||
|
virtcontainers->+agent: CreateSandbox()
|
||||||
|
agent-->-virtcontainers: Sandbox Created
|
||||||
|
virtcontainers->+agent: CreateContainer()
|
||||||
|
agent-->-virtcontainers: Container Created
|
||||||
|
virtcontainers->shim-pod: Start Shim
|
||||||
|
shim-pod->agent: ReadStdout() (blocking call)
|
||||||
|
shim-pod->agent: ReadStderr() (blocking call)
|
||||||
|
shim-pod->agent: WaitProcess() (blocking call)
|
||||||
|
Note left of virtcontainers: Container-pod\nReady
|
||||||
|
virtcontainers-->Kata Runtime: End of CreateSandbox()
|
||||||
|
Kata Runtime-->CRIO: End of create
|
||||||
|
CRIO->Kata Runtime: start
|
||||||
|
Kata Runtime->virtcontainers: StartSandbox()
|
||||||
|
Note left of virtcontainers: Sandbox\nRunning
|
||||||
|
virtcontainers->+agent: StartContainer()
|
||||||
|
agent-->-virtcontainers: Container Started
|
||||||
|
Note left of virtcontainers: Container-pod\nRunning
|
||||||
|
virtcontainers->virtcontainers: Execute PostStart Hooks
|
||||||
|
virtcontainers-->Kata Runtime: End of StartSandbox()
|
||||||
|
Kata Runtime-->CRIO: End of start
|
||||||
|
CRIO-->CRI: End of RunPodSandbox()
|
||||||
|
|
||||||
|
# Create the container
|
||||||
|
CRI->CRIO: CreateContainer()
|
||||||
|
CRIO->Kata Runtime: create
|
||||||
|
Kata Runtime->virtcontainers: CreateContainer()
|
||||||
|
virtcontainers->+agent: CreateContainer()
|
||||||
|
agent-->-virtcontainers: Container Created
|
||||||
|
virtcontainers->shim-ctr: Start Shim
|
||||||
|
shim-ctr->agent: ReadStdout() (blocking call)
|
||||||
|
shim-ctr->agent: ReadStderr() (blocking call)
|
||||||
|
shim-ctr->agent: WaitProcess() (blocking call)
|
||||||
|
Note left of virtcontainers: Container-ctr\nReady
|
||||||
|
virtcontainers-->Kata Runtime: End of CreateContainer()
|
||||||
|
Kata Runtime-->CRIO: End of create
|
||||||
|
CRIO-->CRI: End of CreateContainer()
|
||||||
|
|
||||||
|
# Start the container
|
||||||
|
CRI->CRIO: StartContainer()
|
||||||
|
CRIO->Kata Runtime: start
|
||||||
|
Kata Runtime->virtcontainers: StartContainer()
|
||||||
|
virtcontainers->+agent: StartContainer()
|
||||||
|
agent-->-virtcontainers: Container Started
|
||||||
|
Note left of virtcontainers: Container-ctr\nRunning
|
||||||
|
virtcontainers-->Kata Runtime: End of StartContainer()
|
||||||
|
Kata Runtime-->CRIO: End of start
|
||||||
|
CRIO-->CRI: End of StartContainer()
|
||||||
|
|
||||||
|
# Stop the container
|
||||||
|
CRI->CRIO: StopContainer()
|
||||||
|
CRIO->Kata Runtime: kill
|
||||||
|
Kata Runtime->virtcontainers: KillContainer()
|
||||||
|
virtcontainers->+agent: SignalProcess()
|
||||||
|
alt SIGTERM OR SIGKILL
|
||||||
|
agent-->shim-ctr: WaitProcess() returns
|
||||||
|
end
|
||||||
|
agent-->-virtcontainers: Process Signalled
|
||||||
|
virtcontainers-->Kata Runtime: End of KillContainer()
|
||||||
|
alt SIGTERM OR SIGKILL
|
||||||
|
Kata Runtime->virtcontainers: StopContainer()
|
||||||
|
virtcontainers->+shim-ctr: waitForShim()
|
||||||
|
alt Timeout exceeded
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->shim-ctr: WaitProcess() returns
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->shim-ctr: waitForShim()
|
||||||
|
end
|
||||||
|
shim-ctr-->-virtcontainers: Shim terminated
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->+agent: RemoveContainer()
|
||||||
|
agent-->-virtcontainers: Container Removed
|
||||||
|
Note left of virtcontainers: Container-ctr\nStopped
|
||||||
|
virtcontainers-->Kata Runtime: End of StopContainer()
|
||||||
|
end
|
||||||
|
Kata Runtime-->CRIO: End of kill
|
||||||
|
CRIO-->CRI: End of StopContainer()
|
||||||
|
|
||||||
|
# Remove the container
|
||||||
|
CRI->CRIO: RemoveContainer()
|
||||||
|
CRIO->Kata Runtime: delete
|
||||||
|
Kata Runtime->virtcontainers: DeleteContainer()
|
||||||
|
virtcontainers->virtcontainers: Delete container resources
|
||||||
|
virtcontainers-->Kata Runtime: End of DeleteContainer()
|
||||||
|
Kata Runtime-->CRIO: End of delete
|
||||||
|
CRIO-->CRI: End of RemoveContainer()
|
||||||
|
|
||||||
|
# Stop the sandbox
|
||||||
|
CRI->CRIO: StopPodSandbox()
|
||||||
|
CRIO->Kata Runtime: kill
|
||||||
|
Kata Runtime->virtcontainers: KillContainer()
|
||||||
|
virtcontainers->+agent: SignalProcess()
|
||||||
|
alt SIGTERM OR SIGKILL
|
||||||
|
agent-->shim-pod: WaitProcess() returns
|
||||||
|
end
|
||||||
|
agent-->-virtcontainers: Process Signalled
|
||||||
|
virtcontainers-->Kata Runtime: End of KillContainer()
|
||||||
|
alt SIGTERM OR SIGKILL
|
||||||
|
Kata Runtime->virtcontainers: StopSandbox()
|
||||||
|
loop for each container
|
||||||
|
alt Container-ctr
|
||||||
|
virtcontainers->+shim-ctr: waitForShim()
|
||||||
|
alt Timeout exceeded
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->shim-ctr: WaitProcess() returns
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->shim-ctr: waitForShim()
|
||||||
|
end
|
||||||
|
shim-ctr-->-virtcontainers: Shim terminated
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->+agent: RemoveContainer()
|
||||||
|
agent-->-virtcontainers: Container Removed
|
||||||
|
Note left of virtcontainers: Container-ctr\nStopped
|
||||||
|
else Container-pod
|
||||||
|
virtcontainers->+shim-pod: waitForShim()
|
||||||
|
alt Timeout exceeded
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->shim-pod: WaitProcess() returns
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->shim-pod: waitForShim()
|
||||||
|
end
|
||||||
|
shim-pod-->-virtcontainers: Shim terminated
|
||||||
|
virtcontainers->+agent: SignalProcess(SIGKILL)
|
||||||
|
agent-->-virtcontainers: Process Signalled by SIGKILL
|
||||||
|
virtcontainers->+agent: RemoveContainer()
|
||||||
|
agent-->-virtcontainers: Container Removed
|
||||||
|
Note left of virtcontainers: Container-pod\nStopped
|
||||||
|
end
|
||||||
|
end
|
||||||
|
virtcontainers->+agent: DestroySandbox()
|
||||||
|
agent-->-virtcontainers: Sandbox Destroyed
|
||||||
|
virtcontainers->hypervisor: Stop VM
|
||||||
|
Note left of virtcontainers: Sandbox\nStopped
|
||||||
|
virtcontainers->virtcontainers: removeNetwork()
|
||||||
|
virtcontainers->virtcontainers: Execute PostStop Hooks
|
||||||
|
virtcontainers-->Kata Runtime: End of StopSandbox()
|
||||||
|
end
|
||||||
|
Kata Runtime-->CRIO: End of kill
|
||||||
|
CRIO-->CRI: End of StopPodSandbox()
|
||||||
|
|
||||||
|
# Remove the sandbox
|
||||||
|
CRI->CRIO: RemovePodSandbox()
|
||||||
|
CRIO->Kata Runtime: delete
|
||||||
|
Kata Runtime->virtcontainers: DeleteSandbox()
|
||||||
|
loop for each container
|
||||||
|
virtcontainers->virtcontainers: Delete container resources
|
||||||
|
end
|
||||||
|
virtcontainers->virtcontainers: Delete sandbox resources
|
||||||
|
virtcontainers-->Kata Runtime: End of DeleteSandbox()
|
||||||
|
Kata Runtime-->CRIO: End of delete
|
||||||
|
CRIO-->CRI: End of RemovePodSandbox()
|
27
docs/design/arch-images/kata-oci-create.svg
Normal file
After Width: | Height: | Size: 14 KiB |
31
docs/design/arch-images/kata-oci-create.txt
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
Title: Kata Flow
|
||||||
|
participant Docker
|
||||||
|
participant Kata Runtime
|
||||||
|
participant virtcontainers
|
||||||
|
participant hypervisor
|
||||||
|
participant agent
|
||||||
|
participant shim-pod
|
||||||
|
participant shim-ctr
|
||||||
|
participant proxy
|
||||||
|
|
||||||
|
#Docker Create!
|
||||||
|
Docker->Kata Runtime: create
|
||||||
|
Kata Runtime->virtcontainers: CreateSandbox()
|
||||||
|
Note left of virtcontainers: Sandbox\nReady
|
||||||
|
virtcontainers->virtcontainers: createNetwork()
|
||||||
|
virtcontainers->virtcontainers: Execute PreStart Hooks
|
||||||
|
virtcontainers->+hypervisor: Start VM (inside the netns)
|
||||||
|
hypervisor-->-virtcontainers: VM started
|
||||||
|
virtcontainers->proxy: Start Proxy
|
||||||
|
proxy->hypervisor: Connect the VM
|
||||||
|
virtcontainers->+agent: CreateSandbox()
|
||||||
|
agent-->-virtcontainers: Sandbox Created
|
||||||
|
virtcontainers->+agent: CreateContainer()
|
||||||
|
agent-->-virtcontainers: Container Created
|
||||||
|
virtcontainers->shim-pod: Start Shim
|
||||||
|
shim->agent: ReadStdout() (blocking call)
|
||||||
|
shim->agent: ReadStderr() (blocking call)
|
||||||
|
shim->agent: WaitProcess() (blocking call)
|
||||||
|
Note left of virtcontainers: Container\nReady
|
||||||
|
virtcontainers-->Kata Runtime: End of CreateSandbox()
|
||||||
|
Kata Runtime-->Docker: End of create
|
11
docs/design/arch-images/kata-oci-exec.svg
Normal file
After Width: | Height: | Size: 7.8 KiB |
20
docs/design/arch-images/kata-oci-exec.txt
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
Title: Docker Exec
|
||||||
|
participant Docker
|
||||||
|
participant kata-runtime
|
||||||
|
participant virtcontainers
|
||||||
|
participant shim
|
||||||
|
participant hypervisor
|
||||||
|
participant agent
|
||||||
|
participant proxy
|
||||||
|
|
||||||
|
#Docker Exec
|
||||||
|
Docker->kata-runtime: exec
|
||||||
|
kata-runtime->virtcontainers: EnterContainer()
|
||||||
|
virtcontainers->agent: exec
|
||||||
|
agent->virtcontainers: Process started in the container
|
||||||
|
virtcontainers->shim: start shim
|
||||||
|
shim->agent: ReadStdout()
|
||||||
|
shim->agent: ReadStderr()
|
||||||
|
shim->agent: WaitProcess()
|
||||||
|
virtcontainers->kata-runtime: End of EnterContainer()
|
||||||
|
kata-runtime-->Docker: End of exec
|
9
docs/design/arch-images/kata-oci-start.svg
Normal file
After Width: | Height: | Size: 7.3 KiB |
20
docs/design/arch-images/kata-oci-start.txt
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
Title: Docker Start
|
||||||
|
participant Docker
|
||||||
|
participant Kata Runtime
|
||||||
|
participant virtcontainers
|
||||||
|
participant hypervisor
|
||||||
|
participant agent
|
||||||
|
participant shim-pod
|
||||||
|
participant shim-ctr
|
||||||
|
participant proxy
|
||||||
|
|
||||||
|
#Docker Start
|
||||||
|
Docker->Kata Runtime: start
|
||||||
|
Kata Runtime->virtcontainers: StartSandbox()
|
||||||
|
Note left of virtcontainers: Sandbox\nRunning
|
||||||
|
virtcontainers->+agent: StartContainer()
|
||||||
|
agent-->-virtcontainers: Container Started
|
||||||
|
Note left of virtcontainers: Container-pod\nRunning
|
||||||
|
virtcontainers->virtcontainers: Execute PostStart Hooks
|
||||||
|
virtcontainers-->Kata Runtime: End of StartSandbox()
|
||||||
|
Kata Runtime-->Docker: End of start
|
BIN
docs/design/arch-images/network.png
Normal file
After Width: | Height: | Size: 163 KiB |
1
docs/design/arch-images/shimv2.svg
Normal file
After Width: | Height: | Size: 190 KiB |
BIN
docs/design/arch-images/vm-concept-to-tech.png
Normal file
After Width: | Height: | Size: 102 KiB |
654
docs/design/architecture.md
Normal file
@ -0,0 +1,654 @@
|
|||||||
|
# Kata Containers Architecture
|
||||||
|
|
||||||
|
|
||||||
|
* [Overview](#overview)
|
||||||
|
* [Virtualization](#virtualization)
|
||||||
|
* [Guest assets](#guest-assets)
|
||||||
|
* [Guest kernel](#guest-kernel)
|
||||||
|
* [Guest Image](#guest-image)
|
||||||
|
* [Root filesystem image](#root-filesystem-image)
|
||||||
|
* [Initrd image](#initrd-image)
|
||||||
|
* [Agent](#agent)
|
||||||
|
* [Runtime](#runtime)
|
||||||
|
* [Configuration](#configuration)
|
||||||
|
* [Significant OCI commands](#significant-oci-commands)
|
||||||
|
* [create](#create)
|
||||||
|
* [start](#start)
|
||||||
|
* [exec](#exec)
|
||||||
|
* [kill](#kill)
|
||||||
|
* [delete](#delete)
|
||||||
|
* [Proxy](#proxy)
|
||||||
|
* [Shim](#shim)
|
||||||
|
* [Networking](#networking)
|
||||||
|
* [Storage](#storage)
|
||||||
|
* [Kubernetes Support](#kubernetes-support)
|
||||||
|
* [Problem Statement](#problem-statement)
|
||||||
|
* [Containerd](#containerd)
|
||||||
|
* [CRI-O](#cri-o)
|
||||||
|
* [OCI Annotations](#oci-annotations)
|
||||||
|
* [Mixing VM based and namespace based runtimes](#mixing-vm-based-and-namespace-based-runtimes)
|
||||||
|
* [Appendices](#appendices)
|
||||||
|
* [DAX](#dax)
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This is an architectural overview of Kata Containers, based on the 1.5.0 release.
|
||||||
|
|
||||||
|
The two primary deliverables of the Kata Containers project are a container runtime
|
||||||
|
and a CRI friendly shim. There is also a CRI friendly library API behind them.
|
||||||
|
|
||||||
|
The [Kata Containers runtime (`kata-runtime`)](https://github.com/kata-containers/runtime)
|
||||||
|
is compatible with the [OCI](https://github.com/opencontainers) [runtime specification](https://github.com/opencontainers/runtime-spec)
|
||||||
|
and therefore works seamlessly with the
|
||||||
|
[Docker\* Engine](https://www.docker.com/products/docker-engine) pluggable runtime
|
||||||
|
architecture. It also supports the [Kubernetes\* Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-node/container-runtime-interface.md)
|
||||||
|
through the [CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and
|
||||||
|
[Containerd CRI Plugin\*](https://github.com/containerd/cri) implementation. In other words, you can transparently
|
||||||
|
select between the [default Docker and CRI shim runtime (runc)](https://github.com/opencontainers/runc)
|
||||||
|
and `kata-runtime`.
|
||||||
|
|
||||||
|
`kata-runtime` creates a QEMU\*/KVM virtual machine for each container or pod,
|
||||||
|
the Docker engine or `kubelet` (Kubernetes) creates respectively.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
|
||||||
|
is another Kata Containers entrypoint, which
|
||||||
|
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
|
||||||
|
With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead
|
||||||
|
of `2N+1` shims (a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself), and no standalone
|
||||||
|
`kata-proxy` process even if no VSOCK is available.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
The container process is then spawned by
|
||||||
|
[agent](https://github.com/kata-containers/agent), an agent process running
|
||||||
|
as a daemon inside the virtual machine. `kata-agent` runs a gRPC server in
|
||||||
|
the guest using a VIRTIO serial or VSOCK interface which QEMU exposes as a socket
|
||||||
|
file on the host. `kata-runtime` uses a gRPC protocol to communicate with
|
||||||
|
the agent. This protocol allows the runtime to send container management
|
||||||
|
commands to the agent. The protocol is also used to carry the I/O streams (stdout,
|
||||||
|
stderr, stdin) between the containers and the manage engines (e.g. Docker Engine).
|
||||||
|
|
||||||
|
For any given container, both the init process and all potentially executed
|
||||||
|
commands within that container, together with their related I/O streams, need
|
||||||
|
to go through the VIRTIO serial or VSOCK interface exported by QEMU.
|
||||||
|
In the VIRTIO serial case, a [Kata Containers
|
||||||
|
proxy (`kata-proxy`)](https://github.com/kata-containers/proxy) instance is
|
||||||
|
launched for each virtual machine to handle multiplexing and demultiplexing
|
||||||
|
those commands and streams.
|
||||||
|
|
||||||
|
On the host, each container process's removal is handled by a reaper in the higher
|
||||||
|
layers of the container stack. In the case of Docker or containerd it is handled by `containerd-shim`.
|
||||||
|
In the case of CRI-O it is handled by `conmon`. For clarity, for the remainder
|
||||||
|
of this document the term "container process reaper" will be used to refer to
|
||||||
|
either reaper. As Kata Containers processes run inside their own virtual machines,
|
||||||
|
the container process reaper cannot monitor, control
|
||||||
|
or reap them. `kata-runtime` fixes that issue by creating an [additional shim process
|
||||||
|
(`kata-shim`)](https://github.com/kata-containers/shim) between the container process
|
||||||
|
reaper and `kata-proxy`. A `kata-shim` instance will both forward signals and `stdin`
|
||||||
|
streams to the container process on the guest and pass the container `stdout`
|
||||||
|
and `stderr` streams back up the stack to the CRI shim or Docker via the container process
|
||||||
|
reaper. `kata-runtime` creates a `kata-shim` daemon for each container and for each
|
||||||
|
OCI command received to run within an already running container (example, `docker
|
||||||
|
exec`).
|
||||||
|
|
||||||
|
Since Kata Containers version 1.5, the new introduced `shimv2` has integrated the
|
||||||
|
functionalities of the reaper, the `kata-runtime`, the `kata-shim`, and the `kata-proxy`.
|
||||||
|
As a result, there will not be any of the additional processes previously listed.
|
||||||
|
|
||||||
|
The container workload, that is, the actual OCI bundle rootfs, is exported from the
|
||||||
|
host to the virtual machine. In the case where a block-based graph driver is
|
||||||
|
configured, `virtio-scsi` will be used. In all other cases a 9pfs VIRTIO mount point
|
||||||
|
will be used. `kata-agent` uses this mount point as the root filesystem for the
|
||||||
|
container processes.
|
||||||
|
|
||||||
|
## Virtualization
|
||||||
|
|
||||||
|
How Kata Containers maps container concepts to virtual machine technologies, and how this is realized in the multiple
|
||||||
|
hypervisors and VMMs that Kata supports is described within the [virtualization documentation](./virtualization.md)
|
||||||
|
|
||||||
|
## Guest assets
|
||||||
|
|
||||||
|
The hypervisor will launch a virtual machine which includes a minimal guest kernel
|
||||||
|
and a guest image.
|
||||||
|
|
||||||
|
### Guest kernel
|
||||||
|
|
||||||
|
The guest kernel is passed to the hypervisor and used to boot the virtual
|
||||||
|
machine. The default kernel provided in Kata Containers is highly optimized for
|
||||||
|
kernel boot time and minimal memory footprint, providing only those services
|
||||||
|
required by a container workload. This is based on a very current upstream Linux
|
||||||
|
kernel.
|
||||||
|
|
||||||
|
### Guest image
|
||||||
|
|
||||||
|
Kata Containers supports both an `initrd` and `rootfs` based minimal guest image.
|
||||||
|
|
||||||
|
#### Root filesystem image
|
||||||
|
|
||||||
|
The default packaged root filesystem image, sometimes referred to as the "mini O/S", is a
|
||||||
|
highly optimized container bootstrap system based on [Clear Linux](https://clearlinux.org/). It provides an extremely minimal environment and
|
||||||
|
has a highly optimized boot path.
|
||||||
|
|
||||||
|
The only services running in the context of the mini O/S are the init daemon
|
||||||
|
(`systemd`) and the [Agent](#agent). The real workload the user wishes to run
|
||||||
|
is created using libcontainer, creating a container in the same manner that is done
|
||||||
|
by `runc`.
|
||||||
|
|
||||||
|
For example, when `docker run -ti ubuntu date` is run:
|
||||||
|
|
||||||
|
- The hypervisor will boot the mini-OS image using the guest kernel.
|
||||||
|
- `systemd`, running inside the mini-OS context, will launch the `kata-agent` in
|
||||||
|
the same context.
|
||||||
|
- The agent will create a new confined context to run the specified command in
|
||||||
|
(`date` in this example).
|
||||||
|
- The agent will then execute the command (`date` in this example) inside this
|
||||||
|
new context, first setting the root filesystem to the expected Ubuntu\* root
|
||||||
|
filesystem.
|
||||||
|
|
||||||
|
#### Initrd image
|
||||||
|
|
||||||
|
A compressed `cpio(1)` archive, created from a rootfs which is loaded into memory and used as part of the Linux startup process. During startup, the kernel unpacks it into a special instance of a `tmpfs` that becomes the initial root filesystem.
|
||||||
|
|
||||||
|
The only service running in the context of the initrd is the [Agent](#agent) as the init daemon. The real workload the user wishes to run is created using libcontainer, creating a container in the same manner that is done by `runc`.
|
||||||
|
|
||||||
|
## Agent
|
||||||
|
|
||||||
|
[`kata-agent`](https://github.com/kata-containers/agent) is a process running in the
|
||||||
|
guest as a supervisor for managing containers and processes running within
|
||||||
|
those containers.
|
||||||
|
|
||||||
|
The `kata-agent` execution unit is the sandbox. A `kata-agent` sandbox is a container sandbox defined by a set of namespaces (NS, UTS, IPC and PID). `kata-runtime` can
|
||||||
|
run several containers per VM to support container engines that require multiple
|
||||||
|
containers running inside a pod. In the case of docker, `kata-runtime` creates a
|
||||||
|
single container per pod.
|
||||||
|
|
||||||
|
`kata-agent` communicates with the other Kata components over gRPC.
|
||||||
|
It also runs a [`yamux`](https://github.com/hashicorp/yamux) server on the same gRPC URL.
|
||||||
|
|
||||||
|
The `kata-agent` makes use of [`libcontainer`](https://github.com/opencontainers/runc/tree/master/libcontainer)
|
||||||
|
to manage the lifecycle of the container. This way the `kata-agent` reuses most
|
||||||
|
of the code used by [`runc`](https://github.com/opencontainers/runc).
|
||||||
|
|
||||||
|
### Agent gRPC protocol
|
||||||
|
|
||||||
|
placeholder
|
||||||
|
|
||||||
|
## Runtime
|
||||||
|
|
||||||
|
`kata-runtime` is an OCI compatible container runtime and is responsible for handling
|
||||||
|
all commands specified by
|
||||||
|
[the OCI runtime specification](https://github.com/opencontainers/runtime-spec)
|
||||||
|
and launching `kata-shim` instances.
|
||||||
|
|
||||||
|
`kata-runtime` heavily utilizes the
|
||||||
|
[virtcontainers project](https://github.com/containers/virtcontainers), which
|
||||||
|
provides a generic, runtime-specification agnostic, hardware-virtualized containers
|
||||||
|
library.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
The runtime uses a TOML format configuration file called `configuration.toml`. By
|
||||||
|
default this file is installed in the `/usr/share/defaults/kata-containers`
|
||||||
|
directory and contains various settings such as the paths to the hypervisor,
|
||||||
|
the guest kernel and the mini-OS image.
|
||||||
|
|
||||||
|
Most users will not need to modify the configuration file.
|
||||||
|
|
||||||
|
The file is well commented and provides a few "knobs" that can be used to modify
|
||||||
|
the behavior of the runtime.
|
||||||
|
|
||||||
|
The configuration file is also used to enable runtime [debug output](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#enable-full-debug).
|
||||||
|
|
||||||
|
### Significant OCI commands
|
||||||
|
|
||||||
|
Here we describe how `kata-runtime` handles the most important OCI commands.
|
||||||
|
|
||||||
|
#### `create`
|
||||||
|
|
||||||
|
When handling the OCI
|
||||||
|
[`create`](https://github.com/kata-containers/runtime/blob/master/cli/create.go)
|
||||||
|
command, `kata-runtime` goes through the following steps:
|
||||||
|
|
||||||
|
1. Create the network namespace where we will spawn VM and shims processes.
|
||||||
|
2. Call into the pre-start hooks. One of them should be responsible for creating
|
||||||
|
the `veth` network pair between the host network namespace and the network namespace
|
||||||
|
freshly created.
|
||||||
|
3. Scan the network from the new network namespace, and create a MACVTAP connection
|
||||||
|
between the `veth` interface and a `tap` interface into the VM.
|
||||||
|
4. Start the VM inside the network namespace by providing the `tap` interface
|
||||||
|
previously created.
|
||||||
|
5. Wait for the VM to be ready.
|
||||||
|
6. Start `kata-proxy`, which will connect to the created VM. The `kata-proxy` process
|
||||||
|
will take care of proxying all communications with the VM. Kata has a single proxy
|
||||||
|
per VM.
|
||||||
|
7. Communicate with `kata-agent` (through the proxy) to configure the sandbox
|
||||||
|
inside the VM.
|
||||||
|
8. Communicate with `kata-agent` to create the container, relying on the OCI
|
||||||
|
configuration file `config.json` initially provided to `kata-runtime`. This
|
||||||
|
spawns the container process inside the VM, leveraging the `libcontainer` package.
|
||||||
|
9. Start `kata-shim`, which will connect to the gRPC server socket provided by the `kata-proxy`. `kata-shim` will spawn a few Go routines to parallelize blocking calls `ReadStdout()` , `ReadStderr()` and `WaitProcess()`. Both `ReadStdout()` and `ReadStderr()` are run through infinite loops since `kata-shim` wants the output of those until the container process terminates. `WaitProcess()` is a unique call which returns the exit code of the container process when it terminates inside the VM. Note that `kata-shim` is started inside the network namespace, to allow upper layers to determine which network namespace has been created and by checking the `kata-shim` process. It also creates a new PID namespace by entering into it. This ensures that all `kata-shim` processes belonging to the same container will get killed when the `kata-shim` representing the container process terminates.
|
||||||
|
|
||||||
|
At this point the container process is running inside of the VM, and it is represented
|
||||||
|
on the host system by the `kata-shim` process.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
#### `start`
|
||||||
|
|
||||||
|
With traditional containers, [`start`](https://github.com/kata-containers/runtime/blob/master/cli/start.go) launches a container process in its own set of namespaces. With Kata Containers, the main task of `kata-runtime` is to ask [`kata-agent`](#agent) to start the container workload inside the virtual machine. `kata-runtime` will run through the following steps:
|
||||||
|
|
||||||
|
1. Communicate with `kata-agent` (through the proxy) to start the container workload
|
||||||
|
inside the VM. If, for example, the command to execute inside of the container is `top`,
|
||||||
|
the `kata-shim`'s `ReadStdOut()` will start returning text output for top, and
|
||||||
|
`WaitProcess()` will continue to block as long as the `top` process runs.
|
||||||
|
2. Call into the post-start hooks. Usually, this is a no-op since nothing is provided
|
||||||
|
(this needs clarification)
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
#### `exec`
|
||||||
|
|
||||||
|
OCI [`exec`](https://github.com/kata-containers/runtime/blob/master/cli/exec.go) allows you to run an additional command within an already running
|
||||||
|
container. In Kata Containers, this is handled as follows:
|
||||||
|
|
||||||
|
1. A request is sent to the `kata agent` (through the proxy) to start a new process
|
||||||
|
inside an existing container running within the VM.
|
||||||
|
2. A new `kata-shim` is created within the same network and PID namespaces as the
|
||||||
|
original `kata-shim` representing the container process. This new `kata-shim` is
|
||||||
|
used for the new exec process.
|
||||||
|
|
||||||
|
Now the process started with `exec` is running within the VM, sharing `uts`, `pid`, `mnt` and `ipc` namespaces with the container process.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
#### `kill`
|
||||||
|
|
||||||
|
When sending the OCI [`kill`](https://github.com/kata-containers/runtime/blob/master/cli/kill.go) command, the container runtime should send a
|
||||||
|
[UNIX signal](https://en.wikipedia.org/wiki/Unix_signal) to the container process.
|
||||||
|
A `kill` sending a termination signal such as `SIGKILL` or `SIGTERM` is expected
|
||||||
|
to terminate the container process. In the context of a traditional container,
|
||||||
|
this means stopping the container. For `kata-runtime`, this translates to stopping
|
||||||
|
the container and the VM associated with it.
|
||||||
|
|
||||||
|
1. Send a request to kill the container process to the `kata-agent` (through the proxy).
|
||||||
|
2. Wait for `kata-shim` process to exit.
|
||||||
|
3. Force kill the container process if `kata-shim` process didn't return after a
|
||||||
|
timeout. This is done by communicating with `kata-agent` (connecting the proxy),
|
||||||
|
sending `SIGKILL` signal to the container process inside the VM.
|
||||||
|
4. Wait for `kata-shim` process to exit, and return an error if we reach the
|
||||||
|
timeout again.
|
||||||
|
5. Communicate with `kata-agent` (through the proxy) to remove the container
|
||||||
|
configuration from the VM.
|
||||||
|
6. Communicate with `kata-agent` (through the proxy) to destroy the sandbox
|
||||||
|
configuration from the VM.
|
||||||
|
7. Stop the VM.
|
||||||
|
8. Remove all network configurations inside the network namespace and delete the
|
||||||
|
namespace.
|
||||||
|
9. Execute post-stop hooks.
|
||||||
|
|
||||||
|
If `kill` was invoked with a non-termination signal, this simply signals the container process. Otherwise, everything has been torn down, and the VM has been removed.
|
||||||
|
|
||||||
|
#### `delete`
|
||||||
|
|
||||||
|
[`delete`](https://github.com/kata-containers/runtime/blob/master/cli/delete.go) removes all internal resources related to a container. A running container
|
||||||
|
cannot be deleted unless the OCI runtime is explicitly being asked to, by using
|
||||||
|
`--force` flag.
|
||||||
|
|
||||||
|
If the sandbox is not stopped, but the particular container process returned on
|
||||||
|
its own already, the `kata-runtime` will first go through most of the steps a `kill`
|
||||||
|
would go through for a termination signal. After this process, or if the `sandboxID` was already stopped to begin with, then `kata-runtime` will:
|
||||||
|
|
||||||
|
1. Remove container resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>/<containerID>`.
|
||||||
|
2. Remove sandbox resources. Every file kept under `/var/{lib,run}/virtcontainers/sandboxes/<sandboxID>`.
|
||||||
|
|
||||||
|
At this point, everything related to the container should have been removed from the host system, and no related process should be running.
|
||||||
|
|
||||||
|
#### `state`
|
||||||
|
|
||||||
|
[`state`](https://github.com/kata-containers/runtime/blob/master/cli/state.go)
|
||||||
|
returns the status of the container. For `kata-runtime`, this means being
|
||||||
|
able to detect if the container is still running by looking at the state of `kata-shim`
|
||||||
|
process representing this container process.
|
||||||
|
|
||||||
|
1. Ask the container status by checking information stored on disk. (clarification needed)
|
||||||
|
2. Check `kata-shim` process representing the container.
|
||||||
|
3. In case the container status on disk was supposed to be `ready` or `running`,
|
||||||
|
and the `kata-shim` process no longer exists, this involves the detection of a
|
||||||
|
stopped container. This means that before returning the container status,
|
||||||
|
the container has to be properly stopped. Here are the steps involved in this detection:
|
||||||
|
1. Wait for `kata-shim` process to exit.
|
||||||
|
2. Force kill the container process if `kata-shim` process didn't return after a timeout. This is done by communicating with `kata-agent` (connecting the proxy), sending `SIGKILL` signal to the container process inside the VM.
|
||||||
|
3. Wait for `kata-shim` process to exit, and return an error if we reach the timeout again.
|
||||||
|
4. Communicate with `kata-agent` (connecting the proxy) to remove the container configuration from the VM.
|
||||||
|
4. Return container status.
|
||||||
|
|
||||||
|
## Proxy
|
||||||
|
|
||||||
|
Communication with the VM can be achieved by either `virtio-serial` or, if the host
|
||||||
|
kernel is newer than v4.8, a virtual socket, `vsock` can be used. The default is `virtio-serial`.
|
||||||
|
|
||||||
|
The VM will likely be running multiple container processes. In the event `virtio-serial`
|
||||||
|
is used, the I/O streams associated with each process needs to be multiplexed and demultiplexed on the host. On systems with `vsock` support, this component becomes optional.
|
||||||
|
|
||||||
|
`kata-proxy` is a process offering access to the VM [`kata-agent`](https://github.com/kata-containers/agent)
|
||||||
|
to multiple `kata-shim` and `kata-runtime` clients associated with the VM. Its
|
||||||
|
main role is to route the I/O streams and signals between each `kata-shim`
|
||||||
|
instance and the `kata-agent`.
|
||||||
|
`kata-proxy` connects to `kata-agent` on a Unix domain socket that `kata-runtime` provides
|
||||||
|
while spawning `kata-proxy`.
|
||||||
|
`kata-proxy` uses [`yamux`](https://github.com/hashicorp/yamux) to multiplex gRPC
|
||||||
|
requests on its connection to the `kata-agent`.
|
||||||
|
|
||||||
|
When proxy type is configured as `proxyBuiltIn`, we do not spawn a separate
|
||||||
|
process to proxy gRPC connections. Instead a built-in Yamux gRPC dialer is used to connect
|
||||||
|
directly to `kata-agent`. This is used by CRI container runtime server `frakti` which
|
||||||
|
calls directly into `kata-runtime`.
|
||||||
|
|
||||||
|
## Shim
|
||||||
|
|
||||||
|
A container process reaper, such as Docker's `containerd-shim` or CRI-O's `conmon`,
|
||||||
|
is designed around the assumption that it can monitor and reap the actual container
|
||||||
|
process. As the container process reaper runs on the host, it cannot directly
|
||||||
|
monitor a process running within a virtual machine. At most it can see the QEMU
|
||||||
|
process, but that is not enough. With Kata Containers, `kata-shim` acts as the
|
||||||
|
container process that the container process reaper can monitor. Therefore
|
||||||
|
`kata-shim` needs to handle all container I/O streams (`stdout`, `stdin` and `stderr`)
|
||||||
|
and forward all signals the container process reaper decides to send to the container
|
||||||
|
process.
|
||||||
|
|
||||||
|
`kata-shim` has an implicit knowledge about which VM agent will handle those streams
|
||||||
|
and signals and thus acts as an encapsulation layer between the container process
|
||||||
|
reaper and the `kata-agent`. `kata-shim`:
|
||||||
|
|
||||||
|
- Connects to `kata-proxy` on a Unix domain socket. The socket URL is passed from
|
||||||
|
`kata-runtime` to `kata-shim` when the former spawns the latter along with a
|
||||||
|
`containerID` and `execID`. The `containerID` and `execID` are used to identify
|
||||||
|
the true container process that the shim process will be shadowing or representing.
|
||||||
|
- Forwards the standard input stream from the container process reaper into
|
||||||
|
`kata-proxy` using gRPC `WriteStdin` gRPC API.
|
||||||
|
- Reads the standard output/error from the container process.
|
||||||
|
- Forwards signals it receives from the container process reaper to `kata-proxy`
|
||||||
|
using `SignalProcessRequest` API.
|
||||||
|
- Monitors terminal changes and forwards them to `kata-proxy` using gRPC `TtyWinResize`
|
||||||
|
API.
|
||||||
|
|
||||||
|
|
||||||
|
## Networking
|
||||||
|
|
||||||
|
Containers will typically live in their own, possibly shared, networking namespace.
|
||||||
|
At some point in a container lifecycle, container engines will set up that namespace
|
||||||
|
to add the container to a network which is isolated from the host network, but
|
||||||
|
which is shared between containers
|
||||||
|
|
||||||
|
In order to do so, container engines will usually add one end of a virtual
|
||||||
|
ethernet (`veth`) pair into the container networking namespace. The other end of
|
||||||
|
the `veth` pair is added to the host networking namespace.
|
||||||
|
|
||||||
|
This is a very namespace-centric approach as many hypervisors (in particular QEMU)
|
||||||
|
cannot handle `veth` interfaces. Typically, `TAP` interfaces are created for VM
|
||||||
|
connectivity.
|
||||||
|
|
||||||
|
To overcome incompatibility between typical container engines expectations
|
||||||
|
and virtual machines, `kata-runtime` networking transparently connects `veth`
|
||||||
|
interfaces with `TAP` ones using MACVTAP:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Kata Containers supports both
|
||||||
|
[CNM](https://github.com/docker/libnetwork/blob/master/docs/design.md#the-container-network-model)
|
||||||
|
and [CNI](https://github.com/containernetworking/cni) for networking management.
|
||||||
|
|
||||||
|
### CNM
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
__CNM lifecycle__
|
||||||
|
|
||||||
|
1. `RequestPool`
|
||||||
|
|
||||||
|
2. `CreateNetwork`
|
||||||
|
|
||||||
|
3. `RequestAddress`
|
||||||
|
|
||||||
|
4. `CreateEndPoint`
|
||||||
|
|
||||||
|
5. `CreateContainer`
|
||||||
|
|
||||||
|
6. Create `config.json`
|
||||||
|
|
||||||
|
7. Create PID and network namespace
|
||||||
|
|
||||||
|
8. `ProcessExternalKey`
|
||||||
|
|
||||||
|
9. `JoinEndPoint`
|
||||||
|
|
||||||
|
10. `LaunchContainer`
|
||||||
|
|
||||||
|
11. Launch
|
||||||
|
|
||||||
|
12. Run container
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
__Runtime network setup with CNM__
|
||||||
|
|
||||||
|
1. Read `config.json`
|
||||||
|
|
||||||
|
2. Create the network namespace
|
||||||
|
|
||||||
|
3. Call the `prestart` hook (from inside the netns)
|
||||||
|
|
||||||
|
4. Scan network interfaces inside netns and get the name of the interface
|
||||||
|
created by prestart hook
|
||||||
|
|
||||||
|
5. Create bridge, TAP, and link all together with network interface previously
|
||||||
|
created
|
||||||
|
|
||||||
|
### Network Hotplug
|
||||||
|
|
||||||
|
Kata Containers has developed a set of network sub-commands and APIs to add, list and
|
||||||
|
remove a guest network endpoint and to manipulate the guest route table.
|
||||||
|
|
||||||
|
The following diagram illustrates the Kata Containers network hotplug workflow.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Storage
|
||||||
|
Container workloads are shared with the virtualized environment through [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt).
|
||||||
|
The devicemapper storage driver is a special case. The driver uses dedicated block
|
||||||
|
devices rather than formatted filesystems, and operates at the block level rather
|
||||||
|
than the file level. This knowledge is used to directly use the underlying block
|
||||||
|
device instead of the overlay file system for the container root file system. The
|
||||||
|
block device maps to the top read-write layer for the overlay. This approach gives
|
||||||
|
much better I/O performance compared to using 9pfs to share the container file system.
|
||||||
|
|
||||||
|
The approach above does introduce a limitation in terms of dynamic file copy
|
||||||
|
in/out of the container using the `docker cp` operations. The copy operation from
|
||||||
|
host to container accesses the mounted file system on the host-side. This is
|
||||||
|
not expected to work and may lead to inconsistencies as the block device will
|
||||||
|
be simultaneously written to from two different mounts. The copy operation from
|
||||||
|
container to host will work, provided the user calls `sync(1)` from within the
|
||||||
|
container prior to the copy to make sure any outstanding cached data is written
|
||||||
|
to the block device.
|
||||||
|
|
||||||
|
```
|
||||||
|
docker cp [OPTIONS] CONTAINER:SRC_PATH HOST:DEST_PATH
|
||||||
|
docker cp [OPTIONS] HOST:SRC_PATH CONTAINER:DEST_PATH
|
||||||
|
```
|
||||||
|
|
||||||
|
Kata Containers has the ability to hotplug and remove block devices, which makes it
|
||||||
|
possible to use block devices for containers started after the VM has been launched.
|
||||||
|
|
||||||
|
Users can check to see if the container uses the devicemapper block device as its
|
||||||
|
rootfs by calling `mount(8)` within the container. If the devicemapper block device
|
||||||
|
is used, `/` will be mounted on `/dev/vda`. Users can disable direct mounting
|
||||||
|
of the underlying block device through the runtime configuration.
|
||||||
|
|
||||||
|
## Kubernetes support
|
||||||
|
|
||||||
|
[Kubernetes\*](https://github.com/kubernetes/kubernetes/) is a popular open source
|
||||||
|
container orchestration engine. In Kubernetes, a set of containers sharing resources
|
||||||
|
such as networking, storage, mount, PID, etc. is called a
|
||||||
|
[Pod](https://kubernetes.io/docs/user-guide/pods/).
|
||||||
|
A node can have multiple pods, but at a minimum, a node within a Kubernetes cluster
|
||||||
|
only needs to run a container runtime and a container agent (called a
|
||||||
|
[Kubelet](https://kubernetes.io/docs/admin/kubelet/)).
|
||||||
|
|
||||||
|
A Kubernetes cluster runs a control plane where a scheduler (typically running on a
|
||||||
|
dedicated master node) calls into a compute Kubelet. This Kubelet instance is
|
||||||
|
responsible for managing the lifecycle of pods within the nodes and eventually relies
|
||||||
|
on a container runtime to handle execution. The Kubelet architecture decouples
|
||||||
|
lifecycle management from container execution through the dedicated
|
||||||
|
`gRPC` based [Container Runtime Interface (CRI)](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/container-runtime-interface-v1.md).
|
||||||
|
|
||||||
|
In other words, a Kubelet is a CRI client and expects a CRI implementation to
|
||||||
|
handle the server side of the interface.
|
||||||
|
[CRI-O\*](https://github.com/kubernetes-incubator/cri-o) and [Containerd CRI Plugin\*](https://github.com/containerd/cri) are CRI implementations that rely on [OCI](https://github.com/opencontainers/runtime-spec)
|
||||||
|
compatible runtimes for managing container instances.
|
||||||
|
|
||||||
|
Kata Containers is an officially supported CRI-O and Containerd CRI Plugin runtime. It is OCI compatible and therefore aligns with project's architecture and requirements.
|
||||||
|
However, due to the fact that Kubernetes execution units are sets of containers (also
|
||||||
|
known as pods) rather than single containers, the Kata Containers runtime needs to
|
||||||
|
get extra information to seamlessly integrate with Kubernetes.
|
||||||
|
|
||||||
|
### Problem statement
|
||||||
|
|
||||||
|
The Kubernetes\* execution unit is a pod that has specifications detailing constraints
|
||||||
|
such as namespaces, groups, hardware resources, security contents, *etc* shared by all
|
||||||
|
the containers within that pod.
|
||||||
|
By default the Kubelet will send a container creation request to its CRI runtime for
|
||||||
|
each pod and container creation. Without additional metadata from the CRI runtime,
|
||||||
|
the Kata Containers runtime will thus create one virtual machine for each pod and for
|
||||||
|
each containers within a pod. However the task of providing the Kubernetes pod semantics
|
||||||
|
when creating one virtual machine for each container within the same pod is complex given
|
||||||
|
the resources of these virtual machines (such as networking or PID) need to be shared.
|
||||||
|
|
||||||
|
The challenge with Kata Containers when working as a Kubernetes\* runtime is thus to know
|
||||||
|
when to create a full virtual machine (for pods) and when to create a new container inside
|
||||||
|
a previously created virtual machine. In both cases it will get called with very similar
|
||||||
|
arguments, so it needs the help of the Kubernetes CRI runtime to be able to distinguish a
|
||||||
|
pod creation request from a container one.
|
||||||
|
|
||||||
|
### Containerd
|
||||||
|
|
||||||
|
As of Kata Containers 1.5, using `shimv2` with containerd 1.2.0 or above is the preferred
|
||||||
|
way to run Kata Containers with Kubernetes ([see the howto](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
|
||||||
|
The CRI-O will catch up soon ([`kubernetes-sigs/cri-o#2024`](https://github.com/kubernetes-sigs/cri-o/issues/2024)).
|
||||||
|
|
||||||
|
Refer to the following how-to guides:
|
||||||
|
|
||||||
|
- [How to use Kata Containers and Containerd](/how-to/containerd-kata.md)
|
||||||
|
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||||
|
|
||||||
|
### CRI-O
|
||||||
|
|
||||||
|
#### OCI annotations
|
||||||
|
|
||||||
|
In order for the Kata Containers runtime (or any virtual machine based OCI compatible
|
||||||
|
runtime) to be able to understand if it needs to create a full virtual machine or if it
|
||||||
|
has to create a new container inside an existing pod's virtual machine, CRI-O adds
|
||||||
|
specific annotations to the OCI configuration file (`config.json`) which is passed to
|
||||||
|
the OCI compatible runtime.
|
||||||
|
|
||||||
|
Before calling its runtime, CRI-O will always add a `io.kubernetes.cri-o.ContainerType`
|
||||||
|
annotation to the `config.json` configuration file it produces from the Kubelet CRI
|
||||||
|
request. The `io.kubernetes.cri-o.ContainerType` annotation can either be set to `sandbox`
|
||||||
|
or `container`. Kata Containers will then use this annotation to decide if it needs to
|
||||||
|
respectively create a virtual machine or a container inside a virtual machine associated
|
||||||
|
with a Kubernetes pod:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
containerType, err := ociSpec.ContainerType()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
handleFactory(ctx, runtimeConfig)
|
||||||
|
|
||||||
|
disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal)
|
||||||
|
|
||||||
|
var process vc.Process
|
||||||
|
switch containerType {
|
||||||
|
case vc.PodSandbox:
|
||||||
|
process, err = createSandbox(ctx, ociSpec, runtimeConfig, containerID, bundlePath, console, disableOutput, systemdCgroup)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
case vc.PodContainer:
|
||||||
|
process, err = createContainer(ctx, ociSpec, containerID, bundlePath, console, disableOutput)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Mixing VM based and namespace based runtimes
|
||||||
|
|
||||||
|
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](/how-to/containerd-kata.md#kubernetes-runtimeclass)
|
||||||
|
> has been supported and the user can specify runtime without the non-standardized annotations.
|
||||||
|
|
||||||
|
One interesting evolution of the CRI-O support for `kata-runtime` is the ability
|
||||||
|
to run virtual machine based pods alongside namespace ones. With CRI-O and Kata
|
||||||
|
Containers, one can introduce the concept of workload trust inside a Kubernetes
|
||||||
|
cluster.
|
||||||
|
|
||||||
|
A cluster operator can now tag (through Kubernetes annotations) container workloads
|
||||||
|
as `trusted` or `untrusted`. The former labels known to be safe workloads while
|
||||||
|
the latter describes potentially malicious or misbehaving workloads that need the
|
||||||
|
highest degree of isolation. In a software development context, an example of a `trusted` workload would be a containerized continuous integration engine whereas all
|
||||||
|
developers applications would be `untrusted` by default. Developers workloads can
|
||||||
|
be buggy, unstable or even include malicious code and thus from a security perspective
|
||||||
|
it makes sense to tag them as `untrusted`. A CRI-O and Kata Containers based
|
||||||
|
Kubernetes cluster handles this use case transparently as long as the deployed
|
||||||
|
containers are properly tagged. All `untrusted` containers will be handled by Kata Containers and thus run in a hardware virtualized secure sandbox while `runc`, for
|
||||||
|
example, could handle the `trusted` ones.
|
||||||
|
|
||||||
|
CRI-O's default behavior is to trust all pods, except when they're annotated with
|
||||||
|
`io.kubernetes.cri-o.TrustedSandbox` set to `false`. The default CRI-O trust level
|
||||||
|
is set through its `configuration.toml` configuration file. Generally speaking,
|
||||||
|
the CRI-O runtime selection between its trusted runtime (typically `runc`) and its untrusted one (`kata-runtime`) is a function of the pod `Privileged` setting, the `io.kubernetes.cri-o.TrustedSandbox` annotation value, and the default CRI-O trust
|
||||||
|
level. When a pod is `Privileged`, the runtime will always be `runc`. However, when
|
||||||
|
a pod is **not** `Privileged` the runtime selection is done as follows:
|
||||||
|
|
||||||
|
| | `io.kubernetes.cri-o.TrustedSandbox` not set | `io.kubernetes.cri-o.TrustedSandbox` = `true` | `io.kubernetes.cri-o.TrustedSandbox` = `false` |
|
||||||
|
| :--- | :---: | :---: | :---: |
|
||||||
|
| Default CRI-O trust level: `trusted` | runc | runc | Kata Containers |
|
||||||
|
| Default CRI-O trust level: `untrusted` | Kata Containers | Kata Containers | Kata Containers |
|
||||||
|
|
||||||
|
|
||||||
|
# Appendices
|
||||||
|
|
||||||
|
## DAX
|
||||||
|
|
||||||
|
Kata Containers utilizes the Linux kernel DAX [(Direct Access filesystem)](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/dax.txt)
|
||||||
|
feature to efficiently map some host-side files into the guest VM space.
|
||||||
|
In particular, Kata Containers uses the QEMU NVDIMM feature to provide a
|
||||||
|
memory-mapped virtual device that can be used to DAX map the virtual machine's
|
||||||
|
root filesystem into the guest memory address space.
|
||||||
|
|
||||||
|
Mapping files using DAX provides a number of benefits over more traditional VM
|
||||||
|
file and device mapping mechanisms:
|
||||||
|
|
||||||
|
- Mapping as a direct access devices allows the guest to directly access
|
||||||
|
the host memory pages (such as via Execute In Place (XIP)), bypassing the guest
|
||||||
|
page cache. This provides both time and space optimizations.
|
||||||
|
- Mapping as a direct access device inside the VM allows pages from the
|
||||||
|
host to be demand loaded using page faults, rather than having to make requests
|
||||||
|
via a virtualized device (causing expensive VM exits/hypercalls), thus providing
|
||||||
|
a speed optimization.
|
||||||
|
- Utilizing `MAP_SHARED` shared memory on the host allows the host to efficiently
|
||||||
|
share pages.
|
||||||
|
|
||||||
|
Kata Containers uses the following steps to set up the DAX mappings:
|
||||||
|
1. QEMU is configured with an NVDIMM memory device, with a memory file
|
||||||
|
backend to map in the host-side file into the virtual NVDIMM space.
|
||||||
|
2. The guest kernel command line mounts this NVDIMM device with the DAX
|
||||||
|
feature enabled, allowing direct page mapping and access, thus bypassing the
|
||||||
|
guest page cache.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Information on the use of NVDIMM via QEMU is available in the [QEMU source code](http://git.qemu-project.org/?p=qemu.git;a=blob;f=docs/nvdimm.txt;hb=HEAD)
|
335
docs/design/host-cgroups.md
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
- [Host cgroup management](#host-cgroup-management)
|
||||||
|
- [Introduction](#introduction)
|
||||||
|
- [`SandboxCgroupOnly` enabled](#sandboxcgrouponly-enabled)
|
||||||
|
- [What does Kata do in this configuration?](#what-does-kata-do-in-this-configuration)
|
||||||
|
- [Why create a Kata-cgroup under the parent cgroup?](#why-create-a-kata-cgroup-under-the-parent-cgroup)
|
||||||
|
- [Improvements](#improvements)
|
||||||
|
- [`SandboxCgroupOnly` disabled (default, legacy)](#sandboxcgrouponly-disabled-default-legacy)
|
||||||
|
- [What does this method do?](#what-does-this-method-do)
|
||||||
|
- [Impact](#impact)
|
||||||
|
- [Supported cgroups](#supported-cgroups)
|
||||||
|
- [Cgroups V1](#cgroups-v1)
|
||||||
|
- [Cgroups V2](#cgroups-v2)
|
||||||
|
- [Distro Support](#distro-support)
|
||||||
|
- [Summary](#summary)
|
||||||
|
|
||||||
|
# Host cgroup management
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
In Kata Containers, workloads run in a virtual machine that is managed by a virtual
|
||||||
|
machine monitor (VMM) running on the host. As a result, Kata Containers run over two layers of cgroups. The
|
||||||
|
first layer is in the guest where the workload is placed, while the second layer is on the host where the
|
||||||
|
VMM and associated threads are running.
|
||||||
|
|
||||||
|
The OCI [runtime specification][linux-config] provides guidance on where the container cgroups should be placed:
|
||||||
|
|
||||||
|
> [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups
|
||||||
|
> hierarchy for containers or to run a new process in an existing container
|
||||||
|
|
||||||
|
cgroups are hierarchical, and this can be seen with the following pod example:
|
||||||
|
|
||||||
|
- Pod 1: `cgroupsPath=/kubepods/pod1`
|
||||||
|
- Container 1:
|
||||||
|
`cgroupsPath=/kubepods/pod1/container1`
|
||||||
|
- Container 2:
|
||||||
|
`cgroupsPath=/kubepods/pod1/container2`
|
||||||
|
|
||||||
|
- Pod 2: `cgroupsPath=/kubepods/pod2`
|
||||||
|
- Container 1:
|
||||||
|
`cgroupsPath=/kubepods/pod2/container2`
|
||||||
|
- Container 2:
|
||||||
|
`cgroupsPath=/kubepods/pod2/container2`
|
||||||
|
|
||||||
|
Depending on the upper-level orchestrator, the cgroup under which the pod is placed is
|
||||||
|
managed by the orchestrator. In the case of Kubernetes, the pod-cgroup is created by Kubelet,
|
||||||
|
while the container cgroups are to be handled by the runtime. Kubelet will size the pod-cgroup
|
||||||
|
based on the container resource requirements.
|
||||||
|
|
||||||
|
Kata Containers introduces a non-negligible overhead for running a sandbox (pod). Based on this, two scenarios are possible:
|
||||||
|
1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod-cgroup, or
|
||||||
|
2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup.
|
||||||
|
|
||||||
|
Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through
|
||||||
|
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](https://github.com/kata-containers/runtime#configuration)
|
||||||
|
file.
|
||||||
|
|
||||||
|
## `SandboxCgroupOnly` enabled
|
||||||
|
|
||||||
|
With `SandboxCgroupOnly` enabled, it is expected that the parent cgroup is sized to take the overhead of running
|
||||||
|
a sandbox into account. This is ideal, as all the applicable Kata Containers components can be placed within the
|
||||||
|
given cgroup-path.
|
||||||
|
|
||||||
|
In the context of Kubernetes, Kubelet will size the pod-cgroup to take the overhead of running a Kata-based sandbox
|
||||||
|
into account. This will be feasible in the 1.16 Kubernetes release through the `PodOverhead` feature.
|
||||||
|
|
||||||
|
```
|
||||||
|
+----------------------------------------------------------+
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | | kata-shimv2, VMM and threads: | | | |
|
||||||
|
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
|
||||||
|
| | | | | | | |
|
||||||
|
| | | | kata_<sandbox-id> | | | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | | | |
|
||||||
|
| | |Pod 1 | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | | kata-shimv2, VMM and threads: | | | |
|
||||||
|
| | | | (VMM, IO-threads, vCPU threads, etc)| | | |
|
||||||
|
| | | | | | | |
|
||||||
|
| | | | kata_<sandbox-id> | | | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | |Pod 2 | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| |kubepods | |
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| |
|
||||||
|
|Node |
|
||||||
|
+----------------------------------------------------------+
|
||||||
|
```
|
||||||
|
|
||||||
|
### What does Kata do in this configuration?
|
||||||
|
1. Given a `PodSandbox` container creation, let:
|
||||||
|
|
||||||
|
```
|
||||||
|
podCgroup=Parent(container.CgroupsPath)
|
||||||
|
KataSandboxCgroup=<podCgroup>/kata_<PodSandboxID>
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create the cgroup, `KataSandboxCgroup`
|
||||||
|
|
||||||
|
3. Join the `KataSandboxCgroup`
|
||||||
|
|
||||||
|
Any process created by the runtime will be created in `KataSandboxCgroup`.
|
||||||
|
The runtime will limit the cgroup in the host only if the sandbox doesn't have a
|
||||||
|
container type annotation, but the caller is free to set the proper limits for the `podCgroup`.
|
||||||
|
|
||||||
|
In the example above the pod cgroups are `/kubepods/pod1` and `/kubepods/pod2`.
|
||||||
|
Kata creates the unrestricted sandbox cgroup under the pod cgroup.
|
||||||
|
|
||||||
|
### Why create a Kata-cgroup under the parent cgroup?
|
||||||
|
|
||||||
|
`Docker` does not have a notion of pods, and will not create a cgroup directory
|
||||||
|
to place a particular container in (i.e., all containers would be in a path like
|
||||||
|
`/docker/container-id`. To simplify the implementation and continue to support `Docker`,
|
||||||
|
Kata Containers creates the sandbox-cgroup, in the case of Kubernetes, or a container cgroup, in the case
|
||||||
|
of docker.
|
||||||
|
|
||||||
|
### Improvements
|
||||||
|
|
||||||
|
- Get statistics about pod resources
|
||||||
|
|
||||||
|
If the Kata caller wants to know the resource usage on the host it can get
|
||||||
|
statistics from the pod cgroup. All cgroups stats in the hierarchy will include
|
||||||
|
the Kata overhead. This gives the possibility of gathering usage-statics at the
|
||||||
|
pod level and the container level.
|
||||||
|
|
||||||
|
- Better host resource isolation
|
||||||
|
|
||||||
|
Because the Kata runtime will place all the Kata processes in the pod cgroup,
|
||||||
|
the resource limits that the caller applies to the pod cgroup will affect all
|
||||||
|
processes that belong to the Kata sandbox in the host. This will improve the
|
||||||
|
isolation in the host preventing Kata to become a noisy neighbor.
|
||||||
|
|
||||||
|
## `SandboxCgroupOnly` disabled (default, legacy)
|
||||||
|
|
||||||
|
If the cgroup provided to Kata is not sized appropriately, instability will be
|
||||||
|
introduced when fully constraining Kata components, and the user-workload will
|
||||||
|
see a subset of resources that were requested. Based on this, the default
|
||||||
|
handling for Kata Containers is to not fully constrain the VMM and Kata
|
||||||
|
components on the host.
|
||||||
|
|
||||||
|
```
|
||||||
|
+----------------------------------------------------------+
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | |Container 1 |-|Container 2 | | | |
|
||||||
|
| | | | |-| | | | |
|
||||||
|
| | | | Shim+container1 |-| Shim+container2 | | | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | | | |
|
||||||
|
| | |Pod 1 | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | |Container 1 |-|Container 2 | | | |
|
||||||
|
| | | | |-| | | | |
|
||||||
|
| | | | Shim+container1 |-| Shim+container2 | | | |
|
||||||
|
| | | +--------------------------------------+ | | |
|
||||||
|
| | | | | |
|
||||||
|
| | |Pod 2 | | |
|
||||||
|
| | +---------------------------------------------+ | |
|
||||||
|
| |kubepods | |
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| | Hypervisor | |
|
||||||
|
| |Kata | |
|
||||||
|
| +---------------------------------------------------+ |
|
||||||
|
| |
|
||||||
|
|Node |
|
||||||
|
+----------------------------------------------------------+
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### What does this method do?
|
||||||
|
|
||||||
|
1. Given a container creation let `containerCgroupHost=container.CgroupsPath`
|
||||||
|
1. Rename `containerCgroupHost` path to add `kata_`
|
||||||
|
1. Let `PodCgroupPath=PodSanboxContainerCgroup` where `PodSanboxContainerCgroup` is the cgroup of a container of type `PodSandbox`
|
||||||
|
1. Limit the `PodCgroupPath` with the sum of all the container limits in the Sandbox
|
||||||
|
1. Move only vCPU threads of hypervisor to `PodCgroupPath`
|
||||||
|
1. Per each container, move its `kata-shim` to its own `containerCgroupHost`
|
||||||
|
1. Move hypervisor and applicable threads to memory cgroup `/kata`
|
||||||
|
|
||||||
|
_Note_: the Kata Containers runtime will not add all the hypervisor threads to
|
||||||
|
the cgroup path requested, only vCPUs. These threads are run unconstrained.
|
||||||
|
|
||||||
|
This mitigates the risk of the VMM and other threads receiving an out of memory scenario (`OOM`).
|
||||||
|
|
||||||
|
|
||||||
|
#### Impact
|
||||||
|
|
||||||
|
If resources are reserved at a system level to account for the overheads of
|
||||||
|
running sandbox containers, this configuration can be utilized with adequate
|
||||||
|
stability. In this scenario, non-negligible amounts of CPU and memory will be
|
||||||
|
utilized unaccounted for on the host.
|
||||||
|
|
||||||
|
[linux-config]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md
|
||||||
|
[cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
|
||||||
|
|
||||||
|
# Supported cgroups
|
||||||
|
|
||||||
|
Kata Containers supports cgroups `v1` and `v2`. In the following sections each cgroup is
|
||||||
|
described briefly and what changes are needed in Kata Containers to support it.
|
||||||
|
|
||||||
|
## Cgroups V1
|
||||||
|
|
||||||
|
`Cgroups V1` are under a [`tmpfs`][1] filesystem mounted at `/sys/fs/cgroup`, where each cgroup is
|
||||||
|
mounted under a separate cgroup filesystem. A `Cgroups v1` hierarchy may look like the following
|
||||||
|
diagram:
|
||||||
|
|
||||||
|
```
|
||||||
|
/sys/fs/cgroup/
|
||||||
|
├── blkio
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── cpu -> cpu,cpuacct
|
||||||
|
├── cpuacct -> cpu,cpuacct
|
||||||
|
├── cpu,cpuacct
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── cpuset
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── devices
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── freezer
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── hugetlb
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── memory
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── net_cls -> net_cls,net_prio
|
||||||
|
├── net_cls,net_prio
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── net_prio -> net_cls,net_prio
|
||||||
|
├── perf_event
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
├── pids
|
||||||
|
│ ├── cgroup.procs
|
||||||
|
│ └── tasks
|
||||||
|
└── systemd
|
||||||
|
├── cgroup.procs
|
||||||
|
└── tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file,
|
||||||
|
or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file.
|
||||||
|
|
||||||
|
Kata Containers supports `v1` by default and no change in the configuration file is needed.
|
||||||
|
To know more about `cgroups v1`, see [cgroupsv1(7)][2].
|
||||||
|
|
||||||
|
## Cgroups V2
|
||||||
|
|
||||||
|
`Cgroups v2` are also known as unified cgroups, unlike `cgroups v1`, the cgroups are
|
||||||
|
mounted under the same cgroup filesystem. A `Cgroups v2` hierarchy may look like the following
|
||||||
|
diagram:
|
||||||
|
|
||||||
|
```
|
||||||
|
/sys/fs/cgroup/system.slice
|
||||||
|
├── cgroup.controllers
|
||||||
|
├── cgroup.events
|
||||||
|
├── cgroup.freeze
|
||||||
|
├── cgroup.max.depth
|
||||||
|
├── cgroup.max.descendants
|
||||||
|
├── cgroup.procs
|
||||||
|
├── cgroup.stat
|
||||||
|
├── cgroup.subtree_control
|
||||||
|
├── cgroup.threads
|
||||||
|
├── cgroup.type
|
||||||
|
├── cpu.max
|
||||||
|
├── cpu.pressure
|
||||||
|
├── cpu.stat
|
||||||
|
├── cpu.weight
|
||||||
|
├── cpu.weight.nice
|
||||||
|
├── io.bfq.weight
|
||||||
|
├── io.latency
|
||||||
|
├── io.max
|
||||||
|
├── io.pressure
|
||||||
|
├── io.stat
|
||||||
|
├── memory.current
|
||||||
|
├── memory.events
|
||||||
|
├── memory.events.local
|
||||||
|
├── memory.high
|
||||||
|
├── memory.low
|
||||||
|
├── memory.max
|
||||||
|
├── memory.min
|
||||||
|
├── memory.oom.group
|
||||||
|
├── memory.pressure
|
||||||
|
├── memory.stat
|
||||||
|
├── memory.swap.current
|
||||||
|
├── memory.swap.events
|
||||||
|
├── memory.swap.max
|
||||||
|
├── pids.current
|
||||||
|
├── pids.events
|
||||||
|
└── pids.max
|
||||||
|
```
|
||||||
|
|
||||||
|
Same as `cgroups v1`, a process can join the cgroup by writing its process id (`pid`) to
|
||||||
|
`cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to
|
||||||
|
`cgroup.threads` file.
|
||||||
|
|
||||||
|
For backwards compatibility Kata Containers defaults to supporting cgroups v1 by default.
|
||||||
|
To change this to `v2`, set `sandbox_cgroup_only=true` in the `configuration.toml` file.
|
||||||
|
To know more about `cgroups v2`, see [cgroupsv2(7)][3].
|
||||||
|
|
||||||
|
### Distro Support
|
||||||
|
|
||||||
|
Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition.
|
||||||
|
For more information about the status of this feature see [issue #2494][4].
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
|
||||||
|
| cgroup option | default? | status | pros | cons | cgroups
|
||||||
|
|-|-|-|-|-|-|
|
||||||
|
| `SandboxCgroupOnly=false` | yes | legacy | Easiest to make Kata work | Unaccounted for memory and resource utilization | v1
|
||||||
|
| `SandboxCgroupOnly=true` | no | recommended | Complete tracking of Kata memory and CPU utilization. In Kubernetes, the Kubelet can fully constrain Kata via the pod cgroup | Requires upper layer orchestrator which sizes sandbox cgroup appropriately | v1, v2
|
||||||
|
|
||||||
|
|
||||||
|
[1]: http://man7.org/linux/man-pages/man5/tmpfs.5.html
|
||||||
|
[2]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_1
|
||||||
|
[3]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_2
|
||||||
|
[4]: https://github.com/kata-containers/runtime/issues/2494
|
117
docs/design/kata-api-design.md
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# Kata API Design
|
||||||
|
To fulfill the [Kata design requirements](kata-design-requirements.md), and based on the discussion on [Virtcontainers API extensions](https://docs.google.com/presentation/d/1dbGrD1h9cpuqAPooiEgtiwWDGCYhVPdatq7owsKHDEQ), the Kata runtime library features the following APIs:
|
||||||
|
- Sandbox based top API
|
||||||
|
- Storage and network hotplug API
|
||||||
|
- Plugin frameworks for external proprietary Kata runtime extensions
|
||||||
|
- Built-in shim and proxy types and capabilities
|
||||||
|
|
||||||
|
## Sandbox Based API
|
||||||
|
### Sandbox Management API
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`CreateSandbox(SandboxConfig)`| Create and start a sandbox, and return the sandbox structure.|
|
||||||
|
|`FetchSandbox(ID)`| Connect to an existing sandbox and return the sandbox structure.|
|
||||||
|
|`ListSandboxes()`| List all existing sandboxes with status. |
|
||||||
|
|
||||||
|
### Sandbox Operation API
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`sandbox.Pause()`| Pause the sandbox.|
|
||||||
|
|`sandbox.Resume()`| Resume the paused sandbox.|
|
||||||
|
|`sandbox.Release()`| Release a sandbox data structure, close connections to the agent, and quit any goroutines associated with the sandbox. Mostly used for daemon restart.|
|
||||||
|
|`sandbox.Delete()`| Destroy the sandbox and remove all persistent metadata.|
|
||||||
|
|`sandbox.Status()`| Get the status of the sandbox and containers.|
|
||||||
|
|`sandbox.Monitor()`| Return a context handler for caller to monitor sandbox callbacks such as error termination.|
|
||||||
|
|`sandbox.CreateContainer()`| Create new container in the sandbox.|
|
||||||
|
|`sandbox.DeleteContainer()`| Delete a container from the sandbox.|
|
||||||
|
|`sandbox.StartContainer()`| Start a container in the sandbox.|
|
||||||
|
|`sandbox.StatusContainer()`| Get the status of a container in the sandbox.|
|
||||||
|
|`sandbox.EnterContainer()`| Run a new process in a container.|
|
||||||
|
|`sandbox.WaitProcess()`| Wait on a process to terminate.|
|
||||||
|
### Sandbox Hotplug API
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`sandbox.AddDevice()`| Add new storage device to the sandbox.|
|
||||||
|
|`sandbox.AddInterface()`| Add new NIC to the sandbox.|
|
||||||
|
|`sandbox.RemoveInterface()`| Remove a NIC from the sandbox.|
|
||||||
|
|`sandbox.ListInterfaces()`| List all NICs and their configurations in the sandbox.|
|
||||||
|
|`sandbox.UpdateRoutes()`| Update the sandbox route table (e.g. for portmapping support).|
|
||||||
|
|`sandbox.ListRoutes()`| List the sandbox route table.|
|
||||||
|
|
||||||
|
### Sandbox Relay API
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`sandbox.WinsizeProcess(containerID, processID, Height, Width)`|Relay TTY resize request to a process.|
|
||||||
|
|`sandbox.SignalProcess(containerID, processID, signalID, signalALL)`| Relay a signal to a process or all processes in a container.|
|
||||||
|
|`sandbox.IOStream(containerID, processID)`| Relay a process stdio. Return stdin/stdout/stderr pipes to the process stdin/stdout/stderr streams.|
|
||||||
|
|
||||||
|
## Plugin framework for external proprietary Kata runtime extensions
|
||||||
|
### Hypervisor plugin
|
||||||
|
|
||||||
|
TBD.
|
||||||
|
### Metadata storage plugin
|
||||||
|
The metadata storage plugin controls where sandbox metadata is saved.
|
||||||
|
All metadata storage plugins must implement the following API:
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`storage.Save(key, value)`| Save a record.|
|
||||||
|
|`storage.Load(key)`| Load a record.|
|
||||||
|
|`storage.Delete(key)`| Delete a record.|
|
||||||
|
|
||||||
|
Built-in implementations include:
|
||||||
|
- Filesystem storage
|
||||||
|
- LevelDB storage
|
||||||
|
|
||||||
|
### VM Factory plugin
|
||||||
|
The VM factory plugin controls how a sandbox factory creates new VMs.
|
||||||
|
All VM factory plugins must implement following API:
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`VMFactory.NewVM(HypervisorConfig)`|Create a new VM based on `HypervisorConfig`.|
|
||||||
|
|
||||||
|
Built-in implementations include:
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`CreateNew()`| Create brand new VM based on `HypervisorConfig`.|
|
||||||
|
|`CreateFromTemplate()`| Create new VM from template.|
|
||||||
|
|`CreateFromCache()`| Create new VM from VM caches.|
|
||||||
|
|
||||||
|
### Sandbox Creation Plugin Workflow
|
||||||
|

|
||||||
|
|
||||||
|
### Sandbox Connection Plugin Workflow
|
||||||
|

|
||||||
|
|
||||||
|
## Built-in Shim and Proxy Types and Capabilities
|
||||||
|
### Built-in shim/proxy sandbox configurations
|
||||||
|
- Supported shim configurations:
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`noopshim`|Do not start any shim process.|
|
||||||
|
|`ccshim`| Start the cc-shim binary.|
|
||||||
|
|`katashim`| Start the `kata-shim` binary.|
|
||||||
|
|`katashimbuiltin`|No standalone shim process but shim functionality APIs are exported.|
|
||||||
|
- Supported proxy configurations:
|
||||||
|
|
||||||
|
|Name|Description|
|
||||||
|
|---|---|
|
||||||
|
|`noopProxy`| a dummy proxy implementation of the proxy interface, only used for testing purpose.|
|
||||||
|
|`noProxy`|generic implementation for any case where no actual proxy is needed.|
|
||||||
|
|`ccProxy`|run `ccProxy` to proxy between runtime and agent.|
|
||||||
|
|`kataProxy`|run `kata-proxy` to translate Yamux connections between runtime and Kata agent. |
|
||||||
|
|`kataProxyBuiltin`| no standalone proxy process and connect to Kata agent with internal Yamux translation.|
|
||||||
|
|
||||||
|
### Built-in Shim Capability
|
||||||
|
Built-in shim capability is implemented by removing standalone shim process, and
|
||||||
|
supporting the shim related APIs.
|
||||||
|
|
||||||
|
### Built-in Proxy Capability
|
||||||
|
Built-in proxy capability is achieved by removing standalone proxy process, and
|
||||||
|
connecting to Kata agent with a custom gRPC dialer that is internal Yamux translation.
|
||||||
|
The behavior is enabled when proxy is configured as `kataProxyBuiltin`.
|
95
docs/design/kata-design-requirements.md
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
## Design requirements
|
||||||
|
|
||||||
|
The Kata Containers runtime **MUST** fulfill all of the following requirements:
|
||||||
|
|
||||||
|
### OCI compatibility
|
||||||
|
The Kata Containers runtime **MUST** implement the [OCI runtime specification](https://github.com/opencontainers/runtime-spec) and support all
|
||||||
|
the OCI runtime operations.
|
||||||
|
|
||||||
|
### [`runc`](https://github.com/opencontainers/runc) CLI compatibility
|
||||||
|
In theory, being OCI compatible should be enough. In practice, the Kata Containers runtime
|
||||||
|
should comply with the latest *stable* `runc` CLI. In particular, it **MUST** implement the
|
||||||
|
following `runc` commands:
|
||||||
|
|
||||||
|
* `create`
|
||||||
|
* `delete`
|
||||||
|
* `exec`
|
||||||
|
* `kill`
|
||||||
|
* `list`
|
||||||
|
* `pause`
|
||||||
|
* `ps`
|
||||||
|
* `start`
|
||||||
|
* `state`
|
||||||
|
* `version`
|
||||||
|
|
||||||
|
The Kata Containers runtime **MUST** implement the following command line options:
|
||||||
|
* `--console-socket`
|
||||||
|
* `--pid-file`
|
||||||
|
|
||||||
|
### [CRI](http://blog.kubernetes.io/2016/12/container-runtime-interface-cri-in-kubernetes.html) and [Kubernetes](https://kubernetes.io) support
|
||||||
|
The Kata Containers project **MUST** provide two interfaces for CRI shims to manage hardware
|
||||||
|
virtualization based Kubernetes pods and containers:
|
||||||
|
- An OCI and `runc` compatible command line interface, as described in the previous section.
|
||||||
|
This interface is used by implementations such as [`CRI-O`](http://cri-o.io) and [`cri-containerd`](https://github.com/containerd/cri-containerd), for example.
|
||||||
|
- A hardware virtualization runtime library API for CRI shims to consume and provide a more
|
||||||
|
CRI native implementation. The [`frakti`](https://github.com/kubernetes/frakti) CRI shim is an example of such a consumer.
|
||||||
|
|
||||||
|
### Multiple hardware architectures support
|
||||||
|
The Kata Containers runtime **MUST NOT** be architecture-specific. It should be able to support
|
||||||
|
multiple hardware architectures and provide a modular and flexible design for adding support
|
||||||
|
for additional ones.
|
||||||
|
|
||||||
|
### Multiple hypervisor support
|
||||||
|
The Kata Containers runtime **MUST NOT** be tied to any specific hardware virtualization technology,
|
||||||
|
hypervisor, or virtual machine monitor implementation.
|
||||||
|
It should support multiple hypervisors and provide a pluggable and flexible design to add support
|
||||||
|
for additional ones.
|
||||||
|
|
||||||
|
#### Nesting
|
||||||
|
The Kata Containers runtime **MUST** support nested virtualization environments.
|
||||||
|
|
||||||
|
### Networking
|
||||||
|
|
||||||
|
* The Kata Containers runtime **MUST** support CNI plugin.
|
||||||
|
* The Kata Containers runtime **MUST** support both legacy and IPv6 networks.
|
||||||
|
|
||||||
|
### I/O
|
||||||
|
|
||||||
|
#### Devices direct assignment
|
||||||
|
In order for containers to directly consume host hardware resources, the Kata Containers runtime
|
||||||
|
**MUST** provide containers with secure pass through for generic devices such as GPUs, SRIOV,
|
||||||
|
RDMA, QAT, by leveraging I/O virtualization technologies (IOMMU, interrupt remapping).
|
||||||
|
|
||||||
|
#### Acceleration
|
||||||
|
The Kata Containers runtime **MUST** support accelerated and user-space-based I/O operations
|
||||||
|
for networking (e.g. DPDK) as well as storage through `vhost-user` sockets.
|
||||||
|
|
||||||
|
#### Scalability
|
||||||
|
The Kata Containers runtime **MUST** support scalable I/O through the SRIOV technology.
|
||||||
|
|
||||||
|
|
||||||
|
### Virtualization overhead reduction
|
||||||
|
A compelling aspect of containers is their minimal overhead compared to bare metal applications.
|
||||||
|
A container runtime should keep the overhead to a minimum in order to provide the expected user
|
||||||
|
experience.
|
||||||
|
The Kata Containers runtime implementation **SHOULD** be optimized for:
|
||||||
|
|
||||||
|
* Minimal workload boot and shutdown times
|
||||||
|
* Minimal workload memory footprint
|
||||||
|
* Maximal networking throughput
|
||||||
|
* Minimal networking latency
|
||||||
|
|
||||||
|
### Testing and debugging
|
||||||
|
|
||||||
|
#### Continuous Integration
|
||||||
|
Each Kata Containers runtime pull request **MUST** pass at least the following set of container-related
|
||||||
|
tests:
|
||||||
|
|
||||||
|
* Unit tests: runtime unit tests coverage >75%
|
||||||
|
* Functional tests: the entire runtime CLI and APIs
|
||||||
|
* Integration tests: Docker and Kubernetes
|
||||||
|
|
||||||
|
#### Debugging
|
||||||
|
|
||||||
|
The Kata Containers runtime implementation **MUST** use structured logging in order to namespace
|
||||||
|
log messages to facilitate debugging.
|
175
docs/design/vcpu-handling.md
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
- [Virtual machine vCPU sizing in Kata Containers](#virtual-machine-vcpu-sizing-in-kata-containers)
|
||||||
|
* [Default number of virtual CPUs](#default-number-of-virtual-cpus)
|
||||||
|
* [Virtual CPUs and Kubernetes pods](#virtual-cpus-and-kubernetes-pods)
|
||||||
|
* [Container lifecycle](#container-lifecycle)
|
||||||
|
* [Container without CPU constraint](#container-without-cpu-constraint)
|
||||||
|
* [Container with CPU constraint](#container-with-cpu-constraint)
|
||||||
|
* [Do not waste resources](#do-not-waste-resources)
|
||||||
|
|
||||||
|
# Virtual machine vCPU sizing in Kata Containers
|
||||||
|
|
||||||
|
## Default number of virtual CPUs
|
||||||
|
|
||||||
|
Before starting a container, the [runtime][6] reads the `default_vcpus` option
|
||||||
|
from the [configuration file][7] to determine the number of virtual CPUs
|
||||||
|
(vCPUs) needed to start the virtual machine. By default, `default_vcpus` is
|
||||||
|
equal to 1 for fast boot time and a small memory footprint per virtual machine.
|
||||||
|
Be aware that increasing this value negatively impacts the virtual machine's
|
||||||
|
boot time and memory footprint.
|
||||||
|
In general, we recommend that you do not edit this variable, unless you know
|
||||||
|
what are you doing. If your container needs more than one vCPU, use
|
||||||
|
[docker `--cpus`][1], [docker update][4], or [Kubernetes `cpu` limits][2] to
|
||||||
|
assign more resources.
|
||||||
|
|
||||||
|
*Docker*
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ docker run --name foo -ti --cpus 2 debian bash
|
||||||
|
$ docker update --cpus 4 foo
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
*Kubernetes*
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ~/cpu-demo.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: cpu-demo
|
||||||
|
namespace: sandbox
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: cpu0
|
||||||
|
image: vish/stress
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "3"
|
||||||
|
args:
|
||||||
|
- -cpus
|
||||||
|
- "5"
|
||||||
|
```
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo -E kubectl create -f ~/cpu-demo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Virtual CPUs and Kubernetes pods
|
||||||
|
|
||||||
|
A Kubernetes pod is a group of one or more containers, with shared storage and
|
||||||
|
network, and a specification for how to run the containers [[specification][3]].
|
||||||
|
In Kata Containers this group of containers, which is called a sandbox, runs inside
|
||||||
|
the same virtual machine. If you do not specify a CPU constraint, the runtime does
|
||||||
|
not add more vCPUs and the container is not placed inside a CPU cgroup.
|
||||||
|
Instead, the container uses the number of vCPUs specified by `default_vcpus`
|
||||||
|
and shares these resources with other containers in the same situation
|
||||||
|
(without a CPU constraint).
|
||||||
|
|
||||||
|
## Container lifecycle
|
||||||
|
|
||||||
|
When you create a container with a CPU constraint, the runtime adds the
|
||||||
|
number of vCPUs required by the container. Similarly, when the container terminates,
|
||||||
|
the runtime removes these resources.
|
||||||
|
|
||||||
|
## Container without CPU constraint
|
||||||
|
|
||||||
|
A container without a CPU constraint uses the default number of vCPUs specified
|
||||||
|
in the configuration file. In the case of Kubernetes pods, containers without a
|
||||||
|
CPU constraint use and share between them the default number of vCPUs. For
|
||||||
|
example, if `default_vcpus` is equal to 1 and you have 2 containers without CPU
|
||||||
|
constraints with each container trying to consume 100% of vCPU, the resources
|
||||||
|
divide in two parts, 50% of vCPU for each container because your virtual
|
||||||
|
machine does not have enough resources to satisfy containers needs. If you want
|
||||||
|
to give access to a greater or lesser portion of vCPUs to a specific container,
|
||||||
|
use [`docker --cpu-shares`][1] or [Kubernetes `cpu` requests][2].
|
||||||
|
|
||||||
|
*Docker*
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ docker run -ti --cpus-shares=512 debian bash
|
||||||
|
```
|
||||||
|
|
||||||
|
*Kubernetes*
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# ~/cpu-demo.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: cpu-demo
|
||||||
|
namespace: sandbox
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: cpu0
|
||||||
|
image: vish/stress
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "0.7"
|
||||||
|
args:
|
||||||
|
- -cpus
|
||||||
|
- "3"
|
||||||
|
```
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo -E kubectl create -f ~/cpu-demo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Before running containers without CPU constraint, consider that your containers
|
||||||
|
are not running alone. Since your containers run inside a virtual machine other
|
||||||
|
processes use the vCPUs as well (e.g. `systemd` and the Kata Containers
|
||||||
|
[agent][5]). In general, we recommend setting `default_vcpus` equal to 1 to
|
||||||
|
allow non-container processes to run on this vCPU and to specify a CPU
|
||||||
|
constraint for each container. If your container is already running and needs
|
||||||
|
more vCPUs, you can add more using [docker update][4].
|
||||||
|
|
||||||
|
## Container with CPU constraint
|
||||||
|
|
||||||
|
The runtime calculates the number of vCPUs required by a container with CPU
|
||||||
|
constraints using the following formula: `vCPUs = ceiling( quota / period )`, where
|
||||||
|
`quota` specifies the number of microseconds per CPU Period that the container is
|
||||||
|
guaranteed CPU access and `period` specifies the CPU CFS scheduler period of time
|
||||||
|
in microseconds. The result determines the number of vCPU to hot plug into the
|
||||||
|
virtual machine. Once the vCPUs have been added, the [agent][5] places the
|
||||||
|
container inside a CPU cgroup. This placement allows the container to use only
|
||||||
|
its assigned resources.
|
||||||
|
|
||||||
|
## Do not waste resources
|
||||||
|
|
||||||
|
If you already know the number of vCPUs needed for each container and pod, or
|
||||||
|
just want to run them with the same number of vCPUs, you can specify that
|
||||||
|
number using the `default_vcpus` option in the configuration file, each virtual
|
||||||
|
machine starts with that number of vCPUs. One limitation of this approach is
|
||||||
|
that these vCPUs cannot be removed later and you might be wasting
|
||||||
|
resources. For example, if you set `default_vcpus` to 8 and run only one
|
||||||
|
container with a CPU constraint of 1 vCPUs, you might be wasting 7 vCPUs since
|
||||||
|
the virtual machine starts with 8 vCPUs and 1 vCPUs is added and assigned
|
||||||
|
to the container. Non-container processes might be able to use 8 vCPUs but they
|
||||||
|
use a maximum 1 vCPU, hence 7 vCPUs might not be used.
|
||||||
|
|
||||||
|
|
||||||
|
*Container without CPU constraint*
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ docker run -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cpu.cfs_*"
|
||||||
|
1 # number of vCPUs
|
||||||
|
100000 # cfs period
|
||||||
|
-1 # cfs quota
|
||||||
|
```
|
||||||
|
|
||||||
|
*Container with CPU constraint*
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker run --cpus 4 -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cpu.cfs_*"
|
||||||
|
5 # number of vCPUs
|
||||||
|
100000 # cfs period
|
||||||
|
400000 # cfs quota
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
[1]: https://docs.docker.com/config/containers/resource_constraints/#cpu
|
||||||
|
[2]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource
|
||||||
|
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/
|
||||||
|
[4]: https://docs.docker.com/engine/reference/commandline/update/
|
||||||
|
[5]: https://github.com/kata-containers/agent
|
||||||
|
[6]: https://github.com/kata-containers/runtime
|
||||||
|
[7]: https://github.com/kata-containers/runtime#configuration
|
129
docs/design/virtualization.md
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
# Virtualization in Kata Containers
|
||||||
|
|
||||||
|
- [Virtualization in Kata Containers](#virtualization-in-kata-containers)
|
||||||
|
- [Mapping container concepts to virtual machine technologies](#mapping-container-concepts-to-virtual-machine-technologies)
|
||||||
|
- [Kata Containers Hypervisor and VMM support](#kata-containers-hypervisor-and-vmm-support)
|
||||||
|
- [QEMU/KVM](#qemukvm)
|
||||||
|
- [Machine accelerators](#machine-accelerators)
|
||||||
|
- [Hotplug devices](#hotplug-devices)
|
||||||
|
- [Firecracker/KVM](#firecrackerkvm)
|
||||||
|
- [Cloud Hypervisor/KVM](#cloud-hypervisorkvm)
|
||||||
|
- [Summary](#summary)
|
||||||
|
|
||||||
|
|
||||||
|
Kata Containers, a second layer of isolation is created on top of those provided by traditional namespace-containers. The
|
||||||
|
hardware virtualization interface is the basis of this additional layer. Kata will launch a lightweight virtual machine,
|
||||||
|
and use the guest’s Linux kernel to create a container workload, or workloads in the case of multi-container pods. In Kubernetes
|
||||||
|
and in the Kata implementation, the sandbox is carried out at the pod level. In Kata, this sandbox is created using a virtual machine.
|
||||||
|
|
||||||
|
This document describes how Kata Containers maps container technologies to virtual machines technologies, and how this is realized in
|
||||||
|
the multiple hypervisors and virtual machine monitors that Kata supports.
|
||||||
|
|
||||||
|
## Mapping container concepts to virtual machine technologies
|
||||||
|
|
||||||
|
A typical deployment of Kata Containers will be in Kubernetes by way of a Container Runtime Interface (CRI) implementation. On every node,
|
||||||
|
Kubelet will interact with a CRI implementor (such as containerd or CRI-O), which will in turn interface with Kata Containers (an OCI based runtime).
|
||||||
|
|
||||||
|
The CRI API, as defined at the [Kubernetes CRI-API repo](https://github.com/kubernetes/cri-api/), implies a few constructs being supported by the
|
||||||
|
CRI implementation, and ultimately in Kata Containers. In order to support the full [API](https://github.com/kubernetes/cri-api/blob/a6f63f369f6d50e9d0886f2eda63d585fbd1ab6a/pkg/apis/runtime/v1alpha2/api.proto#L34-L110) with the CRI-implementor, Kata must provide the following constructs:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
These constructs can then be further mapped to what devices are necessary for interfacing with the virtual machine:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Ultimately, these concepts map to specific para-virtualized devices or virtualization technologies.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Each hypervisor or VMM varies on how or if it handles each of these.
|
||||||
|
|
||||||
|
## Kata Containers Hypervisor and VMM support
|
||||||
|
|
||||||
|
Kata Containers is designed to support multiple virtual machine monitors (VMMs) and hypervisors.
|
||||||
|
Kata Containers supports:
|
||||||
|
- [ACRN hypervisor](https://projectacrn.org/)
|
||||||
|
- [Cloud Hypervisor](https://github.com/cloud-hypervisor/cloud-hypervisor)/[KVM](https://www.linux-kvm.org/page/Main_Page)
|
||||||
|
- [Firecracker](https://github.com/firecracker-microvm/firecracker)/KVM
|
||||||
|
- [QEMU](http://www.qemu-project.org/)/KVM
|
||||||
|
|
||||||
|
Which configuration to use will depend on the end user's requirements. Details of each solution and a summary are provided below.
|
||||||
|
|
||||||
|
### QEMU/KVM
|
||||||
|
|
||||||
|
Kata Containers with QEMU has complete compatibility with Kubernetes.
|
||||||
|
|
||||||
|
Depending on the host architecture, Kata Containers supports various machine types,
|
||||||
|
for example `pc` and `q35` on x86 systems, `virt` on ARM systems and `pseries` on IBM Power systems. The default Kata Containers
|
||||||
|
machine type is `pc`. The machine type and its [`Machine accelerators`](#machine-accelerators) can
|
||||||
|
be changed by editing the runtime [`configuration`](./architecture.md/#configuration) file.
|
||||||
|
|
||||||
|
Devices and features used:
|
||||||
|
- virtio VSOCK or virtio serial
|
||||||
|
- virtio block or virtio SCSI
|
||||||
|
- virtio net
|
||||||
|
- virtio fs or virtio 9p (recommend: virtio fs)
|
||||||
|
- VFIO
|
||||||
|
- hotplug
|
||||||
|
- machine accelerators
|
||||||
|
|
||||||
|
Machine accelerators and hotplug are used in Kata Containers to manage resource constraints, improve boot time and reduce memory footprint. These are documented below.
|
||||||
|
|
||||||
|
#### Machine accelerators
|
||||||
|
|
||||||
|
Machine accelerators are architecture specific and can be used to improve the performance
|
||||||
|
and enable specific features of the machine types. The following machine accelerators
|
||||||
|
are used in Kata Containers:
|
||||||
|
|
||||||
|
- NVDIMM: This machine accelerator is x86 specific and only supported by `pc` and
|
||||||
|
`q35` machine types. `nvdimm` is used to provide the root filesystem as a persistent
|
||||||
|
memory device to the Virtual Machine.
|
||||||
|
|
||||||
|
#### Hotplug devices
|
||||||
|
|
||||||
|
The Kata Containers VM starts with a minimum amount of resources, allowing for faster boot time and a reduction in memory footprint. As the container launch progresses,
|
||||||
|
devices are hotplugged to the VM. For example, when a CPU constraint is specified which includes additional CPUs, they can be hot added. Kata Containers has support
|
||||||
|
for hot-adding the following devices:
|
||||||
|
- Virtio block
|
||||||
|
- Virtio SCSI
|
||||||
|
- VFIO
|
||||||
|
- CPU
|
||||||
|
|
||||||
|
### Firecracker/KVM
|
||||||
|
|
||||||
|
Firecracker, built on many rust crates that are within [rust-VMM](https://github.com/rust-vmm), has a very limited device model, providing a lighter
|
||||||
|
footprint and attack surface, focusing on function-as-a-service like use cases. As a result, Kata Containers with Firecracker VMM supports a subset of the CRI API.
|
||||||
|
Firecracker does not support file-system sharing, and as a result only block-based storage drivers are supported. Firecracker does not support device
|
||||||
|
hotplug nor does it support VFIO. As a result, Kata Containers with Firecracker VMM does not support updating container resources after boot, nor
|
||||||
|
does it support device passthrough.
|
||||||
|
|
||||||
|
Devices used:
|
||||||
|
- virtio VSOCK
|
||||||
|
- virtio block
|
||||||
|
- virtio net
|
||||||
|
|
||||||
|
### Cloud Hypervisor/KVM
|
||||||
|
|
||||||
|
Cloud Hypervisor, based on [rust-VMM](https://github.com/rust-vmm), is designed to have a lighter footprint and attack surface. For Kata Containers,
|
||||||
|
relative to Firecracker, the Cloud Hypervisor configuration provides better compatibility at the expense of exposing additional devices: file system
|
||||||
|
sharing and direct device assignment. As of the 1.10 release of Kata Containers, Cloud Hypervisor does not support device hotplug, and as a result
|
||||||
|
does not support updating container resources after boot, or utilizing block based volumes. While Cloud Hypervisor does support VFIO, Kata is still adding
|
||||||
|
this support. As of 1.10, Kata does not support block based volumes or direct device assignment. See [Cloud Hypervisor device support documentation](https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/docs/device_model.md)
|
||||||
|
for more details on Cloud Hypervisor.
|
||||||
|
|
||||||
|
Devices used:
|
||||||
|
- virtio VSOCK
|
||||||
|
- virtio block
|
||||||
|
- virtio net
|
||||||
|
- virtio fs
|
||||||
|
|
||||||
|
### Summary
|
||||||
|
|
||||||
|
| Solution | release introduced | brief summary |
|
||||||
|
|-|-|-|
|
||||||
|
| QEMU | 1.0 | upstream QEMU, with support for hotplug and filesystem sharing |
|
||||||
|
| NEMU | 1.4 | Deprecated, removed as of 1.10 release. Slimmed down fork of QEMU, with experimental support of virtio-fs |
|
||||||
|
| Firecracker | 1.5 | upstream Firecracker, rust-VMM based, no VFIO, no FS sharing, no memory/CPU hotplug |
|
||||||
|
| QEMU-virtio-fs | 1.7 | upstream QEMU with support for virtio-fs. Will be removed once virtio-fs lands in upstream QEMU |
|
||||||
|
| Cloud Hypervisor | 1.10 | rust-VMM based, includes VFIO and FS sharing through virtio-fs, no hotplug |
|
28
docs/how-to/README.md
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Howto Guides
|
||||||
|
|
||||||
|
* [Howto Guides](#howto-guides)
|
||||||
|
* [Kubernetes Integration](#kubernetes-integration)
|
||||||
|
* [Hypervisors Integration](#hypervisors-integration)
|
||||||
|
* [Advanced Topics](#advanced-topics)
|
||||||
|
|
||||||
|
## Kubernetes Integration
|
||||||
|
- [Run Kata Containers with Kubernetes](run-kata-with-k8s.md)
|
||||||
|
- [How to use Kata Containers and Containerd](containerd-kata.md)
|
||||||
|
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||||
|
- [Kata Containers and service mesh for Kubernetes](service-mesh.md)
|
||||||
|
- [How to import Kata Containers logs into Fluentd](how-to-import-kata-logs-with-fluentd.md)
|
||||||
|
|
||||||
|
## Hypervisors Integration
|
||||||
|
- [Kata Containers with Firecracker](https://github.com/kata-containers/documentation/wiki/Initial-release-of-Kata-Containers-with-Firecracker-support)
|
||||||
|
- [Kata Containers with NEMU](how-to-use-kata-containers-with-nemu.md)
|
||||||
|
- [Kata Containers with ACRN Hypervisor](how-to-use-kata-containers-with-acrn.md)
|
||||||
|
|
||||||
|
## Advanced Topics
|
||||||
|
- [How to use Kata Containers with virtio-fs](how-to-use-virtio-fs-with-kata.md)
|
||||||
|
- [Setting Sysctls with Kata](how-to-use-sysctls-with-kata.md)
|
||||||
|
- [What Is VMCache and How To Enable It](what-is-vm-cache-and-how-do-I-use-it.md)
|
||||||
|
- [What Is VM Templating and How To Enable It](what-is-vm-templating-and-how-do-I-use-it.md)
|
||||||
|
- [Privileged Kata Containers](privileged.md)
|
||||||
|
- [How to load kernel modules in Kata Containers](how-to-load-kernel-modules-with-kata.md)
|
||||||
|
- [How to use Kata Containers with `virtio-mem`](how-to-use-virtio-mem-with-kata.md)
|
||||||
|
- [How to set sandbox Kata Containers configurations with pod annotations](how-to-set-sandbox-config-kata.md)
|
368
docs/how-to/containerd-kata.md
Normal file
@ -0,0 +1,368 @@
|
|||||||
|
# How to use Kata Containers and Containerd
|
||||||
|
|
||||||
|
- [Concepts](#concepts)
|
||||||
|
- [Kubernetes `RuntimeClass`](#kubernetes-runtimeclass)
|
||||||
|
- [Containerd Runtime V2 API: Shim V2 API](#containerd-runtime-v2-api-shim-v2-api)
|
||||||
|
- [Install](#install)
|
||||||
|
- [Install Kata Containers](#install-kata-containers)
|
||||||
|
- [Install containerd with CRI plugin](#install-containerd-with-cri-plugin)
|
||||||
|
- [Install CNI plugins](#install-cni-plugins)
|
||||||
|
- [Install `cri-tools`](#install-cri-tools)
|
||||||
|
- [Configuration](#configuration)
|
||||||
|
- [Configure containerd to use Kata Containers](#configure-containerd-to-use-kata-containers)
|
||||||
|
- [Kata Containers as a `RuntimeClass`](#kata-containers-as-a-runtimeclass)
|
||||||
|
- [Kata Containers as the runtime for untrusted workload](#kata-containers-as-the-runtime-for-untrusted-workload)
|
||||||
|
- [Kata Containers as the default runtime](#kata-containers-as-the-default-runtime)
|
||||||
|
- [Configuration for `cri-tools`](#configuration-for-cri-tools)
|
||||||
|
- [Run](#run)
|
||||||
|
- [Launch containers with `ctr` command line](#launch-containers-with-ctr-command-line)
|
||||||
|
- [Launch Pods with `crictl` command line](#launch-pods-with-crictl-command-line)
|
||||||
|
|
||||||
|
This document covers the installation and configuration of [containerd](https://containerd.io/)
|
||||||
|
and [Kata Containers](https://katacontainers.io). The containerd provides not only the `ctr`
|
||||||
|
command line tool, but also the [CRI](https://kubernetes.io/blog/2016/12/container-runtime-interface-cri-in-kubernetes/)
|
||||||
|
interface for [Kubernetes](https://kubernetes.io) and other CRI clients.
|
||||||
|
|
||||||
|
This document is primarily written for Kata Containers v1.5.0-rc2 or above, and containerd v1.2.0 or above.
|
||||||
|
Previous versions are addressed here, but we suggest users upgrade to the newer versions for better support.
|
||||||
|
|
||||||
|
## Concepts
|
||||||
|
|
||||||
|
### Kubernetes `RuntimeClass`
|
||||||
|
|
||||||
|
[`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/) is a Kubernetes feature first
|
||||||
|
introduced in Kubernetes 1.12 as alpha. It is the feature for selecting the container runtime configuration to
|
||||||
|
use to run a pod’s containers. This feature is supported in `containerd` since [v1.2.0](https://github.com/containerd/containerd/releases/tag/v1.2.0).
|
||||||
|
|
||||||
|
Before the `RuntimeClass` was introduced, Kubernetes was not aware of the difference of runtimes on the node. `kubelet`
|
||||||
|
creates Pod sandboxes and containers through CRI implementations, and treats all the Pods equally. However, there
|
||||||
|
are requirements to run trusted Pods (i.e. Kubernetes plugin) in a native container like runc, and to run untrusted
|
||||||
|
workloads with isolated sandboxes (i.e. Kata Containers).
|
||||||
|
|
||||||
|
As a result, the CRI implementations extended their semantics for the requirements:
|
||||||
|
|
||||||
|
- At the beginning, [Frakti](https://github.com/kubernetes/frakti) checks the network configuration of a Pod, and
|
||||||
|
treat Pod with `host` network as trusted, while others are treated as untrusted.
|
||||||
|
- The containerd introduced an annotation for untrusted Pods since [v1.0](https://github.com/containerd/cri/blob/v1.0.0-rc.0/docs/config.md):
|
||||||
|
```yaml
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
```
|
||||||
|
- Similarly, CRI-O introduced the annotation `io.kubernetes.cri-o.TrustedSandbox` for untrusted Pods.
|
||||||
|
|
||||||
|
To eliminate the complexity of user configuration introduced by the non-standardized annotations and provide
|
||||||
|
extensibility, `RuntimeClass` was introduced. This gives users the ability to affect the runtime behavior
|
||||||
|
through `RuntimeClass` without the knowledge of the CRI daemons. We suggest that users with multiple runtimes
|
||||||
|
use `RuntimeClass` instead of the deprecated annotations.
|
||||||
|
|
||||||
|
### Containerd Runtime V2 API: Shim V2 API
|
||||||
|
|
||||||
|
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
|
||||||
|
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
|
||||||
|
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
|
||||||
|
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`
|
||||||
|
process were used, even with VSOCK not available.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
The shim v2 is introduced in containerd [v1.2.0](https://github.com/containerd/containerd/releases/tag/v1.2.0) and Kata `shimv2`
|
||||||
|
is implemented in Kata Containers v1.5.0.
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
### Install Kata Containers
|
||||||
|
|
||||||
|
Follow the instructions to [install Kata Containers](https://github.com/kata-containers/documentation/blob/master/install/README.md).
|
||||||
|
|
||||||
|
### Install containerd with CRI plugin
|
||||||
|
|
||||||
|
> **Note:** `cri` is a native plugin of containerd 1.1 and above. It is built into containerd and enabled by default.
|
||||||
|
> You do not need to install `cri` if you have containerd 1.1 or above. Just remove the `cri` plugin from the list of
|
||||||
|
> `disabled_plugins` in the containerd configuration file (`/etc/containerd/config.toml`).
|
||||||
|
|
||||||
|
Follow the instructions from the [CRI installation guide](http://github.com/containerd/cri/blob/master/docs/installation.md).
|
||||||
|
|
||||||
|
Then, check if `containerd` is now available:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ command -v containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
### Install CNI plugins
|
||||||
|
|
||||||
|
> **Note:** You do not need to install CNI plugins if you do not want to use containerd with Kubernetes.
|
||||||
|
> If you have installed Kubernetes with `kubeadm`, you might have already installed the CNI plugins.
|
||||||
|
|
||||||
|
You can manually install CNI plugins as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ go get github.com/containernetworking/plugins
|
||||||
|
$ pushd $GOPATH/src/github.com/containernetworking/plugins
|
||||||
|
$ ./build_linux.sh
|
||||||
|
$ sudo mkdir /opt/cni
|
||||||
|
$ sudo cp -r bin /opt/cni/
|
||||||
|
$ popd
|
||||||
|
```
|
||||||
|
|
||||||
|
### Install `cri-tools`
|
||||||
|
|
||||||
|
> **Note:** `cri-tools` is a set of tools for CRI used for development and testing. Users who only want
|
||||||
|
> to use containerd with Kubernetes can skip the `cri-tools`.
|
||||||
|
|
||||||
|
You can install the `cri-tools` from source code:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ go get github.com/kubernetes-incubator/cri-tools
|
||||||
|
$ pushd $GOPATH/src/github.com/kubernetes-incubator/cri-tools
|
||||||
|
$ make
|
||||||
|
$ sudo -E make install
|
||||||
|
$ popd
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Configure containerd to use Kata Containers
|
||||||
|
|
||||||
|
By default, the configuration of containerd is located at `/etc/containerd/config.toml`, and the
|
||||||
|
`cri` plugins are placed in the following section:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins]
|
||||||
|
[plugins.cri]
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
[plugins.cri.containerd.default_runtime]
|
||||||
|
#runtime_type = "io.containerd.runtime.v1.linux"
|
||||||
|
|
||||||
|
[plugins.cri.cni]
|
||||||
|
# conf_dir is the directory in which the admin places a CNI conf.
|
||||||
|
conf_dir = "/etc/cni/net.d"
|
||||||
|
```
|
||||||
|
|
||||||
|
The following sections outline how to add Kata Containers to the configurations.
|
||||||
|
|
||||||
|
#### Kata Containers as a `RuntimeClass`
|
||||||
|
|
||||||
|
For
|
||||||
|
- Kata Containers v1.5.0 or above (including `1.5.0-rc`)
|
||||||
|
- Containerd v1.2.0 or above
|
||||||
|
- Kubernetes v1.12.0 or above
|
||||||
|
|
||||||
|
The `RuntimeClass` is suggested.
|
||||||
|
|
||||||
|
The following configuration includes three runtime classes:
|
||||||
|
- `plugins.cri.containerd.runtimes.runc`: the runc, and it is the default runtime.
|
||||||
|
- `plugins.cri.containerd.runtimes.kata`: The function in containerd (reference [the document here](https://github.com/containerd/containerd/tree/master/runtime/v2#binary-naming))
|
||||||
|
where the dot-connected string `io.containerd.kata.v2` is translated to `containerd-shim-kata-v2` (i.e. the
|
||||||
|
binary name of the Kata implementation of [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)).
|
||||||
|
- `plugins.cri.containerd.runtimes.katacli`: the `containerd-shim-runc-v1` calls `kata-runtime`, which is the legacy process.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
no_pivot = false
|
||||||
|
[plugins.cri.containerd.runtimes]
|
||||||
|
[plugins.cri.containerd.runtimes.runc]
|
||||||
|
runtime_type = "io.containerd.runc.v1"
|
||||||
|
[plugins.cri.containerd.runtimes.runc.options]
|
||||||
|
NoPivotRoot = false
|
||||||
|
NoNewKeyring = false
|
||||||
|
ShimCgroup = ""
|
||||||
|
IoUid = 0
|
||||||
|
IoGid = 0
|
||||||
|
BinaryName = "runc"
|
||||||
|
Root = ""
|
||||||
|
CriuPath = ""
|
||||||
|
SystemdCgroup = false
|
||||||
|
[plugins.cri.containerd.runtimes.kata]
|
||||||
|
runtime_type = "io.containerd.kata.v2"
|
||||||
|
[plugins.cri.containerd.runtimes.katacli]
|
||||||
|
runtime_type = "io.containerd.runc.v1"
|
||||||
|
[plugins.cri.containerd.runtimes.katacli.options]
|
||||||
|
NoPivotRoot = false
|
||||||
|
NoNewKeyring = false
|
||||||
|
ShimCgroup = ""
|
||||||
|
IoUid = 0
|
||||||
|
IoGid = 0
|
||||||
|
BinaryName = "/usr/bin/kata-runtime"
|
||||||
|
Root = ""
|
||||||
|
CriuPath = ""
|
||||||
|
SystemdCgroup = false
|
||||||
|
```
|
||||||
|
|
||||||
|
From Containerd v1.2.4 and Kata v1.6.0, there is a new runtime option supported, which allows you to specify a specific Kata configuration file as follows:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd.runtimes.kata]
|
||||||
|
runtime_type = "io.containerd.kata.v2"
|
||||||
|
[plugins.cri.containerd.runtimes.kata.options]
|
||||||
|
ConfigPath = "/etc/kata-containers/config.toml"
|
||||||
|
```
|
||||||
|
|
||||||
|
This `ConfigPath` option is optional. If you do not specify it, shimv2 first tries to get the configuration file from the environment variable `KATA_CONF_FILE`. If neither are set, shimv2 will use the default Kata configuration file paths (`/etc/kata-containers/configuration.toml` and `/usr/share/defaults/kata-containers/configuration.toml`).
|
||||||
|
|
||||||
|
If you use Containerd older than v1.2.4 or a version of Kata older than v1.6.0 and also want to specify a configuration file, you can use the following workaround, since the shimv2 accepts an environment variable, `KATA_CONF_FILE` for the configuration file path. Then, you can create a
|
||||||
|
shell script with the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
KATA_CONF_FILE=/etc/kata-containers/firecracker.toml containerd-shim-kata-v2 $@
|
||||||
|
```
|
||||||
|
|
||||||
|
Name it as `/usr/local/bin/containerd-shim-katafc-v2` and reference it in the configuration of containerd:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd.runtimes.kata-firecracker]
|
||||||
|
runtime_type = "io.containerd.katafc.v2"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Kata Containers as the runtime for untrusted workload
|
||||||
|
|
||||||
|
For cases without `RuntimeClass` support, we can use the legacy annotation method to support using Kata Containers
|
||||||
|
for an untrusted workload. With the following configuration, you can run trusted workloads with a runtime such as `runc`
|
||||||
|
and then, run an untrusted workload with Kata Containers:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
# "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
|
||||||
|
[plugins.cri.containerd.default_runtime]
|
||||||
|
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
||||||
|
runtime_type = "io.containerd.runtime.v1.linux"
|
||||||
|
|
||||||
|
# "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
|
||||||
|
[plugins.cri.containerd.untrusted_workload_runtime]
|
||||||
|
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
||||||
|
runtime_type = "io.containerd.kata.v2"
|
||||||
|
```
|
||||||
|
|
||||||
|
For the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
|
||||||
|
# "plugins.cri.containerd.default_runtime" is the runtime to use in containerd.
|
||||||
|
[plugins.cri.containerd.default_runtime]
|
||||||
|
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
||||||
|
runtime_type = "io.containerd.runtime.v1.linux"
|
||||||
|
|
||||||
|
# "plugins.cri.containerd.untrusted_workload_runtime" is a runtime to run untrusted workloads on it.
|
||||||
|
[plugins.cri.containerd.untrusted_workload_runtime]
|
||||||
|
# runtime_type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
|
||||||
|
runtime_type = "io.containerd.runtime.v1.linux"
|
||||||
|
|
||||||
|
# runtime_engine is the name of the runtime engine used by containerd.
|
||||||
|
runtime_engine = "/usr/bin/kata-runtime"
|
||||||
|
```
|
||||||
|
|
||||||
|
You can find more information on the [Containerd config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
|
||||||
|
|
||||||
|
|
||||||
|
#### Kata Containers as the default runtime
|
||||||
|
|
||||||
|
If you want to set Kata Containers as the only runtime in the deployment, you can simply configure as follows:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
[plugins.cri.containerd.default_runtime]
|
||||||
|
runtime_type = "io.containerd.kata.v2"
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, for the earlier versions of Kata Containers and containerd that do not support Runtime V2 (Shim API), you can use the following alternative configuration:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
[plugins.cri.containerd.default_runtime]
|
||||||
|
runtime_type = "io.containerd.runtime.v1.linux"
|
||||||
|
runtime_engine = "/usr/bin/kata-runtime"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration for `cri-tools`
|
||||||
|
|
||||||
|
> **Note:** If you skipped the [Install `cri-tools`](#install-cri-tools) section, you can skip this section too.
|
||||||
|
|
||||||
|
First, add the CNI configuration in the containerd configuration.
|
||||||
|
|
||||||
|
The following is the configuration if you installed CNI as the *[Install CNI plugins](#install-cni-plugins)* section outlined.
|
||||||
|
|
||||||
|
Put the CNI configuration as `/etc/cni/net.d/10-mynet.conf`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"cniVersion": "0.2.0",
|
||||||
|
"name": "mynet",
|
||||||
|
"type": "bridge",
|
||||||
|
"bridge": "cni0",
|
||||||
|
"isGateway": true,
|
||||||
|
"ipMasq": true,
|
||||||
|
"ipam": {
|
||||||
|
"type": "host-local",
|
||||||
|
"subnet": "172.19.0.0/24",
|
||||||
|
"routes": [
|
||||||
|
{ "dst": "0.0.0.0/0" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, reference the configuration directory through containerd `config.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins.cri.cni]
|
||||||
|
# conf_dir is the directory in which the admin places a CNI conf.
|
||||||
|
conf_dir = "/etc/cni/net.d"
|
||||||
|
```
|
||||||
|
|
||||||
|
The configuration file of `crictl` command line tool in `cri-tools` locates at `/etc/crictl.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
runtime-endpoint: unix:///var/run/containerd/containerd.sock
|
||||||
|
image-endpoint: unix:///var/run/containerd/containerd.sock
|
||||||
|
timeout: 10
|
||||||
|
debug: true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
### Launch containers with `ctr` command line
|
||||||
|
|
||||||
|
To run a container with Kata Containers through the containerd command line, you can run the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo ctr image pull docker.io/library/busybox:latest
|
||||||
|
$ sudo ctr run --runtime io.containerd.run.kata.v2 -t --rm docker.io/library/busybox:latest hello sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This launches a BusyBox container named `hello`, and it will be removed by `--rm` after it quits.
|
||||||
|
|
||||||
|
### Launch Pods with `crictl` command line
|
||||||
|
|
||||||
|
With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag.
|
||||||
|
Use the following to launch Pod with `kata` runtime class with the pod in [the example](https://github.com/kubernetes-sigs/cri-tools/tree/master/docs/examples)
|
||||||
|
of `cri-tools`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo crictl runp -r kata podsandbox-config.yaml
|
||||||
|
36e23521e8f89fabd9044924c9aeb34890c60e85e1748e8daca7e2e673f8653e
|
||||||
|
```
|
||||||
|
|
||||||
|
You can add container to the launched Pod with the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo crictl create 36e23521e8f89 container-config.yaml podsandbox-config.yaml
|
||||||
|
1aab7585530e62c446734f12f6899f095ce53422dafcf5a80055ba11b95f2da7
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, start it with the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo crictl start 1aab7585530e6
|
||||||
|
1aab7585530e6
|
||||||
|
```
|
||||||
|
|
||||||
|
In Kubernetes, you need to create a `RuntimeClass` resource and add the `RuntimeClass` field in the Pod Spec
|
||||||
|
(see this [document](https://kubernetes.io/docs/concepts/containers/runtime-class/) for more information).
|
||||||
|
|
||||||
|
If `RuntimeClass` is not supported, you can use the following annotation in a Kubernetes pod to identify as an untrusted workload:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
```
|
463
docs/how-to/how-to-import-kata-logs-with-fluentd.md
Normal file
@ -0,0 +1,463 @@
|
|||||||
|
# Importing Kata Containers logs with Fluentd
|
||||||
|
|
||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Overview](#overview)
|
||||||
|
* [Test stack](#test-stack)
|
||||||
|
* [Importing the logs](#importing-the-logs)
|
||||||
|
* [Direct import `logfmt` from `systemd`](#direct-import-logfmt-from-systemd)
|
||||||
|
* [Configuring `minikube`](#configuring-minikube)
|
||||||
|
* [Pull from `systemd`](#pull-from-systemd)
|
||||||
|
* [Systemd Summary](#systemd-summary)
|
||||||
|
* [Directly importing JSON](#directly-importing-json)
|
||||||
|
* [JSON in files](#json-in-files)
|
||||||
|
* [Prefixing all keys](#prefixing-all-keys)
|
||||||
|
* [Kata `shimv2`](#kata-shimv2)
|
||||||
|
* [Caveats](#caveats)
|
||||||
|
* [Summary](#summary)
|
||||||
|
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
This document describes how to import Kata Containers logs into [Fluentd](https://www.fluentd.org/),
|
||||||
|
typically for importing into an
|
||||||
|
Elastic/Fluentd/Kibana([EFK](https://github.com/kubernetes/kubernetes/tree/master/cluster/addons/fluentd-elasticsearch#running-efk-stack-in-production))
|
||||||
|
or Elastic/Logstash/Kibana([ELK](https://www.elastic.co/elastic-stack)) stack.
|
||||||
|
|
||||||
|
The majority of this document focusses on CRI-O based (classic) Kata runtime. Much of that information
|
||||||
|
also applies to the Kata `shimv2` runtime. Differences pertaining to Kata `shimv2` can be found in their
|
||||||
|
[own section](#kata-shimv2).
|
||||||
|
|
||||||
|
> **Note:** This document does not cover any aspect of "log rotation". It is expected that any production
|
||||||
|
> stack already has a method in place to control node log growth.
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
Kata generates logs. The logs can come from numerous parts of the Kata stack (the runtime, proxy, shim
|
||||||
|
and even the agent). By default the logs
|
||||||
|
[go to the system journal](https://github.com/kata-containers/runtime#logging),
|
||||||
|
but they can also be configured to be stored in files.
|
||||||
|
|
||||||
|
The logs default format is in [`logfmt` structured logging](https://brandur.org/logfmt), but can be switched to
|
||||||
|
be JSON with a command line option.
|
||||||
|
|
||||||
|
Provided below are some examples of Kata log import and processing using
|
||||||
|
[Fluentd](https://www.fluentd.org/).
|
||||||
|
|
||||||
|
## Test stack
|
||||||
|
|
||||||
|
Some of the testing we can perform locally, but other times we really need a live stack for testing.
|
||||||
|
We will use a [`minikube`](https://github.com/kubernetes/minikube/) stack with EFK enabled and Kata
|
||||||
|
installed to do our tests. Some details such as specific paths and versions of components may need
|
||||||
|
to be adapted to your specific installation.
|
||||||
|
|
||||||
|
The [Kata minikube installation guide](../install/minikube-installation-guide.md) was used to install
|
||||||
|
`minikube` with Kata Containers enabled.
|
||||||
|
|
||||||
|
The minikube EFK stack `addon` is then enabled:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ minikube addons enable efk
|
||||||
|
```
|
||||||
|
|
||||||
|
> *Note*: Installing and booting EFK can take a little while - check progress with
|
||||||
|
> `kubectl get pods -n=kube-system` and wait for all the pods to get to the `Running` state.
|
||||||
|
|
||||||
|
## Importing the logs
|
||||||
|
|
||||||
|
Kata offers us two choices to make when storing the logs:
|
||||||
|
- Do we store them to the system log, or to separate files?
|
||||||
|
- Do we store them in `logfmt` format, or `JSON`?
|
||||||
|
|
||||||
|
We will start by examining the Kata default setup (`logfmt` stored in the system log), and then look
|
||||||
|
at other options.
|
||||||
|
|
||||||
|
## Direct import `logfmt` from `systemd`
|
||||||
|
|
||||||
|
Fluentd contains both a component that can read the `systemd` system journals, and a component
|
||||||
|
that can parse `logfmt` entries. We will utilise these in two separate steps to evaluate how well
|
||||||
|
the Kata logs import to the EFK stack.
|
||||||
|
|
||||||
|
### Configuring `minikube`
|
||||||
|
|
||||||
|
> **Note:** Setting up, configuration and deployment of `minikube` is not covered in exacting
|
||||||
|
> detail in this guide. It is presumed the user has the abilities and their own Kubernetes/Fluentd
|
||||||
|
> stack they are able to utilise in order to modify and test as necessary.
|
||||||
|
|
||||||
|
Minikube by default
|
||||||
|
[configures](https://github.com/kubernetes/minikube/blob/master/deploy/iso/minikube-iso/board/coreos/minikube/rootfs-overlay/etc/systemd/journald.conf)
|
||||||
|
the `systemd-journald` with the
|
||||||
|
[`Storage=volatile`](https://www.freedesktop.org/software/systemd/man/journald.conf.html) option,
|
||||||
|
which results in the journal being stored in `/run/log/journal`. Unfortunately, the Minikube EFK
|
||||||
|
Fluentd install extracts most of its logs in `/var/log`, and therefore does not mount `/run/log`
|
||||||
|
into the Fluentd pod by default. This prevents us from reading the system journal by default.
|
||||||
|
|
||||||
|
This can be worked around by patching the Minikube EFK `addon` YAML to mount `/run/log` into the
|
||||||
|
Fluentd container:
|
||||||
|
|
||||||
|
```patch
|
||||||
|
diff --git a/deploy/addons/efk/fluentd-es-rc.yaml.tmpl b/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
|
||||||
|
index 75e386984..83bea48b9 100644
|
||||||
|
--- a/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
|
||||||
|
+++ b/deploy/addons/efk/fluentd-es-rc.yaml.tmpl
|
||||||
|
@@ -44,6 +44,8 @@ spec:
|
||||||
|
volumeMounts:
|
||||||
|
- name: varlog
|
||||||
|
mountPath: /var/log
|
||||||
|
+ - name: runlog
|
||||||
|
+ mountPath: /run/log
|
||||||
|
- name: varlibdockercontainers
|
||||||
|
mountPath: /var/lib/docker/containers
|
||||||
|
readOnly: true
|
||||||
|
@@ -57,6 +59,9 @@ spec:
|
||||||
|
- name: varlog
|
||||||
|
hostPath:
|
||||||
|
path: /var/log
|
||||||
|
+ - name: runlog
|
||||||
|
+ hostPath:
|
||||||
|
+ path: /run/log
|
||||||
|
- name: varlibdockercontainers
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/docker/containers
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** After making this change you will need to build your own `minikube` to encapsulate
|
||||||
|
> and use this change, or fine another method to (re-)launch the Fluentd containers for the change
|
||||||
|
> to take effect.
|
||||||
|
|
||||||
|
### Pull from `systemd`
|
||||||
|
|
||||||
|
We will start with testing Fluentd pulling the Kata logs directly from the system journal with the
|
||||||
|
Fluentd [systemd plugin](https://github.com/fluent-plugin-systemd/fluent-plugin-systemd).
|
||||||
|
|
||||||
|
We modify the Fluentd config file with the following fragment. For reference, the Minikube
|
||||||
|
YAML can be found
|
||||||
|
[on GitHub](https://github.com/kubernetes/minikube/blob/master/deploy/addons/efk/fluentd-es-configmap.yaml.tmpl):
|
||||||
|
|
||||||
|
> **Note:** The below Fluentd config fragment is in the "older style" to match the Minikube version of
|
||||||
|
> Fluentd. If using a more up to date version of Fluentd, you may need to update some parameters, such as
|
||||||
|
> using `matches` rather than `filters` and placing `@` before `type`. Your Fluentd should warn you in its
|
||||||
|
> logs if such updates are necessary.
|
||||||
|
|
||||||
|
```
|
||||||
|
<source>
|
||||||
|
type systemd
|
||||||
|
tag kata-containers
|
||||||
|
path /run/log/journal
|
||||||
|
pos_file /run/log/journal/kata-journald.pos
|
||||||
|
filters [{"SYSLOG_IDENTIFIER": "kata-runtime"}, {"SYSLOG_IDENTIFIER": "kata-proxy"}, {"SYSLOG_IDENTIFIER": "kata-shim"}]
|
||||||
|
read_from_head true
|
||||||
|
</source>
|
||||||
|
```
|
||||||
|
|
||||||
|
We then apply the new YAML, and restart the Fluentd pod (by killing it, and letting the `ReplicationController`
|
||||||
|
start a new instance, which will pick up the new `ConfigurationMap`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl apply -f new-fluentd-cm.yaml
|
||||||
|
$ kubectl delete pod -n=kube-system fluentd-es-XXXXX
|
||||||
|
```
|
||||||
|
|
||||||
|
Now open the Kibana UI to the Minikube EFK `addon`, and launch a Kata QEMU based test pod in order to
|
||||||
|
generate some Kata specific log entries:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ minikube addons open efk
|
||||||
|
$ cd $GOPATH/src/github.com/kata-containers/packaging/kata-deploy
|
||||||
|
$ kubectl apply -f examples/nginx-deployment-qemu.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Looking at the Kibana UI, we can now see that some `kata-runtime` tagged records have appeared:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
If we now filter on that tag, we can see just the Kata related entries
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
If we expand one of those entries, we can see we have imported useful information. You can then
|
||||||
|
sub-filter on, for instance, the `SYSLOG_IDENTIFIER` to differentiate the Kata components, and
|
||||||
|
on the `PRIORITY` to filter out critical issues etc.
|
||||||
|
|
||||||
|
Kata generates a significant amount of Kata specific information, which can be seen as
|
||||||
|
[`logfmt`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser#logfile-requirements).
|
||||||
|
data contained in the `MESSAGE` field. Imported as-is, there is no easy way to filter on that data
|
||||||
|
in Kibana:
|
||||||
|
|
||||||
|
.
|
||||||
|
|
||||||
|
We can however further sub-parse the Kata entries using the
|
||||||
|
[Fluentd plugins](https://docs.fluentbit.io/manual/parser/logfmt) that will parse
|
||||||
|
`logfmt` formatted data. We can utilise these to parse the sub-fields using a Fluentd filter
|
||||||
|
section. At the same time, we will prefix the new fields with `kata_` to make it clear where
|
||||||
|
they have come from:
|
||||||
|
|
||||||
|
```
|
||||||
|
<filter kata-containers>
|
||||||
|
@type parser
|
||||||
|
key_name MESSAGE
|
||||||
|
format logfmt
|
||||||
|
reserve_data true
|
||||||
|
inject_key_prefix kata_
|
||||||
|
</filter>
|
||||||
|
```
|
||||||
|
|
||||||
|
The Minikube Fluentd version does not come with the `logfmt` parser installed, so we will run a local
|
||||||
|
test to check the parsing works. The resulting output from Fluentd is:
|
||||||
|
|
||||||
|
```
|
||||||
|
2020-02-21 10:31:27.810781647 +0000 kata-containers:
|
||||||
|
{"_BOOT_ID":"590edceeef5545a784ec8c6181a10400",
|
||||||
|
"_MACHINE_ID":"3dd49df65a1b467bac8d51f2eaa17e92",
|
||||||
|
"_HOSTNAME":"minikube",
|
||||||
|
"PRIORITY":"6",
|
||||||
|
"_UID":"0",
|
||||||
|
"_GID":"0",
|
||||||
|
"_SYSTEMD_SLICE":"system.slice",
|
||||||
|
"_SELINUX_CONTEXT":"kernel",
|
||||||
|
"_CAP_EFFECTIVE":"3fffffffff",
|
||||||
|
"_TRANSPORT":"syslog",
|
||||||
|
"_SYSTEMD_CGROUP":"/system.slice/crio.service",
|
||||||
|
"_SYSTEMD_UNIT":"crio.service",
|
||||||
|
"_SYSTEMD_INVOCATION_ID":"f2d99c784e6f406c87742f4bca16a4f6",
|
||||||
|
"SYSLOG_IDENTIFIER":"kata-runtime",
|
||||||
|
"_COMM":"kata-runtime",
|
||||||
|
"_EXE":"/opt/kata/bin/kata-runtime",
|
||||||
|
"SYSLOG_TIMESTAMP":"Feb 21 10:31:27 ",
|
||||||
|
"_CMDLINE":"/opt/kata/bin/kata-runtime --kata-config /opt/kata/share/defaults/kata-containers/configuration-qemu.toml --root /run/runc state 7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
|
||||||
|
"SYSLOG_PID":"14314",
|
||||||
|
"_PID":"14314",
|
||||||
|
"MESSAGE":"time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox",
|
||||||
|
"SYSLOG_RAW":"<6>Feb 21 10:31:27 kata-runtime[14314]: time=\"2020-02-21T10:31:27.810781647Z\" level=info msg=\"release sandbox\" arch=amd64 command=state container=7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997 name=kata-runtime pid=14314 sandbox=1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c source=virtcontainers subsystem=sandbox\n",
|
||||||
|
"_SOURCE_REALTIME_TIMESTAMP":"1582281087810805",
|
||||||
|
"kata_level":"info",
|
||||||
|
"kata_msg":"release sandbox",
|
||||||
|
"kata_arch":"amd64",
|
||||||
|
"kata_command":"state",
|
||||||
|
"kata_container":"7cdd31660d8705facdadeb8598d2c0bd008e8142c54e3b3069abd392c8d58997",
|
||||||
|
"kata_name":"kata-runtime",
|
||||||
|
"kata_pid":14314,
|
||||||
|
"kata_sandbox":"1c3e77cad66aa2b6d8cc846f818370f79cb0104c0b840f67d0f502fd6562b68c",
|
||||||
|
"kata_source":"virtcontainers",
|
||||||
|
"kata_subsystem":"sandbox"}
|
||||||
|
```
|
||||||
|
|
||||||
|
Here we can see that the `MESSAGE` field has been parsed out and pre-pended into the `kata_*` fields,
|
||||||
|
which contain usefully filterable fields such as `kata_level`, `kata_command` and `kata_subsystem` etc.
|
||||||
|
|
||||||
|
### Systemd Summary
|
||||||
|
|
||||||
|
We have managed to configure Fluentd to capture the Kata logs entries from the system
|
||||||
|
journal, and further managed to then parse out the `logfmt` message into JSON to allow further analysis
|
||||||
|
inside Elastic/Kibana.
|
||||||
|
|
||||||
|
## Directly importing JSON
|
||||||
|
|
||||||
|
The underlying basic data format used by Fluentd and Elastic is JSON. If we output JSON
|
||||||
|
directly from Kata, that should make overall import and processing of the log entries more efficient.
|
||||||
|
|
||||||
|
There are potentially two things we can do with Kata here:
|
||||||
|
|
||||||
|
- Get Kata to [output its logs in `JSON` format](https://github.com/kata-containers/runtime#logging) rather
|
||||||
|
than `logfmt`.
|
||||||
|
- Get Kata to log directly into a file, rather than via the system journal. This would allow us to not need
|
||||||
|
to parse the systemd format files, and capture the Kata log lines directly. It would also avoid Fluentd
|
||||||
|
having to potentially parse or skip over many non-Kata related systemd journal that it is not at all
|
||||||
|
interested in.
|
||||||
|
|
||||||
|
In theory we could get Kata to post its messages in JSON format to the systemd journal by adding the
|
||||||
|
`--log-format=json` option to the Kata runtime, and then swapping the `logfmt` parser for the `json`
|
||||||
|
parser, but we would still need to parse the systemd files. We will skip this setup in this document, and
|
||||||
|
go directly to a full Kata specific JSON format logfile test.
|
||||||
|
|
||||||
|
### JSON in files
|
||||||
|
|
||||||
|
Kata runtime has the ability to generate JSON logs directly, rather than its default `logfmt` format. Passing
|
||||||
|
the `--log-format=json` argument to the Kata runtime enables this. The easiest way to pass in this extra
|
||||||
|
parameter from a [Kata deploy](https://github.com/kata-containers/packaging/tree/master/kata-deploy) installation
|
||||||
|
is to edit the `/opt/kata/bin/kata-qemu` shell script (generated by the
|
||||||
|
[Kata packaging release scripts](https://github.com/kata-containers/packaging/blob/master/release/kata-deploy-binaries.sh)).
|
||||||
|
|
||||||
|
At the same time, we will add the `--log=/var/log/kata-runtime.log` argument to store the Kata logs in their
|
||||||
|
own file (rather than into the system journal).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
/opt/kata/bin/kata-runtime --kata-config "/opt/kata/share/defaults/kata-containers/configuration-qemu.toml" --log-format=json --log=/var/log/kata-runtime.log $@
|
||||||
|
```
|
||||||
|
|
||||||
|
And then we'll add the Fluentd config section to parse that file. Note, we inform the parser that Kata is
|
||||||
|
generating timestamps in `iso8601` format. Kata places these timestamps into a field called `time`, which
|
||||||
|
is the default field the Fluentd parser looks for:
|
||||||
|
|
||||||
|
```
|
||||||
|
<source>
|
||||||
|
type tail
|
||||||
|
tag kata-containers
|
||||||
|
path /var/log/kata-runtime.log
|
||||||
|
pos_file /var/log/kata-runtime.pos
|
||||||
|
format json
|
||||||
|
time_format %iso8601
|
||||||
|
read_from_head true
|
||||||
|
</source>
|
||||||
|
```
|
||||||
|
|
||||||
|
This imports the `kata-runtime` logs, with the resulting records looking like:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Something to note here is that we seem to have gained an awful lot of fairly identical looking fields in the
|
||||||
|
elastic database:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
In reality, they are not all identical, but do come out of one of the Kata log entries - from the
|
||||||
|
`kill` command. A JSON fragment showing an example is below:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"EndpointProperties": {
|
||||||
|
"Iface": {
|
||||||
|
"Index": 4,
|
||||||
|
"MTU": 1460,
|
||||||
|
"TxQLen": 0,
|
||||||
|
"Name": "eth0",
|
||||||
|
"HardwareAddr": "ClgKAQAL",
|
||||||
|
"Flags": 19,
|
||||||
|
"RawFlags": 69699,
|
||||||
|
"ParentIndex": 15,
|
||||||
|
"MasterIndex": 0,
|
||||||
|
"Namespace": null,
|
||||||
|
"Alias": "",
|
||||||
|
"Statistics": {
|
||||||
|
"RxPackets": 1,
|
||||||
|
"TxPackets": 5,
|
||||||
|
"RxBytes": 42,
|
||||||
|
"TxBytes": 426,
|
||||||
|
"RxErrors": 0,
|
||||||
|
"TxErrors": 0,
|
||||||
|
"RxDropped": 0,
|
||||||
|
"TxDropped": 0,
|
||||||
|
"Multicast": 0,
|
||||||
|
"Collisions": 0,
|
||||||
|
"RxLengthErrors": 0,
|
||||||
|
"RxOverErrors": 0,
|
||||||
|
"RxCrcErrors": 0,
|
||||||
|
"RxFrameErrors": 0,
|
||||||
|
"RxFifoErrors": 0,
|
||||||
|
"RxMissedErrors": 0,
|
||||||
|
"TxAbortedErrors": 0,
|
||||||
|
"TxCarrierErrors": 0,
|
||||||
|
"TxFifoErrors": 0,
|
||||||
|
"TxHeartbeatErrors": 0,
|
||||||
|
"TxWindowErrors": 0,
|
||||||
|
"RxCompressed": 0,
|
||||||
|
"TxCompressed": 0
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
If these new fields are not required, then a Fluentd
|
||||||
|
[`record_transformer` filter](https://docs.fluentd.org/filter/record_transformer#remove_keys)
|
||||||
|
could be used to delete them before they are injected into Elastic.
|
||||||
|
|
||||||
|
#### Prefixing all keys
|
||||||
|
|
||||||
|
It may be noted above that all the fields are imported with their base native name, such as
|
||||||
|
`arch` and `level`. It may be better for data storage and processing if all the fields were
|
||||||
|
identifiable as having come from Kata, and avoid namespace clashes with other imports.
|
||||||
|
This can be achieved by prefixing all the keys with, say, `kata_`. It appears `fluend` cannot
|
||||||
|
do this directly in the input or match phases, but can in the filter/parse phase (as was done
|
||||||
|
when processing `logfmt` data for instance). To achieve this, we can first input the Kata
|
||||||
|
JSON data as a single line, and then add the prefix using a JSON filter section:
|
||||||
|
|
||||||
|
```
|
||||||
|
# Pull in as a single line...
|
||||||
|
<source>
|
||||||
|
@type tail
|
||||||
|
path /var/log/kata-runtime.log
|
||||||
|
pos_file /var/log/kata-runtime.pos
|
||||||
|
read_from_head true
|
||||||
|
tag kata-runtime
|
||||||
|
<parse>
|
||||||
|
@type none
|
||||||
|
</parse>
|
||||||
|
</source>
|
||||||
|
|
||||||
|
<filter kata-runtime>
|
||||||
|
@type parser
|
||||||
|
key_name message
|
||||||
|
# drop the original single line `message` entry
|
||||||
|
reserve_data false
|
||||||
|
inject_key_prefix kata_
|
||||||
|
<parse>
|
||||||
|
@type json
|
||||||
|
</parse>
|
||||||
|
</filter>
|
||||||
|
```
|
||||||
|
|
||||||
|
# Kata `shimv2`
|
||||||
|
|
||||||
|
When using the Kata `shimv2` runtime with `containerd`, as described in this
|
||||||
|
[how-to guide](containerd-kata.md#containerd-runtime-v2-api-shim-v2-api), the Kata logs are routed
|
||||||
|
differently, and some adjustments to the above methods will be necessary to filter them in Fluentd.
|
||||||
|
|
||||||
|
The Kata `shimv2` logs are different in two primary ways:
|
||||||
|
|
||||||
|
- The Kata logs are directed via `containerd`, and will be captured along with the `containerd` logs,
|
||||||
|
such as on the containerd stdout or in the system journal.
|
||||||
|
- In parallel, Kata `shimv2` places its logs into the system journal under the systemd name of `kata`.
|
||||||
|
|
||||||
|
Below is an example Fluentd configuration fragment showing one possible method of extracting and separating
|
||||||
|
the `containerd` and Kata logs from the system journal by filtering on the Kata `SYSLOG_IDENTIFIER` field,
|
||||||
|
using the [Fluentd v0.12 rewrite_tag_filter](https://docs.fluentd.org/v/0.12/output/rewrite_tag_filter):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
<source>
|
||||||
|
type systemd
|
||||||
|
path /path/to/journal
|
||||||
|
# capture the containerd logs
|
||||||
|
filters [{ "_SYSTEMD_UNIT": "containerd.service" }]
|
||||||
|
pos_file /tmp/systemd-containerd.pos
|
||||||
|
read_from_head true
|
||||||
|
# tag those temporarily, as we will filter them and rewrite the tags
|
||||||
|
tag containerd_tmp_tag
|
||||||
|
</source>
|
||||||
|
|
||||||
|
# filter out and split between kata entries and containerd entries
|
||||||
|
<match containerd_tmp_tag>
|
||||||
|
@type rewrite_tag_filter
|
||||||
|
# Tag Kata entries
|
||||||
|
<rule>
|
||||||
|
key SYSLOG_IDENTIFIER
|
||||||
|
pattern kata
|
||||||
|
tag kata_tag
|
||||||
|
</rule>
|
||||||
|
# Anything that was not matched so far, tag as containerd
|
||||||
|
<rule>
|
||||||
|
key MESSAGE
|
||||||
|
pattern /.+/
|
||||||
|
tag containerd_tag
|
||||||
|
</rule>
|
||||||
|
</match>
|
||||||
|
```
|
||||||
|
|
||||||
|
# Caveats
|
||||||
|
|
||||||
|
> **Warning:** You should be aware of the following caveats, which may disrupt or change what and how
|
||||||
|
> you capture and process the Kata Containers logs.
|
||||||
|
|
||||||
|
The following caveats should be noted:
|
||||||
|
|
||||||
|
- There is a [known issue](https://github.com/kata-containers/runtime/issues/985) whereby enabling
|
||||||
|
full debug in Kata, particularly enabling agent kernel log messages, can result in corrupt log lines
|
||||||
|
being generated by Kata (due to overlapping multiple output streams).
|
||||||
|
- Presently only the `kata-runtime` can generate JSON logs, and direct them to files. Other components
|
||||||
|
such as the `proxy` and `shim` can only presently report to the system journal. Hopefully these
|
||||||
|
components will be extended with extra functionality.
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
|
||||||
|
We have shown how native Kata logs using the systemd journal and `logfmt` data can be import, and also
|
||||||
|
how Kata can be instructed to generate JSON logs directly, and import those into Fluentd.
|
||||||
|
|
||||||
|
We have detailed a few known caveats, and leave it to the implementer to choose the best method for their
|
||||||
|
system.
|
106
docs/how-to/how-to-load-kernel-modules-with-kata.md
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# Loading kernel modules
|
||||||
|
|
||||||
|
A new feature for loading kernel modules was introduced in Kata Containers 1.9.
|
||||||
|
The list of kernel modules and their parameters can be provided using the
|
||||||
|
configuration file or OCI annotations. The [Kata runtime][1] gives that
|
||||||
|
information to the [Kata Agent][2] through gRPC when the sandbox is created.
|
||||||
|
The [Kata Agent][2] will insert the kernel modules using `modprobe(8)`, hence
|
||||||
|
modules dependencies are resolved automatically.
|
||||||
|
|
||||||
|
The sandbox will not be started when:
|
||||||
|
|
||||||
|
* A kernel module is specified and the `modprobe(8)` command is not installed in
|
||||||
|
the guest or it fails loading the module.
|
||||||
|
* The module is not available in the guest or it doesn't meet the guest kernel
|
||||||
|
requirements, like architecture and version.
|
||||||
|
|
||||||
|
In the following sections are documented the different ways that exist for
|
||||||
|
loading kernel modules in Kata Containers.
|
||||||
|
|
||||||
|
- [Using Kata Configuration file](#using-kata-configuration-file)
|
||||||
|
- [Using annotations](#using-annotations)
|
||||||
|
|
||||||
|
# Using Kata Configuration file
|
||||||
|
|
||||||
|
```
|
||||||
|
NOTE: Use this method, only if you need to pass the kernel modules to all
|
||||||
|
containers. Please use annotations described below to set per pod annotations.
|
||||||
|
```
|
||||||
|
|
||||||
|
The list of kernel modules and parameters can be set in the `kernel_modules`
|
||||||
|
option as a coma separated list, where each entry in the list specifies a kernel
|
||||||
|
module and its parameters. Each list element comprises one or more space separated
|
||||||
|
fields. The first field specifies the module name and subsequent fields specify
|
||||||
|
individual parameters for the module.
|
||||||
|
|
||||||
|
The following example specifies two modules to load: `e1000e` and `i915`. Two parameters
|
||||||
|
are specified for the `e1000` module: `InterruptThrottleRate` (which takes an array
|
||||||
|
of integer values) and `EEE` (which requires a single integer value).
|
||||||
|
|
||||||
|
```toml
|
||||||
|
kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Not all the container managers allow users provide custom annotations, hence
|
||||||
|
this is the only way that Kata Containers provide for loading modules when
|
||||||
|
custom annotations are not supported.
|
||||||
|
|
||||||
|
There are some limitations with this approach:
|
||||||
|
|
||||||
|
* Write access to the Kata configuration file is required.
|
||||||
|
* The configuration file must be updated when a new container is created,
|
||||||
|
otherwise the same list of modules is used, even if they are not needed in the
|
||||||
|
container.
|
||||||
|
|
||||||
|
# Using annotations
|
||||||
|
|
||||||
|
As was mentioned above, not all containers need the same modules, therefore using
|
||||||
|
the configuration file for specifying the list of kernel modules per [POD][3] can
|
||||||
|
be a pain. Unlike the configuration file, annotations provide a way to specify
|
||||||
|
custom configurations per POD.
|
||||||
|
|
||||||
|
The list of kernel modules and parameters can be set using the annotation
|
||||||
|
`io.katacontainers.config.agent.kernel_modules` as a semicolon separated
|
||||||
|
list, where the first word of each element is considered as the module name and
|
||||||
|
the rest as its parameters.
|
||||||
|
|
||||||
|
In the following example two PODs are created, but the kernel modules `e1000e`
|
||||||
|
and `i915` are inserted only in the POD `pod1`.
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: pod1
|
||||||
|
annotations:
|
||||||
|
io.katacontainers.config.agent.kernel_modules: "e1000e EEE=1; i915"
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata
|
||||||
|
containers:
|
||||||
|
- name: c1
|
||||||
|
image: busybox
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
stdin: true
|
||||||
|
tty: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: pod2
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata
|
||||||
|
containers:
|
||||||
|
- name: c2
|
||||||
|
image: busybox
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
stdin: true
|
||||||
|
tty: true
|
||||||
|
```
|
||||||
|
|
||||||
|
[1]: https://github.com/kata-containers/runtime
|
||||||
|
[2]: https://github.com/kata-containers/agent
|
||||||
|
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/
|
160
docs/how-to/how-to-set-sandbox-config-kata.md
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
# Per-Pod Kata Configurations
|
||||||
|
|
||||||
|
Kata Containers gives users freedom to customize at per-pod level, by setting
|
||||||
|
a wide range of Kata specific annotations in the pod specification.
|
||||||
|
|
||||||
|
# Kata Configuration Annotations
|
||||||
|
There are several kinds of Kata configurations and they are listed below.
|
||||||
|
|
||||||
|
## Global Options
|
||||||
|
| Key | Value Type | Comments |
|
||||||
|
|-------| ----- | ----- |
|
||||||
|
| `io.katacontainers.config_path` | string | Kata config file location that overrides the default config paths |
|
||||||
|
| `io.katacontainers.pkg.oci.bundle_path` | string | OCI bundle path |
|
||||||
|
| `io.katacontainers.pkg.oci.container_type`| string | OCI container type. Only accepts `pod_container` and `pod_sandbox` |
|
||||||
|
|
||||||
|
## Runtime Options
|
||||||
|
| Key | Value Type | Comments |
|
||||||
|
|-------| ----- | ----- |
|
||||||
|
| `io.katacontainers.config.runtime.experimental` | `boolean` | determines if experimental features enabled |
|
||||||
|
| `io.katacontainers.config.runtime.disable_guest_seccomp`| `boolean` | determines if `seccomp` should be applied inside guest |
|
||||||
|
| `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process |
|
||||||
|
| `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` |
|
||||||
|
| `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup |
|
||||||
|
|
||||||
|
## Agent Options
|
||||||
|
| Key | Value Type | Comments |
|
||||||
|
|-------| ----- | ----- |
|
||||||
|
| `io.katacontainers.config.agent.enable_tracing` | `boolean` | enable tracing for the agent |
|
||||||
|
| `io.katacontainers.config.agent.kernel_modules` | string | the list of kernel modules and their parameters that will be loaded in the guest kernel. Semicolon separated list of kernel modules and their parameters. These modules will be loaded in the guest kernel using `modprobe`(8). E.g., `e1000e InterruptThrottleRate=3000,3000,3000 EEE=1; i915 enable_ppgtt=0` |
|
||||||
|
| `io.katacontainers.config.agent.trace_mode` | string | the trace mode for the agent |
|
||||||
|
| `io.katacontainers.config.agent.trace_type` | string | the trace type for the agent |
|
||||||
|
|
||||||
|
## Hypervisor Options
|
||||||
|
| Key | Value Type | Comments |
|
||||||
|
|-------| ----- | ----- |
|
||||||
|
| `io.katacontainers.config.hypervisor.asset_hash_type` | string | the hash type used for assets verification, default is `sha512` |
|
||||||
|
| `io.katacontainers.config.hypervisor.block_device_cache_direct` | `boolean` | Denotes whether use of `O_DIRECT` (bypass the host page cache) is enabled |
|
||||||
|
| `io.katacontainers.config.hypervisor.block_device_cache_noflush` | `boolean` | Denotes whether flush requests for the device are ignored |
|
||||||
|
| `io.katacontainers.config.hypervisor.block_device_cache_set` | `boolean` | cache-related options will be set to block devices or not |
|
||||||
|
| `io.katacontainers.config.hypervisor.block_device_driver` | string | the driver to be used for block device, valid values are `virtio-blk`, `virtio-scsi`, `nvdimm`|
|
||||||
|
| `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` |
|
||||||
|
| `io.katacontainers.config.hypervisor.default_vcpus` | uint32| the default vCPUs assigned for a VM by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.disable_block_device_use` | `boolean` | disallow a block device from being used |
|
||||||
|
| `io.katacontainers.config.hypervisor.disable_vhost_net` | `boolean` | specify if `vhost-net` is not available on the host |
|
||||||
|
| `io.katacontainers.config.hypervisor.enable_hugepages` | `boolean` | if the memory should be `pre-allocated` from huge pages |
|
||||||
|
| `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver |
|
||||||
|
| `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.enable_swap` | `boolean` | enable swap of VM memory |
|
||||||
|
| `io.katacontainers.config.hypervisor.entropy_source` | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) |
|
||||||
|
| `io.katacontainers.config.hypervisor.file_mem_backend` | string | file based memory backend root directory |
|
||||||
|
| `io.katacontainers.config.hypervisor.firmware_hash` | string | container firmware SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.firmware` | string | the guest firmware that will run the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.guest_hook_path` | string | the path within the VM that will be used for drop in hooks |
|
||||||
|
| `io.katacontainers.config.hypervisor.hotplug_vfio_on_root_bus` | `boolean` | indicate if devices need to be hotplugged on the root bus instead of a bridge|
|
||||||
|
| `io.katacontainers.config.hypervisor.hypervisor_hash` | string | container hypervisor binary SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.image_hash` | string | container guest image SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.image` | string | the guest image that will run in the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.initrd_hash` | string | container guest initrd SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.initrd` | string | the guest initrd image that will run in the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.jailer_hash` | string | container jailer SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.jailer_path` | string | the jailer that will constrain the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.kernel_hash` | string | container kernel image SHA-512 hash value |
|
||||||
|
| `io.katacontainers.config.hypervisor.kernel_params` | string | additional guest kernel parameters |
|
||||||
|
| `io.katacontainers.config.hypervisor.kernel` | string | the kernel used to boot the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.machine_accelerators` | string | machine specific accelerators for the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.machine_type` | string | the type of machine being emulated by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.memory_offset` | uint32| the memory space used for `nvdimm` device by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.memory_slots` | uint32| the memory slots assigned to the VM by the hypervisor |
|
||||||
|
| `io.katacontainers.config.hypervisor.msize_9p` | uint32 | the `msize` for 9p shares |
|
||||||
|
| `io.katacontainers.config.hypervisor.path` | string | the hypervisor that will run the container VM |
|
||||||
|
| `io.katacontainers.config.hypervisor.shared_fs` | string | the shared file system type, either `virtio-9p` or `virtio-fs` |
|
||||||
|
| `io.katacontainers.config.hypervisor.use_vsock` | `boolean` | specify use of `vsock` for agent communication |
|
||||||
|
| `io.katacontainers.config.hypervisor.virtio_fs_cache_size` | uint32 | virtio-fs DAX cache size in `MiB` |
|
||||||
|
| `io.katacontainers.config.hypervisor.virtio_fs_cache` | string | the cache mode for virtio-fs, valid values are `always`, `auto` and `none` |
|
||||||
|
| `io.katacontainers.config.hypervisor.virtio_fs_daemon` | string | virtio-fs `vhost-user` daemon path |
|
||||||
|
| `io.katacontainers.config.hypervisor.virtio_fs_extra_args` | string | extra options passed to `virtiofs` daemon |
|
||||||
|
|
||||||
|
# CRI Configuration
|
||||||
|
|
||||||
|
In case of CRI-O, all annotations specified in the pod spec are passed down to Kata.
|
||||||
|
For containerd, annotations specified in the pod spec are passed down to Kata
|
||||||
|
starting with version `1.3.0`. Additionally, extra configuration is needed for containerd,
|
||||||
|
by providing a `pod_annotations` field in the containerd config file. The `pod_annotations`
|
||||||
|
field is a list of annotations that can be passed down to Kata as OCI annotations.
|
||||||
|
It supports golang match patterns. Since annotations supported by Kata follow the pattern
|
||||||
|
`io.katacontainers.*`, the following configuration would work for passing annotations to
|
||||||
|
Kata from containerd:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat /etc/containerd/config
|
||||||
|
....
|
||||||
|
|
||||||
|
[plugins.cri.containerd.runtimes.kata]
|
||||||
|
runtime_type = "io.containerd.runc.v1"
|
||||||
|
pod_annotations = ["io.katacontainers.*"]
|
||||||
|
[plugins.cri.containerd.runtimes.kata.options]
|
||||||
|
BinaryName = "/usr/bin/kata-runtime"
|
||||||
|
....
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Additional documentation on the above configuration can be found in the
|
||||||
|
[containerd docs](https://github.com/containerd/cri/blob/8d5a8355d07783ba2f8f451209f6bdcc7c412346/docs/config.md).
|
||||||
|
|
||||||
|
# Example - Using annotations
|
||||||
|
|
||||||
|
As mentioned above, not all containers need the same modules, therefore using
|
||||||
|
the configuration file for specifying the list of kernel modules per POD can
|
||||||
|
be a pain. Unlike the configuration file, annotations provide a way to specify
|
||||||
|
custom configurations per POD.
|
||||||
|
|
||||||
|
The list of kernel modules and parameters can be set using the annotation
|
||||||
|
`io.katacontainers.config.agent.kernel_modules` as a semicolon separated
|
||||||
|
list, where the first word of each element is considered as the module name and
|
||||||
|
the rest as its parameters.
|
||||||
|
|
||||||
|
Also users might want to enable guest `seccomp` to provide better isolation with a
|
||||||
|
little performance sacrifice. The annotation
|
||||||
|
`io.katacontainers.config.runtime.disable_guest_seccomp` can used for such purpose.
|
||||||
|
|
||||||
|
In the following example two PODs are created, but the kernel modules `e1000e`
|
||||||
|
and `i915` are inserted only in the POD `pod1`. Also guest `seccomp` is only enabled
|
||||||
|
in the POD `pod2`.
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: pod1
|
||||||
|
annotations:
|
||||||
|
io.katacontainers.config.agent.kernel_modules: "e1000e EEE=1; i915"
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata
|
||||||
|
containers:
|
||||||
|
- name: c1
|
||||||
|
image: busybox
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
stdin: true
|
||||||
|
tty: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: pod2
|
||||||
|
annotations:
|
||||||
|
io.katacontainers.config.runtime.disable_guest_seccomp: false
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata
|
||||||
|
containers:
|
||||||
|
- name: c2
|
||||||
|
image: busybox
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
stdin: true
|
||||||
|
tty: true
|
||||||
|
```
|
220
docs/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
# How to use Kata Containers and CRI (containerd plugin) with Kubernetes
|
||||||
|
|
||||||
|
* [Requirements](#requirements)
|
||||||
|
* [Install and configure containerd](#install-and-configure-containerd)
|
||||||
|
* [Install and configure Kubernetes](#install-and-configure-kubernetes)
|
||||||
|
* [Install Kubernetes](#install-kubernetes)
|
||||||
|
* [Configure Kubelet to use containerd](#configure-kubelet-to-use-containerd)
|
||||||
|
* [Configure HTTP proxy - OPTIONAL](#configure-http-proxy---optional)
|
||||||
|
* [Start Kubernetes](#start-kubernetes)
|
||||||
|
* [Install a Pod Network](#install-a-pod-network)
|
||||||
|
* [Allow pods to run in the master node](#allow-pods-to-run-in-the-master-node)
|
||||||
|
* [Create an untrusted pod using Kata Containers](#create-an-untrusted-pod-using-kata-containers)
|
||||||
|
* [Delete created pod](#delete-created-pod)
|
||||||
|
|
||||||
|
This document describes how to set up a single-machine Kubernetes (k8s) cluster.
|
||||||
|
|
||||||
|
The Kubernetes cluster will use the
|
||||||
|
[CRI containerd plugin](https://github.com/containerd/cri) and
|
||||||
|
[Kata Containers](https://katacontainers.io) to launch untrusted workloads.
|
||||||
|
|
||||||
|
For Kata Containers 1.5.0-rc2 and above, we will use `containerd-shim-kata-v2` (short as `shimv2` in this documentation)
|
||||||
|
to launch Kata Containers. For the previous version of Kata Containers, the Pods are launched with `kata-runtime`.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Kubernetes, Kubelet, `kubeadm`
|
||||||
|
- containerd with `cri` plug-in
|
||||||
|
- Kata Containers
|
||||||
|
|
||||||
|
> **Note:** For information about the supported versions of these components,
|
||||||
|
> see the Kata Containers
|
||||||
|
> [`versions.yaml`](https://github.com/kata-containers/runtime/blob/master/versions.yaml)
|
||||||
|
> file.
|
||||||
|
|
||||||
|
## Install and configure containerd
|
||||||
|
|
||||||
|
First, follow the [How to use Kata Containers and Containerd](containerd-kata.md) to install and configure containerd.
|
||||||
|
Then, make sure the containerd works with the [examples in it](containerd-kata.md#run).
|
||||||
|
|
||||||
|
## Install and configure Kubernetes
|
||||||
|
|
||||||
|
### Install Kubernetes
|
||||||
|
|
||||||
|
- Follow the instructions for
|
||||||
|
[`kubeadm` installation](https://kubernetes.io/docs/setup/independent/install-kubeadm/).
|
||||||
|
|
||||||
|
- Check `kubeadm` is now available
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ command -v kubeadm
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configure Kubelet to use containerd
|
||||||
|
|
||||||
|
In order to allow Kubelet to use containerd (using the CRI interface), configure the service to point to the `containerd` socket.
|
||||||
|
|
||||||
|
- Configure Kubernetes to use `containerd`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/kubelet.service.d/
|
||||||
|
$ cat << EOF | sudo tee /etc/systemd/system/kubelet.service.d/0-containerd.conf
|
||||||
|
[Service]
|
||||||
|
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
- Inform systemd about the new configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configure HTTP proxy - OPTIONAL
|
||||||
|
|
||||||
|
If you are behind a proxy, use the following script to configure your proxy for docker, Kubelet, and containerd:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ services="
|
||||||
|
kubelet
|
||||||
|
containerd
|
||||||
|
docker
|
||||||
|
"
|
||||||
|
|
||||||
|
$ for service in ${services}; do
|
||||||
|
|
||||||
|
service_dir="/etc/systemd/system/${service}.service.d/"
|
||||||
|
sudo mkdir -p ${service_dir}
|
||||||
|
|
||||||
|
cat << EOT | sudo tee "${service_dir}/proxy.conf"
|
||||||
|
[Service]
|
||||||
|
Environment="HTTP_PROXY=${http_proxy}"
|
||||||
|
Environment="HTTPS_PROXY=${https_proxy}"
|
||||||
|
Environment="NO_PROXY=${no_proxy}"
|
||||||
|
EOT
|
||||||
|
done
|
||||||
|
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
```
|
||||||
|
|
||||||
|
## Start Kubernetes
|
||||||
|
|
||||||
|
- Make sure `containerd` is up and running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl restart containerd
|
||||||
|
$ sudo systemctl status containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
- Prevent conflicts between `docker` iptables (packet filtering) rules and k8s pod communication
|
||||||
|
|
||||||
|
If Docker is installed on the node, it is necessary to modify the rule
|
||||||
|
below. See https://github.com/kubernetes/kubernetes/issues/40182 for further
|
||||||
|
details.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo iptables -P FORWARD ACCEPT
|
||||||
|
```
|
||||||
|
|
||||||
|
- Start cluster using `kubeadm`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo kubeadm init --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
|
||||||
|
$ export KUBECONFIG=/etc/kubernetes/admin.conf
|
||||||
|
$ sudo -E kubectl get nodes
|
||||||
|
$ sudo -E kubectl get pods
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install a Pod Network
|
||||||
|
|
||||||
|
A pod network plugin is needed to allow pods to communicate with each other.
|
||||||
|
|
||||||
|
- Install the `flannel` plugin by following the
|
||||||
|
[Using `kubeadm` to Create a Cluster](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#instructions)
|
||||||
|
guide, starting from the **Installing a pod network** section.
|
||||||
|
|
||||||
|
- Create a pod network using flannel
|
||||||
|
|
||||||
|
> **Note:** There is no known way to determine programmatically the best version (commit) to use.
|
||||||
|
> See https://github.com/coreos/flannel/issues/995.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
- Wait for the pod network to become available
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# number of seconds to wait for pod network to become available
|
||||||
|
$ timeout_dns=420
|
||||||
|
|
||||||
|
$ while [ "$timeout_dns" -gt 0 ]; do
|
||||||
|
if sudo -E kubectl get pods --all-namespaces | grep dns | grep Running; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
sleep 1s
|
||||||
|
((timeout_dns--))
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
- Check the pod network is running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl get pods --all-namespaces | grep dns | grep Running && echo "OK" || ( echo "FAIL" && false )
|
||||||
|
```
|
||||||
|
|
||||||
|
## Allow pods to run in the master node
|
||||||
|
|
||||||
|
By default, the cluster will not schedule pods in the master node. To enable master node scheduling:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
|
||||||
|
```
|
||||||
|
|
||||||
|
## Create an untrusted pod using Kata Containers
|
||||||
|
|
||||||
|
By default, all pods are created with the default runtime configured in CRI containerd plugin.
|
||||||
|
|
||||||
|
If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
|
||||||
|
[Kata Containers runtime](https://github.com/kata-containers/runtime/blob/master/README.md).
|
||||||
|
|
||||||
|
- Create an untrusted pod configuration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ cat << EOT | tee nginx-untrusted.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: nginx-untrusted
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: nginx
|
||||||
|
image: nginx
|
||||||
|
|
||||||
|
EOT
|
||||||
|
```
|
||||||
|
|
||||||
|
- Create an untrusted pod
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl apply -f nginx-untrusted.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
- Check pod is running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl get pods
|
||||||
|
```
|
||||||
|
|
||||||
|
- Check hypervisor is running
|
||||||
|
```bash
|
||||||
|
$ ps aux | grep qemu
|
||||||
|
```
|
||||||
|
|
||||||
|
## Delete created pod
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E kubectl delete -f nginx-untrusted.yaml
|
||||||
|
```
|
130
docs/how-to/how-to-use-kata-containers-with-acrn.md
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
# Kata Containers with ACRN
|
||||||
|
|
||||||
|
This document provides an overview on how to run Kata containers with ACRN hypervisor and device model.
|
||||||
|
|
||||||
|
- [Introduction](#introduction)
|
||||||
|
- [Pre-requisites](#pre-requisites)
|
||||||
|
- [Configure Docker](#configure-docker)
|
||||||
|
- [Configure Kata Containers with ACRN](#configure-kata-containers-with-acrn)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
ACRN is a flexible, lightweight Type-1 reference hypervisor built with real-time and safety-criticality in mind. ACRN uses an open source platform making it optimized to streamline embedded development.
|
||||||
|
|
||||||
|
Some of the key features being:
|
||||||
|
|
||||||
|
- Small footprint - Approx. 25K lines of code (LOC).
|
||||||
|
- Real Time - Low latency, faster boot time, improves overall responsiveness with hardware.
|
||||||
|
- Adaptability - Multi-OS support for guest operating systems like Linux, Android, RTOSes.
|
||||||
|
- Rich I/O mediators - Allows sharing of various I/O devices across VMs.
|
||||||
|
- Optimized for a variety of IoT (Internet of Things) and embedded device solutions.
|
||||||
|
|
||||||
|
Please refer to ACRN [documentation](https://projectacrn.github.io/latest/index.html) for more details on ACRN hypervisor and device model.
|
||||||
|
|
||||||
|
## Pre-requisites
|
||||||
|
|
||||||
|
This document requires the presence of the ACRN hypervisor and Kata Containers on your system. Install using the instructions available through the following links:
|
||||||
|
|
||||||
|
- ACRN supported [Hardware](https://projectacrn.github.io/latest/hardware.html#supported-hardware).
|
||||||
|
> **Note:** Please make sure to have a minimum of 4 logical processors (HT) or cores.
|
||||||
|
- ACRN [software](https://projectacrn.github.io/latest/tutorials/kbl-nuc-sdc.html#use-the-script-to-set-up-acrn-automatically) setup.
|
||||||
|
- For networking, ACRN supports either MACVTAP or TAP. If MACVTAP is not enabled in the Service OS, please follow the below steps to update the kernel:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ git clone https://github.com/projectacrn/acrn-kernel.git
|
||||||
|
$ cd acrn-kernel
|
||||||
|
$ cp kernel_config_sos .config
|
||||||
|
$ sed -i "s/# CONFIG_MACVLAN is not set/CONFIG_MACVLAN=y/" .config
|
||||||
|
$ sed -i '$ i CONFIG_MACVTAP=y' .config
|
||||||
|
$ make clean && make olddefconfig && make && sudo make modules_install INSTALL_MOD_PATH=out/
|
||||||
|
```
|
||||||
|
Login into Service OS and update the kernel with MACVTAP support:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo mount /dev/sda1 /mnt
|
||||||
|
$ sudo scp -r <user name>@<host address>:<your workspace>/acrn-kernel/arch/x86/boot/bzImage /mnt/EFI/org.clearlinux/
|
||||||
|
$ sudo scp -r <user name>@<host address>:<your workspace>/acrn-kernel/out/lib/modules/* /lib/modules/
|
||||||
|
$ conf_file=$(sed -n '$ s/default //p' /mnt/loader/loader.conf).conf
|
||||||
|
$ kernel_img=$(sed -n 2p /mnt/loader/entries/$conf_file | cut -d'/' -f4)
|
||||||
|
$ sudo sed -i "s/$kernel_img/bzImage/g" /mnt/loader/entries/$conf_file
|
||||||
|
$ sync && sudo umount /mnt && sudo reboot
|
||||||
|
```
|
||||||
|
- Kata Containers installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md) steps.
|
||||||
|
|
||||||
|
> **Note:** Create rootfs image and not initrd image.
|
||||||
|
|
||||||
|
In order to run Kata with ACRN, your container stack must provide block-based storage, such as device-mapper.
|
||||||
|
|
||||||
|
> **Note:** Currently, by design you can only launch one VM from Kata Containers using ACRN hypervisor (SDC scenario). Based on feedback from community we can increase number of VMs.
|
||||||
|
|
||||||
|
## Configure Docker
|
||||||
|
|
||||||
|
To configure Docker for device-mapper and Kata,
|
||||||
|
|
||||||
|
1. Stop Docker daemon if it is already running.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl stop docker
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Set `/etc/docker/daemon.json` with the following contents.
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"storage-driver": "devicemapper"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart docker.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Configure [Docker](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#update-the-docker-systemd-unit-file) to use `kata-runtime`.
|
||||||
|
|
||||||
|
## Configure Kata Containers with ACRN
|
||||||
|
|
||||||
|
To configure Kata Containers with ACRN, copy the generated `configuration-acrn.toml` file when building the `kata-runtime` to either `/etc/kata-containers/configuration.toml` or `/usr/share/defaults/kata-containers/configuration.toml`.
|
||||||
|
|
||||||
|
The following command shows full paths to the `configuration.toml` files that the runtime loads. It will use the first path that exists. (Please make sure the kernel and image paths are set correctly in the `configuration.toml` file)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo kata-runtime --kata-show-default-config-paths
|
||||||
|
```
|
||||||
|
|
||||||
|
>**Warning:** Please offline CPUs using [this](offline_cpu.sh) script, else VM launches will fail.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo ./offline_cpu.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Start an ACRN based Kata Container,
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run -ti --runtime=kata-runtime busybox sh
|
||||||
|
```
|
||||||
|
|
||||||
|
You will see ACRN(`acrn-dm`) is now running on your system, as well as a `kata-shim`, `kata-proxy`. You should obtain an interactive shell prompt. Verify that all the Kata processes terminate once you exit the container.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ps -ef | grep -E "kata|acrn"
|
||||||
|
```
|
||||||
|
|
||||||
|
Validate ACRN hypervisor by using `kata-runtime kata-env`,
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/'
|
||||||
|
[Hypervisor]
|
||||||
|
MachineType = ""
|
||||||
|
Version = "DM version is: 1.2-unstable-254577a6-dirty (daily tag:acrn-2019w27.4-140000p)
|
||||||
|
Path = "/usr/bin/acrn-dm"
|
||||||
|
BlockDeviceDriver = "virtio-blk"
|
||||||
|
EntropySource = "/dev/urandom"
|
||||||
|
Msize9p = 0
|
||||||
|
MemorySlots = 10
|
||||||
|
Debug = false
|
||||||
|
UseVSock = false
|
||||||
|
SharedFS = ""
|
||||||
|
```
|
115
docs/how-to/how-to-use-kata-containers-with-nemu.md
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
|
||||||
|
# Kata Containers with NEMU
|
||||||
|
|
||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Pre-requisites](#pre-requisites)
|
||||||
|
* [NEMU](#nemu)
|
||||||
|
* [Download and build](#download-and-build)
|
||||||
|
* [x86_64](#x86_64)
|
||||||
|
* [aarch64](#aarch64)
|
||||||
|
* [Configure Kata Containers](#configure-kata-containers)
|
||||||
|
|
||||||
|
Kata Containers relies by default on the QEMU hypervisor in order to spawn the virtual machines running containers. [NEMU](https://github.com/intel/nemu) is a fork of QEMU that:
|
||||||
|
- Reduces the number of lines of code.
|
||||||
|
- Removes all legacy devices.
|
||||||
|
- Reduces the emulation as far as possible.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
This document describes how to run Kata Containers with NEMU, first by explaining how to download, build and install it. Then it walks through the steps needed to update your Kata Containers configuration in order to run with NEMU.
|
||||||
|
|
||||||
|
## Pre-requisites
|
||||||
|
This document requires Kata Containers to be [installed](https://github.com/kata-containers/documentation/blob/master/install/README.md) on your system.
|
||||||
|
|
||||||
|
Also, it's worth noting that NEMU only supports `x86_64` and `aarch64` architecture.
|
||||||
|
|
||||||
|
## NEMU
|
||||||
|
|
||||||
|
### Download and build
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ git clone https://github.com/intel/nemu.git
|
||||||
|
$ cd nemu
|
||||||
|
$ git fetch origin
|
||||||
|
$ git checkout origin/experiment/automatic-removal
|
||||||
|
```
|
||||||
|
#### x86_64
|
||||||
|
```
|
||||||
|
$ SRCDIR=$PWD ./tools/build_x86_64_virt.sh
|
||||||
|
```
|
||||||
|
#### aarch64
|
||||||
|
```
|
||||||
|
$ SRCDIR=$PWD ./tools/build_aarch64.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** The branch `experiment/automatic-removal` is a branch published by Jenkins after it has applied the automatic removal script to the `topic/virt-x86` branch. The purpose of this code removal being to reduce the source tree by removing files not being used by NEMU.
|
||||||
|
|
||||||
|
After those commands have successfully returned, you will find the NEMU binary at `$HOME/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt` (__x86__), or `$HOME/build-aarch64/aarch64-softmmu/qemu-system-aarch64` (__ARM__).
|
||||||
|
|
||||||
|
You also need the `OVMF` firmware in order to boot the virtual machine's kernel. It can currently be found at this [location](https://github.com/intel/ovmf-virt/releases).
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /usr/share/nemu
|
||||||
|
$ OVMF_URL=$(curl -sL https://api.github.com/repos/intel/ovmf-virt/releases/latest | jq -S '.assets[0].browser_download_url')
|
||||||
|
$ curl -o OVMF.fd -L $(sed -e 's/^"//' -e 's/"$//' <<<"$OVMF_URL")
|
||||||
|
$ sudo install -o root -g root -m 0640 OVMF.fd /usr/share/nemu/
|
||||||
|
```
|
||||||
|
> **Note:** The OVMF firmware will be located at this temporary location until the changes can be pushed upstream.
|
||||||
|
|
||||||
|
|
||||||
|
## Configure Kata Containers
|
||||||
|
All you need from this section is to modify the configuration file `/usr/share/defaults/kata-containers/configuration.toml` to specify the options related to the hypervisor.
|
||||||
|
|
||||||
|
|
||||||
|
```diff
|
||||||
|
[hypervisor.qemu]
|
||||||
|
-path = "/usr/bin/qemu-lite-system-x86_64"
|
||||||
|
+path = "/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt"
|
||||||
|
kernel = "/usr/share/kata-containers/vmlinuz.container"
|
||||||
|
initrd = "/usr/share/kata-containers/kata-containers-initrd.img"
|
||||||
|
image = "/usr/share/kata-containers/kata-containers.img"
|
||||||
|
-machine_type = "pc"
|
||||||
|
+machine_type = "virt"
|
||||||
|
|
||||||
|
# Optional space-separated list of options to pass to the guest kernel.
|
||||||
|
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||||
|
@@ -31,7 +31,7 @@
|
||||||
|
|
||||||
|
# Path to the firmware.
|
||||||
|
# If you want that qemu uses the default firmware leave this option empty
|
||||||
|
-firmware = ""
|
||||||
|
+firmware = "/usr/share/nemu/OVMF.fd"
|
||||||
|
|
||||||
|
# Machine accelerators
|
||||||
|
# comma-separated list of machine accelerators to pass to the hypervisor.
|
||||||
|
```
|
||||||
|
|
||||||
|
As you can see from this snippet above, all you need to change is:
|
||||||
|
- The path to the hypervisor binary, `/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt` in this example.
|
||||||
|
- The machine type from `pc` to `virt`.
|
||||||
|
- The path to the firmware binary, `/usr/share/nemu/OVMF.fd` in this example.
|
||||||
|
|
||||||
|
Once you have saved those modifications, you can start a new container:
|
||||||
|
```bash
|
||||||
|
$ docker run --runtime=kata-runtime -it busybox
|
||||||
|
```
|
||||||
|
And you will be able to verify this new container is running with the NEMU hypervisor by looking for the hypervisor path and the machine type from the `qemu` process running on your system:
|
||||||
|
```bash
|
||||||
|
$ ps -aux | grep qemu
|
||||||
|
root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
|
||||||
|
... -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Also relying on `kata-runtime kata-env` is a reliable way to validate you are using the expected hypervisor:
|
||||||
|
```bash
|
||||||
|
$ kata-runtime kata-env | awk -v RS= '/\[Hypervisor\]/'
|
||||||
|
[Hypervisor]
|
||||||
|
MachineType = "virt"
|
||||||
|
Version = "NEMU (like QEMU) version 3.0.0 (v3.0.0-179-gaf9a791)\nCopyright (c) 2003-2017 Fabrice Bellard and the QEMU Project developers"
|
||||||
|
Path = "/home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt"
|
||||||
|
BlockDeviceDriver = "virtio-scsi"
|
||||||
|
EntropySource = "/dev/urandom"
|
||||||
|
Msize9p = 8192
|
||||||
|
MemorySlots = 10
|
||||||
|
Debug = true
|
||||||
|
UseVSock = false
|
||||||
|
```
|
143
docs/how-to/how-to-use-sysctls-with-kata.md
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
# Setting Sysctls with Kata
|
||||||
|
|
||||||
|
## Sysctls
|
||||||
|
In Linux, the sysctl interface allows an administrator to modify kernel
|
||||||
|
parameters at runtime. Parameters are available via the `/proc/sys/` virtual
|
||||||
|
process file system.
|
||||||
|
|
||||||
|
The parameters include the following subsystems among others:
|
||||||
|
- `fs` (file systems)
|
||||||
|
- `kernel` (kernel)
|
||||||
|
- `net` (networking)
|
||||||
|
- `vm` (virtual memory)
|
||||||
|
|
||||||
|
To get a complete list of kernel parameters, run:
|
||||||
|
```
|
||||||
|
$ sudo sysctl -a
|
||||||
|
```
|
||||||
|
|
||||||
|
Both Docker and Kubernetes provide mechanisms for setting namespaced sysctls.
|
||||||
|
Namespaced sysctls can be set per pod in the case of Kubernetes or per container
|
||||||
|
in case of Docker.
|
||||||
|
The following sysctls are known to be namespaced and can be set with
|
||||||
|
Docker and Kubernetes:
|
||||||
|
|
||||||
|
- `kernel.shm*`
|
||||||
|
- `kernel.msg*`
|
||||||
|
- `kernel.sem`
|
||||||
|
- `fs.mqueue.*`
|
||||||
|
- `net.*`
|
||||||
|
|
||||||
|
### Namespaced Sysctls:
|
||||||
|
|
||||||
|
Kata Containers supports setting namespaced sysctls with Docker and Kubernetes.
|
||||||
|
All namespaced sysctls can be set in the same way as regular Linux based
|
||||||
|
containers, the difference being, in the case of Kata they are set inside the guest.
|
||||||
|
|
||||||
|
#### Setting Namespaced Sysctls with Docker:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/fs/mqueue/queues_max
|
||||||
|
256
|
||||||
|
$ sudo docker run --runtime=kata-runtime --sysctl fs.mqueue.queues_max=512 -it alpine cat /proc/sys/fs/mqueue/queues_max
|
||||||
|
512
|
||||||
|
```
|
||||||
|
|
||||||
|
... and:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run --runtime=kata-runtime -it alpine cat /proc/sys/kernel/shmmax
|
||||||
|
18446744073692774399
|
||||||
|
$ sudo docker run --runtime=kata-runtime --sysctl kernel.shmmax=1024 -it alpine cat /proc/sys/kernel/shmmax
|
||||||
|
1024
|
||||||
|
```
|
||||||
|
|
||||||
|
For additional documentation on setting sysctls with Docker please refer to [Docker-sysctl-doc](https://docs.docker.com/engine/reference/commandline/run/#configure-namespaced-kernel-parameters-sysctls-at-runtime).
|
||||||
|
|
||||||
|
|
||||||
|
#### Setting Namespaced Sysctls with Kubernetes:
|
||||||
|
|
||||||
|
Kubernetes considers certain sysctls as safe and others as unsafe. For detailed
|
||||||
|
information about what sysctls are considered unsafe, please refer to the [Kubernetes sysctl docs](https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/).
|
||||||
|
For using unsafe sysctls, the cluster admin would need to allow these as:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ kubelet --allowed-unsafe-sysctls 'kernel.msg*,net.ipv4.route.min_pmtu' ...
|
||||||
|
```
|
||||||
|
|
||||||
|
or using the declarative approach as:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat kubeadm.yaml
|
||||||
|
apiVersion: kubeadm.k8s.io/v1alpha3
|
||||||
|
kind: InitConfiguration
|
||||||
|
nodeRegistration:
|
||||||
|
kubeletExtraArgs:
|
||||||
|
allowed-unsafe-sysctls: "kernel.msg*,kernel.shm.*,net.*"
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
The above YAML can then be passed to `kubeadm init` as:
|
||||||
|
```
|
||||||
|
$ sudo -E kubeadm init --config=kubeadm.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Both safe and unsafe sysctls can be enabled in the same way in the Pod YAML:
|
||||||
|
|
||||||
|
```
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: sysctl-example
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
sysctls:
|
||||||
|
- name: kernel.shm_rmid_forced
|
||||||
|
value: "0"
|
||||||
|
- name: net.ipv4.route.min_pmtu
|
||||||
|
value: "1024"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Non-Namespaced Sysctls:
|
||||||
|
|
||||||
|
Docker and Kubernetes disallow sysctls without a namespace.
|
||||||
|
The recommendation is to set them directly on the host or use a privileged
|
||||||
|
container in the case of Kubernetes.
|
||||||
|
|
||||||
|
In the case of Kata, the approach of setting sysctls on the host does not
|
||||||
|
work since the host sysctls have no effect on a Kata Container running
|
||||||
|
inside a guest. Kata gives you the ability to set non-namespaced sysctls using a privileged container.
|
||||||
|
This has the advantage that the non-namespaced sysctls are set inside the guest
|
||||||
|
without having any effect on the `/proc/sys` values of any other pod or the
|
||||||
|
host itself.
|
||||||
|
|
||||||
|
The recommended approach to do this would be to set the sysctl value in a
|
||||||
|
privileged init container. In this way, the application containers do not need any elevated
|
||||||
|
privileges.
|
||||||
|
|
||||||
|
```
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: busybox-kata
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata-qemu
|
||||||
|
securityContext:
|
||||||
|
sysctls:
|
||||||
|
- name: kernel.shm_rmid_forced
|
||||||
|
value: "0"
|
||||||
|
containers:
|
||||||
|
- name: busybox-container
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
image: debian
|
||||||
|
command:
|
||||||
|
- sleep
|
||||||
|
- "3000"
|
||||||
|
initContainers:
|
||||||
|
- name: init-sys
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
image: busybox
|
||||||
|
command: ['sh', '-c', 'echo "64000" > /proc/sys/vm/max_map_count']
|
||||||
|
```
|
51
docs/how-to/how-to-use-virtio-fs-with-kata.md
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# Kata Containers with virtio-fs
|
||||||
|
|
||||||
|
- [Introduction](#introduction)
|
||||||
|
- [Pre-requisites](#pre-requisites)
|
||||||
|
- [Install Kata Containers with virtio-fs support](#install-kata-containers-with-virtio-fs-support)
|
||||||
|
- [Run a Kata Container utilizing virtio-fs](#run-a-kata-container-utilizing-virtio-fs)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Container deployments utilize explicit or implicit file sharing between host filesystem and containers. From a trust perspective, avoiding a shared file-system between the trusted host and untrusted container is recommended. This is not always feasible. In Kata Containers, block-based volumes are preferred as they allow usage of either device pass through or `virtio-blk` for access within the virtual machine.
|
||||||
|
|
||||||
|
As of the 1.7 release of Kata Containers, [9pfs](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) is the default filesystem sharing mechanism. While this does allow for workload compatibility, it does so with degraded performance and potential for POSIX compliance limitations.
|
||||||
|
|
||||||
|
To help address these limitations, [virtio-fs](https://virtio-fs.gitlab.io/) has been developed. virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. In Kata Containers, virtio-fs can be used to share container volumes, secrets, config-maps, configuration files (hostname, hosts, `resolv.conf`) and the container rootfs on the host with the guest. virtio-fs provides significant performance and POSIX compliance improvements compared to 9pfs.
|
||||||
|
|
||||||
|
Enabling of virtio-fs requires changes in the guest kernel as well as the VMM. For Kata Containers, experimental virtio-fs support is enabled through the [NEMU VMM](https://github.com/intel/nemu).
|
||||||
|
|
||||||
|
**Note: virtio-fs support is experimental in the 1.7 release of Kata Containers. Work is underway to improve stability, performance and upstream integration. This is available for early preview - use at your own risk**
|
||||||
|
|
||||||
|
This document describes how to get Kata Containers to work with virtio-fs.
|
||||||
|
|
||||||
|
## Pre-requisites
|
||||||
|
|
||||||
|
* Before Kata 1.8 this feature required the host to have hugepages support enabled. Enable this with the `sysctl vm.nr_hugepages=1024` command on the host.
|
||||||
|
|
||||||
|
## Install Kata Containers with virtio-fs support
|
||||||
|
|
||||||
|
The Kata Containers NEMU configuration, the NEMU VMM and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.7 release. While the feature is experimental, distribution packages are not supported, but installation is available through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
|
||||||
|
|
||||||
|
Install the latest release of Kata as follows:
|
||||||
|
```
|
||||||
|
docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
|
||||||
|
```
|
||||||
|
|
||||||
|
This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-nemu`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
|
||||||
|
|
||||||
|
|
||||||
|
## Run a Kata Container utilizing virtio-fs
|
||||||
|
|
||||||
|
Once installed, start a new container, utilizing NEMU + `virtiofs`:
|
||||||
|
```bash
|
||||||
|
$ docker run --runtime=kata-nemu -it busybox
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the new container is running with the NEMU hypervisor as well as using `virtiofsd`. To do this look for the hypervisor path and the `virtiofs` daemon process on the host:
|
||||||
|
```bash
|
||||||
|
$ ps -aux | grep virtiofs
|
||||||
|
root ... /home/foo/build-x86_64_virt/x86_64_virt-softmmu/qemu-system-x86_64_virt
|
||||||
|
... -machine virt,accel=kvm,kernel_irqchip,nvdimm ...
|
||||||
|
root ... /home/foo/build-x86_64_virt/virtiofsd-x86_64 ...
|
||||||
|
```
|
53
docs/how-to/how-to-use-virtio-mem-with-kata.md
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# Kata Containers with `virtio-mem`
|
||||||
|
|
||||||
|
- [Introduction](#introduction)
|
||||||
|
- [Requisites](#requisites)
|
||||||
|
- [Run a Kata Container utilizing `virtio-mem`](#run-a-kata-container-utilizing-virtio-mem)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
The basic idea of `virtio-mem` is to provide a flexible, cross-architecture memory hot plug and hot unplug solution that avoids many limitations imposed by existing technologies, architectures, and interfaces.
|
||||||
|
More details can be found in https://lkml.org/lkml/2019/12/12/681.
|
||||||
|
|
||||||
|
Kata Containers with `virtio-mem` supports memory resize.
|
||||||
|
|
||||||
|
## Requisites
|
||||||
|
|
||||||
|
Kata Containers with `virtio-mem` requires Linux and the QEMU that support `virtio-mem`.
|
||||||
|
The Linux kernel and QEMU upstream version still not support `virtio-mem`. @davidhildenbrand is working on them.
|
||||||
|
Please use following unofficial version of the Linux kernel and QEMU that support `virtio-mem` with Kata Containers.
|
||||||
|
|
||||||
|
The Linux kernel is at https://github.com/davidhildenbrand/linux/tree/virtio-mem-rfc-v4.
|
||||||
|
The Linux kernel config that can work with Kata Containers is at https://gist.github.com/teawater/016194ee84748c768745a163d08b0fb9.
|
||||||
|
|
||||||
|
The QEMU is at https://github.com/teawater/qemu/tree/kata-virtio-mem. (The original source is at https://github.com/davidhildenbrand/qemu/tree/virtio-mem. Its base version of QEMU cannot work with Kata Containers. So merge the commit of `virtio-mem` to upstream QEMU.)
|
||||||
|
|
||||||
|
Set Linux and the QEMU that support `virtio-mem` with following line in the Kata Containers QEMU configuration `configuration-qemu.toml`:
|
||||||
|
```toml
|
||||||
|
[hypervisor.qemu]
|
||||||
|
path = "qemu-dir"
|
||||||
|
kernel = "vmlinux-dir"
|
||||||
|
```
|
||||||
|
|
||||||
|
Enable `virtio-mem` with following line in the Kata Containers configuration:
|
||||||
|
```toml
|
||||||
|
enable_virtio_mem = true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run a Kata Container utilizing `virtio-mem`
|
||||||
|
|
||||||
|
Use following command to enable memory overcommitment of a Linux kernel. Because QEMU `virtio-mem` device need to allocate a lot of memory.
|
||||||
|
```
|
||||||
|
$ echo 1 | sudo tee /proc/sys/vm/overcommit_memory
|
||||||
|
```
|
||||||
|
|
||||||
|
Use following command start a Kata Container.
|
||||||
|
```
|
||||||
|
$ docker run --rm -it --runtime=kata --name test busybox
|
||||||
|
```
|
||||||
|
|
||||||
|
Use following command set the memory size of test to default_memory + 512m.
|
||||||
|
```
|
||||||
|
$ docker update -m 512m --memory-swap -1 test
|
||||||
|
```
|
||||||
|
|
BIN
docs/how-to/images/efk_direct_from_json.png
Normal file
After Width: | Height: | Size: 130 KiB |
BIN
docs/how-to/images/efk_direct_json_fields.png
Normal file
After Width: | Height: | Size: 38 KiB |
BIN
docs/how-to/images/efk_filter_on_tag.png
Normal file
After Width: | Height: | Size: 136 KiB |
BIN
docs/how-to/images/efk_kata_tag.png
Normal file
After Width: | Height: | Size: 51 KiB |
BIN
docs/how-to/images/efk_syslog_entry_detail.png
Normal file
After Width: | Height: | Size: 144 KiB |
24
docs/how-to/offline_cpu.sh
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright (c) 2019 Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
# Description: Offline SOS CPUs except BSP before launch UOS
|
||||||
|
|
||||||
|
[ $(id -u) -eq 0 ] || { echo >&2 "ERROR: run as root"; exit 1; }
|
||||||
|
|
||||||
|
for i in $(ls -d /sys/devices/system/cpu/cpu[1-9]*); do
|
||||||
|
online=`cat $i/online`
|
||||||
|
idx=`echo $i | tr -cd "[0-9]"`
|
||||||
|
echo "INFO:$0: cpu$idx online=$online"
|
||||||
|
if [ "$online" = "1" ]; then
|
||||||
|
echo 0 > $i/online
|
||||||
|
while [ "$online" = "1" ]; do
|
||||||
|
sleep 1
|
||||||
|
echo 0 > $i/online
|
||||||
|
online=`cat $i/online`
|
||||||
|
done
|
||||||
|
echo $idx > /sys/class/vhm/acrn_vhm/offline_cpu
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
79
docs/how-to/privileged.md
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
# Privileged Kata Containers
|
||||||
|
|
||||||
|
Kata Containers supports creation of containers that are "privileged" (i.e. have additional capabilities and access
|
||||||
|
that is not normally granted).
|
||||||
|
|
||||||
|
* [Warnings](#warnings)
|
||||||
|
* [Host Devices](#host-devices)
|
||||||
|
* [Containerd and CRI](#containerd-and-cri)
|
||||||
|
* [CRI-O](#cri-o)
|
||||||
|
|
||||||
|
## Warnings
|
||||||
|
|
||||||
|
**Warning:** Whilst this functionality is supported, it can decrease the security of Kata Containers if not configured
|
||||||
|
correctly.
|
||||||
|
|
||||||
|
### Host Devices
|
||||||
|
|
||||||
|
By default, when privileged is enabled for a container, all the `/dev/*` block devices from the host are mounted
|
||||||
|
into the guest. This will allow the privileged container inside the Kata guest to gain access to mount any block device
|
||||||
|
from the host, a potentially undesirable side-effect that decreases the security of Kata.
|
||||||
|
|
||||||
|
The following sections document how to configure this behavior in different container runtimes.
|
||||||
|
|
||||||
|
#### Containerd and CRI
|
||||||
|
|
||||||
|
The Containerd CRI allows configuring the privileged host devices behavior for each runtime in the CRI config. This is
|
||||||
|
done with the `privileged_without_host_devices` option. Setting this to `true` will disable hot plugging of the host
|
||||||
|
devices into the guest, even when privileged is enabled.
|
||||||
|
|
||||||
|
Support for configuring privileged host devices behaviour was added in containerd `1.3.0` version.
|
||||||
|
|
||||||
|
See below example config:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[plugins]
|
||||||
|
[plugins.cri]
|
||||||
|
[plugins.cri.containerd]
|
||||||
|
[plugins.cri.containerd.runtimes.runc]
|
||||||
|
runtime_type = "io.containerd.runc.v1"
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
[plugins.cri.containerd.runtimes.kata]
|
||||||
|
runtime_type = "io.containerd.kata.v2"
|
||||||
|
privileged_without_host_devices = true
|
||||||
|
[plugins.cri.containerd.runtimes.kata.options]
|
||||||
|
ConfigPath = "/opt/kata/share/defaults/kata-containers/configuration.toml"
|
||||||
|
```
|
||||||
|
|
||||||
|
- [Kata Containers with Containerd and CRI documentation](how-to-use-k8s-with-cri-containerd-and-kata.md)
|
||||||
|
- [Containerd CRI config documentation](https://github.com/containerd/cri/blob/master/docs/config.md)
|
||||||
|
|
||||||
|
#### CRI-O
|
||||||
|
|
||||||
|
Similar to containerd, CRI-O allows configuring the privileged host devices
|
||||||
|
behavior for each runtime in the CRI config. This is done with the
|
||||||
|
`privileged_without_host_devices` option. Setting this to `true` will disable
|
||||||
|
hot plugging of the host devices into the guest, even when privileged is enabled.
|
||||||
|
|
||||||
|
Support for configuring privileged host devices behaviour was added in CRI-O `1.16.0` version.
|
||||||
|
|
||||||
|
See below example config:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[crio.runtime.runtimes.runc]
|
||||||
|
runtime_path = "/usr/local/bin/crio-runc"
|
||||||
|
runtime_type = "oci"
|
||||||
|
runtime_root = "/run/runc"
|
||||||
|
privileged_without_host_devices = false
|
||||||
|
[crio.runtime.runtimes.kata]
|
||||||
|
runtime_path = "/usr/bin/kata-runtime"
|
||||||
|
runtime_type = "oci"
|
||||||
|
privileged_without_host_devices = true
|
||||||
|
[crio.runtime.runtimes.kata-shim2]
|
||||||
|
runtime_path = "/usr/local/bin/containerd-shim-kata-v2"
|
||||||
|
runtime_type = "vm"
|
||||||
|
privileged_without_host_devices = true
|
||||||
|
```
|
||||||
|
|
||||||
|
- [Kata Containers with CRI-O](https://github.com/kata-containers/documentation/blob/master/how-to/run-kata-with-k8s.md#cri-o)
|
||||||
|
|
204
docs/how-to/run-kata-with-k8s.md
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
# Run Kata Containers with Kubernetes
|
||||||
|
|
||||||
|
* [Run Kata Containers with Kubernetes](#run-kata-containers-with-kubernetes)
|
||||||
|
* [Prerequisites](#prerequisites)
|
||||||
|
* [Install a CRI implementation](#install-a-cri-implementation)
|
||||||
|
* [CRI-O](#cri-o)
|
||||||
|
* [Kubernetes Runtime Class (CRI-O v1.12 )](#kubernetes-runtime-class-cri-o-v112)
|
||||||
|
* [Untrusted annotation (until CRI-O v1.12)](#untrusted-annotation-until-cri-o-v112)
|
||||||
|
* [Network namespace management](#network-namespace-management)
|
||||||
|
* [containerd with CRI plugin](#containerd-with-cri-plugin)
|
||||||
|
* [Install Kubernetes](#install-kubernetes)
|
||||||
|
* [Configure for CRI-O](#configure-for-cri-o)
|
||||||
|
* [Configure for containerd](#configure-for-containerd)
|
||||||
|
* [Run a Kubernetes pod with Kata Containers](#run-a-kubernetes-pod-with-kata-containers)
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
This guide requires Kata Containers available on your system, install-able by following [this guide](https://github.com/kata-containers/documentation/blob/master/install/README.md).
|
||||||
|
|
||||||
|
## Install a CRI implementation
|
||||||
|
|
||||||
|
Kubernetes CRI (Container Runtime Interface) implementations allow using any
|
||||||
|
OCI-compatible runtime with Kubernetes, such as the Kata Containers runtime.
|
||||||
|
|
||||||
|
Kata Containers support both the [CRI-O](https://github.com/kubernetes-incubator/cri-o) and
|
||||||
|
[CRI-containerd](https://github.com/containerd/cri) CRI implementations.
|
||||||
|
|
||||||
|
After choosing one CRI implementation, you must make the appropriate configuration
|
||||||
|
to ensure it integrates with Kata Containers.
|
||||||
|
|
||||||
|
Kata Containers 1.5 introduced the `shimv2` for containerd 1.2.0, reducing the components
|
||||||
|
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
|
||||||
|
|
||||||
|
An equivalent shim implementation for CRI-O is planned.
|
||||||
|
|
||||||
|
### CRI-O
|
||||||
|
For CRI-O installation instructions, refer to the [CRI-O Tutorial](https://github.com/kubernetes-incubator/cri-o/blob/master/tutorial.md) page.
|
||||||
|
|
||||||
|
The following sections show how to set up the CRI-O configuration file (default path: `/etc/crio/crio.conf`) for Kata.
|
||||||
|
|
||||||
|
Unless otherwise stated, all the following settings are specific to the `crio.runtime` table:
|
||||||
|
```toml
|
||||||
|
# The "crio.runtime" table contains settings pertaining to the OCI
|
||||||
|
# runtime used and options for how to set up and manage the OCI runtime.
|
||||||
|
[crio.runtime]
|
||||||
|
```
|
||||||
|
A comprehensive documentation of the configuration file can be found [here](https://github.com/cri-o/cri-o/blob/master/docs/crio.conf.5.md).
|
||||||
|
|
||||||
|
> **Note**: After any change to this file, the CRI-O daemon have to be restarted with:
|
||||||
|
>````
|
||||||
|
>$ sudo systemctl restart crio
|
||||||
|
>````
|
||||||
|
|
||||||
|
#### Kubernetes Runtime Class (CRI-O v1.12+)
|
||||||
|
The [Kubernetes Runtime Class](https://kubernetes.io/docs/concepts/containers/runtime-class/)
|
||||||
|
is the preferred way of specifying the container runtime configuration to run a Pod's containers.
|
||||||
|
To use this feature, Kata must added as a runtime handler with:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[crio.runtime.runtimes.kata-runtime]
|
||||||
|
runtime_path = "/usr/bin/kata-runtime"
|
||||||
|
runtime_type = "oci"
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also add multiple entries to specify alternatives hypervisors, e.g.:
|
||||||
|
```toml
|
||||||
|
[crio.runtime.runtimes.kata-qemu]
|
||||||
|
runtime_path = "/usr/bin/kata-runtime"
|
||||||
|
runtime_type = "oci"
|
||||||
|
|
||||||
|
[crio.runtime.runtimes.kata-fc]
|
||||||
|
runtime_path = "/usr/bin/kata-runtime"
|
||||||
|
runtime_type = "oci"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Untrusted annotation (until CRI-O v1.12)
|
||||||
|
The untrusted annotation is used to specify a runtime for __untrusted__ workloads, i.e.
|
||||||
|
a runtime to be used when the workload cannot be trusted and a higher level of security
|
||||||
|
is required. An additional flag can be used to let CRI-O know if a workload
|
||||||
|
should be considered _trusted_ or _untrusted_ by default.
|
||||||
|
For further details, see the documentation
|
||||||
|
[here](https://github.com/kata-containers/documentation/blob/master/design/architecture.md#mixing-vm-based-and-namespace-based-runtimes).
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# runtime is the OCI compatible runtime used for trusted container workloads.
|
||||||
|
# This is a mandatory setting as this runtime will be the default one
|
||||||
|
# and will also be used for untrusted container workloads if
|
||||||
|
# runtime_untrusted_workload is not set.
|
||||||
|
runtime = "/usr/bin/runc"
|
||||||
|
|
||||||
|
# runtime_untrusted_workload is the OCI compatible runtime used for untrusted
|
||||||
|
# container workloads. This is an optional setting, except if
|
||||||
|
# default_container_trust is set to "untrusted".
|
||||||
|
runtime_untrusted_workload = "/usr/bin/kata-runtime"
|
||||||
|
|
||||||
|
# default_workload_trust is the default level of trust crio puts in container
|
||||||
|
# workloads. It can either be "trusted" or "untrusted", and the default
|
||||||
|
# is "trusted".
|
||||||
|
# Containers can be run through different container runtimes, depending on
|
||||||
|
# the trust hints we receive from kubelet:
|
||||||
|
# - If kubelet tags a container workload as untrusted, crio will try first to
|
||||||
|
# run it through the untrusted container workload runtime. If it is not set,
|
||||||
|
# crio will use the trusted runtime.
|
||||||
|
# - If kubelet does not provide any information about the container workload trust
|
||||||
|
# level, the selected runtime will depend on the default_container_trust setting.
|
||||||
|
# If it is set to "untrusted", then all containers except for the host privileged
|
||||||
|
# ones, will be run by the runtime_untrusted_workload runtime. Host privileged
|
||||||
|
# containers are by definition trusted and will always use the trusted container
|
||||||
|
# runtime. If default_container_trust is set to "trusted", crio will use the trusted
|
||||||
|
# container runtime for all containers.
|
||||||
|
default_workload_trust = "untrusted"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Network namespace management
|
||||||
|
To enable networking for the workloads run by Kata, CRI-O needs to be configured to
|
||||||
|
manage network namespaces, by setting the following key to `true`.
|
||||||
|
|
||||||
|
In CRI-O v1.16:
|
||||||
|
```toml
|
||||||
|
manage_network_ns_lifecycle = true
|
||||||
|
```
|
||||||
|
In CRI-O v1.17+:
|
||||||
|
```toml
|
||||||
|
manage_ns_lifecycle = true
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### containerd with CRI plugin
|
||||||
|
|
||||||
|
If you select containerd with `cri` plugin, follow the "Getting Started for Developers"
|
||||||
|
instructions [here](https://github.com/containerd/cri#getting-started-for-developers)
|
||||||
|
to properly install it.
|
||||||
|
|
||||||
|
To customize containerd to select Kata Containers runtime, follow our
|
||||||
|
"Configure containerd to use Kata Containers" internal documentation
|
||||||
|
[here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
|
||||||
|
|
||||||
|
## Install Kubernetes
|
||||||
|
|
||||||
|
Depending on what your needs are and what you expect to do with Kubernetes,
|
||||||
|
please refer to the following
|
||||||
|
[documentation](https://kubernetes.io/docs/setup/) to install it correctly.
|
||||||
|
|
||||||
|
Kubernetes talks with CRI implementations through a `container-runtime-endpoint`,
|
||||||
|
also called CRI socket. This socket path is different depending on which CRI
|
||||||
|
implementation you chose, and the Kubelet service has to be updated accordingly.
|
||||||
|
|
||||||
|
### Configure for CRI-O
|
||||||
|
|
||||||
|
`/etc/systemd/system/kubelet.service.d/0-crio.conf`
|
||||||
|
```
|
||||||
|
[Service]
|
||||||
|
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///var/run/crio/crio.sock"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configure for containerd
|
||||||
|
|
||||||
|
`/etc/systemd/system/kubelet.service.d/0-cri-containerd.conf`
|
||||||
|
```
|
||||||
|
[Service]
|
||||||
|
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
|
||||||
|
```
|
||||||
|
For more information about containerd see the "Configure Kubelet to use containerd"
|
||||||
|
documentation [here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd).
|
||||||
|
|
||||||
|
## Run a Kubernetes pod with Kata Containers
|
||||||
|
|
||||||
|
After you update your Kubelet service based on the CRI implementation you
|
||||||
|
are using, reload and restart Kubelet. Then, start your cluster:
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart kubelet
|
||||||
|
|
||||||
|
# If using CRI-O
|
||||||
|
$ sudo kubeadm init --skip-preflight-checks --cri-socket /var/run/crio/crio.sock --pod-network-cidr=10.244.0.0/16
|
||||||
|
|
||||||
|
# If using CRI-containerd
|
||||||
|
$ sudo kubeadm init --skip-preflight-checks --cri-socket /run/containerd/containerd.sock --pod-network-cidr=10.244.0.0/16
|
||||||
|
|
||||||
|
$ export KUBECONFIG=/etc/kubernetes/admin.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
You can force Kubelet to use Kata Containers by adding some `untrusted`
|
||||||
|
annotation to your pod configuration. In our case, this ensures Kata
|
||||||
|
Containers is the selected runtime to run the described workload.
|
||||||
|
|
||||||
|
`nginx-untrusted.yaml`
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: nginx-untrusted
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: nginx
|
||||||
|
image: nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, you run your pod:
|
||||||
|
```
|
||||||
|
$ sudo -E kubectl apply -f nginx-untrusted.yaml
|
||||||
|
```
|
||||||
|
|
246
docs/how-to/service-mesh.md
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
# Kata Containers and service mesh for Kubernetes
|
||||||
|
|
||||||
|
* [Assumptions](#assumptions)
|
||||||
|
* [How they work](#how-they-work)
|
||||||
|
* [Prerequisites](#prerequisites)
|
||||||
|
* [Kata and Kubernetes](#kata-and-kubernetes)
|
||||||
|
* [Restrictions](#restrictions)
|
||||||
|
* [Install and deploy your service mesh](#install-and-deploy-your-service-mesh)
|
||||||
|
* [Service Mesh Istio](#service-mesh-istio)
|
||||||
|
* [Service Mesh Linkerd](#service-mesh-linkerd)
|
||||||
|
* [Inject your services with sidecars](#inject-your-services-with-sidecars)
|
||||||
|
* [Sidecar Istio](#sidecar-istio)
|
||||||
|
* [Sidecar Linkerd](#sidecar-linkerd)
|
||||||
|
* [Run your services with Kata](#run-your-services-with-kata)
|
||||||
|
* [Lower privileges](#lower-privileges)
|
||||||
|
* [Add annotations](#add-annotations)
|
||||||
|
* [Deploy](#deploy)
|
||||||
|
|
||||||
|
A service mesh is a way to monitor and control the traffic between
|
||||||
|
micro-services running in your Kubernetes cluster. It is a powerful
|
||||||
|
tool that you might want to use in combination with the security
|
||||||
|
brought by Kata Containers.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
You are expected to be familiar with concepts such as __pods__,
|
||||||
|
__containers__, __control plane__, __data plane__, and __sidecar__.
|
||||||
|
|
||||||
|
## How they work
|
||||||
|
|
||||||
|
Istio and Linkerd both rely on the same model, where they run controller
|
||||||
|
applications in the control plane, and inject a proxy as a sidecar inside
|
||||||
|
the pod running the service. The proxy registers in the control plane as
|
||||||
|
a first step, and it constantly sends different sorts of information about
|
||||||
|
the service running inside the pod. That information comes from the
|
||||||
|
filtering performed when receiving all the traffic initially intended for
|
||||||
|
the service. That is how the interaction between the control plane and the
|
||||||
|
proxy allows the user to apply load balancing and authentication rules to
|
||||||
|
the incoming and outgoing traffic, inside the cluster, and between multiple
|
||||||
|
micro-services.
|
||||||
|
|
||||||
|
This cannot not happen without a good amount of `iptables` rules ensuring
|
||||||
|
the packets reach the proxy instead of the expected service. Rules are
|
||||||
|
setup through an __init__ container because they have to be there as soon
|
||||||
|
as the proxy starts.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### Kata and Kubernetes
|
||||||
|
|
||||||
|
Follow the [instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
|
||||||
|
to get Kata Containers properly installed and configured with Kubernetes.
|
||||||
|
You can choose between CRI-O and CRI-containerd, both are supported
|
||||||
|
through this document.
|
||||||
|
|
||||||
|
For both cases, select the workloads as _trusted_ by default. This way,
|
||||||
|
your cluster and your service mesh run with `runc`, and only the containers
|
||||||
|
you choose to annotate run with Kata Containers.
|
||||||
|
|
||||||
|
### Restrictions
|
||||||
|
|
||||||
|
As documented [here](https://github.com/linkerd/linkerd2/issues/982),
|
||||||
|
a kernel version between 4.14.22 and 4.14.40 causes a deadlock when
|
||||||
|
`getsockopt()` gets called with the `SO_ORIGINAL_DST` option. Unfortunately,
|
||||||
|
both service meshes use this system call with this same option from the
|
||||||
|
proxy container running inside the VM. This means that you cannot run
|
||||||
|
this kernel version range as the guest kernel for Kata if you want your
|
||||||
|
service mesh to work.
|
||||||
|
|
||||||
|
As mentioned when explaining the basic functioning of those service meshes,
|
||||||
|
`iptables` are heavily used, and they need to be properly enabled through
|
||||||
|
the guest kernel config. If they are not properly enabled, the init container
|
||||||
|
is not able to perform a proper setup of the rules.
|
||||||
|
|
||||||
|
## Install and deploy your service mesh
|
||||||
|
|
||||||
|
### Service Mesh Istio
|
||||||
|
|
||||||
|
As a reference, you can follow Istio [instructions](https://istio.io/docs/setup/kubernetes/quick-start/#download-and-prepare-for-the-installation).
|
||||||
|
|
||||||
|
The following is a summary of what you need to install Istio on your system:
|
||||||
|
```
|
||||||
|
$ curl -L https://git.io/getLatestIstio | sh -
|
||||||
|
$ cd istio-*
|
||||||
|
$ export PATH=$PWD/bin:$PATH
|
||||||
|
```
|
||||||
|
|
||||||
|
Now deploy Istio in the control plane of your cluster with the following:
|
||||||
|
```
|
||||||
|
$ kubectl apply -f install/kubernetes/istio-demo.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
To verify that the control plane is properly deployed, you can use both of
|
||||||
|
the following commands:
|
||||||
|
```
|
||||||
|
$ kubectl get svc -n istio-system
|
||||||
|
$ kubectl get pods -n istio-system -o wide
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Mesh Linkerd
|
||||||
|
|
||||||
|
As a reference, follow the Linkerd [instructions](https://linkerd.io/2/getting-started/index.html).
|
||||||
|
|
||||||
|
The following is a summary of what you need to install Linkerd on your system:
|
||||||
|
```
|
||||||
|
$ curl https://run.linkerd.io/install | sh
|
||||||
|
$ export PATH=$PATH:$HOME/.linkerd/bin
|
||||||
|
```
|
||||||
|
|
||||||
|
Now deploy Linkerd in the control plane of your cluster with the following:
|
||||||
|
```
|
||||||
|
$ linkerd install | kubectl apply -f -
|
||||||
|
```
|
||||||
|
|
||||||
|
To verify that the control plane is properly deployed, you can use both of
|
||||||
|
the following commands:
|
||||||
|
```
|
||||||
|
$ kubectl get svc -n linkerd
|
||||||
|
$ kubectl get pods -n linkerd -o wide
|
||||||
|
```
|
||||||
|
|
||||||
|
## Inject your services with sidecars
|
||||||
|
|
||||||
|
Once the control plane is running, you need a deployment to define a few
|
||||||
|
services that rely on each other. Then, you inject the YAML file with the
|
||||||
|
sidecar proxy using the tools provided by each service mesh.
|
||||||
|
|
||||||
|
If you do not have such a deployment ready, refer to the samples provided
|
||||||
|
by each project.
|
||||||
|
|
||||||
|
### Sidecar Istio
|
||||||
|
|
||||||
|
Istio provides a [`bookinfo`](https://istio.io/docs/examples/bookinfo/)
|
||||||
|
sample, which you can rely on to inject their `envoy` proxy as a
|
||||||
|
sidecar.
|
||||||
|
|
||||||
|
You need to use their tool called `istioctl kube-inject` to inject
|
||||||
|
your YAML file. We use their `bookinfo` sample as an example:
|
||||||
|
```
|
||||||
|
$ istioctl kube-inject -f samples/bookinfo/kube/bookinfo.yaml -o bookinfo-injected.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sidecar Linkerd
|
||||||
|
|
||||||
|
Linkerd provides an [`emojivoto`](https://linkerd.io/2/getting-started/index.html)
|
||||||
|
sample, which you can rely on to inject their `linkerd` proxy as a
|
||||||
|
sidecar.
|
||||||
|
|
||||||
|
You need to use their tool called `linkerd inject` to inject your YAML
|
||||||
|
file. We use their `emojivoto` sample as example:
|
||||||
|
```
|
||||||
|
$ wget https://raw.githubusercontent.com/runconduit/conduit-examples/master/emojivoto/emojivoto.yml
|
||||||
|
$ linkerd inject emojivoto.yml > emojivoto-injected.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run your services with Kata
|
||||||
|
|
||||||
|
Now that your service deployment is injected with the appropriate sidecar
|
||||||
|
containers, manually edit your deployment to make it work with Kata.
|
||||||
|
|
||||||
|
### Lower privileges
|
||||||
|
|
||||||
|
In Kubernetes, the __init__ container is often `privileged` as it needs to
|
||||||
|
setup the environment, which often needs some root privileges. In the case
|
||||||
|
of those services meshes, all they need is the `NET_ADMIN` capability to
|
||||||
|
modify the underlying network rules. Linkerd, by default, does not use
|
||||||
|
`privileged` container, but Istio does.
|
||||||
|
|
||||||
|
Because of the previous reason, if you use Istio you need to switch all
|
||||||
|
containers with `privileged: true` to `privileged: false`.
|
||||||
|
|
||||||
|
### Add annotations
|
||||||
|
|
||||||
|
There is no difference between Istio and Linkerd in this section. It is
|
||||||
|
about which CRI implementation you use.
|
||||||
|
|
||||||
|
For both CRI-O and CRI-containerd, you have to add an annotation indicating
|
||||||
|
the workload for this deployment is not _trusted_, which will trigger
|
||||||
|
`kata-runtime` to be called instead of `runc`.
|
||||||
|
|
||||||
|
__CRI-O:__
|
||||||
|
|
||||||
|
Add the following annotation for CRI-O
|
||||||
|
```yaml
|
||||||
|
io.kubernetes.cri-o.TrustedSandbox: "false"
|
||||||
|
```
|
||||||
|
The following is an example of what your YAML can look like:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
...
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
creationTimestamp: null
|
||||||
|
name: details-v1
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy: {}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri-o.TrustedSandbox: "false"
|
||||||
|
sidecar.istio.io/status: '{"version":"55c9e544b52e1d4e45d18a58d0b34ba4b72531e45fb6d1572c77191422556ffc","initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-certs"],"imagePullSecrets":null}'
|
||||||
|
creationTimestamp: null
|
||||||
|
labels:
|
||||||
|
app: details
|
||||||
|
version: v1
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
__CRI-containerd:__
|
||||||
|
|
||||||
|
Add the following annotation for CRI-containerd
|
||||||
|
```yaml
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
```
|
||||||
|
The following is an example of what your YAML can look like:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
...
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
creationTimestamp: null
|
||||||
|
name: details-v1
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy: {}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
io.kubernetes.cri.untrusted-workload: "true"
|
||||||
|
sidecar.istio.io/status: '{"version":"55c9e544b52e1d4e45d18a58d0b34ba4b72531e45fb6d1572c77191422556ffc","initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-certs"],"imagePullSecrets":null}'
|
||||||
|
creationTimestamp: null
|
||||||
|
labels:
|
||||||
|
app: details
|
||||||
|
version: v1
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploy
|
||||||
|
|
||||||
|
Deploy your application by using the following:
|
||||||
|
```
|
||||||
|
$ kubectl apply -f myapp-injected.yaml
|
||||||
|
```
|
47
docs/how-to/what-is-vm-cache-and-how-do-I-use-it.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# What Is VMCache and How To Enable It
|
||||||
|
|
||||||
|
* [What is VMCache](#what-is-vmcache)
|
||||||
|
* [How is this different to VM templating](#how-is-this-different-to-vm-templating)
|
||||||
|
* [How to enable VMCache](#how-to-enable-vmcache)
|
||||||
|
* [Limitations](#limitations)
|
||||||
|
|
||||||
|
### What is VMCache
|
||||||
|
|
||||||
|
VMCache is a new function that creates VMs as caches before using it.
|
||||||
|
It helps speed up new container creation.
|
||||||
|
The function consists of a server and some clients communicating
|
||||||
|
through Unix socket. The protocol is gRPC in [`protocols/cache/cache.proto`](https://github.com/kata-containers/runtime/blob/master/protocols/cache/cache.proto).
|
||||||
|
The VMCache server will create some VMs and cache them by factory cache.
|
||||||
|
It will convert the VM to gRPC format and transport it when gets
|
||||||
|
requested from clients.
|
||||||
|
Factory `grpccache` is the VMCache client. It will request gRPC format
|
||||||
|
VM and convert it back to a VM. If VMCache function is enabled,
|
||||||
|
`kata-runtime` will request VM from factory `grpccache` when it creates
|
||||||
|
a new sandbox.
|
||||||
|
|
||||||
|
### How is this different to VM templating
|
||||||
|
|
||||||
|
Both [VM templating](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md) and VMCache help speed up new container creation.
|
||||||
|
When VM templating enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. So it saves a lot of memory if there are many Kata Containers running on the same host.
|
||||||
|
VMCache is not vulnerable to [share memory CVE](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md#what-are-the-cons) because each VM doesn't share the memory.
|
||||||
|
|
||||||
|
### How to enable VMCache
|
||||||
|
|
||||||
|
VMCache can be enabled by changing your Kata Containers config file (`/usr/share/defaults/kata-containers/configuration.toml`,
|
||||||
|
overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
|
||||||
|
* `vm_cache_number` specifies the number of caches of VMCache:
|
||||||
|
* unspecified or == 0
|
||||||
|
VMCache is disabled
|
||||||
|
* `> 0`
|
||||||
|
will be set to the specified number
|
||||||
|
* `vm_cache_endpoint` specifies the address of the Unix socket.
|
||||||
|
|
||||||
|
Then you can create a VM templating for later usage by calling:
|
||||||
|
```
|
||||||
|
$ sudo kata-runtime factory init
|
||||||
|
```
|
||||||
|
and purge it by `ctrl-c` it.
|
||||||
|
|
||||||
|
### Limitations
|
||||||
|
* Cannot work with VM templating.
|
||||||
|
* Only supports the QEMU hypervisor.
|
60
docs/how-to/what-is-vm-templating-and-how-do-I-use-it.md
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# What Is VM Templating and How To Enable It
|
||||||
|
|
||||||
|
### What is VM templating
|
||||||
|
VM templating is a Kata Containers feature that enables new VM
|
||||||
|
creation using a cloning technique. When enabled, new VMs are created
|
||||||
|
by cloning from a pre-created template VM, and they will share the
|
||||||
|
same initramfs, kernel and agent memory in readonly mode. It is very
|
||||||
|
much like a process fork done by the kernel but here we *fork* VMs.
|
||||||
|
|
||||||
|
### How is this different from VMCache
|
||||||
|
Both [VMCache](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-cache-and-how-do-I-use-it.md) and VM templating help speed up new container creation.
|
||||||
|
When VMCache enabled, new VMs are created by the VMCache server. So it is not vulnerable to share memory CVE because each VM doesn't share the memory.
|
||||||
|
VM templating saves a lot of memory if there are many Kata Containers running on the same host.
|
||||||
|
|
||||||
|
### What are the Pros
|
||||||
|
VM templating helps speed up new container creation and saves a lot
|
||||||
|
of memory if there are many Kata Containers running on the same host.
|
||||||
|
If you are running a density workload, or care a lot about container
|
||||||
|
startup speed, VM templating can be very useful.
|
||||||
|
|
||||||
|
In one example, we created 100 Kata Containers each claiming 128MB
|
||||||
|
guest memory and ended up saving 9GB of memory in total when VM templating
|
||||||
|
is enabled, which is about 72% of the total guest memory. See [full results
|
||||||
|
here](https://github.com/kata-containers/runtime/pull/303#issuecomment-395846767).
|
||||||
|
|
||||||
|
In another example, we created ten Kata Containers with containerd shimv2
|
||||||
|
and calculated the average boot up speed for each of them. The result
|
||||||
|
showed that VM templating speeds up Kata Containers creation by as much as
|
||||||
|
38.68%. See [full results here](https://gist.github.com/bergwolf/06974a3c5981494a40e2c408681c085d).
|
||||||
|
|
||||||
|
### What are the Cons
|
||||||
|
One drawback of VM templating is that it cannot avoid cross-VM side-channel
|
||||||
|
attack such as [CVE-2015-2877](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-2877)
|
||||||
|
that originally targeted at the Linux KSM feature.
|
||||||
|
It was concluded that "Share-until-written approaches for memory conservation among
|
||||||
|
mutually untrusting tenants are inherently detectable for information disclosure,
|
||||||
|
and can be classified as potentially misunderstood behaviors rather than vulnerabilities."
|
||||||
|
|
||||||
|
**Warning**: If you care about such attack vector, do not use VM templating or KSM.
|
||||||
|
|
||||||
|
### How to enable VM templating
|
||||||
|
VM templating can be enabled by changing your Kata Containers config file (`/usr/share/defaults/kata-containers/configuration.toml`,
|
||||||
|
overridden by `/etc/kata-containers/configuration.toml` if provided) such that:
|
||||||
|
|
||||||
|
- `qemu-lite` is specified in `hypervisor.qemu`->`path` section
|
||||||
|
- `enable_template = true`
|
||||||
|
- `initrd =` is set
|
||||||
|
- `image =` option is commented out or removed
|
||||||
|
|
||||||
|
Then you can create a VM templating for later usage by calling
|
||||||
|
```
|
||||||
|
$ sudo kata-runtime factory init
|
||||||
|
```
|
||||||
|
and purge it by calling
|
||||||
|
```
|
||||||
|
$ sudo kata-runtime factory destroy
|
||||||
|
```
|
||||||
|
|
||||||
|
If you do not want to call `kata-runtime factory init` by hand,
|
||||||
|
the very first Kata container you create will automatically create a VM templating.
|
127
docs/install/README.md
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# Kata Containers installation user guides
|
||||||
|
|
||||||
|
* [Prerequisites](#prerequisites)
|
||||||
|
* [Packaged installation methods](#packaged-installation-methods)
|
||||||
|
* [Supported Distributions](#supported-distributions)
|
||||||
|
* [Official packages](#official-packages)
|
||||||
|
* [Automatic Installation](#automatic-installation)
|
||||||
|
* [Snap Installation](#snap-installation)
|
||||||
|
* [Scripted Installation](#scripted-installation)
|
||||||
|
* [Manual Installation](#manual-installation)
|
||||||
|
* [Build from source installation](#build-from-source-installation)
|
||||||
|
* [Installing on a Cloud Service Platform](#installing-on-a-cloud-service-platform)
|
||||||
|
* [Further information](#further-information)
|
||||||
|
|
||||||
|
The following is an overview of the different installation methods available. All of these methods equally result
|
||||||
|
in a system configured to run Kata Containers.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
Kata Containers requires nested virtualization or bare metal.
|
||||||
|
See the
|
||||||
|
[hardware requirements](https://github.com/kata-containers/runtime/blob/master/README.md#hardware-requirements)
|
||||||
|
to see if your system is capable of running Kata Containers.
|
||||||
|
|
||||||
|
## Packaged installation methods
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - Packaged installation methods uses your distribution's native package format (such as RPM or DEB).
|
||||||
|
|
||||||
|
| Installation method | Description | Distributions supported |
|
||||||
|
|------------------------------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------|
|
||||||
|
| [Automatic](#automatic-installation) |Run a single command to install a full system |[see table](#supported-distributions) |
|
||||||
|
| [Using snap](#snap-installation) |Easy to install and automatic updates |any distro that supports snapd |
|
||||||
|
| [Using official distro packages](#official-packages) |Kata packages provided by Linux distributions official repositories |[see table](#supported-distributions) |
|
||||||
|
| [Scripted](#scripted-installation) |Generates an installation script which will result in a working system when executed |[see table](#supported-distributions) |
|
||||||
|
| [Manual](#manual-installation) |Allows the user to read a brief document and execute the specified commands step-by-step |[see table](#supported-distributions) |
|
||||||
|
|
||||||
|
### Supported Distributions
|
||||||
|
|
||||||
|
Kata is packaged by the Kata community for:
|
||||||
|
|
||||||
|
|Distribution (link to installation guide) | Versions |
|
||||||
|
|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
|
||||||
|
|[CentOS](centos-installation-guide.md) | 7 |
|
||||||
|
|[Debian](debian-installation-guide.md) | 9, 10 |
|
||||||
|
|[Fedora](fedora-installation-guide.md) | 28, 29, 30 |
|
||||||
|
|[openSUSE](opensuse-installation-guide.md) | [Leap](opensuse-leap-installation-guide.md) (15, 15.1)<br>[Tumbleweed](opensuse-tumbleweed-installation-guide.md) |
|
||||||
|
|[Red Hat Enterprise Linux (RHEL)](rhel-installation-guide.md) | 7 |
|
||||||
|
|[SUSE Linux Enterprise Server (SLES)](sles-installation-guide.md)| SLES 12 SP3 |
|
||||||
|
|[Ubuntu](ubuntu-installation-guide.md) | 16.04, 18.04 |
|
||||||
|
|
||||||
|
#### Official packages
|
||||||
|
|
||||||
|
Kata packages are provided by official distribution repositories for:
|
||||||
|
|
||||||
|
|Distribution (link to packages) | Versions |
|
||||||
|
|-----------------------------------------------------------------|------------|
|
||||||
|
|[openSUSE](https://software.opensuse.org/package/katacontainers) | Tumbleweed |
|
||||||
|
|
||||||
|
|
||||||
|
### Automatic Installation
|
||||||
|
|
||||||
|
[Use `kata-manager`](installing-with-kata-manager.md) to automatically install Kata packages.
|
||||||
|
|
||||||
|
### Snap Installation
|
||||||
|
|
||||||
|
[](https://snapcraft.io/kata-containers)
|
||||||
|
|
||||||
|
[Use snap](snap-installation-guide.md) to install Kata Containers from https://snapcraft.io.
|
||||||
|
|
||||||
|
### Scripted Installation
|
||||||
|
[Use `kata-doc-to-script`](installing-with-kata-doc-to-script.md) to generate installation scripts that can be reviewed before they are executed.
|
||||||
|
|
||||||
|
### Manual Installation
|
||||||
|
Manual installation instructions are available for [these distributions](#supported-distributions) and document how to:
|
||||||
|
1. Add the Kata Containers repository to your distro package manager, and import the packages signing key.
|
||||||
|
2. Install the Kata Containers packages.
|
||||||
|
3. Install a supported container manager.
|
||||||
|
4. Configure the container manager to use `kata-runtime` as the default OCI runtime. Or, for Kata Containers 1.5.0 or above, configure the
|
||||||
|
`io.containerd.kata.v2` to be the runtime shim (see [containerd runtime v2 (shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
|
||||||
|
and [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)).
|
||||||
|
|
||||||
|
> **Notes on upgrading**:
|
||||||
|
> - If you are installing Kata Containers on a system that already has Clear Containers or `runv` installed,
|
||||||
|
> first read [the upgrading document](../Upgrading.md).
|
||||||
|
|
||||||
|
> **Notes on releases**:
|
||||||
|
> - [This download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/)
|
||||||
|
> hosts the Kata Containers packages built by OBS for all the supported architectures.
|
||||||
|
> Packages are available for the latest and stable releases (more info [here](https://github.com/kata-containers/documentation/blob/master/Stable-Branch-Strategy.md)).
|
||||||
|
>
|
||||||
|
> - The following guides apply to the latest Kata Containers release
|
||||||
|
> (a.k.a. `master` release).
|
||||||
|
>
|
||||||
|
> - When choosing a stable release, replace all `master` occurrences in the URLs
|
||||||
|
> with a `stable-x.y` version available on the [download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/).
|
||||||
|
|
||||||
|
> **Notes on packages source verification**:
|
||||||
|
> - The Kata packages hosted on the download server are signed with GPG to ensure integrity and authenticity.
|
||||||
|
>
|
||||||
|
> - The public key used to sign packages is available [at this link](https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub); the fingerprint is `9FDC0CB6 3708CF80 3696E2DC D0B37B82 6063F3ED`.
|
||||||
|
>
|
||||||
|
> - Only trust the signing key and fingerprint listed in the previous bullet point. Do not disable GPG checks,
|
||||||
|
> otherwise packages source and authenticity is not guaranteed.
|
||||||
|
|
||||||
|
## Build from source installation
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - Power users who decide to build from sources should be aware of the
|
||||||
|
> implications of using an unpackaged system which will not be automatically
|
||||||
|
> updated as new [releases](../Stable-Branch-Strategy.md) are made available.
|
||||||
|
|
||||||
|
[Building from sources](../Developer-Guide.md#initial-setup) allows power users
|
||||||
|
who are comfortable building software from source to use the latest component
|
||||||
|
versions. This is not recommended for normal users.
|
||||||
|
|
||||||
|
## Installing on a Cloud Service Platform
|
||||||
|
* [Amazon Web Services (AWS)](aws-installation-guide.md)
|
||||||
|
* [Google Compute Engine (GCE)](gce-installation-guide.md)
|
||||||
|
* [Microsoft Azure](azure-installation-guide.md)
|
||||||
|
* [Minikube](minikube-installation-guide.md)
|
||||||
|
* [VEXXHOST OpenStack Cloud](vexxhost-installation-guide.md)
|
||||||
|
|
||||||
|
## Further information
|
||||||
|
* The [upgrading document](../Upgrading.md).
|
||||||
|
* The [developer guide](../Developer-Guide.md).
|
||||||
|
* The [runtime documentation](https://github.com/kata-containers/runtime/blob/master/README.md).
|
140
docs/install/aws-installation-guide.md
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
# Install Kata Containers on Amazon Web Services
|
||||||
|
|
||||||
|
* [Install and Configure AWS CLI](#install-and-configure-aws-cli)
|
||||||
|
* [Create or Import an EC2 SSH key pair](#create-or-import-an-ec2-ssh-key-pair)
|
||||||
|
* [Launch i3.metal instance](#launch-i3metal-instance)
|
||||||
|
* [Install Kata](#install-kata)
|
||||||
|
|
||||||
|
Kata Containers on Amazon Web Services (AWS) makes use of [i3.metal](https://aws.amazon.com/ec2/instance-types/i3/) instances. Most of the installation procedure is identical to that for Kata on your preferred distribution, except that you have to run it on bare metal instances since AWS doesn't support nested virtualization yet. This guide walks you through creating an i3.metal instance.
|
||||||
|
|
||||||
|
## Install and Configure AWS CLI
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
|
||||||
|
* Python:
|
||||||
|
* Python 2 version 2.6.5+
|
||||||
|
* Python 3 version 3.3+
|
||||||
|
|
||||||
|
### Install
|
||||||
|
|
||||||
|
Install with this command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ pip install awscli --upgrade --user
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configure
|
||||||
|
|
||||||
|
First, verify it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws --version
|
||||||
|
```
|
||||||
|
|
||||||
|
Then configure it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws configure
|
||||||
|
```
|
||||||
|
|
||||||
|
Specify the required parameters:
|
||||||
|
|
||||||
|
```
|
||||||
|
AWS Access Key ID []: <your-key-id-from-iam>
|
||||||
|
AWS Secret Access Key []: <your-secret-access-key-from-iam>
|
||||||
|
Default region name []: <your-aws-region-for-your-i3-metal-instance>
|
||||||
|
Default output format [None]: <yaml-or-json-or-empty>
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can create the files: `~/.aws/credentials` and `~/.aws/config`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ cat <<EOF > ~/.aws/credentials
|
||||||
|
[default]
|
||||||
|
aws_access_key_id = <your-key-id-from-iam>
|
||||||
|
aws_secret_access_key = <your-secret-access-key-from-iam>
|
||||||
|
EOF
|
||||||
|
$ cat <<EOF > ~/.aws/config
|
||||||
|
[default]
|
||||||
|
region = <your-aws-region-for-your-i3-metal-instance>
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on how to get AWS credentials please refer to [this guide](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). Alternatively, you can ask the administrator of your AWS account to issue one with the AWS CLI:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ aws_username="myusername"
|
||||||
|
$ aws iam create-access-key --user-name="$aws_username"
|
||||||
|
```
|
||||||
|
|
||||||
|
More general AWS CLI guidelines can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/installing.html).
|
||||||
|
|
||||||
|
## Create or Import an EC2 SSH key pair
|
||||||
|
|
||||||
|
You will need this to access your instance.
|
||||||
|
|
||||||
|
To create:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws ec2 create-key-pair --key-name MyKeyPair | grep KeyMaterial | cut -d: -f2- | tr -d ' \n\"\,' > MyKeyPair.pem
|
||||||
|
$ chmod 400 MyKeyPair.pem
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively to import using your public SSH key:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws ec2 import-key-pair --key-name "MyKeyPair" --public-key-material file://MyKeyPair.pub
|
||||||
|
```
|
||||||
|
|
||||||
|
## Launch i3.metal instance
|
||||||
|
|
||||||
|
Get the latest Bionic Ubuntu AMI (Amazon Image) or the latest AMI for the Linux distribution you would like to use. For example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws ec2 describe-images --owners 099720109477 --filters "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server*" --query 'sort_by(Images, &CreationDate)[].ImageId '
|
||||||
|
```
|
||||||
|
|
||||||
|
This command will produce output similar to the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
[
|
||||||
|
...
|
||||||
|
"ami-063aa838bd7631e0b",
|
||||||
|
"ami-03d5270fcb641f79b"
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Launch the EC2 instance and pick IP the `INSTANCEID`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws ec2 run-instances --image-id ami-03d5270fcb641f79b --count 1 --instance-type i3.metal --key-name MyKeyPair --associate-public-ip-address > /tmp/aws.json
|
||||||
|
$ export INSTANCEID=$(grep InstanceId /tmp/aws.json | cut -d: -f2- | tr -d ' \n\"\,')
|
||||||
|
```
|
||||||
|
|
||||||
|
Wait for the instance to come up, the output of the following command should be `running`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ aws ec2 describe-instances --instance-id=${INSTANCEID} | grep running | cut -d: -f2- | tr -d ' \"\,'
|
||||||
|
```
|
||||||
|
|
||||||
|
Get the public IP address for the instances:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ export IP=$(aws ec2 describe-instances --instance-id=${INSTANCEID} | grep PublicIpAddress | cut -d: -f2- | tr -d ' \n\"\,')
|
||||||
|
```
|
||||||
|
|
||||||
|
Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-launch.html) for more details on how to launch instances with the AWS CLI.
|
||||||
|
|
||||||
|
SSH into the machine
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ssh -i MyKeyPair.pen ubuntu@${IP}
|
||||||
|
```
|
||||||
|
|
||||||
|
Go onto the next step.
|
||||||
|
|
||||||
|
## Install Kata
|
||||||
|
|
||||||
|
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
|
||||||
|
|
||||||
|
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).
|
18
docs/install/azure-installation-guide.md
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# Install Kata Containers on Microsoft Azure
|
||||||
|
|
||||||
|
Kata Containers on Azure use nested virtualization to provide an identical installation
|
||||||
|
experience to Kata on your preferred Linux distribution.
|
||||||
|
|
||||||
|
This guide assumes you have an Azure account set up and tools to remotely login to your virtual
|
||||||
|
machine (SSH). Instructions will use [Azure Portal](https://portal.azure.com/) to avoid
|
||||||
|
local dependencies and setup.
|
||||||
|
|
||||||
|
## Create a new virtual machine with nesting support
|
||||||
|
|
||||||
|
Create a new virtual machine with:
|
||||||
|
* Nesting support (v3 series)
|
||||||
|
* your distro of choice
|
||||||
|
|
||||||
|
## Set up with distribution specific quick start
|
||||||
|
|
||||||
|
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).
|
17
docs/install/centos-installation-guide.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Install Kata Containers on CentOS
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ sudo yum -y install yum-utils
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/CentOS_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
|
||||||
|
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/centos-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
22
docs/install/debian-installation-guide.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# Install Kata Containers on Debian
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ export DEBIAN_FRONTEND=noninteractive
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ [ "$ID" = debian ] && [ -z "$VERSION_ID" ] && echo >&2 "ERROR: Debian unstable not supported.
|
||||||
|
You can try stable packages here:
|
||||||
|
http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}" && exit 1
|
||||||
|
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/ /' > /etc/apt/sources.list.d/kata-containers.list"
|
||||||
|
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Debian_${VERSION_ID}/Release.key | sudo apt-key add -
|
||||||
|
$ sudo -E apt-get update
|
||||||
|
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/debian-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
75
docs/install/docker/centos-docker-install.md
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# Install Docker for Kata Containers on CentOS
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../centos-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
|
||||||
|
$ sudo yum -y install docker-ce
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://docs.docker.com/engine/installation/linux/centos).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
103
docs/install/docker/debian-docker-install.md
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# Install Docker for Kata Containers on Debian
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../debian-installation-guide.md).
|
||||||
|
> - This guide allows for installation with `systemd` or `sysVinit` init systems.
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo apt-get -y install apt-transport-https ca-certificates curl gnupg2 software-properties-common
|
||||||
|
$ curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo "$ID")/gpg | sudo apt-key add -
|
||||||
|
$ sudo add-apt-repository "deb https://download.docker.com/linux/$(. /etc/os-release; echo "$ID") $(lsb_release -cs) stable"
|
||||||
|
$ sudo apt-get update
|
||||||
|
$ sudo -E apt-get -y install docker-ce
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://docs.docker.com/engine/installation/linux/debian).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
a. `sysVinit`
|
||||||
|
|
||||||
|
- with `sysVinit`, docker config is stored in `/etc/default/docker`, edit the options similar to the following:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo sh -c "echo '# specify docker runtime for kata-containers
|
||||||
|
DOCKER_OPTS=\"-D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime\"' >> /etc/default/docker"
|
||||||
|
```
|
||||||
|
|
||||||
|
b. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
c. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with one of the following (depending on init choice):
|
||||||
|
|
||||||
|
a. `sysVinit`
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ sudo /etc/init.d/docker stop
|
||||||
|
$ sudo /etc/init.d/docker start
|
||||||
|
```
|
||||||
|
|
||||||
|
To watch for errors:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ tail -f /var/log/docker.log
|
||||||
|
```
|
||||||
|
|
||||||
|
b. systemd
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
77
docs/install/docker/fedora-docker-install.md
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
# Install Docker for Kata Containers on Fedora
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../fedora-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ sudo dnf config-manager --add-repo https://download.docker.com/linux/fedora/docker-ce.repo
|
||||||
|
$ sudo dnf makecache
|
||||||
|
$ sudo dnf -y install docker-ce
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://docs.docker.com/engine/installation/linux/fedora).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
75
docs/install/docker/opensuse-docker-install.md
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# Install Docker for Kata Containers on openSUSE
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../opensuse-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo zypper -n install docker
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://software.opensuse.org/package/docker).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. Specify the runtime options in `/etc/sysconfig/docker` (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ DOCKER_SYSCONFIG=/etc/sysconfig/docker
|
||||||
|
# Add kata-runtime to the list of available runtimes, if not already listed
|
||||||
|
$ grep -qE "^ *DOCKER_OPTS=.+--add-runtime[= ] *kata-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)\" *$|\1 --add-runtime kata-runtime=/usr/bin/kata-runtime\"|g" $DOCKER_SYSCONFIG
|
||||||
|
# If a current default runtime is specified, overwrite it with kata-runtime
|
||||||
|
$ sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+--default-runtime[= ] *)[^ \"]+(.*\"$)|\1kata-runtime\2|g" $DOCKER_SYSCONFIG
|
||||||
|
# Add kata-runtime as default runtime, if no default runtime is specified
|
||||||
|
$ grep -qE "^ *DOCKER_OPTS=.+--default-runtime" $DOCKER_SYSCONFIG || sudo -E sed -i -E "s|^( *DOCKER_OPTS=.+)(\"$)|\1 --default-runtime=kata-runtime\2|g" $DOCKER_SYSCONFIG
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
14
docs/install/docker/opensuse-leap-docker-install.md
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# Install Docker for Kata Containers on openSUSE Leap
|
||||||
|
|
||||||
|
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
|
||||||
|
<!--
|
||||||
|
You can ignore the content of this comment.
|
||||||
|
(test code run by test-install-docs.sh to validate code blocks this document)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
|
||||||
|
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
|
||||||
|
|
||||||
|
Please download this file and run kata-doc-to-script.sh again."
|
||||||
|
```
|
||||||
|
-->
|
14
docs/install/docker/opensuse-tumbleweed-docker-install.md
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# Install Docker for Kata Containers on openSUSE Tumbleweed
|
||||||
|
|
||||||
|
Follow the instructions in the generic [openSUSE Docker install guide](opensuse-docker-install.md).
|
||||||
|
<!--
|
||||||
|
You can ignore the content of this comment.
|
||||||
|
(test code run by test-install-docs.sh to validate code blocks this document)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
|
||||||
|
https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/opensuse-docker-install.md
|
||||||
|
|
||||||
|
Please download this file and run kata-doc-to-script.sh again."
|
||||||
|
```
|
||||||
|
-->
|
76
docs/install/docker/rhel-docker-install.md
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# Install Docker for Kata Containers on RHEL
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../rhel-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ export rhel_devtoolset_version="7"
|
||||||
|
$ sudo subscription-manager repos --enable=rhel-${rhel_devtoolset_version}-server-extras-rpms
|
||||||
|
$ sudo yum -y install docker && systemctl enable --now docker
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_atomic_host/7/html-single/getting_started_with_containers/#getting_docker_in_rhel_7).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
74
docs/install/docker/sles-docker-install.md
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# Install Docker for Kata Containers on SLES
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../sles-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo zypper -n install docker
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://www.suse.com/documentation/sles-12/singlehtml/book_sles_docker/book_sles_docker.html).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
79
docs/install/docker/ubuntu-docker-install.md
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
# Install Docker for Kata Containers on Ubuntu
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - This guide assumes you have
|
||||||
|
> [already installed the Kata Containers packages](../ubuntu-installation-guide.md).
|
||||||
|
|
||||||
|
1. Install the latest version of Docker with the following commands:
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
>
|
||||||
|
> - This step is only required if Docker is not installed on the system.
|
||||||
|
> - Docker version 18.09 [removed devicemapper support](https://github.com/kata-containers/documentation/issues/373).
|
||||||
|
> If you wish to use a block based backend, see the options listed on https://github.com/kata-containers/documentation/issues/407.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo -E apt-get -y install apt-transport-https ca-certificates software-properties-common
|
||||||
|
$ curl -sL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
|
||||||
|
$ arch=$(dpkg --print-architecture)
|
||||||
|
$ sudo -E add-apt-repository "deb [arch=${arch}] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||||
|
$ sudo -E apt-get update
|
||||||
|
$ sudo -E apt-get -y install docker-ce
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on installing Docker please refer to the
|
||||||
|
[Docker Guide](https://docs.docker.com/engine/installation/linux/ubuntu).
|
||||||
|
|
||||||
|
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
|
||||||
|
|
||||||
|
1. systemd (this is the default and is applied automatically if you select the
|
||||||
|
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo mkdir -p /etc/systemd/system/docker.service.d/
|
||||||
|
$ cat <<EOF | sudo tee /etc/systemd/system/docker.service.d/kata-containers.conf
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/dockerd -D --add-runtime kata-runtime=/usr/bin/kata-runtime --default-runtime=kata-runtime
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Docker `daemon.json`
|
||||||
|
|
||||||
|
Create docker configuration folder.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo mkdir -p /etc/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the following definitions to `/etc/docker/daemon.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"default-runtime": "kata-runtime",
|
||||||
|
"runtimes": {
|
||||||
|
"kata-runtime": {
|
||||||
|
"path": "/usr/bin/kata-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Restart the Docker systemd service with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo systemctl daemon-reload
|
||||||
|
$ sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run Kata Containers
|
||||||
|
|
||||||
|
You are now ready to run Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo docker run busybox uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous command shows details of the kernel version running inside the
|
||||||
|
container, which is different to the host kernel version.
|
17
docs/install/fedora-installation-guide.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Install Kata Containers on Fedora
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ sudo dnf -y install dnf-plugins-core
|
||||||
|
$ sudo -E dnf config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/Fedora_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
|
||||||
|
$ sudo -E dnf -y install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/fedora-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
133
docs/install/gce-installation-guide.md
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
# Install Kata Containers on Google Compute Engine
|
||||||
|
|
||||||
|
* [Create an Image with Nested Virtualization Enabled](#create-an-image-with-nested-virtualization-enabled)
|
||||||
|
* [Create the Image](#create-the-image)
|
||||||
|
* [Verify VMX is Available](#verify-vmx-is-available)
|
||||||
|
* [Install Kata](#install-kata)
|
||||||
|
* [Create a Kata-enabled Image](#create-a-kata-enabled-image)
|
||||||
|
|
||||||
|
Kata Containers on Google Compute Engine (GCE) makes use of [nested virtualization](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances). Most of the installation procedure is identical to that for Kata on your preferred distribution, but enabling nested virtualization currently requires extra steps on GCE. This guide walks you through creating an image and instance with nested virtualization enabled. Note that `kata-runtime kata-check` checks for nested virtualization, but does not fail if support is not found.
|
||||||
|
|
||||||
|
As a pre-requisite this guide assumes an installed and configured instance of the [Google Cloud SDK](https://cloud.google.com/sdk/downloads). For a zero-configuration option, all of the commands below were been tested under [Google Cloud Shell](https://cloud.google.com/shell/) (as of Jun 2018). Verify your `gcloud` installation and configuration:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gcloud info || { echo "ERROR: no Google Cloud SDK"; exit 1; }
|
||||||
|
```
|
||||||
|
|
||||||
|
## Create an Image with Nested Virtualization Enabled
|
||||||
|
|
||||||
|
VM images on GCE are grouped into families under projects. Officially supported images are automatically discoverable with `gcloud compute images list`. That command produces a list similar to the following (likely with different image names):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gcloud compute images list
|
||||||
|
NAME PROJECT FAMILY DEPRECATED STATUS
|
||||||
|
centos-7-v20180523 centos-cloud centos-7 READY
|
||||||
|
coreos-stable-1745-5-0-v20180531 coreos-cloud coreos-stable READY
|
||||||
|
cos-beta-67-10575-45-0 cos-cloud cos-beta READY
|
||||||
|
cos-stable-66-10452-89-0 cos-cloud cos-stable READY
|
||||||
|
debian-9-stretch-v20180510 debian-cloud debian-9 READY
|
||||||
|
rhel-7-v20180522 rhel-cloud rhel-7 READY
|
||||||
|
sles-11-sp4-v20180523 suse-cloud sles-11 READY
|
||||||
|
ubuntu-1604-xenial-v20180522 ubuntu-os-cloud ubuntu-1604-lts READY
|
||||||
|
ubuntu-1804-bionic-v20180522 ubuntu-os-cloud ubuntu-1804-lts READY
|
||||||
|
```
|
||||||
|
|
||||||
|
Each distribution has its own project, and each project can host images for multiple versions of the distribution, typically grouped into families. We recommend you select images by project and family, rather than by name. This ensures any scripts or other automation always works with a non-deprecated image, including security updates, updates to GCE-specific scripts, etc.
|
||||||
|
|
||||||
|
### Create the Image
|
||||||
|
|
||||||
|
The following example (substitute your preferred distribution project and image family) produces an image with nested virtualization enabled in your currently active GCE project:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ SOURCE_IMAGE_PROJECT=ubuntu-os-cloud
|
||||||
|
$ SOURCE_IMAGE_FAMILY=ubuntu-1804-lts
|
||||||
|
$ IMAGE_NAME=${SOURCE_IMAGE_FAMILY}-nested
|
||||||
|
|
||||||
|
$ gcloud compute images create \
|
||||||
|
--source-image-project $SOURCE_IMAGE_PROJECT \
|
||||||
|
--source-image-family $SOURCE_IMAGE_FAMILY \
|
||||||
|
--licenses=https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx \
|
||||||
|
$IMAGE_NAME
|
||||||
|
```
|
||||||
|
|
||||||
|
If successful, `gcloud` reports that the image was created. Verify that the image has the nested virtualization license with `gcloud compute images describe $IMAGE_NAME`. This produces output like the following (some fields have been removed for clarity and to redact personal info):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
diskSizeGb: '10'
|
||||||
|
kind: compute#image
|
||||||
|
licenseCodes:
|
||||||
|
- '1002001'
|
||||||
|
- '5926592092274602096'
|
||||||
|
licenses:
|
||||||
|
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
|
||||||
|
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
|
||||||
|
name: ubuntu-1804-lts-nested
|
||||||
|
sourceImage: https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20180522
|
||||||
|
sourceImageId: '3280575157699667619'
|
||||||
|
sourceType: RAW
|
||||||
|
status: READY
|
||||||
|
```
|
||||||
|
|
||||||
|
The primary criterion of interest here is the presence of the `enable-vmx` license. Without that licence Kata will not work. Without that license Kata does not work. The presence of that license instructs the Google Compute Engine hypervisor to enable Intel's VT-x instructions in virtual machines created from the image. Note that nested virtualization is only available in VMs running on Intel Haswell or later CPU micro-architectures.
|
||||||
|
|
||||||
|
### Verify VMX is Available
|
||||||
|
|
||||||
|
Assuming you created a nested-enabled image using the previous instructions, verify that VMs created from this image are VMX-enabled with the following:
|
||||||
|
|
||||||
|
1. Create a VM from the image created previously:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gcloud compute instances create \
|
||||||
|
--image $IMAGE_NAME \
|
||||||
|
--machine-type n1-standard-2 \
|
||||||
|
--min-cpu-platform "Intel Broadwell" \
|
||||||
|
kata-testing
|
||||||
|
```
|
||||||
|
|
||||||
|
> **NOTE**: In most zones the `--min-cpu-platform` argument can be omitted. It is only necessary in GCE Zones that include hosts based on Intel's Ivybridge platform.
|
||||||
|
|
||||||
|
2. Verify that the VMX CPUID flag is set:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gcloud compute ssh kata-testing
|
||||||
|
|
||||||
|
# While ssh'd into the VM:
|
||||||
|
$ [ -z "$(lscpu|grep GenuineIntel)" ] && { echo "ERROR: Need an Intel CPU"; exit 1; }
|
||||||
|
```
|
||||||
|
|
||||||
|
If this fails, ensure you created your instance from the correct image and that the previously listed `enable-vmx` license is included.
|
||||||
|
|
||||||
|
## Install Kata
|
||||||
|
|
||||||
|
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
|
||||||
|
|
||||||
|
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).
|
||||||
|
|
||||||
|
## Create a Kata-enabled Image
|
||||||
|
|
||||||
|
Optionally, after installing Kata, create an image to preserve the fruits of your labor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gcloud compute instances stop kata-testing
|
||||||
|
$ gcloud compute images create \
|
||||||
|
--source-disk kata-testing \
|
||||||
|
kata-base
|
||||||
|
```
|
||||||
|
|
||||||
|
The result is an image that includes any changes made to the `kata-testing` instance as well as the `enable-vmx` flag. Verify this with `gcloud compute images describe kata-base`. The result, which omits some fields for clarity, should be similar to the following:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
diskSizeGb: '10'
|
||||||
|
kind: compute#image
|
||||||
|
licenseCodes:
|
||||||
|
- '1002001'
|
||||||
|
- '5926592092274602096'
|
||||||
|
licenses:
|
||||||
|
- https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx
|
||||||
|
- https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1804-lts
|
||||||
|
name: kata-base
|
||||||
|
selfLink: https://www.googleapis.com/compute/v1/projects/my-kata-project/global/images/kata-base
|
||||||
|
sourceDisk: https://www.googleapis.com/compute/v1/projects/my-kata-project/zones/us-west1-a/disks/kata-testing
|
||||||
|
sourceType: RAW
|
||||||
|
status: READY
|
||||||
|
```
|
47
docs/install/installing-with-kata-doc-to-script.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Installing with `kata-doc-to-script`
|
||||||
|
|
||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Packages Installation](#packages-installation)
|
||||||
|
* [Docker Installation and Setup](#docker-installation-and-setup)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
Use [these installation instructions](README.md#supported-distributions) together with
|
||||||
|
[`kata-doc-to-script`](https://github.com/kata-containers/tests/blob/master/.ci/kata-doc-to-script.sh)
|
||||||
|
to generate installation bash scripts.
|
||||||
|
|
||||||
|
> Note:
|
||||||
|
> - Only the Docker container manager installation can be scripted. For other setups you must
|
||||||
|
> install and configure the container manager manually.
|
||||||
|
|
||||||
|
## Packages Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/${ID}-installation-guide.md
|
||||||
|
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-installation-guide.md ${ID}-install.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, if your distribution is CentOS, the previous example will generate a runnable shell script called `centos-install.sh`.
|
||||||
|
To proceed with the installation, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ bash "./${ID}-install.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Installation and Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ curl -fsSL -O https://raw.githubusercontent.com/kata-containers/documentation/master/install/docker/${ID}-docker-install.md
|
||||||
|
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/.ci/kata-doc-to-script.sh) ${ID}-docker-install.md ${ID}-docker-install.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, if your distribution is CentOS, this will generate a runnable shell script called `centos-docker-install.sh`.
|
||||||
|
|
||||||
|
To proceed with the Docker installation, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ bash "./${ID}-docker-install.sh"
|
||||||
|
```
|
47
docs/install/installing-with-kata-manager.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Installing with `kata-manager`
|
||||||
|
|
||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Full Installation](#full-installation)
|
||||||
|
* [Install the Kata packages only](#install-the-kata-packages-only)
|
||||||
|
* [Further Information](#further-information)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
`kata-manager` automates the Kata Containers installation procedure documented for [these Linux distributions](README.md#supported-distributions).
|
||||||
|
|
||||||
|
> **Note**:
|
||||||
|
> - `kata-manager` requires `curl` and `sudo` installed on your system.
|
||||||
|
>
|
||||||
|
> - Full installation mode is only available for Docker container manager. For other setups, you
|
||||||
|
> can still use `kata-manager` to [install Kata package](#install-the-kata-packages-only), and then setup your container manager manually.
|
||||||
|
>
|
||||||
|
> - You can run `kata-manager` in dry run mode by passing the `-n` flag. Dry run mode allows you to review the
|
||||||
|
> commands that `kata-manager` would run, without doing any change to your system.
|
||||||
|
|
||||||
|
|
||||||
|
## Full Installation
|
||||||
|
This command does the following:
|
||||||
|
1. Installs Kata Containers packages
|
||||||
|
2. Installs Docker
|
||||||
|
3. Configure Docker to use the Kata OCI runtime by default
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-docker-system"
|
||||||
|
```
|
||||||
|
|
||||||
|
<!--
|
||||||
|
You can ignore the content of this comment.
|
||||||
|
(test code run by test-install-docs.sh to validate code blocks this document)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) remove-packages"
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
## Install the Kata packages only
|
||||||
|
Use the following command to only install Kata Containers packages.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ bash -c "$(curl -fsSL https://raw.githubusercontent.com/kata-containers/tests/master/cmd/kata-manager/kata-manager.sh) install-packages"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Further Information
|
||||||
|
For more information on what `kata-manager` can do, refer to the [`kata-manager` page](https://github.com/kata-containers/tests/blob/master/cmd/kata-manager).
|
238
docs/install/minikube-installation-guide.md
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
# Installing Kata Containers in Minikube
|
||||||
|
|
||||||
|
* [Installing Kata Containers in Minikube](#installing-kata-containers-in-minikube)
|
||||||
|
* [Introduction](#introduction)
|
||||||
|
* [Prerequisites](#prerequisites)
|
||||||
|
* [Setting up Minikube](#setting-up-minikube)
|
||||||
|
* [Checking for nested virtualization](#checking-for-nested-virtualization)
|
||||||
|
* [Check Minikube is running](#check-minikube-is-running)
|
||||||
|
* [Installing Kata Containers](#installing-kata-containers)
|
||||||
|
* [Enabling Kata Containers](#enabling-kata-containers)
|
||||||
|
* [Register the runtime](#register-the-runtime)
|
||||||
|
* [Testing Kata Containers](#testing-kata-containers)
|
||||||
|
* [Wrapping up](#wrapping-up)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
[Minikube](https://kubernetes.io/docs/setup/minikube/) is an easy way to try out a Kubernetes (k8s)
|
||||||
|
cluster locally. It creates a single node Kubernetes stack in a local VM.
|
||||||
|
|
||||||
|
[Kata Containers](https://github.com/kata-containers) can be installed into a Minikube cluster using
|
||||||
|
[`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
|
||||||
|
|
||||||
|
This document details the pre-requisites, installation steps, and how to check
|
||||||
|
the installation has been successful.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
This installation guide has only been verified under a Minikube Linux installation, using the
|
||||||
|
[`kvm2`](https://minikube.sigs.k8s.io/docs/drivers/kvm2/) driver.
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
> - This installation guide may not work for macOS installations of Minikube, due to the lack of
|
||||||
|
nested virtualization support on that platform.
|
||||||
|
> - This installation guide has not been tested on a Windows installation.
|
||||||
|
> - Kata under Minikube does not currently support Kata Firecracker (`kata-fc`).
|
||||||
|
> Although the `kata-fc` binary will be installed as part of these instructions,
|
||||||
|
> via `kata-deploy`, pods cannot be launched with `kata-fc`, and will fail to start.
|
||||||
|
|
||||||
|
Before commencing installation, it is strongly recommended you read the
|
||||||
|
[Minikube installation guide](https://kubernetes.io/docs/tasks/tools/install-minikube/).
|
||||||
|
|
||||||
|
## Checking for nested virtualization
|
||||||
|
|
||||||
|
For Kata Containers to work under a Minikube VM, your host system must support
|
||||||
|
nested virtualization. If you are using a Linux system utilizing Intel VT-x
|
||||||
|
and the `kvm_intel` driver, you can perform the following check:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ cat /sys/module/kvm_intel/parameters/nested
|
||||||
|
```
|
||||||
|
|
||||||
|
If your system does not report `Y` from the `nested` parameter, then details on how
|
||||||
|
to enable nested virtualization can be found on the
|
||||||
|
[KVM Nested Guests page](https://www.linux-kvm.org/page/Nested_Guests)
|
||||||
|
|
||||||
|
Alternatively, and for other architectures, the Kata Containers built in
|
||||||
|
[`kata-check`](https://github.com/kata-containers/runtime#hardware-requirements)
|
||||||
|
command can be used *inside Minikube* once Kata has been installed, to check for compatibility.
|
||||||
|
|
||||||
|
## Setting up Minikube
|
||||||
|
|
||||||
|
To enable Kata Containers under Minikube, you need to add a few configuration options to the
|
||||||
|
default Minikube setup. You can easily accomplish this as Minikube supports them on the setup commandline.
|
||||||
|
Minikube can be set up to use either CRI-O or containerd.
|
||||||
|
|
||||||
|
Here are the features to set up a CRI-O based Minikube, and why you need them:
|
||||||
|
|
||||||
|
| what | why |
|
||||||
|
| ---- | --- |
|
||||||
|
| `--bootstrapper=kubeadm` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||||
|
| `--container-runtime=cri-o` | Using CRI-O for Kata |
|
||||||
|
| `--enable-default-cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||||
|
| `--memory 6144` | Allocate sufficient memory, as Kata Containers default to 1 or 2Gb |
|
||||||
|
| `--network-plugin=cni` | As recommended for [minikube CRI-o](https://kubernetes.io/docs/setup/minikube/#cri-o) |
|
||||||
|
| `--vm-driver kvm2` | The host VM driver |
|
||||||
|
|
||||||
|
To use containerd, modify the `--container-runtime` argument:
|
||||||
|
|
||||||
|
| what | why |
|
||||||
|
| ---- | --- |
|
||||||
|
| `--container-runtime=containerd` | Using containerd for Kata |
|
||||||
|
|
||||||
|
> **Notes:**
|
||||||
|
> - Adjust the `--memory 6144` line to suit your environment and requirements. Kata Containers default to
|
||||||
|
> requesting 2048MB per container. We recommended you supply more than that to the Minikube node.
|
||||||
|
> - Prior to Minikube/Kubernetes v1.14, the beta `RuntimeClass` feature also needed enabling with
|
||||||
|
> the following.
|
||||||
|
>
|
||||||
|
> | what | why |
|
||||||
|
> | ---- | --- |
|
||||||
|
> | `--feature-gates=RuntimeClass=true` | Kata needs to use the `RuntimeClass` Kubernetes feature |
|
||||||
|
|
||||||
|
The full command is therefore:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ minikube start --vm-driver kvm2 --memory 6144 --network-plugin=cni --enable-default-cni --container-runtime=cri-o --bootstrapper=kubeadm
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** For Kata Containers later than v1.6.1, the now default `tcfilter` networking of Kata Containers
|
||||||
|
> does not work for Minikube versions less than v1.1.1. Please ensure you use Minikube version v1.1.1
|
||||||
|
> or above.
|
||||||
|
|
||||||
|
## Check Minikube is running
|
||||||
|
|
||||||
|
Before you install Kata Containers, check that your Minikube is operating. On your guest:
|
||||||
|
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ kubectl get nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see your `master` node listed as being `Ready`.
|
||||||
|
|
||||||
|
Check you have virtualization enabled inside your Minikube. The following should return
|
||||||
|
a number larger than `0` if you have either of the `vmx` or `svm` nested virtualization features
|
||||||
|
available:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ minikube ssh "egrep -c 'vmx|svm' /proc/cpuinfo"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installing Kata Containers
|
||||||
|
|
||||||
|
You can now install the Kata Containers runtime components. You will need a local copy of some Kata
|
||||||
|
Containers components to help with this, and then use `kubectl` on the host (that Minikube has already
|
||||||
|
configured for you) to deploy them:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ git clone https://github.com/kata-containers/packaging.git
|
||||||
|
$ cd packaging/kata-deploy
|
||||||
|
$ kubectl apply -f kata-rbac.yaml
|
||||||
|
$ kubectl apply -f kata-deploy.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
This installs the Kata Containers components into `/opt/kata` inside the Minikube node. It can take
|
||||||
|
a few minutes for the operation to complete. You can check the installation has worked by checking
|
||||||
|
the status of the `kata-deploy` pod, which will be executing
|
||||||
|
[this script](https://github.com/kata-containers/packaging/blob/master/kata-deploy/scripts/kata-deploy.sh),
|
||||||
|
and will be executing a `sleep infinity` once it has successfully completed its work.
|
||||||
|
You can accomplish this by running the following:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ podname=$(kubectl -n kube-system get pods -o=name | fgrep kata-deploy | sed 's?pod/??')
|
||||||
|
$ kubectl -n kube-system exec ${podname} -- ps -ef | fgrep infinity
|
||||||
|
```
|
||||||
|
|
||||||
|
> *NOTE:* This check only works for single node clusters, which is the default for Minikube.
|
||||||
|
> For multi-node clusters, the check would need to be adapted to check `kata-deploy` had
|
||||||
|
> completed on all nodes.
|
||||||
|
|
||||||
|
## Enabling Kata Containers
|
||||||
|
|
||||||
|
> **Note:** Only Minikube/Kubernetes versions <= 1.13 require this step. Since version
|
||||||
|
> v1.14, the `RuntimeClass` is enabled by default. Performing this step on Kubernetes > v1.14 is
|
||||||
|
> however benign.
|
||||||
|
|
||||||
|
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
|
||||||
|
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/node-api/master/manifests/runtimeclass_crd.yaml > runtimeclass_crd.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Register the runtime
|
||||||
|
|
||||||
|
Now register the `kata qemu` runtime with that class. This should result in no errors:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ cd packaging/kata-deploy/k8s-1.14
|
||||||
|
$ kubectl apply -f kata-qemu-runtimeClass.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
The Kata Containers installation process should be complete and enabled in the Minikube cluster.
|
||||||
|
|
||||||
|
## Testing Kata Containers
|
||||||
|
|
||||||
|
Launch a container that has been defined to run on Kata Containers. The enabling is configured by
|
||||||
|
the following lines in the YAML file. See the Kubernetes
|
||||||
|
[Runtime Class Documentation](https://kubernetes.io/docs/concepts/containers/runtime-class/#usage)
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
spec:
|
||||||
|
runtimeClassName: kata-qemu
|
||||||
|
```
|
||||||
|
|
||||||
|
Perform the following action to launch a Kata Containers based Apache PHP pod:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ cd packaging/kata-deploy/examples
|
||||||
|
$ kubectl apply -f test-deploy-kata-qemu.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
This may take a few moments if the container image needs to be pulled down into the cluster.
|
||||||
|
Check progress using:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ kubectl rollout status deployment php-apache-kata-qemu
|
||||||
|
```
|
||||||
|
|
||||||
|
There are a couple of ways to verify it is running with Kata Containers.
|
||||||
|
In theory, you should not be able to tell your pod is running as a Kata Containers container.
|
||||||
|
Careful examination can verify your pod is in fact a Kata Containers pod.
|
||||||
|
|
||||||
|
First, look on the node for a `qemu` running. You should see a QEMU command line output here,
|
||||||
|
indicating that your pod is running inside a Kata Containers VM:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ minikube ssh -- pgrep -a qemu
|
||||||
|
```
|
||||||
|
|
||||||
|
Another way to verify Kata Containers is running is to look in the container itself and check
|
||||||
|
which kernel is running there. For a normal software container you will be running
|
||||||
|
the same kernel as the node. For a Kata Container you will be running a Kata Containers kernel
|
||||||
|
inside the Kata Containers VM.
|
||||||
|
|
||||||
|
First, examine which kernel is running inside the Minikube node itself:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ minikube ssh -- uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
And then compare that against the kernel that is running inside the container:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ podname=$(kubectl get pods -o=name | fgrep php-apache-kata-qemu | sed 's?pod/??')
|
||||||
|
$ kubectl exec ${podname} -- uname -a
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see the node and pod are running different kernel versions.
|
||||||
|
|
||||||
|
## Wrapping up
|
||||||
|
|
||||||
|
This guide has shown an easy way to setup Minikube with Kata Containers.
|
||||||
|
Be aware, this is only a small single node Kubernetes cluster running under a nested virtualization setup.
|
||||||
|
As such, it has limitations, but as a first introduction to Kata Containers, and how to install it under Kubernetes,
|
||||||
|
it should suffice for initial learning and experimentation.
|
||||||
|
|
23
docs/install/opensuse-installation-guide.md
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Install Kata Containers on openSUSE
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ DISTRO_REPO=$(sed "s/ /_/g" <<< "$NAME")
|
||||||
|
$ [ -n "$VERSION" ] && DISTRO_REPO+="_${VERSION}"
|
||||||
|
$ DISTRO_REPO=$(echo $DISTRO_REPO | tr -d ' ')
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ REPO_ALIAS="kata-${BRANCH}"
|
||||||
|
$ PUBKEY="/tmp/rpm-signkey.pub"
|
||||||
|
$ curl -SsL -o "$PUBKEY" "https://raw.githubusercontent.com/kata-containers/tests/master/data/rpm-signkey.pub"
|
||||||
|
$ sudo -E rpm --import "$PUBKEY"
|
||||||
|
$ zypper lr "$REPO_ALIAS" && sudo -E zypper -n removerepo "$REPO_ALIAS"
|
||||||
|
$ sudo -E zypper addrepo --refresh "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/${DISTRO_REPO}/" "$REPO_ALIAS"
|
||||||
|
$ sudo -E zypper -n install kata-runtime
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
- [Docker](docker/opensuse-docker-install.md)
|
||||||
|
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
|
19
docs/install/opensuse-leap-installation-guide.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Install Kata Containers on openSUSE Leap
|
||||||
|
|
||||||
|
1. Install Kata Containers on openSUSE by following the instructions in the
|
||||||
|
[openSUSE install guide](opensuse-installation-guide.md).
|
||||||
|
<!--
|
||||||
|
You can ignore the content of this comment.
|
||||||
|
(test code run by test-install-docs.sh to validate code blocks this document)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
|
||||||
|
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
|
||||||
|
|
||||||
|
Please download this file and run kata-doc-to-script.sh again."
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
- [Docker](docker/opensuse-leap-docker-install.md)
|
||||||
|
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
|
19
docs/install/opensuse-tumbleweed-installation-guide.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Install Kata Containers on openSUSE Tumbleweed
|
||||||
|
|
||||||
|
1. Install Kata Containers on openSUSE by following the instructions in the
|
||||||
|
[openSUSE install guide](opensuse-installation-guide.md).
|
||||||
|
<!--
|
||||||
|
You can ignore the content of this comment.
|
||||||
|
(test code run by test-install-docs.sh to validate code blocks this document)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ echo "NOTE: this document is just a link to the generic openSUSE install guide located at:
|
||||||
|
https://raw.githubusercontent.com/kata-containers/documentation/master/install/opensuse-installation-guide.md
|
||||||
|
|
||||||
|
Please download this file and run kata-doc-to-script.sh again."
|
||||||
|
```
|
||||||
|
-->
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
- [Docker](docker/opensuse-tumbleweed-docker-install.md)
|
||||||
|
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)
|
16
docs/install/rhel-installation-guide.md
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# Install Kata Containers on RHEL
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ source /etc/os-release
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ sudo -E yum-config-manager --add-repo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/RHEL_${VERSION_ID}/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
|
||||||
|
$ sudo -E yum -y install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/rhel-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
15
docs/install/sles-installation-guide.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Install Kata Containers on SLES
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ sudo -E zypper addrepo "http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/SLE_15_SP1/home:katacontainers:releases:${ARCH}:${BRANCH}.repo"
|
||||||
|
$ sudo -E zypper -n --no-gpg-checks install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/sles-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
13
docs/install/snap-installation-guide.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Install Kata Containers from `snapcraft.io`
|
||||||
|
|
||||||
|
Kata Containers can be installed in any Linux distribution that supports
|
||||||
|
[snapd](https://docs.snapcraft.io/installing-snapd).
|
||||||
|
|
||||||
|
Run the following command to install Kata Containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ sudo snap install kata-containers --classic
|
||||||
|
```
|
||||||
|
|
||||||
|
For further information on integrating and configuring the `snap` Kata Containers install,
|
||||||
|
refer to the [Kata Containers packaging `snap` documentation](https://github.com/kata-containers/packaging/blob/master/snap/README.md#configure-kata-containers).
|
17
docs/install/ubuntu-installation-guide.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Install Kata Containers on Ubuntu
|
||||||
|
|
||||||
|
1. Install the Kata Containers components with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ARCH=$(arch)
|
||||||
|
$ BRANCH="${BRANCH:-master}"
|
||||||
|
$ sudo sh -c "echo 'deb http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/ /' > /etc/apt/sources.list.d/kata-containers.list"
|
||||||
|
$ curl -sL http://download.opensuse.org/repositories/home:/katacontainers:/releases:/${ARCH}:/${BRANCH}/xUbuntu_$(lsb_release -rs)/Release.key | sudo apt-key add -
|
||||||
|
$ sudo -E apt-get update
|
||||||
|
$ sudo -E apt-get -y install kata-runtime kata-proxy kata-shim
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Decide which container manager to use and select the corresponding link that follows:
|
||||||
|
|
||||||
|
- [Docker](docker/ubuntu-docker-install.md)
|
||||||
|
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
|
16
docs/install/vexxhost-installation-guide.md
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# Install Kata Containers on VEXXHOST
|
||||||
|
|
||||||
|
Kata Containers on VEXXHOST use nested virtualization to provide an identical
|
||||||
|
installation experience to Kata on your preferred Linux distribution.
|
||||||
|
|
||||||
|
This guide assumes you have an OpenStack public cloud account set up and tools
|
||||||
|
to remotely connect to your virtual machine (SSH).
|
||||||
|
|
||||||
|
## Create a new virtual machine with nesting support
|
||||||
|
|
||||||
|
All regions support nested virtualization using the V2 flavors (those prefixed
|
||||||
|
with v2). The recommended machine type for container workloads is `v2-highcpu` range.
|
||||||
|
|
||||||
|
## Set up with distribution specific quick start
|
||||||
|
|
||||||
|
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).
|
6
docs/use-cases/GPU-passthrough-and-Kata.md
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Using GPUs with Kata Containers
|
||||||
|
|
||||||
|
Kata Containers supports passing certain GPUs from the host into the container. Select the GPU vendor for detailed information:
|
||||||
|
|
||||||
|
- [Intel](Intel-GPU-passthrough-and-Kata.md)
|
||||||
|
- [Nvidia](Nvidia-GPU-passthrough-and-Kata.md)
|
295
docs/use-cases/Intel-GPU-passthrough-and-Kata.md
Normal file
@ -0,0 +1,295 @@
|
|||||||
|
# Using Intel GPU device with Kata Containers
|
||||||
|
|
||||||
|
- [Using Intel GPU device with Kata Containers](#using-intel-gpu-device-with-kata-containers)
|
||||||
|
- [Hardware Requirements](#hardware-requirements)
|
||||||
|
- [Host Kernel Requirements](#host-kernel-requirements)
|
||||||
|
- [Install and configure Kata Containers](#install-and-configure-kata-containers)
|
||||||
|
- [Build Kata Containers kernel with GPU support](#build-kata-containers-kernel-with-gpu-support)
|
||||||
|
- [GVT-d with Kata Containers](#gvt-d-with-kata-containers)
|
||||||
|
- [GVT-g with Kata Containers](#gvt-g-with-kata-containers)
|
||||||
|
|
||||||
|
An Intel Graphics device can be passed to a Kata Containers container using GPU
|
||||||
|
passthrough (Intel GVT-d) as well as GPU mediated passthrough (Intel GVT-g).
|
||||||
|
|
||||||
|
Intel GVT-d (one VM to one physical GPU) also named as Intel-Graphics-Device
|
||||||
|
passthrough feature is one flavor of graphics virtualization approach.
|
||||||
|
This flavor allows direct assignment of an entire GPU to a single user,
|
||||||
|
passing the native driver capabilities through the hypervisor without any limitations.
|
||||||
|
|
||||||
|
Intel GVT-g (multiple VMs to one physical GPU) is a full GPU virtualization solution
|
||||||
|
with mediated pass-through.<br/>
|
||||||
|
A virtual GPU instance is maintained for each VM, with part of performance critical
|
||||||
|
resources, directly assigned. The ability to run a native graphics driver inside a
|
||||||
|
VM without hypervisor intervention in performance critical paths, achieves a good
|
||||||
|
balance among performance, feature, and sharing capability.
|
||||||
|
|
||||||
|
| Technology | Description | Behaviour | Detail |
|
||||||
|
|-|-|-|-|
|
||||||
|
| Intel GVT-d | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
|
||||||
|
| Intel GVT-g | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
|
||||||
|
|
||||||
|
## Hardware Requirements
|
||||||
|
|
||||||
|
- For client platforms, 5th generation Intel® Core Processor Graphics or higher are required.
|
||||||
|
- For server platforms, E3_v4 or higher Xeon Processor Graphics are required.
|
||||||
|
|
||||||
|
The following steps outline the workflow for using an Intel Graphics device with Kata.
|
||||||
|
|
||||||
|
## Host Kernel Requirements
|
||||||
|
|
||||||
|
The following configurations need to be enabled on your host kernel:
|
||||||
|
|
||||||
|
```
|
||||||
|
CONFIG_VFIO_IOMMU_TYPE1=m
|
||||||
|
CONFIG_VFIO=m
|
||||||
|
CONFIG_VFIO_PCI=m
|
||||||
|
CONFIG_VFIO_MDEV=m
|
||||||
|
CONFIG_VFIO_MDEV_DEVICE=m
|
||||||
|
CONFIG_DRM_I915_GVT=m
|
||||||
|
CONFIG_DRM_I915_GVT_KVMGT=m
|
||||||
|
```
|
||||||
|
|
||||||
|
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
|
||||||
|
line.
|
||||||
|
|
||||||
|
## Install and configure Kata Containers
|
||||||
|
|
||||||
|
To use this feature, you need Kata version 1.3.0 or above.
|
||||||
|
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
|
||||||
|
to install the latest version of Kata.
|
||||||
|
|
||||||
|
In order to pass a GPU to a Kata Container, you need to enable the `hotplug_vfio_on_root_bus`
|
||||||
|
configuration in the Kata `configuration.toml` file as shown below.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo sed -i -e 's/^# *\(hotplug_vfio_on_root_bus\).*=.*$/\1 = true/g' /usr/share/defaults/kata-containers/configuration.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
Make sure you are using the `pc` machine type by verifying `machine_type = "pc"` is
|
||||||
|
set in the `configuration.toml`.
|
||||||
|
|
||||||
|
## Build Kata Containers kernel with GPU support
|
||||||
|
|
||||||
|
The default guest kernel installed with Kata Containers does not provide GPU support.
|
||||||
|
To use an Intel GPU with Kata Containers, you need to build a kernel with the necessary
|
||||||
|
GPU support.
|
||||||
|
|
||||||
|
The following i915 kernel config options need to be enabled:
|
||||||
|
```
|
||||||
|
CONFIG_DRM=y
|
||||||
|
CONFIG_DRM_I915=y
|
||||||
|
CONFIG_DRM_I915_USERPTR=y
|
||||||
|
```
|
||||||
|
|
||||||
|
Build the Kata Containers kernel with the previous config options, using the instructions
|
||||||
|
described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
|
||||||
|
For further details on building and installing guest kernels, see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
|
||||||
|
|
||||||
|
There is an easy way to build a guest kernel that supports Intel GPU:
|
||||||
|
```
|
||||||
|
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
|
||||||
|
|
||||||
|
# Prepare (download guest kernel source, generate .config)
|
||||||
|
$ ./build-kernel.sh -g intel -f setup
|
||||||
|
|
||||||
|
# Build guest kernel
|
||||||
|
$ ./build-kernel.sh -g intel build
|
||||||
|
|
||||||
|
# Install guest kernel
|
||||||
|
$ sudo -E ./build-kernel.sh -g intel install
|
||||||
|
/usr/share/kata-containers/vmlinux-intel-gpu.container -> vmlinux-5.4.15-70-intel-gpu
|
||||||
|
/usr/share/kata-containers/vmlinuz-intel-gpu.container -> vmlinuz-5.4.15-70-intel-gpu
|
||||||
|
```
|
||||||
|
|
||||||
|
Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
|
||||||
|
```
|
||||||
|
kernel = "/usr/share/kata-containers/vmlinuz-intel-gpu.container"
|
||||||
|
```
|
||||||
|
|
||||||
|
## GVT-d with Kata Containers
|
||||||
|
|
||||||
|
Use the following steps to pass an Intel Graphics device in GVT-d mode with Kata:
|
||||||
|
|
||||||
|
1. Find the Bus-Device-Function (BDF) for GPU device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo lspci -nn -D | grep Graphics
|
||||||
|
0000:00:02.0 VGA compatible controller [0300]: Intel Corporation Broadwell-U Integrated Graphics [8086:1616] (rev 09)
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the previous command to determine the BDF for the GPU device on host.<br/>
|
||||||
|
From the previous output, PCI address `0000:00:02.0` is assigned to the hardware GPU device.<br/>
|
||||||
|
This BDF is used later to unbind the GPU device from the host.<br/>
|
||||||
|
"8086 1616" is the device ID of the hardware GPU device. It is used later to
|
||||||
|
rebind the GPU device to `vfio-pci` driver.
|
||||||
|
|
||||||
|
2. Find the IOMMU group for the GPU device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ BDF="0000:00:02.0"
|
||||||
|
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
|
||||||
|
/sys/kernel/iommu_groups/1
|
||||||
|
```
|
||||||
|
|
||||||
|
The previous output shows that the GPU belongs to IOMMU group 1.
|
||||||
|
|
||||||
|
3. Unbind the GPU:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ echo $BDF | sudo tee /sys/bus/pci/devices/$BDF/driver/unbind
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Bind the GPU to the `vfio-pci` device driver:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo modprobe vfio-pci
|
||||||
|
$ echo 8086 1616 | sudo tee /sys/bus/pci/drivers/vfio-pci/new_id
|
||||||
|
$ echo $BDF | sudo tee --append /sys/bus/pci/drivers/vfio-pci/bind
|
||||||
|
```
|
||||||
|
|
||||||
|
After you run the previous commands, the GPU is bound to `vfio-pci` driver.<br/>
|
||||||
|
A new directory with the IOMMU group number is created under `/dev/vfio`:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ls -l /dev/vfio
|
||||||
|
total 0
|
||||||
|
crw------- 1 root root 241, 0 May 18 15:38 1
|
||||||
|
crw-rw-rw- 1 root root 10, 196 May 18 15:37 vfio
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Start a Kata container with GPU device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run -it --runtime=kata-runtime --rm --device /dev/vfio/1 -v /dev:/dev debian /bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
Run `lspci` within the container to verify the GPU device is seen in the list of
|
||||||
|
the PCI devices. Note the vendor-device id of the GPU ("8086:1616") in the `lspci` output.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lspci -nn -D
|
||||||
|
0000:00:00.0 Class [0600]: Device [8086:1237] (rev 02)
|
||||||
|
0000:00:01.0 Class [0601]: Device [8086:7000]
|
||||||
|
0000:00:01.1 Class [0101]: Device [8086:7010]
|
||||||
|
0000:00:01.3 Class [0680]: Device [8086:7113] (rev 03)
|
||||||
|
0000:00:02.0 Class [0604]: Device [1b36:0001]
|
||||||
|
0000:00:03.0 Class [0780]: Device [1af4:1003]
|
||||||
|
0000:00:04.0 Class [0100]: Device [1af4:1004]
|
||||||
|
0000:00:05.0 Class [0002]: Device [1af4:1009]
|
||||||
|
0000:00:06.0 Class [0200]: Device [1af4:1000]
|
||||||
|
0000:00:0f.0 Class [0300]: Device [8086:1616] (rev 09)
|
||||||
|
```
|
||||||
|
|
||||||
|
Additionally, you can access the device node for the graphics device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ls /dev/dri
|
||||||
|
card0 renderD128
|
||||||
|
```
|
||||||
|
|
||||||
|
## GVT-g with Kata Containers
|
||||||
|
|
||||||
|
For GVT-g, you append `i915.enable_gvt=1` in addition to `intel_iommu=on`
|
||||||
|
on your host kernel command line and then reboot your host.
|
||||||
|
|
||||||
|
Use the following steps to pass an Intel Graphics device in GVT-g mode to a Kata Container:
|
||||||
|
|
||||||
|
1. Find the BDF for GPU device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo lspci -nn -D | grep Graphics
|
||||||
|
0000:00:02.0 VGA compatible controller [0300]: Intel Corporation Broadwell-U Integrated Graphics [8086:1616] (rev 09)
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the previous command to find out the BDF for the GPU device on host.
|
||||||
|
The previous output shows PCI address "0000:00:02.0" is assigned to the GPU device.
|
||||||
|
|
||||||
|
2. Choose the MDEV (Mediated Device) type for VGPU (Virtual GPU):
|
||||||
|
|
||||||
|
For background on `mdev` types, please follow this [kernel documentation](https://github.com/torvalds/linux/blob/master/Documentation/driver-api/vfio-mediated-device.rst).
|
||||||
|
|
||||||
|
* List out the `mdev` types for the VGPU:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ BDF="0000:00:02.0"
|
||||||
|
|
||||||
|
$ ls /sys/devices/pci0000:00/$BDF/mdev_supported_types
|
||||||
|
i915-GVTg_V4_1 i915-GVTg_V4_2 i915-GVTg_V4_4 i915-GVTg_V4_8
|
||||||
|
```
|
||||||
|
|
||||||
|
* Inspect the `mdev` types and choose one that fits your requirement:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cd /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8 && ls
|
||||||
|
available_instances create description device_api devices
|
||||||
|
|
||||||
|
$ cat description
|
||||||
|
low_gm_size: 64MB
|
||||||
|
high_gm_size: 384MB
|
||||||
|
fence: 4
|
||||||
|
resolution: 1024x768
|
||||||
|
weight: 2
|
||||||
|
|
||||||
|
$ cat available_instances
|
||||||
|
7
|
||||||
|
```
|
||||||
|
|
||||||
|
The output of file `description` represents the GPU resources that are
|
||||||
|
assigned to the VGPU with specified MDEV type.The output of file `available_instances`
|
||||||
|
represents the remaining amount of VGPUs you can create with specified MDEV type.
|
||||||
|
|
||||||
|
3. Create a VGPU:
|
||||||
|
|
||||||
|
* Generate a UUID:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ gpu_uuid=$(uuid)
|
||||||
|
```
|
||||||
|
|
||||||
|
* Write the UUID to the `create` file under the chosen `mdev` type:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ echo $(gpu_uuid) | sudo tee /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8/create
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Find the IOMMU group for the VGPU:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ls -la /sys/devices/pci0000:00/0000:00:02.0/mdev_supported_types/i915-GVTg_V4_8/devices/${gpu_uuid}/iommu_group
|
||||||
|
lrwxrwxrwx 1 root root 0 May 18 14:35 devices/bbc4aafe-5807-11e8-a43e-03533cceae7d/iommu_group -> ../../../../kernel/iommu_groups/0
|
||||||
|
|
||||||
|
$ ls -l /dev/vfio
|
||||||
|
total 0
|
||||||
|
crw------- 1 root root 241, 0 May 18 11:30 0
|
||||||
|
crw-rw-rw- 1 root root 10, 196 May 18 11:29 vfio
|
||||||
|
```
|
||||||
|
|
||||||
|
The IOMMU group "0" is created from the previous output.<br/>
|
||||||
|
Now you can use the device node `/dev/vfio/0` in docker command line to pass
|
||||||
|
the VGPU to a Kata Container.
|
||||||
|
|
||||||
|
5. Start Kata container with GPU device enabled:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo docker run -it --runtime=kata-runtime --rm --device /dev/vfio/0 -v /dev:/dev debian /bin/bash
|
||||||
|
$ lspci -nn -D
|
||||||
|
0000:00:00.0 Class [0600]: Device [8086:1237] (rev 02)
|
||||||
|
0000:00:01.0 Class [0601]: Device [8086:7000]
|
||||||
|
0000:00:01.1 Class [0101]: Device [8086:7010]
|
||||||
|
0000:00:01.3 Class [0680]: Device [8086:7113] (rev 03)
|
||||||
|
0000:00:02.0 Class [0604]: Device [1b36:0001]
|
||||||
|
0000:00:03.0 Class [0780]: Device [1af4:1003]
|
||||||
|
0000:00:04.0 Class [0100]: Device [1af4:1004]
|
||||||
|
0000:00:05.0 Class [0002]: Device [1af4:1009]
|
||||||
|
0000:00:06.0 Class [0200]: Device [1af4:1000]
|
||||||
|
0000:00:0f.0 Class [0300]: Device [8086:1616] (rev 09)
|
||||||
|
```
|
||||||
|
|
||||||
|
BDF "0000:00:0f.0" is assigned to the VGPU device.
|
||||||
|
|
||||||
|
Additionally, you can access the device node for the graphics device:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ls /dev/dri
|
||||||
|
card0 renderD128
|
||||||
|
```
|
313
docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md
Normal file
@ -0,0 +1,313 @@
|
|||||||
|
# Using Nvidia GPU device with Kata Containers
|
||||||
|
|
||||||
|
- [Using Nvidia GPU device with Kata Containers](#using-nvidia-gpu-device-with-kata-containers)
|
||||||
|
- [Hardware Requirements](#hardware-requirements)
|
||||||
|
- [Host BIOS Requirements](#host-bios-requirements)
|
||||||
|
- [Host Kernel Requirements](#host-kernel-requirements)
|
||||||
|
- [Install and configure Kata Containers](#install-and-configure-kata-containers)
|
||||||
|
- [Build Kata Containers kernel with GPU support](#build-kata-containers-kernel-with-gpu-support)
|
||||||
|
- [Nvidia GPU pass-through mode with Kata Containers](#nvidia-gpu-pass-through-mode-with-kata-containers)
|
||||||
|
- [Nvidia vGPU mode with Kata Containers](#nvidia-vgpu-mode-with-kata-containers)
|
||||||
|
- [Install Nvidia Driver in Kata Containers](#install-nvidia-driver-in-kata-containers)
|
||||||
|
- [References](#references)
|
||||||
|
|
||||||
|
|
||||||
|
An Nvidia GPU device can be passed to a Kata Containers container using GPU passthrough
|
||||||
|
(Nvidia GPU pass-through mode) as well as GPU mediated passthrough (Nvidia vGPU mode).
|
||||||
|
|
||||||
|
Nvidia GPU pass-through mode, an entire physical GPU is directly assigned to one VM,
|
||||||
|
bypassing the Nvidia Virtual GPU Manager. In this mode of operation, the GPU is accessed
|
||||||
|
exclusively by the Nvidia driver running in the VM to which it is assigned.
|
||||||
|
The GPU is not shared among VMs.
|
||||||
|
|
||||||
|
Nvidia Virtual GPU (vGPU) enables multiple virtual machines (VMs) to have simultaneous,
|
||||||
|
direct access to a single physical GPU, using the same Nvidia graphics drivers that are
|
||||||
|
deployed on non-virtualized operating systems. By doing this, Nvidia vGPU provides VMs
|
||||||
|
with unparalleled graphics performance, compute performance, and application compatibility,
|
||||||
|
together with the cost-effectiveness and scalability brought about by sharing a GPU
|
||||||
|
among multiple workloads.
|
||||||
|
|
||||||
|
| Technology | Description | Behaviour | Detail |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| Nvidia GPU pass-through mode | GPU passthrough | Physical GPU assigned to a single VM | Direct GPU assignment to VM without limitation |
|
||||||
|
| Nvidia vGPU mode | GPU sharing | Physical GPU shared by multiple VMs | Mediated passthrough |
|
||||||
|
|
||||||
|
## Hardware Requirements
|
||||||
|
Nvidia GPUs Recommended for Virtualization:
|
||||||
|
|
||||||
|
- Nvidia Tesla (T4, M10, P6, V100 or newer)
|
||||||
|
- Nvidia Quadro RTX 6000/8000
|
||||||
|
|
||||||
|
## Host BIOS Requirements
|
||||||
|
|
||||||
|
Some hardware requires a larger PCI BARs window, for example, Nvidia Tesla P100, K40m
|
||||||
|
```
|
||||||
|
$ lspci -s 04:00.0 -vv | grep Region
|
||||||
|
Region 0: Memory at c6000000 (32-bit, non-prefetchable) [size=16M]
|
||||||
|
Region 1: Memory at 383800000000 (64-bit, prefetchable) [size=16G] #above 4G
|
||||||
|
Region 3: Memory at 383c00000000 (64-bit, prefetchable) [size=32M]
|
||||||
|
```
|
||||||
|
|
||||||
|
For large BARs devices, MMIO mapping above 4G address space should be `enabled`
|
||||||
|
in the PCI configuration of the BIOS.
|
||||||
|
|
||||||
|
Some hardware vendors use different name in BIOS, such as:
|
||||||
|
|
||||||
|
- Above 4G Decoding
|
||||||
|
- Memory Hole for PCI MMIO
|
||||||
|
- Memory Mapped I/O above 4GB
|
||||||
|
|
||||||
|
The following steps outline the workflow for using an Nvidia GPU with Kata.
|
||||||
|
|
||||||
|
## Host Kernel Requirements
|
||||||
|
The following configurations need to be enabled on your host kernel:
|
||||||
|
|
||||||
|
- `CONFIG_VFIO`
|
||||||
|
- `CONFIG_VFIO_IOMMU_TYPE1`
|
||||||
|
- `CONFIG_VFIO_MDEV`
|
||||||
|
- `CONFIG_VFIO_MDEV_DEVICE`
|
||||||
|
- `CONFIG_VFIO_PCI`
|
||||||
|
|
||||||
|
Your host kernel needs to be booted with `intel_iommu=on` on the kernel command line.
|
||||||
|
|
||||||
|
## Install and configure Kata Containers
|
||||||
|
To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata version 1.3.0 or above.
|
||||||
|
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
|
||||||
|
to install the latest version of Kata.
|
||||||
|
|
||||||
|
The following configuration in the Kata `configuration.toml` file as shown below can work:
|
||||||
|
```
|
||||||
|
machine_type = "pc"
|
||||||
|
|
||||||
|
hotplug_vfio_on_root_bus = true
|
||||||
|
```
|
||||||
|
|
||||||
|
To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above.
|
||||||
|
|
||||||
|
The following configuration in the Kata `configuration.toml` file as shown below can work:
|
||||||
|
|
||||||
|
Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver):
|
||||||
|
```
|
||||||
|
machine_type = "q35"
|
||||||
|
|
||||||
|
hotplug_vfio_on_root_bus = false
|
||||||
|
```
|
||||||
|
|
||||||
|
Hotplug for PCIe devices by `pciehp` (Linux's PCIe Hotplug driver):
|
||||||
|
```
|
||||||
|
machine_type = "q35"
|
||||||
|
|
||||||
|
hotplug_vfio_on_root_bus = true
|
||||||
|
pcie_root_port = 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Build Kata Containers kernel with GPU support
|
||||||
|
The default guest kernel installed with Kata Containers does not provide GPU support.
|
||||||
|
To use an Nvidia GPU with Kata Containers, you need to build a kernel with the
|
||||||
|
necessary GPU support.
|
||||||
|
|
||||||
|
The following kernel config options need to be enabled:
|
||||||
|
```
|
||||||
|
# Support PCI/PCIe device hotplug (Required for large BARs device)
|
||||||
|
CONFIG_HOTPLUG_PCI_PCIE=y
|
||||||
|
CONFIG_HOTPLUG_PCI_SHPC=y
|
||||||
|
|
||||||
|
# Support for loading modules (Required for load Nvidia drivers)
|
||||||
|
CONFIG_MODULES=y
|
||||||
|
CONFIG_MODULE_UNLOAD=y
|
||||||
|
|
||||||
|
# Enable the MMIO access method for PCIe devices (Required for large BARs device)
|
||||||
|
CONFIG_PCI_MMCONFIG=y
|
||||||
|
```
|
||||||
|
|
||||||
|
The following kernel config options need to be disabled:
|
||||||
|
```
|
||||||
|
# Disable Open Source Nvidia driver nouveau
|
||||||
|
# It conflicts with Nvidia official driver
|
||||||
|
CONFIG_DRM_NOUVEAU=n
|
||||||
|
```
|
||||||
|
> **Note**: `CONFIG_DRM_NOUVEAU` is normally disabled by default.
|
||||||
|
It is worth checking that it is not enabled in your kernel configuration to prevent any conflicts.
|
||||||
|
|
||||||
|
|
||||||
|
Build the Kata Containers kernel with the previous config options,
|
||||||
|
using the instructions described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
|
||||||
|
For further details on building and installing guest kernels,
|
||||||
|
see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
|
||||||
|
|
||||||
|
There is an easy way to build a guest kernel that supports Nvidia GPU:
|
||||||
|
```
|
||||||
|
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
|
||||||
|
|
||||||
|
# Prepare (download guest kernel source, generate .config)
|
||||||
|
$ ./build-kernel.sh -v 4.19.86 -g nvidia -f setup
|
||||||
|
|
||||||
|
# Build guest kernel
|
||||||
|
$ ./build-kernel.sh -v 4.19.86 -g nvidia build
|
||||||
|
|
||||||
|
# Install guest kernel
|
||||||
|
$ sudo -E ./build-kernel.sh -v 4.19.86 -g nvidia install
|
||||||
|
/usr/share/kata-containers/vmlinux-nvidia-gpu.container -> vmlinux-4.19.86-70-nvidia-gpu
|
||||||
|
/usr/share/kata-containers/vmlinuz-nvidia-gpu.container -> vmlinuz-4.19.86-70-nvidia-gpu
|
||||||
|
```
|
||||||
|
|
||||||
|
To build Nvidia Driver in Kata container, `kernel-devel` is required.
|
||||||
|
This is a way to generate rpm packages for `kernel-devel`:
|
||||||
|
```
|
||||||
|
$ cd kata-linux-4.19.86-68
|
||||||
|
$ make rpm-pkg
|
||||||
|
Output RPMs:
|
||||||
|
~/rpmbuild/RPMS/x86_64/kernel-devel-4.19.86_nvidia_gpu-1.x86_64.rpm
|
||||||
|
```
|
||||||
|
> **Note**:
|
||||||
|
> - `kernel-devel` should be installed in Kata container before run Nvidia driver installer.
|
||||||
|
> - Run `make deb-pkg` to build the deb package.
|
||||||
|
|
||||||
|
Before using the new guest kernel, please update the `kernel` parameters in `configuration.toml`.
|
||||||
|
```
|
||||||
|
kernel = "/usr/share/kata-containers/vmlinuz-nvidia-gpu.container"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Nvidia GPU pass-through mode with Kata Containers
|
||||||
|
Use the following steps to pass an Nvidia GPU device in pass-through mode with Kata:
|
||||||
|
|
||||||
|
1. Find the Bus-Device-Function (BDF) for GPU device on host:
|
||||||
|
```
|
||||||
|
$ sudo lspci -nn -D | grep -i nvidia
|
||||||
|
0000:04:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
|
||||||
|
0000:84:00.0 3D controller [0302]: NVIDIA Corporation Device [10de:15f8] (rev a1)
|
||||||
|
```
|
||||||
|
> PCI address `0000:04:00.0` is assigned to the hardware GPU device.
|
||||||
|
> `10de:15f8` is the device ID of the hardware GPU device.
|
||||||
|
|
||||||
|
2. Find the IOMMU group for the GPU device:
|
||||||
|
```
|
||||||
|
$ BDF="0000:04:00.0"
|
||||||
|
$ readlink -e /sys/bus/pci/devices/$BDF/iommu_group
|
||||||
|
/sys/kernel/iommu_groups/45
|
||||||
|
```
|
||||||
|
The previous output shows that the GPU belongs to IOMMU group 45.
|
||||||
|
|
||||||
|
3. Check the IOMMU group number under `/dev/vfio`:
|
||||||
|
```
|
||||||
|
$ ls -l /dev/vfio
|
||||||
|
total 0
|
||||||
|
crw------- 1 root root 248, 0 Feb 28 09:57 45
|
||||||
|
crw------- 1 root root 248, 1 Feb 28 09:57 54
|
||||||
|
crw-rw-rw- 1 root root 10, 196 Feb 28 09:57 vfio
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Start a Kata container with GPU device:
|
||||||
|
```
|
||||||
|
$ sudo docker run -it --runtime=kata-runtime --cap-add=ALL --device /dev/vfio/45 centos /bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Run `lspci` within the container to verify the GPU device is seen in the list
|
||||||
|
of the PCI devices. Note the vendor-device id of the GPU (`10de:15f8`) in the `lspci` output.
|
||||||
|
```
|
||||||
|
$ lspci -nn -D | grep '10de:15f8'
|
||||||
|
0000:01:01.0 3D controller [0302]: NVIDIA Corporation GP100GL [Tesla P100 PCIe 16GB] [10de:15f8] (rev a1)
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Additionally, you can check the PCI BARs space of the Nvidia GPU device in the container:
|
||||||
|
```
|
||||||
|
$ lspci -s 01:01.0 -vv | grep Region
|
||||||
|
Region 0: Memory at c0000000 (32-bit, non-prefetchable) [disabled] [size=16M]
|
||||||
|
Region 1: Memory at 4400000000 (64-bit, prefetchable) [disabled] [size=16G]
|
||||||
|
Region 3: Memory at 4800000000 (64-bit, prefetchable) [disabled] [size=32M]
|
||||||
|
```
|
||||||
|
> **Note**: If you see a message similar to the above, the BAR space of the Nvidia
|
||||||
|
> GPU has been successfully allocated.
|
||||||
|
|
||||||
|
## Nvidia vGPU mode with Kata Containers
|
||||||
|
|
||||||
|
Nvidia vGPU is a licensed product on all supported GPU boards. A software license
|
||||||
|
is required to enable all vGPU features within the guest VM.
|
||||||
|
|
||||||
|
> **Note**: There is no suitable test environment, so it is not written here.
|
||||||
|
|
||||||
|
|
||||||
|
## Install Nvidia Driver in Kata Containers
|
||||||
|
Download the official Nvidia driver from
|
||||||
|
[https://www.nvidia.com/Download/index.aspx](https://www.nvidia.com/Download/index.aspx),
|
||||||
|
for example `NVIDIA-Linux-x86_64-418.87.01.run`.
|
||||||
|
|
||||||
|
Install the `kernel-devel`(generated in the previous steps) for guest kernel:
|
||||||
|
```
|
||||||
|
$ sudo rpm -ivh kernel-devel-4.19.86_gpu-1.x86_64.rpm
|
||||||
|
```
|
||||||
|
|
||||||
|
Here is an example to extract, compile and install Nvidia driver:
|
||||||
|
```
|
||||||
|
## Extract
|
||||||
|
$ sh ./NVIDIA-Linux-x86_64-418.87.01.run -x
|
||||||
|
|
||||||
|
## Compile and install (It will take some time)
|
||||||
|
$ cd NVIDIA-Linux-x86_64-418.87.01
|
||||||
|
$ sudo ./nvidia-installer -a -q --ui=none \
|
||||||
|
--no-cc-version-check \
|
||||||
|
--no-opengl-files --no-install-libglvnd \
|
||||||
|
--kernel-source-path=/usr/src/kernels/`uname -r`
|
||||||
|
```
|
||||||
|
|
||||||
|
Or just run one command line:
|
||||||
|
```
|
||||||
|
$ sudo sh ./NVIDIA-Linux-x86_64-418.87.01.run -a -q --ui=none \
|
||||||
|
--no-cc-version-check \
|
||||||
|
--no-opengl-files --no-install-libglvnd \
|
||||||
|
--kernel-source-path=/usr/src/kernels/`uname -r`
|
||||||
|
```
|
||||||
|
|
||||||
|
To view detailed logs of the installer:
|
||||||
|
```
|
||||||
|
$ tail -f /var/log/nvidia-installer.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Load Nvidia driver module manually
|
||||||
|
```
|
||||||
|
# Optional(generate modules.dep and map files for Nvidia driver)
|
||||||
|
$ sudo depmod
|
||||||
|
|
||||||
|
# Load module
|
||||||
|
$ sudo modprobe nvidia-drm
|
||||||
|
|
||||||
|
# Check module
|
||||||
|
$ lsmod | grep nvidia
|
||||||
|
nvidia_drm 45056 0
|
||||||
|
nvidia_modeset 1093632 1 nvidia_drm
|
||||||
|
nvidia 18202624 1 nvidia_modeset
|
||||||
|
drm_kms_helper 159744 1 nvidia_drm
|
||||||
|
drm 364544 3 nvidia_drm,drm_kms_helper
|
||||||
|
i2c_core 65536 3 nvidia,drm_kms_helper,drm
|
||||||
|
ipmi_msghandler 49152 1 nvidia
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Check Nvidia device status with `nvidia-smi`
|
||||||
|
```
|
||||||
|
$ nvidia-smi
|
||||||
|
Tue Mar 3 00:03:49 2020
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
| NVIDIA-SMI 418.87.01 Driver Version: 418.87.01 CUDA Version: 10.1 |
|
||||||
|
|-------------------------------+----------------------+----------------------+
|
||||||
|
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
|
||||||
|
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
||||||
|
|===============================+======================+======================|
|
||||||
|
| 0 Tesla P100-PCIE... Off | 00000000:01:01.0 Off | 0 |
|
||||||
|
| N/A 27C P0 25W / 250W | 0MiB / 16280MiB | 0% Default |
|
||||||
|
+-------------------------------+----------------------+----------------------+
|
||||||
|
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
| Processes: GPU Memory |
|
||||||
|
| GPU PID Type Process name Usage |
|
||||||
|
|=============================================================================|
|
||||||
|
| No running processes found |
|
||||||
|
+-----------------------------------------------------------------------------+
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [Configuring a VM for GPU Pass-Through by Using the QEMU Command Line](https://docs.nvidia.com/grid/latest/grid-vgpu-user-guide/index.html#using-gpu-pass-through-red-hat-el-qemu-cli)
|
||||||
|
- https://gitlab.com/nvidia/container-images/driver/-/tree/master
|
||||||
|
- https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
|
BIN
docs/use-cases/images/kata-zun1.png
Normal file
After Width: | Height: | Size: 113 KiB |