diff --git a/README.md b/README.md index 7577246d77..82db01dffe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ # Kata Containers packaging -This repository is used to generate packages for Kata Containers components. +Kata Containers currently supports packages for many distributions. Tooling to aid in creating these +packages are contained within this repository. + +In addition, Kata build artifacts are available within a container image, created by a +[Dockerfile](kata-deploy/Dockerfile). Reference daemonsets are provided in [kata-deploy](kata-deploy), +which make installation of Kata Containers in a running Kubernetes Cluster very straightforward. diff --git a/kata-deploy/Dockerfile b/kata-deploy/Dockerfile new file mode 100644 index 0000000000..548dd8facc --- /dev/null +++ b/kata-deploy/Dockerfile @@ -0,0 +1,21 @@ +FROM centos/systemd +ARG KATA_VER=1.0.0 +ARG KATA_URL=https://github.com/kata-containers/runtime/releases/download/${KATA_VER} + +RUN yum install -y wget +WORKDIR /tmp/kata/ +RUN wget -q ${KATA_URL}/{vmlinuz.container,kata-containers.img} + +WORKDIR /tmp/kata/bin/ +RUN wget -q ${KATA_URL}/{kata-runtime,kata-proxy,kata-shim} + +ARG KUBECTL_VER=v1.10.2 +RUN wget -qO /bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VER}/bin/linux/amd64/kubectl && \ + chmod +x /bin/kubectl + +COPY bin /tmp/kata/bin +COPY qemu-artifacts /tmp/kata/share/qemu + +COPY configuration.toml /tmp/kata/ +COPY scripts /tmp/kata/scripts + diff --git a/kata-deploy/README.md b/kata-deploy/README.md new file mode 100644 index 0000000000..e0a4e1fc41 --- /dev/null +++ b/kata-deploy/README.md @@ -0,0 +1,131 @@ +# kata-deploy + + +- [kata-deploy](#kata-deploy) + * [Quick start](#quick-start-) + + [Install Kata on a running Kubernetes cluster](#install-kata-on-a-running-kubernetes-cluster) + + [Run a sample workload](#run-a-sample-workload-) + + [Remove Kata from the Kubernetes cluster](#remove-kata-from-the-kubernetes-cluster-) + * [kata-deploy details](#kata-deploy-details) + + [Dockerfile](#dockerfile) + + [Daemonsets and RBAC](#daemonsets-and-rbac-) + - [runtime-labeler](#runtime-labeler-) + - [CRI-O and containerd kata installer](#cri-o-and-containerd-kata-installer-) + + [Kata cleanup](#kata-cleanup-) + + +[kata-deploy](kata-deploy) provides a Dockerfile which contains all of the binaries +and artifacts required to run Kata Containers, as well as reference daemonsets which can be utilized to install Kata Containers on a running Kubernetes cluster. + +Note, installation through daemonsets only succesfully installs `kata-containers.io/kata-runtime` on +a node if it uses either containerd or CRI-O CRI-shims. + +## Quick start: + +### Install Kata on a running Kubernetes cluster + +``` +kubectl apply -f kata-rbac.yaml +kubectl apply -f kata-deploy.yaml +``` + +### Run a sample workload + +Untrusted workloads can node-select based on ```kata-containers.io/kata-runtime=true```, and are +run through ```kata-containers.io/kata-runtime``` if they are marked with the appropriate CRIO or containerd +annotation: +``` +CRIO: io.kubernetes.cri-o.TrustedSandbox: "false" +containerd: io.kubernetes.cri.untrusted-workload: "true" +``` + +The following is a sample workload for running untrusted on a kata-enabled node: +``` +apiVersion: v1 +kind: Pod +metadata: + name: nginx + annotations: + io.kubernetes.cri-o.TrustedSandbox: "false" + io.kubernetes.cri.untrusted-workload: "true" + labels: + env: test +spec: + containers: + - name: nginx + image: nginx + imagePullPolicy: IfNotPresent + nodeSelector: + kata-containers.io/kata-runtime: "true" +``` + +To run: +``` +kubectl apply -f examples/nginx-untrusted.yaml +``` + +Now, you should see the pod start. You can verify that the pod is making use of +```kata-containers.io/kata-runtime``` by comparing the container ID observed with the following: +``` +/opt/kata/bin/kata-containers.io/kata-runtime list +kubectl describe pod nginx-untrusted +``` + +The following removes the test pod: +``` +kubectl delete -f examples/nginx-untrusted.yaml +``` + +### Remove Kata from the Kubernetes cluster + +``` +kubectl delete -f kata-deploy.yaml +kubectl apply -f kata-cleanup.yaml +kubectl delete -f kata-cleanup.yaml +kubectl delete -f kata-rbac.yaml +``` + +## kata-deploy Details + +### Dockerfile + +The Dockerfile used to create the container image deployed in the DaemonSet is provided here. +This image contains all the necessary artifacts for running Kata Containers. + +Host artifacts: +* kata-containers.io/kata-runtime: pulled from Kata GitHub releases page +* kata-proxy: pulled from Kata GitHub releases page +* kata-shim: pulled from Kata GitHub releases page +* qemu-system-x86_64: statically built and included in this repo, based on Kata's QEMU repo +* qemu/* : supporting binaries required for qemu-system-x86_64 + +Virtual Machine artifacts: +* kata-containers.img: pulled from Kata github releases page +* vmliuz.container: pulled from Kata github releases page + +### Daemonsets and RBAC: + +A few daemonsets are introduced for kata-deploy, as well as an RBAC to facilitate +appyling labels to the nodes. + +#### runtime-labeler: + +This daemonset creates a label on each node in +the cluster identifying the CRI shim in use. For example, +`kata-containers.io/container-runtime=crio` or `kata-containers.io/container-runtime=containerd.` + +#### CRI-O and containerd kata installer + +Depending the value of `kata-containers.io/container-runtime` label on the node, either the CRI-O or +containerd kata installation daemonset executes. These daemonsets install +the necessary kata binaries, configuration files and virtual machine artifacts on +the node. Once installed, the daemonset adds a node label `kata-containers.io/kata-runtime=true` and reconfigures +either CRI-O or containerd to make use of Kata for untrusted workloads. As a final step the daemonset +restarts either CRI-O or containerd and kubelet. Upon deletion, the daemonset removes the kata binaries +and VM artifacts and updates the node label to `kata-containers.io/kata-runtime=cleanup.` + +### Kata cleanup: +This daemonset runs of the node has the label `kata-containers.io/kata-runtime=cleanup.` This daemonsets removes +the `kata-containers.io/container-runtime` and `kata-containers.io/kata-runtime` labels as well as restarts either CRI-O or containerd systemctl +daemon and kubelet. You cannot execute these restets during the preStopHook of the Kata installer daemonset, +which necessitated this final cleanup daemonset. diff --git a/kata-deploy/bin/qemu-system-x86_64 b/kata-deploy/bin/qemu-system-x86_64 new file mode 100755 index 0000000000..00f7d97a44 Binary files /dev/null and b/kata-deploy/bin/qemu-system-x86_64 differ diff --git a/kata-deploy/configuration.toml b/kata-deploy/configuration.toml new file mode 100755 index 0000000000..a8d9f5e46f --- /dev/null +++ b/kata-deploy/configuration.toml @@ -0,0 +1,144 @@ +# XXX: WARNING: this file is auto-generated. +# XXX: +# XXX: Source file: "cli/config/configuration.toml.in" +# XXX: Project: +# XXX: Name: Kata Containers +# XXX: Type: kata + +[hypervisor.qemu] +path = "/opt/kata/bin/qemu-system-x86_64" +kernel = "/opt/kata/vmlinuz.container" +# initrd = "/opt/kata/vm-artifacts/kata-containers-initrd.img" +image = "/opt/kata/kata-containers.img" +machine_type = "pc" + +# Optional space-separated list of options to pass to the guest kernel. +# For example, use `kernel_params = "vsyscall=emulate"` if you are having +# trouble running pre-2.15 glibc. +# +# WARNING: - any parameter specified here will take priority over the default +# parameter value of the same name used to start the virtual machine. +# Do not set values here unless you understand the impact of doing so as you +# may stop the virtual machine from booting. +# To see the list of default parameters, enable hypervisor debug, create a +# container and look for 'default-kernel-parameters' log entries. +kernel_params = "" +#kernel_params = " agent.log=debug" + +# Path to the firmware. +# If you want that qemu uses the default firmware leave this option empty +firmware = "" + +# Machine accelerators +# comma-separated list of machine accelerators to pass to the hypervisor. +# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"` +machine_accelerators="" + +# Default number of vCPUs per POD/VM: +# unspecified or 0 --> will be set to 1 +# < 0 --> will be set to the actual number of physical cores +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores +default_vcpus = 1 + + +# Bridges can be used to hot plug devices. +# Limitations: +# * Currently only pci bridges are supported +# * Until 30 devices per bridge can be hot plugged. +# * Until 5 PCI bridges can be cold plugged per VM. +# This limitation could be a bug in qemu or in the kernel +# Default number of bridges per POD/VM: +# unspecified or 0 --> will be set to 1 +# > 1 <= 5 --> will be set to the specified number +# > 5 --> will be set to 5 +default_bridges = 1 + +# Default memory size in MiB for POD/VM. +# If unspecified then it will be set 2048 MiB. +#default_memory = 2048 + +# Disable block device from being used for a container's rootfs. +# In case of a storage driver like devicemapper where a container's +# root file system is backed by a block device, the block device is passed +# directly to the hypervisor for performance reasons. +# This flag prevents the block device from being passed to the hypervisor, +# 9pfs is used instead to pass the rootfs. +disable_block_device_use = false + +# Block storage driver to be used for the hypervisor in case the container +# rootfs is backed by a block device. This is either virtio-scsi or +# virtio-blk. +block_device_driver = "virtio-scsi" + +# Enable pre allocation of VM RAM, default false +# Enabling this will result in lower container density +# as all of the memory will be allocated and locked +# This is useful when you want to reserve all the memory +# upfront or in the cases where you want memory latencies +# to be very predictable +# Default false +#enable_mem_prealloc = true + +# Enable huge pages for VM RAM, default false +# Enabling this will result in the VM memory +# being allocated using huge pages. +# This is useful when you want to use vhost-user network +# stacks within the container. This will automatically +# result in memory pre allocation +#enable_hugepages = true + +# Enable swap of vm memory. Default false. +# The behaviour is undefined if mem_prealloc is also set to true +#enable_swap = true + +# This option changes the default hypervisor and kernel parameters +# to enable debug output where available. This extra output is added +# to the proxy logs, but only when proxy debug is also enabled. +# +# Default false +#enable_debug = true + +# Disable the customizations done in the runtime when it detects +# that it is running on top a VMM. This will result in the runtime +# behaving as it would when running on bare metal. +# +#disable_nesting_checks = true + +[proxy.kata] +path = "/opt/kata/bin/kata-proxy" + +# If enabled, proxy messages will be sent to the system log +# (default: disabled) +#enable_debug = true + +[shim.kata] +path = "/opt/kata/bin/kata-shim" + +# If enabled, shim messages will be sent to the system log +# (default: disabled) +#enable_debug = true + +[agent.kata] +# There is no field for this section. The goal is only to be able to +# specify which type of agent the user wants to use. + +[runtime] +# If enabled, the runtime will log additional debug messages to the +# system log +# (default: disabled) +#enable_debug = true +# +# Internetworking model +# Determines how the VM should be connected to the +# the container network interface +# Options: +# +# - bridged +# Uses a linux bridge to interconnect the container interface to +# the VM. Works for most cases except macvlan and ipvlan. +# +# - macvtap +# Used when the Container network interface can be bridged using +# macvtap. +internetworking_model="macvtap" diff --git a/kata-deploy/example/nginx-untrusted.yaml b/kata-deploy/example/nginx-untrusted.yaml new file mode 100644 index 0000000000..714f0ebc1c --- /dev/null +++ b/kata-deploy/example/nginx-untrusted.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + annotations: + io.kubernetes.cri-o.TrustedSandbox: "false" + io.kubernetes.cri.untrusted-workload: "true" + name: nginx-untrusted +spec: + containers: + - name: nginx + image: nginx + nodeSelector: + kata-runtime: "true" diff --git a/kata-deploy/kata-cleanup.yaml b/kata-deploy/kata-cleanup.yaml new file mode 100644 index 0000000000..722389e567 --- /dev/null +++ b/kata-deploy/kata-cleanup.yaml @@ -0,0 +1,50 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-kata-cleanup + namespace: kube-system +spec: + selector: + matchLabels: + name: kubelet-kata-cleanup + template: + metadata: + labels: + name: kubelet-kata-cleanup + spec: + serviceAccountName: kata-label-node + nodeSelector: + kata-containers.io/kata-runtime: cleanup + containers: + - name: kube-kata-cleanup + image: egernst/kata-deploy + imagePullPolicy: Always + command: [ "sh", "-c" ] + args: + - kubectl label node $NODE_NAME kata-containers.io/container-runtime- kata-containers.io/kata-runtime-; + systemctl daemon-reload && systemctl restart containerd && systemctl restart crio && systemctl restart kubelet; + tail -f /dev/null; + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + volumeMounts: + - name: dbus + mountPath: /var/run/dbus + - name: systemd + mountPath: /run/systemd + volumes: + - name: dbus + hostPath: + path: /var/run/dbus + - name: systemd + hostPath: + path: /run/systemd + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate diff --git a/kata-deploy/kata-deploy.yaml b/kata-deploy/kata-deploy.yaml new file mode 100644 index 0000000000..aa078326f5 --- /dev/null +++ b/kata-deploy/kata-deploy.yaml @@ -0,0 +1,181 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-runtime-labeler + namespace: kube-system +spec: + selector: + matchLabels: + name: kubelet-runtime-labeler + template: + metadata: + labels: + name: kubelet-runtime-labeler + spec: + serviceAccountName: kata-label-node + containers: + - name: kubelet-runtime-labeler-pod + image: egernst/kata-deploy + imagePullPolicy: Always + command: [ "sh", "-c" ] + args: + - printenv NODE_NAME; + kubectl get node $NODE_NAME --show-labels; + kubectl label node $NODE_NAME kata-containers.io/container-runtime=$(kubectl describe node $NODE_NAME | awk -F'[:]' '/Container Runtime Version/ {print $2}' | tr -d ' '); + kubectl get node $NODE_NAME --show-labels; + tail -f /dev/null; + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-cri-o-kata + namespace: kube-system +spec: + selector: + matchLabels: + name: kubelet-cri-o-kata + template: + metadata: + labels: + name: kubelet-cri-o-kata + spec: + serviceAccountName: kata-label-node + nodeSelector: + kata-containers.io/container-runtime: cri-o + containers: + - name: kube-kata + image: egernst/kata-deploy + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["sh", "-c", "/tmp/kata/scripts/remove-kata-crio.sh && kubectl label node $NODE_NAME --overwrite kata-containers.io/kata-runtime=cleanup"] + command: [ "sh", "-c" ] + args: + - /tmp/kata/scripts/install-kata-crio.sh && kubectl label node $NODE_NAME kata-containers.io/kata-runtime=true; + kubectl get node $NODE_NAME --show-labels; + tail -f /dev/null; + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: kata-conf + mountPath: /usr/share/defaults/kata-containers/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: dbus + mountPath: /var/run/dbus + - name: systemd + mountPath: /run/systemd + volumes: + - name: crio-conf + hostPath: + path: /etc/crio/ + - name: kata-conf + hostPath: + path: /usr/share/defaults/kata-containers/ + type: DirectoryOrCreate + - name: kata-artifacts + hostPath: + path: /opt/kata/ + type: DirectoryOrCreate + - name: dbus + hostPath: + path: /var/run/dbus + - name: systemd + hostPath: + path: /run/systemd + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-cri-containerd-kata + namespace: kube-system +spec: + selector: + matchLabels: + name: kubelet-cri-containerd-kata + template: + metadata: + labels: + name: kubelet-cri-containerd-kata + spec: + serviceAccountName: kata-label-node + nodeSelector: + kata-containers.io/container-runtime: containerd + containers: + - name: kube-kata + image: egernst/kata-deploy + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["sh", "-c", "/tmp/kata/scripts/remove-kata-containerd.sh && kubectl label node $NODE_NAME --overwrite kata-containers.io/kata-runtime=cleanup"] + command: [ "sh", "-c" ] + args: + - /tmp/kata/scripts/install-kata-containerd.sh && kubectl label node $NODE_NAME kata-containers.io/kata-runtime=true; + kubectl get node $NODE_NAME --show-labels; + tail -f /dev/null; + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + volumeMounts: + - name: containerd-conf + mountPath: /etc/containerd/ + - name: kata-conf + mountPath: /usr/share/defaults/kata-containers/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: dbus + mountPath: /var/run/dbus + - name: systemd + mountPath: /run/systemd + volumes: + - name: containerd-conf + hostPath: + path: /etc/containerd/ + type: DirectoryOrCreate + - name: kata-conf + hostPath: + path: /usr/share/defaults/kata-containers/ + type: DirectoryOrCreate + - name: kata-artifacts + hostPath: + path: /opt/kata/ + type: DirectoryOrCreate + - name: dbus + hostPath: + path: /var/run/dbus + - name: systemd + hostPath: + path: /run/systemd + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate diff --git a/kata-deploy/kata-rbac.yaml b/kata-deploy/kata-rbac.yaml new file mode 100644 index 0000000000..408b5be907 --- /dev/null +++ b/kata-deploy/kata-rbac.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kata-label-node + namespace: kube-system +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: node-labeler +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kata-label-node-rb +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-labeler +subjects: +- kind: ServiceAccount + name: kata-label-node + namespace: kube-system + diff --git a/kata-deploy/qemu-artifacts/bios-256k.bin b/kata-deploy/qemu-artifacts/bios-256k.bin new file mode 100644 index 0000000000..18666c9f2f Binary files /dev/null and b/kata-deploy/qemu-artifacts/bios-256k.bin differ diff --git a/kata-deploy/qemu-artifacts/bios.bin b/kata-deploy/qemu-artifacts/bios.bin new file mode 100644 index 0000000000..a394411fe5 Binary files /dev/null and b/kata-deploy/qemu-artifacts/bios.bin differ diff --git a/kata-deploy/qemu-artifacts/efi-virtio.rom b/kata-deploy/qemu-artifacts/efi-virtio.rom new file mode 100644 index 0000000000..3563776dbd Binary files /dev/null and b/kata-deploy/qemu-artifacts/efi-virtio.rom differ diff --git a/kata-deploy/qemu-artifacts/linuxboot.bin b/kata-deploy/qemu-artifacts/linuxboot.bin new file mode 100644 index 0000000000..923d1796fb Binary files /dev/null and b/kata-deploy/qemu-artifacts/linuxboot.bin differ diff --git a/kata-deploy/qemu-artifacts/linuxboot_dma.bin b/kata-deploy/qemu-artifacts/linuxboot_dma.bin new file mode 100644 index 0000000000..218d3ab4a2 Binary files /dev/null and b/kata-deploy/qemu-artifacts/linuxboot_dma.bin differ diff --git a/kata-deploy/scripts/install-kata-containerd.sh b/kata-deploy/scripts/install-kata-containerd.sh new file mode 100755 index 0000000000..e88f7585e2 --- /dev/null +++ b/kata-deploy/scripts/install-kata-containerd.sh @@ -0,0 +1,23 @@ +#!/bin/sh +echo "copying kata artifacts onto host" +cp -R /tmp/kata/* /opt/kata/ +chmod +x /opt/kata/bin/* +cp /opt/kata/configuration.toml /usr/share/defaults/kata-containers/configuration.toml + +## Configure containerd to use Kata: +echo "create containerd configuration for Kata" +mkdir -p /etc/containerd/ + +cat << EOT | tee /etc/containerd/config.toml +[plugins] + [plugins.cri.containerd] + snapshotter = "overlayfs" + [plugins.cri.containerd.untrusted_workload_runtime] + runtime_type = "io.containerd.runtime.v1.linux" + runtime_engine = "/opt/kata/bin/kata-runtime" + runtime_root = "" +EOT + +echo "Reload systemd services" +systemctl daemon-reload +systemctl restart containerd diff --git a/kata-deploy/scripts/install-kata-crio.sh b/kata-deploy/scripts/install-kata-crio.sh new file mode 100755 index 0000000000..0aef0df98f --- /dev/null +++ b/kata-deploy/scripts/install-kata-crio.sh @@ -0,0 +1,16 @@ +#!/bin/sh +echo "copying kata artifacts from /tmp to /opt" +cp -R /tmp/kata/* /opt/kata/ + +chmod +x /opt/kata/bin/* + +cp /opt/kata/configuration.toml /usr/share/defaults/kata-containers/configuration.toml + +cp /etc/crio/crio.conf /etc/crio/crio.conf.bak + +echo "Set Kata containers as default runtime in CRI-O for untrusted workloads" +sed -i '/runtime_untrusted_workload = /c\runtime_untrusted_workload = "/opt/kata/bin/kata-runtime"' /etc/crio/crio.conf + +echo "Reload systemd services" +systemctl daemon-reload +systemctl restart crio diff --git a/kata-deploy/scripts/remove-kata-containerd.sh b/kata-deploy/scripts/remove-kata-containerd.sh new file mode 100755 index 0000000000..dbff017505 --- /dev/null +++ b/kata-deploy/scripts/remove-kata-containerd.sh @@ -0,0 +1,5 @@ +#!/bin/sh +echo "delete kata artifacts" +rm -rf /opt/kata +rm -rf /usr/share/defaults/kata-containers +rm -f /etc/containerd/config.toml diff --git a/kata-deploy/scripts/remove-kata-crio.sh b/kata-deploy/scripts/remove-kata-crio.sh new file mode 100755 index 0000000000..2c8839884b --- /dev/null +++ b/kata-deploy/scripts/remove-kata-crio.sh @@ -0,0 +1,5 @@ +#!/bin/sh +echo "deleting kata artifacts" +rm -rf /opt/kata/ +rm -rf /usr/sahre/defaults/kata-containers +mv /etc/crio/crio.conf.bak /etc/crio/crio.conf