kata-containers/tests/functional/vfio/run.sh
Jeremi Piotrowski faee59b520 tests/vfio: Accept single device in vfio group for CLH
cloud hypervisor does not emulate pcie switches or pci bridges, so we need to
accept a lonely device.

Signed-off-by: Jeremi Piotrowski <jpiotrowski@microsoft.com>
2023-09-14 14:23:28 +02:00

351 lines
10 KiB
Bash
Executable File

#!/bin/bash
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -x
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
script_path=$(dirname "$0")
source "${script_path}/../../common.bash"
addr=
tmp_data_dir="$(mktemp -d)"
rootfs_tar="${tmp_data_dir}/rootfs.tar"
trap cleanup EXIT
# kata-runtime options
SANDBOX_CGROUP_ONLY=""
HYPERVISOR=
MACHINE_TYPE=
IMAGE_TYPE=
cleanup() {
clean_env_ctr
sudo rm -rf "${tmp_data_dir}"
[ -n "${host_pci}" ] && sudo driverctl unset-override "${host_pci}"
}
host_pci_addr() {
lspci -D | grep "Ethernet controller" | grep "Virtio.*network device" | tail -1 | cut -d' ' -f1
}
get_vfio_path() {
local addr="$1"
echo "/dev/vfio/$(basename $(realpath /sys/bus/pci/drivers/vfio-pci/${host_pci}/iommu_group))"
}
pull_rootfs() {
# pull and export busybox image in tar file
local image="quay.io/prometheus/busybox:latest"
sudo -E ctr i pull ${image}
sudo -E ctr i export "${rootfs_tar}" "${image}"
sudo chown ${USER}:${USER} "${rootfs_tar}"
sync
}
create_bundle() {
local bundle_dir="$1"
mkdir -p "${bundle_dir}"
# extract busybox rootfs
local rootfs_dir="${bundle_dir}/rootfs"
mkdir -p "${rootfs_dir}"
local layers_dir="$(mktemp -d)"
tar -C "${layers_dir}" -pxf "${rootfs_tar}"
for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do
tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]")
done
sync
# Copy config.json
cp -a "${script_path}/config.json" "${bundle_dir}/config.json"
}
run_container() {
local container_id="$1"
local bundle_dir="$2"
sudo -E ctr run -d --runtime io.containerd.kata.v2 --config "${bundle_dir}/config.json" "${container_id}"
}
get_ctr_cmd_output() {
local container_id="$1"
shift
timeout 30s sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}"
}
check_guest_kernel() {
local container_id="$1"
# For vfio_mode=guest-kernel, the device should be bound to
# the guest kernel's native driver. To check this has worked,
# we look for an ethernet device named 'eth*'
get_ctr_cmd_output "${container_id}" ip a | grep "eth" || die "Missing VFIO network interface"
}
check_vfio() {
local cid="$1"
# For vfio_mode=vfio, the device should be bound to the guest
# vfio-pci driver.
# Check the control device is visible
get_ctr_cmd_output "${cid}" ls /dev/vfio/vfio || die "Couldn't find VFIO control device in container"
# The device should *not* cause an ethernet interface to appear
! get_ctr_cmd_output "${cid}" ip a | grep "eth" || die "Unexpected network interface"
# There should be exactly one VFIO group device (there might
# be multiple IOMMU groups in the VM, but only one device
# should be bound to the VFIO driver, so there should still
# only be one VFIO device
group="$(get_ctr_cmd_output "${cid}" ls /dev/vfio | grep -v vfio)"
if [ $(echo "${group}" | wc -w) != "1" ] ; then
die "Expected exactly one VFIO group got: ${group}"
fi
# There should be two devices in the IOMMU group: the ethernet
# device we care about, plus the PCIe to PCI bridge device
devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)"
num_devices=$(echo "${devs}" | wc -w)
if [ "${HYPERVISOR}" = "qemu" ] && [ "${num_devices}" != "2" ] ; then
die "Expected exactly two devices got: ${devs}"
fi
if [ "${HYPERVISOR}" = "clh" ] && [ "${num_devices}" != "1" ] ; then
die "Expected exactly one device got: ${devs}"
fi
# The bridge device will always sort first, because it is on
# bus zero, whereas the NIC will be on a non-zero bus
guest_pci=$(echo "${devs}" | tail -1)
# This is a roundabout way of getting the environment
# variable, but to use the more obvious "echo $PCIDEVICE_..."
# we would have to escape the '$' enough to not be expanded
# before it's injected into the container, but not so much
# that it *is* expanded by the shell within the container.
# Doing that with another shell function in between is very
# fragile, so do it this way instead.
guest_env="$(get_ctr_cmd_output "${cid}" env | grep ^PCIDEVICE_VIRTIO_NET | sed s/^[^=]*=//)"
if [ "${guest_env}" != "${guest_pci}" ]; then
die "PCIDEVICE variable was \"${guest_env}\" instead of \"${guest_pci}\""
fi
}
get_dmesg() {
local container_id="$1"
get_ctr_cmd_output "${container_id}" dmesg
}
# Show help about this script
help(){
cat << EOF
Usage: $0 [-h] [options]
Description:
This script runs a kata container and passthrough a vfio device
Options:
-h, Help
-i <string>, Specify initrd or image
-m <string>, Specify kata-runtime machine type for qemu hypervisor
-p <string>, Specify kata-runtime hypervisor
-s <value>, Set sandbox_cgroup_only in the configuration file
EOF
}
setup_configuration_file() {
local qemu_config_file="configuration-qemu.toml"
local clh_config_file="configuration-clh.toml"
local image_file="/opt/kata/share/kata-containers/kata-containers.img"
local initrd_file="/opt/kata/share/kata-containers/kata-containers-initrd.img"
local kata_config_file=""
for file in $(kata-runtime --kata-show-default-config-paths); do
if [ ! -f "${file}" ]; then
continue
fi
kata_config_file="${file}"
config_dir=$(dirname ${file})
config_filename=""
if [ "$HYPERVISOR" = "qemu" ]; then
config_filename="${qemu_config_file}"
elif [ "$HYPERVISOR" = "clh" ]; then
config_filename="${clh_config_file}"
fi
config_file="${config_dir}/${config_filename}"
if [ -f "${config_file}" ]; then
rm -f "${kata_config_file}"
cp -a $(realpath "${config_file}") "${kata_config_file}"
break
fi
done
# machine type applies to configuration.toml and configuration-qemu.toml
if [ -n "$MACHINE_TYPE" ]; then
if [ "$HYPERVISOR" = "qemu" ]; then
sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}"
else
warn "Variable machine_type only applies to qemu. It will be ignored"
fi
fi
# Make sure we have set hot_plug_vfio to a reasonable value
if [ "$HYPERVISOR" = "qemu" ]; then
sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "bridge-port"|' "${kata_config_file}"
elif [ "$HYPERVISOR" = "clh" ]; then
sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "root-port"|' "${kata_config_file}"
fi
if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then
sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}"
fi
# Change to initrd or image depending on user input.
# Non-default configs must be changed to specify either initrd or image, image is default.
if [ "$IMAGE_TYPE" = "initrd" ]; then
if $(grep -q "^image.*" ${kata_config_file}); then
if $(grep -q "^initrd.*" ${kata_config_file}); then
sed -i '/^image.*/d' "${kata_config_file}"
else
sed -i 's|^image.*|initrd = "'${initrd_file}'"|g' "${kata_config_file}"
fi
fi
else
if $(grep -q "^initrd.*" ${kata_config_file}); then
if $(grep -q "^image.*" ${kata_config_file}); then
sed -i '/^initrd.*/d' "${kata_config_file}"
else
sed -i 's|^initrd.*|image = "'${image_file}'"|g' "${kata_config_file}"
fi
fi
fi
# enable debug
sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \
-e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \
-e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 mitigations=off agent.log=debug"/g' \
"${kata_config_file}"
# enable VFIO relevant hypervisor annotations
sed -i -e 's/^\(enable_annotations\).*=.*$/\1 = ["enable_iommu"]/' \
"${kata_config_file}"
}
run_test_container() {
local container_id="$1"
local bundle_dir="$2"
local config_json_in="$3"
local host_pci="$4"
# generate final config.json
sed -e '/^#.*/d' \
-e 's|@VFIO_PATH@|'"${vfio_device}"'|g' \
-e 's|@VFIO_MAJOR@|'"${vfio_major}"'|g' \
-e 's|@VFIO_MINOR@|'"${vfio_minor}"'|g' \
-e 's|@VFIO_CTL_MAJOR@|'"${vfio_ctl_major}"'|g' \
-e 's|@VFIO_CTL_MINOR@|'"${vfio_ctl_minor}"'|g' \
-e 's|@ROOTFS@|'"${bundle_dir}/rootfs"'|g' \
-e 's|@HOST_PCI@|'"${host_pci}"'|g' \
"${config_json_in}" > "${script_path}/config.json"
create_bundle "${bundle_dir}"
# run container
run_container "${container_id}" "${bundle_dir}"
# output VM dmesg
get_dmesg "${container_id}"
}
main() {
local OPTIND
while getopts "hi:m:p:s:" opt;do
case ${opt} in
h)
help
exit 0;
;;
i)
IMAGE_TYPE="${OPTARG}"
;;
m)
MACHINE_TYPE="${OPTARG}"
;;
p)
HYPERVISOR="${OPTARG}"
;;
s)
SANDBOX_CGROUP_ONLY="${OPTARG}"
;;
?)
# parse failure
help
die "Failed to parse arguments"
;;
esac
done
shift $((OPTIND-1))
#
# Get the device ready on the host
#
setup_configuration_file
restart_containerd_service
sudo modprobe vfio
sudo modprobe vfio-pci
host_pci=$(host_pci_addr)
[ -n "${host_pci}" ] || die "virtio ethernet controller PCI address not found"
cat /proc/cmdline | grep -q "intel_iommu=on" || \
die "intel_iommu=on not found in kernel cmdline"
sudo driverctl set-override "${host_pci}" vfio-pci
vfio_device="$(get_vfio_path "${host_pci}")"
[ -n "${vfio_device}" ] || die "vfio device not found"
vfio_major="$(printf '%d' $(stat -c '0x%t' ${vfio_device}))"
vfio_minor="$(printf '%d' $(stat -c '0x%T' ${vfio_device}))"
[ -n "/dev/vfio/vfio" ] || die "vfio control device not found"
vfio_ctl_major="$(printf '%d' $(stat -c '0x%t' /dev/vfio/vfio))"
vfio_ctl_minor="$(printf '%d' $(stat -c '0x%T' /dev/vfio/vfio))"
# Get the rootfs we'll use for all tests
pull_rootfs
#
# Run the tests
#
# test for guest-kernel mode
guest_kernel_cid="vfio-guest-kernel-${RANDOM}"
run_test_container "${guest_kernel_cid}" \
"${tmp_data_dir}/vfio-guest-kernel" \
"${script_path}/guest-kernel.json.in" \
"${host_pci}"
check_guest_kernel "${guest_kernel_cid}"
# Remove the container so we can re-use the device for the next test
clean_env_ctr
# test for vfio mode
vfio_cid="vfio-vfio-${RANDOM}"
run_test_container "${vfio_cid}" \
"${tmp_data_dir}/vfio-vfio" \
"${script_path}/vfio.json.in" \
"${host_pci}"
check_vfio "${vfio_cid}"
}
main $@