Merge pull request #7704 from jepio/vfio-part-1

gha: vfio: Import test script
This commit is contained in:
Jeremi Piotrowski
2023-09-14 16:45:31 +02:00
committed by GitHub
15 changed files with 1140 additions and 8 deletions

View File

@@ -45,4 +45,5 @@ jobs:
path: kata-artifacts
- name: Run vfio tests
timeout-minutes: 15
run: bash tests/functional/vfio/gha-run.sh run

View File

@@ -184,12 +184,22 @@ block_device_driver = "virtio-blk"
# Disable the 'seccomp' feature from Cloud Hypervisor, default false
# disable_seccomp = true
# Enable vIOMMU, default false
# Enabling this will result in the VM having a vIOMMU device
# This will also add the following options to the kernel's
# command line: iommu=pt
#enable_iommu = true
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available.
#
# Default false
#enable_debug = true
# Enable hot-plugging of VFIO devices to a root-port.
# The default setting is "no-port"
#hot_plug_vfio = "root-port"
# Path to OCI hook binaries in the *guest rootfs*.
# This does not affect host-side hooks which must instead be added to
# the OCI spec passed to the runtime.

View File

@@ -1680,8 +1680,8 @@ func checkConfig(config oci.RuntimeConfig) error {
// Only allow one of the following settings for cold-plug:
// no-port, root-port, switch-port
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error {
if hypervisorType != virtcontainers.QemuHypervisor {
kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU hypervisor, ignoring hot(cold)_vfio_port setting")
if hypervisorType != virtcontainers.QemuHypervisor && hypervisorType != virtcontainers.ClhHypervisor {
kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU/CLH hypervisor, ignoring hot(cold)_vfio_port setting")
return nil
}
@@ -1696,6 +1696,14 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if machineType != "q35" && machineType != "virt" {
return nil
}
if hypervisorType == virtcontainers.ClhHypervisor {
if coldPlug != config.NoPort {
return fmt.Errorf("cold-plug not supported on CLH")
}
if hotPlug != config.RootPort {
return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort)
}
}
var port config.PCIePort
if coldPlug != config.NoPort {
@@ -1704,10 +1712,6 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if hotPlug != config.NoPort {
port = hotPlug
}
if port == config.NoPort {
return fmt.Errorf("invalid vfio_port=%s setting, use on of %s, %s, %s",
port, config.BridgePort, config.RootPort, config.SwitchPort)
}
if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
return nil
}

View File

@@ -18,8 +18,10 @@ import (
"syscall"
"testing"
config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock"
@@ -419,3 +421,32 @@ func TestCreateContainer(t *testing.T) {
assert.NoError(err)
}
}
func TestVfioChecksClh(t *testing.T) {
assert := assert.New(t)
// Check valid CLH vfio configs
f := func(coldPlug, hotPlug config.PCIePort) error {
return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.ClhHypervisor)
}
assert.NoError(f(config.NoPort, config.NoPort))
assert.NoError(f(config.NoPort, config.RootPort))
assert.Error(f(config.RootPort, config.RootPort))
assert.Error(f(config.RootPort, config.NoPort))
assert.Error(f(config.NoPort, config.SwitchPort))
}
func TestVfioCheckQemu(t *testing.T) {
assert := assert.New(t)
// Check valid Qemu vfio configs
f := func(coldPlug, hotPlug config.PCIePort) error {
return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.QemuHypervisor)
}
assert.NoError(f(config.NoPort, config.NoPort))
assert.NoError(f(config.RootPort, config.NoPort))
assert.NoError(f(config.NoPort, config.RootPort))
assert.Error(f(config.RootPort, config.RootPort))
assert.Error(f(config.SwitchPort, config.RootPort))
}

View File

@@ -490,6 +490,13 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
}
clh.vmconfig.Payload.SetKernel(kernelPath)
clh.vmconfig.Platform = chclient.NewPlatformConfig()
platform := clh.vmconfig.Platform
platform.SetNumPciSegments(2)
if clh.config.IOMMU {
platform.SetIommuSegments([]int32{0})
}
if clh.config.ConfidentialGuest {
if err := clh.enableProtection(); err != nil {
return err
@@ -528,6 +535,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
// start the guest kernel with 'quiet' in non-debug mode
params = append(params, Param{"quiet", ""})
}
if clh.config.IOMMU {
params = append(params, Param{"iommu", "pt"})
}
// Followed by extra kernel parameters defined in the configuration file
params = append(params, clh.config.KernelParams...)
@@ -536,6 +546,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
// set random device generator to hypervisor
clh.vmconfig.Rng = chclient.NewRngConfig(clh.config.EntropySource)
clh.vmconfig.Rng.SetIommu(clh.config.IOMMU)
// set the initial root/boot disk of hypervisor
assetPath, assetType, err := clh.config.ImageOrInitrdAssetPath()
@@ -561,6 +572,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
} else {
pmem := chclient.NewPmemConfig(assetPath)
*pmem.DiscardWrites = true
pmem.SetIommu(clh.config.IOMMU)
if clh.vmconfig.Pmem != nil {
*clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem)
@@ -594,6 +606,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net
clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF)
}
clh.vmconfig.Console.SetIommu(clh.config.IOMMU)
cpu_topology := chclient.NewCpuTopology()
cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1)
@@ -836,6 +849,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro
queueSize := int32(1024)
clhDisk.NumQueues = &queues
clhDisk.QueueSize = &queueSize
clhDisk.SetIommu(clh.config.IOMMU)
diskRateLimiterConfig := clh.getDiskRateLimiterConfig()
if diskRateLimiterConfig != nil {
@@ -861,6 +875,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
// Create the clh device config via the constructor to ensure default values are properly assigned
clhDevice := *chclient.NewDeviceConfig(device.SysfsDev)
clhDevice.SetIommu(clh.config.IOMMU)
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
if err != nil {
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
@@ -1535,6 +1550,7 @@ func (clh *cloudHypervisor) addVSock(cid int64, path string) {
}).Info("Adding HybridVSock")
clh.vmconfig.Vsock = chclient.NewVsockConfig(cid, path)
clh.vmconfig.Vsock.SetIommu(clh.config.IOMMU)
}
func (clh *cloudHypervisor) getRateLimiterConfig(bwSize, bwOneTimeBurst, opsSize, opsOneTimeBurst int64) *chclient.RateLimiterConfig {
@@ -1604,6 +1620,7 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error {
if netRateLimiterConfig != nil {
net.SetRateLimiterConfig(*netRateLimiterConfig)
}
net.SetIommu(clh.config.IOMMU)
if clh.netDevices != nil {
*clh.netDevices = append(*clh.netDevices, *net)
@@ -1636,6 +1653,7 @@ func (clh *cloudHypervisor) addVolume(volume types.Volume) error {
}
fs := chclient.NewFsConfig(volume.MountTag, vfsdSockPath, numQueues, queueSize)
fs.SetPciSegment(1)
clh.vmconfig.Fs = &[]chclient.FsConfig{*fs}
clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag)

View File

@@ -68,6 +68,7 @@ func newClhConfig() (HypervisorConfig, error) {
NetRateLimiterBwOneTimeBurst: int64(0),
NetRateLimiterOpsMaxRate: int64(0),
NetRateLimiterOpsOneTimeBurst: int64(0),
HotPlugVFIO: config.NoPort,
}, nil
}

View File

@@ -158,7 +158,7 @@ function clean_env_ctr()
info "Wait until the containers gets removed"
for task_id in "${running_tasks[@]}"; do
sudo ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1
sudo timeout -s SIGKILL 30s ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1 || true
sleep 0.5
done

View File

@@ -15,10 +15,33 @@ source "${vfio_dir}/../../common.bash"
function install_dependencies() {
info "Installing the dependencies needed for running the vfio tests"
(
source /etc/os-release || source /usr/lib/os-release
case "${ID}" in
ubuntu)
# cloud image dependencies
deps=(xorriso curl qemu-utils openssh-client)
sudo apt-get update
sudo apt-get install -y ${deps[@]} qemu-system-x86
;;
fedora)
# cloud image dependencies
deps=(xorriso curl qemu-img openssh)
sudo dnf install -y ${deps[@]} qemu-system-x86-core
;;
"*")
die "Unsupported distro: ${ID}"
;;
esac
)
}
function run() {
info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor"
"${vfio_dir}"/vfio_fedora_vm_wrapper.sh
}
function main() {

View File

@@ -0,0 +1,176 @@
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
{
"ociVersion": "1.0.0-rc2-dev",
"platform": {
"os": "linux",
"arch": "amd64"
},
"annotations": {
"io.katacontainers.config.hypervisor.enable_iommu": "false",
"io.katacontainers.config.runtime.vfio_mode": "guest-kernel"
},
"process": {
"terminal": false,
"consoleSize": {
"height": 0,
"width": 0
},
"user": {
"uid": 0,
"gid": 0
},
"args": [ "/bin/tail", "-f", "/dev/null" ],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
"cwd": "/",
"rlimits": [{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}],
"noNewPrivileges": true
},
"root": {
"path": "@ROOTFS@",
"readonly": false
},
"hostname": "vfio-test",
"mounts": [{
"destination": "/proc",
"type": "proc",
"source": "proc"
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/dev/shm",
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
},
{
"destination": "/sys/fs/cgroup",
"type": "cgroup",
"source": "cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
]
}
],
"hooks": {},
"linux": {
"devices": [{
"path": "@VFIO_PATH@",
"type": "c",
"major": @VFIO_MAJOR@,
"minor": @VFIO_MINOR@,
"fileMode": 384,
"uid": 0,
"gid": 0
}],
"cgroupsPath": "kata/vfiotest",
"resources": {
"devices": [
{"allow":false,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"},
{"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"},
{"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"},
{"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"}
]
},
"namespaces": [{
"type": "pid"
},
{
"type": "network"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
}
],
"maskedPaths": [
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware"
],
"readonlyPaths": [
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}

350
tests/functional/vfio/run.sh Executable file
View File

@@ -0,0 +1,350 @@
#!/bin/bash
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
set -x
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
script_path=$(dirname "$0")
source "${script_path}/../../common.bash"
addr=
tmp_data_dir="$(mktemp -d)"
rootfs_tar="${tmp_data_dir}/rootfs.tar"
trap cleanup EXIT
# kata-runtime options
SANDBOX_CGROUP_ONLY=""
HYPERVISOR=
MACHINE_TYPE=
IMAGE_TYPE=
cleanup() {
clean_env_ctr
sudo rm -rf "${tmp_data_dir}"
[ -n "${host_pci}" ] && sudo driverctl unset-override "${host_pci}"
}
host_pci_addr() {
lspci -D | grep "Ethernet controller" | grep "Virtio.*network device" | tail -1 | cut -d' ' -f1
}
get_vfio_path() {
local addr="$1"
echo "/dev/vfio/$(basename $(realpath /sys/bus/pci/drivers/vfio-pci/${host_pci}/iommu_group))"
}
pull_rootfs() {
# pull and export busybox image in tar file
local image="quay.io/prometheus/busybox:latest"
sudo -E ctr i pull ${image}
sudo -E ctr i export "${rootfs_tar}" "${image}"
sudo chown ${USER}:${USER} "${rootfs_tar}"
sync
}
create_bundle() {
local bundle_dir="$1"
mkdir -p "${bundle_dir}"
# extract busybox rootfs
local rootfs_dir="${bundle_dir}/rootfs"
mkdir -p "${rootfs_dir}"
local layers_dir="$(mktemp -d)"
tar -C "${layers_dir}" -pxf "${rootfs_tar}"
for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do
tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]")
done
sync
# Copy config.json
cp -a "${script_path}/config.json" "${bundle_dir}/config.json"
}
run_container() {
local container_id="$1"
local bundle_dir="$2"
sudo -E ctr run -d --runtime io.containerd.kata.v2 --config "${bundle_dir}/config.json" "${container_id}"
}
get_ctr_cmd_output() {
local container_id="$1"
shift
timeout 30s sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}"
}
check_guest_kernel() {
local container_id="$1"
# For vfio_mode=guest-kernel, the device should be bound to
# the guest kernel's native driver. To check this has worked,
# we look for an ethernet device named 'eth*'
get_ctr_cmd_output "${container_id}" ip a | grep "eth" || die "Missing VFIO network interface"
}
check_vfio() {
local cid="$1"
# For vfio_mode=vfio, the device should be bound to the guest
# vfio-pci driver.
# Check the control device is visible
get_ctr_cmd_output "${cid}" ls /dev/vfio/vfio || die "Couldn't find VFIO control device in container"
# The device should *not* cause an ethernet interface to appear
! get_ctr_cmd_output "${cid}" ip a | grep "eth" || die "Unexpected network interface"
# There should be exactly one VFIO group device (there might
# be multiple IOMMU groups in the VM, but only one device
# should be bound to the VFIO driver, so there should still
# only be one VFIO device
group="$(get_ctr_cmd_output "${cid}" ls /dev/vfio | grep -v vfio)"
if [ $(echo "${group}" | wc -w) != "1" ] ; then
die "Expected exactly one VFIO group got: ${group}"
fi
# There should be two devices in the IOMMU group: the ethernet
# device we care about, plus the PCIe to PCI bridge device
devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)"
num_devices=$(echo "${devs}" | wc -w)
if [ "${HYPERVISOR}" = "qemu" ] && [ "${num_devices}" != "2" ] ; then
die "Expected exactly two devices got: ${devs}"
fi
if [ "${HYPERVISOR}" = "clh" ] && [ "${num_devices}" != "1" ] ; then
die "Expected exactly one device got: ${devs}"
fi
# The bridge device will always sort first, because it is on
# bus zero, whereas the NIC will be on a non-zero bus
guest_pci=$(echo "${devs}" | tail -1)
# This is a roundabout way of getting the environment
# variable, but to use the more obvious "echo $PCIDEVICE_..."
# we would have to escape the '$' enough to not be expanded
# before it's injected into the container, but not so much
# that it *is* expanded by the shell within the container.
# Doing that with another shell function in between is very
# fragile, so do it this way instead.
guest_env="$(get_ctr_cmd_output "${cid}" env | grep ^PCIDEVICE_VIRTIO_NET | sed s/^[^=]*=//)"
if [ "${guest_env}" != "${guest_pci}" ]; then
die "PCIDEVICE variable was \"${guest_env}\" instead of \"${guest_pci}\""
fi
}
get_dmesg() {
local container_id="$1"
get_ctr_cmd_output "${container_id}" dmesg
}
# Show help about this script
help(){
cat << EOF
Usage: $0 [-h] [options]
Description:
This script runs a kata container and passthrough a vfio device
Options:
-h, Help
-i <string>, Specify initrd or image
-m <string>, Specify kata-runtime machine type for qemu hypervisor
-p <string>, Specify kata-runtime hypervisor
-s <value>, Set sandbox_cgroup_only in the configuration file
EOF
}
setup_configuration_file() {
local qemu_config_file="configuration-qemu.toml"
local clh_config_file="configuration-clh.toml"
local image_file="/opt/kata/share/kata-containers/kata-containers.img"
local initrd_file="/opt/kata/share/kata-containers/kata-containers-initrd.img"
local kata_config_file=""
for file in $(kata-runtime --kata-show-default-config-paths); do
if [ ! -f "${file}" ]; then
continue
fi
kata_config_file="${file}"
config_dir=$(dirname ${file})
config_filename=""
if [ "$HYPERVISOR" = "qemu" ]; then
config_filename="${qemu_config_file}"
elif [ "$HYPERVISOR" = "clh" ]; then
config_filename="${clh_config_file}"
fi
config_file="${config_dir}/${config_filename}"
if [ -f "${config_file}" ]; then
rm -f "${kata_config_file}"
cp -a $(realpath "${config_file}") "${kata_config_file}"
break
fi
done
# machine type applies to configuration.toml and configuration-qemu.toml
if [ -n "$MACHINE_TYPE" ]; then
if [ "$HYPERVISOR" = "qemu" ]; then
sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}"
else
warn "Variable machine_type only applies to qemu. It will be ignored"
fi
fi
# Make sure we have set hot_plug_vfio to a reasonable value
if [ "$HYPERVISOR" = "qemu" ]; then
sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "bridge-port"|' "${kata_config_file}"
elif [ "$HYPERVISOR" = "clh" ]; then
sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "root-port"|' "${kata_config_file}"
fi
if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then
sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}"
fi
# Change to initrd or image depending on user input.
# Non-default configs must be changed to specify either initrd or image, image is default.
if [ "$IMAGE_TYPE" = "initrd" ]; then
if $(grep -q "^image.*" ${kata_config_file}); then
if $(grep -q "^initrd.*" ${kata_config_file}); then
sed -i '/^image.*/d' "${kata_config_file}"
else
sed -i 's|^image.*|initrd = "'${initrd_file}'"|g' "${kata_config_file}"
fi
fi
else
if $(grep -q "^initrd.*" ${kata_config_file}); then
if $(grep -q "^image.*" ${kata_config_file}); then
sed -i '/^initrd.*/d' "${kata_config_file}"
else
sed -i 's|^initrd.*|image = "'${image_file}'"|g' "${kata_config_file}"
fi
fi
fi
# enable debug
sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \
-e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \
-e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 mitigations=off agent.log=debug"/g' \
"${kata_config_file}"
# enable VFIO relevant hypervisor annotations
sed -i -e 's/^\(enable_annotations\).*=.*$/\1 = ["enable_iommu"]/' \
"${kata_config_file}"
}
run_test_container() {
local container_id="$1"
local bundle_dir="$2"
local config_json_in="$3"
local host_pci="$4"
# generate final config.json
sed -e '/^#.*/d' \
-e 's|@VFIO_PATH@|'"${vfio_device}"'|g' \
-e 's|@VFIO_MAJOR@|'"${vfio_major}"'|g' \
-e 's|@VFIO_MINOR@|'"${vfio_minor}"'|g' \
-e 's|@VFIO_CTL_MAJOR@|'"${vfio_ctl_major}"'|g' \
-e 's|@VFIO_CTL_MINOR@|'"${vfio_ctl_minor}"'|g' \
-e 's|@ROOTFS@|'"${bundle_dir}/rootfs"'|g' \
-e 's|@HOST_PCI@|'"${host_pci}"'|g' \
"${config_json_in}" > "${script_path}/config.json"
create_bundle "${bundle_dir}"
# run container
run_container "${container_id}" "${bundle_dir}"
# output VM dmesg
get_dmesg "${container_id}"
}
main() {
local OPTIND
while getopts "hi:m:p:s:" opt;do
case ${opt} in
h)
help
exit 0;
;;
i)
IMAGE_TYPE="${OPTARG}"
;;
m)
MACHINE_TYPE="${OPTARG}"
;;
p)
HYPERVISOR="${OPTARG}"
;;
s)
SANDBOX_CGROUP_ONLY="${OPTARG}"
;;
?)
# parse failure
help
die "Failed to parse arguments"
;;
esac
done
shift $((OPTIND-1))
#
# Get the device ready on the host
#
setup_configuration_file
restart_containerd_service
sudo modprobe vfio
sudo modprobe vfio-pci
host_pci=$(host_pci_addr)
[ -n "${host_pci}" ] || die "virtio ethernet controller PCI address not found"
cat /proc/cmdline | grep -q "intel_iommu=on" || \
die "intel_iommu=on not found in kernel cmdline"
sudo driverctl set-override "${host_pci}" vfio-pci
vfio_device="$(get_vfio_path "${host_pci}")"
[ -n "${vfio_device}" ] || die "vfio device not found"
vfio_major="$(printf '%d' $(stat -c '0x%t' ${vfio_device}))"
vfio_minor="$(printf '%d' $(stat -c '0x%T' ${vfio_device}))"
[ -n "/dev/vfio/vfio" ] || die "vfio control device not found"
vfio_ctl_major="$(printf '%d' $(stat -c '0x%t' /dev/vfio/vfio))"
vfio_ctl_minor="$(printf '%d' $(stat -c '0x%T' /dev/vfio/vfio))"
# Get the rootfs we'll use for all tests
pull_rootfs
#
# Run the tests
#
# test for guest-kernel mode
guest_kernel_cid="vfio-guest-kernel-${RANDOM}"
run_test_container "${guest_kernel_cid}" \
"${tmp_data_dir}/vfio-guest-kernel" \
"${script_path}/guest-kernel.json.in" \
"${host_pci}"
check_guest_kernel "${guest_kernel_cid}"
# Remove the container so we can re-use the device for the next test
clean_env_ctr
# test for vfio mode
vfio_cid="vfio-vfio-${RANDOM}"
run_test_container "${vfio_cid}" \
"${tmp_data_dir}/vfio-vfio" \
"${script_path}/vfio.json.in" \
"${host_pci}"
check_vfio "${vfio_cid}"
}
main $@

View File

@@ -0,0 +1,187 @@
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
{
"ociVersion": "1.0.0-rc2-dev",
"platform": {
"os": "linux",
"arch": "amd64"
},
"annotations": {
"io.katacontainers.config.hypervisor.enable_iommu": "true",
"io.katacontainers.config.runtime.vfio_mode": "vfio"
},
"process": {
"terminal": false,
"consoleSize": {
"height": 0,
"width": 0
},
"user": {
"uid": 0,
"gid": 0
},
"args": [ "/bin/tail", "-f", "/dev/null" ],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
"PCIDEVICE_VIRTIO_NET=@HOST_PCI@"
],
"cwd": "/",
"rlimits": [{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}],
"noNewPrivileges": true
},
"root": {
"path": "@ROOTFS@",
"readonly": false
},
"hostname": "vfio-test",
"mounts": [{
"destination": "/proc",
"type": "proc",
"source": "proc"
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/dev/shm",
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
},
{
"destination": "/sys/fs/cgroup",
"type": "cgroup",
"source": "cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
]
}
],
"hooks": {},
"linux": {
"devices": [{
"path": "/dev/vfio/vfio",
"type": "c",
"major": @VFIO_CTL_MAJOR@,
"minor": @VFIO_CTL_MINOR@,
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "@VFIO_PATH@",
"type": "c",
"major": @VFIO_MAJOR@,
"minor": @VFIO_MINOR@,
"fileMode": 384,
"uid": 0,
"gid": 0
}],
"cgroupsPath": "kata/vfiotest",
"resources": {
"devices": [
{"allow":false,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"},
{"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"},
{"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"},
{"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"},
{"allow": true,"access": "rwm","major": @VFIO_CTL_MAJOR@,"minor": @VFIO_CTL_MINOR@,"type": "c"},
{"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"}
]
},
"namespaces": [{
"type": "pid"
},
{
"type": "network"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
}
],
"maskedPaths": [
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware"
],
"readonlyPaths": [
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}

View File

@@ -0,0 +1,329 @@
#!/bin/bash
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Run the .ci/jenkins_job_build.sh script in a VM
# that supports VFIO, then run VFIO functional tests
set -o xtrace
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
cidir=$(readlink -f $(dirname "$0"))
source /etc/os-release || source /usr/lib/os-release
# <CHANGES HERE>
source "${cidir}/../../common.bash"
export WORKSPACE="${WORKSPACE:-${HOME}}"
export GIT_URL="https://github.com/kata-containers/kata-containers.git"
export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
# </CHANGES>
http_proxy=${http_proxy:-}
https_proxy=${https_proxy:-}
vm_ip="127.0.15.1"
vm_port="10022"
# Don't save data in /tmp, we need it after rebooting the system
data_dir="${HOME}/functional-vfio-test"
ssh_key_file="${data_dir}/key"
arch=$(uname -m)
artifacts_dir="${WORKSPACE}/artifacts"
kill_vms() {
sudo killall -9 qemu-system-${arch}
}
cleanup() {
mkdir -p ${artifacts_dir}
sudo chown -R ${USER} ${artifacts_dir}
scp_vm ${artifacts_dir}/* ${artifacts_dir} || true
kill_vms
echo "::group::L2 journal"
cat "${artifacts_dir}/journal.log"
echo "::endgroup::"
echo "::group::L1 dmesg"
sudo dmesg
echo "::endgroup::"
}
create_ssh_key() {
rm -f "${ssh_key_file}"
ssh-keygen -f "${ssh_key_file}" -t rsa -N ""
}
create_meta_data() {
file="$1"
cat <<EOF > "${file}"
{
"uuid": "d1b4aafa-5d75-4f9c-87eb-2ceabe110c39",
"hostname": "test"
}
EOF
}
create_user_data() {
file="$1"
ssh_pub_key_file="$2"
ssh_pub_key="$(cat "${ssh_pub_key_file}")"
dnf_proxy=""
service_proxy=""
docker_user_proxy="{}"
environment=$(env | egrep "ghprb|WORKSPACE|KATA|GIT|JENKINS|_PROXY|_proxy" | \
sed -e "s/'/'\"'\"'/g" \
-e "s/\(^[[:alnum:]_]\+\)=/\1='/" \
-e "s/$/'/" \
-e 's/^/ export /')
if [ -n "${http_proxy}" ] && [ -n "${https_proxy}" ]; then
dnf_proxy="proxy=${http_proxy}"
service_proxy='[Service]
Environment="HTTP_PROXY='${http_proxy}'" "HTTPS_PROXY='${https_proxy}'" "NO_PROXY='${no_proxy}'"'
docker_user_proxy='{"proxies": { "default": {
"httpProxy": "'${http_proxy}'",
"httpsProxy": "'${https_proxy}'",
"noProxy": "'${no_proxy}'"
} } }'
fi
cat <<EOF > "${file}"
#cloud-config
package_upgrade: false
runcmd:
- chown -R ${USER}:${USER} /home/${USER}
- touch /.done
users:
- gecos: User
gid: "1000"
lock-passwd: true
name: ${USER}
shell: /bin/bash
ssh-authorized-keys:
- ${ssh_pub_key}
sudo: ALL=(ALL) NOPASSWD:ALL
uid: "1000"
write_files:
- content: |
[main]
fastestmirror=True
gpgcheck=1
max_parallel_downloads=10
installonly_limit=2
clean_requirements_on_remove=True
keepcache=True
ip_resolve=4
path: /etc/dnf/dnf.conf
- content: |
${environment}
path: /etc/environment
- content: |
${service_proxy}
path: /etc/systemd/system/docker.service.d/http-proxy.conf
- content: |
${service_proxy}
path: /etc/systemd/system/containerd.service.d/http-proxy.conf
- content: |
${docker_user_proxy}
path: ${HOME}/.docker/config.json
- content: |
${docker_user_proxy}
path: /root/.docker/config.json
- content: |
set -x
set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
. /etc/environment
. /etc/os-release
[ "\$ID" = "fedora" ] || (echo >&2 "$0 only supports Fedora"; exit 1)
echo "${dnf_proxy}" | sudo tee -a /etc/dnf/dnf.conf
for i in \$(seq 1 50); do
[ -f /.done ] && break
echo "waiting for cloud-init to finish"
sleep 5;
done
export DEBUG=true
export GOPATH=\${WORKSPACE}/go
export PATH=\${GOPATH}/bin:/usr/local/go/bin:/usr/sbin:\${PATH}
export GOROOT="/usr/local/go"
# Make sure the packages were installed
# Sometimes cloud-init is unable to install them
sudo dnf install -y git wget pciutils driverctl
git config --global user.email "foo@bar"
git config --global user.name "Foo Bar"
sudo mkdir -p /workspace
sudo mount -t 9p -o access=any,trans=virtio,version=9p2000.L workspace /workspace
mkdir -p ${artifacts_dir}
trap "cd /workspace; sudo journalctl -b0 > ${artifacts_dir}/journal.log || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT
pushd /workspace
source tests/common.bash
ensure_yq
cri_containerd=\$(get_from_kata_deps "externals.containerd.lts")
cri_tools=\$(get_from_kata_deps "externals.critools.latest")
install_cri_containerd \${cri_containerd}
install_cri_tools \${cri_tools}
kata_tarball_dir="kata-artifacts"
install_kata
sudo /workspace/tests/functional/vfio/run.sh -s false -p \${KATA_HYPERVISOR} -m q35 -i image
sudo /workspace/tests/functional/vfio/run.sh -s true -p \${KATA_HYPERVISOR} -m q35 -i image
path: /home/${USER}/run.sh
permissions: '0755'
EOF
}
create_config_iso() {
iso_file="$1"
ssh_pub_key_file="${ssh_key_file}.pub"
iso_data_dir="${data_dir}/d"
meta_data_file="${iso_data_dir}/openstack/latest/meta_data.json"
user_data_file="${iso_data_dir}/openstack/latest/user_data"
mkdir -p $(dirname "${user_data_file}")
create_meta_data "${meta_data_file}"
create_user_data "${user_data_file}" "${ssh_pub_key_file}"
[ -f "${iso_file}" ] && rm -f "${iso_file}"
xorriso -as mkisofs -R -V config-2 -o "${iso_file}" "${iso_data_dir}"
}
pull_fedora_cloud_image() {
fedora_img="$1"
fedora_version=38
# Add a version to the image cache, otherwise the tests are going to
# use always the same image without rebuilding it, regardless the version
# set in fedora_version
fedora_img_cache="${fedora_img}.cache.${fedora_version}"
fedora_img_url="https://download.fedoraproject.org/pub/fedora/linux/releases/${fedora_version}/Cloud/${arch}/images/Fedora-Cloud-Base-${fedora_version}-1.6.${arch}.raw.xz"
if [ ! -f "${fedora_img_cache}" ]; then
curl -sL ${fedora_img_url} -o "${fedora_img_cache}.xz"
xz -f -d "${fedora_img_cache}.xz"
fi
cp -a "${fedora_img_cache}" "${fedora_img}"
# setup cloud image
sudo losetup -D
loop=$(sudo losetup --show -Pf "${fedora_img}")
sudo mount "${loop}p2" /mnt
# add intel_iommu=on to the guest kernel command line
kernelopts="intel_iommu=on iommu=pt selinux=0 mitigations=off idle=poll kvm.tdp_mmu=0"
entries=$(sudo ls /mnt/loader/entries/)
for entry in ${entries}; do
sudo sed -i '/^options / s/$/ '"${kernelopts}"' /g' /mnt/loader/entries/"${entry}"
done
sudo sed -i 's|kernelopts="|kernelopts="'"${kernelopts}"'|g' /mnt/grub2/grub.cfg
sudo sed -i 's|kernelopts=|kernelopts='"${kernelopts}"'|g' /mnt/grub2/grubenv
# cleanup
sudo umount -R /mnt/
sudo losetup -d "${loop}"
qemu-img resize -f raw "${fedora_img}" +20G
}
reload_kvm() {
# TDP_MMU is buggy on Hyper-V until v6.3/v6.4
sudo rmmod kvm-intel kvm-amd kvm || true
sudo modprobe kvm tdp_mmu=0
sudo modprobe kvm-intel || true
sudo modprobe kvm-amd || true
}
run_vm() {
image="$1"
config_iso="$2"
disable_modern="off"
hostname="$(hostname)"
memory="8192M"
cpus=2
machine_type="q35"
reload_kvm
sudo /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \
-cpu host,host-phys-bits \
-machine ${machine_type},accel=kvm,kernel_irqchip=split \
-device intel-iommu,intremap=on,caching-mode=on,device-iotlb=on \
-drive file=${image},if=virtio,aio=threads,format=raw \
-drive file=${config_iso_file},if=virtio,media=cdrom \
-daemonize -enable-kvm -device virtio-rng-pci -display none -vga none \
-netdev user,hostfwd=tcp:${vm_ip}:${vm_port}-:22,hostname="${hostname}",id=net0 \
-device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \
-netdev user,id=net1 \
-device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \
-fsdev local,path=${repo_root_dir},security_model=passthrough,id=fs0 \
-device virtio-9p-pci,fsdev=fs0,mount_tag=workspace
}
ssh_vm() {
cmd=$@
ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -p "${vm_port}" "${USER}@${vm_ip}" "${cmd}"
}
scp_vm() {
guest_src=$1
host_dest=$2
scp -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -P "${vm_port}" ${USER}@${vm_ip}:${guest_src} ${host_dest}
}
wait_for_vm() {
for i in $(seq 1 30); do
if ssh_vm true; then
return 0
fi
info "waiting for VM to start"
sleep 5
done
return 1
}
main() {
trap cleanup EXIT
config_iso_file="${data_dir}/config.iso"
fedora_img="${data_dir}/image.img"
mkdir -p "${data_dir}"
create_ssh_key
create_config_iso "${config_iso_file}"
for i in $(seq 1 5); do
pull_fedora_cloud_image "${fedora_img}"
run_vm "${fedora_img}" "${config_iso_file}"
if wait_for_vm; then
break
fi
info "Couldn't connect to the VM. Stopping VM and starting a new one."
kill_vms
done
ssh_vm "/home/${USER}/run.sh"
}
main $@

View File

@@ -22,3 +22,4 @@ CONFIG_ARM64_UAO
CONFIG_VFIO_MDEV_DEVICE
CONFIG_SPECULATION_MITIGATIONS
CONFIG_X86_SGX
CONFIG_VIRTIO_IOMMU

View File

@@ -1,3 +1,4 @@
# x86 specific items we need in order to handle vfio_mode=vfio devices
CONFIG_INTEL_IOMMU=y
CONFIG_IRQ_REMAP=y
CONFIG_VIRTIO_IOMMU=y

View File

@@ -1 +1 @@
114
115