From 419b5ed71577962d83878f126289685ded0f7a93 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 21 Jan 2025 12:58:34 +0100 Subject: [PATCH 1/5] runtime: Add DeviceInfo to Container for VFIO coldplug configuration Even though ociSpec.Linux.Devices is preserved when vfio_mode is VFIO, it has not been updated correctly for coldplug scenarios. This happens because the device info passed to the agent via CreateContainerRequest is dropped by the Kata runtime. This commit ensures that the device info is added to the sandbox's device manager when vfio_mode is VFIO and coldPlugVFIO is true (e.g., vfio-ap-cold), allowing ociSpec.Linux.Devices to be properly updated with the device information before the container is created on the guest. Signed-off-by: Hyounggyu Choi --- src/runtime/virtcontainers/container.go | 10 +- src/runtime/virtcontainers/kata_agent_test.go | 134 ++++++++++++++++++ src/runtime/virtcontainers/sandbox_test.go | 1 + 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 2276cb5bc5..536fbcff13 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -864,7 +864,15 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { // device /dev/vfio/vfio an 2nd the actuall device(s) afterwards. // Sort the devices starting with device #1 being the VFIO control group // device and the next the actuall device(s) /dev/vfio/ - deviceInfos = sortContainerVFIODevices(hotPlugDevices) + if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO { + // DeviceInfo should still be added to the sandbox's device manager + // if vfio_mode is VFIO and coldPlugVFIO is true (e.g. vfio-ap-cold). + // This ensures that ociSpec.Linux.Devices is updated with + // this information before the container is created on the guest. + deviceInfos = sortContainerVFIODevices(coldPlugDevices) + } else { + deviceInfos = sortContainerVFIODevices(hotPlugDevices) + } for _, info := range deviceInfos { dev, err := c.sandbox.devManager.NewDevice(info) diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 67afc516d1..2c44164552 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -1218,3 +1218,137 @@ func TestIsNydusRootFSType(t *testing.T) { }) } } + +func TestKataAgentCreateContainerVFIODevices(t *testing.T) { + assert := assert.New(t) + + // Create temporary directory to mock IOMMU filesystem + tmpDir := t.TempDir() + iommuPath := filepath.Join(tmpDir, "sys", "kernel", "iommu_groups", "2", "devices") + err := os.MkdirAll(iommuPath, 0755) + assert.NoError(err) + + // Create a dummy device file to satisfy the IOMMU group check + dummyDev := filepath.Join(iommuPath, "0000:00:02.0") + err = os.WriteFile(dummyDev, []byte(""), 0644) + assert.NoError(err) + + // Save original paths and restore after test + origConfigSysIOMMUPath := config.SysIOMMUGroupPath + config.SysIOMMUGroupPath = filepath.Join(tmpDir, "sys", "kernel", "iommu_groups") + defer func() { + config.SysIOMMUGroupPath = origConfigSysIOMMUPath + }() + + tests := []struct { + name string + hotPlugVFIO config.PCIePort + coldPlugVFIO config.PCIePort + vfioMode config.VFIOModeType + expectVFIODev bool + }{ + { + name: "VFIO device with cold plug enabled", + hotPlugVFIO: config.NoPort, + coldPlugVFIO: config.BridgePort, + vfioMode: config.VFIOModeVFIO, + expectVFIODev: true, + }, + { + name: "VFIO device with hot plug enabled", + hotPlugVFIO: config.BridgePort, + coldPlugVFIO: config.NoPort, + vfioMode: config.VFIOModeVFIO, + expectVFIODev: true, + }, + { + name: "VFIO device with cold plug enabled but guest kernel mode", + hotPlugVFIO: config.NoPort, + coldPlugVFIO: config.BridgePort, + vfioMode: config.VFIOModeGuestKernel, + expectVFIODev: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vfioGroup := "2" + vfioPath := filepath.Join("/dev/vfio", vfioGroup) + + vfioDevice := config.DeviceInfo{ + ContainerPath: vfioPath, + HostPath: vfioPath, + DevType: "c", + Major: 10, + Minor: 196, + ColdPlug: tt.coldPlugVFIO != config.NoPort, + Port: tt.coldPlugVFIO, + } + + // Setup container config with the VFIO device + contConfig := &ContainerConfig{ + DeviceInfos: []config.DeviceInfo{vfioDevice}, + } + + // Create mock URL for kata agent + url, err := mock.GenerateKataMockHybridVSock() + assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) + + hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} + err = hybridVSockTTRPCMock.Start(url) + assert.NoError(err) + defer hybridVSockTTRPCMock.Stop() + + k := &kataAgent{ + ctx: context.Background(), + state: KataAgentState{ + URL: url, + }, + } + + mockReceiver := &api.MockDeviceReceiver{} + + sandbox := &Sandbox{ + config: &SandboxConfig{ + VfioMode: tt.vfioMode, + HypervisorConfig: HypervisorConfig{ + HotPlugVFIO: tt.hotPlugVFIO, + ColdPlugVFIO: tt.coldPlugVFIO, + }, + }, + devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil), + agent: k, + } + + container := &Container{ + sandbox: sandbox, + id: "test-container", + config: contConfig, + } + + // Call createDevices which should trigger the full flow + err = container.createDevices(contConfig) + assert.NoError(err) + + // Find the device in device manager using the original device info + dev := sandbox.devManager.FindDevice(&vfioDevice) + + if tt.expectVFIODev { + // For cases where VFIO device should be handled + assert.NotNil(dev, "VFIO device should be found in device manager") + if dev != nil { + // Manually attach the device to increase attach count + err = dev.Attach(context.Background(), mockReceiver) + assert.NoError(err, "Device attachment should succeed") + + assert.True(sandbox.devManager.IsDeviceAttached(dev.DeviceID()), + "Device should be marked as attached") + } + } else { + // For cases where VFIO device should be skipped + assert.Nil(dev, "VFIO device should not be found in device manager") + } + }) + } +} diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index cd3b175a7b..c09e297f5b 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -62,6 +62,7 @@ func testCreateSandbox(t *testing.T, id string, Volumes: volumes, Containers: containers, Annotations: sandboxAnnotations, + VfioMode: config.VFIOModeGuestKernel, } ctx := WithNewAgentFunc(context.Background(), newMockAgent) From 4a6ba534f1c8886fc513c3b85d93cf187192e5f8 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 21 Jan 2025 13:23:06 +0100 Subject: [PATCH 2/5] runtime: Introduce new gRPC device type for VFIO-AP coldplug This commit introduces a new gRPC device type, `vfio-ap-cold`, to support VFIO-AP coldplug. This enables the VM guest to handle passthrough devices differently from VFIO-AP hotplug. With this new type, the guest no longer needs to wait for events (e.g., device addition) because the device already exists at the time the device type is checked. Signed-off-by: Hyounggyu Choi --- src/runtime/virtcontainers/kata_agent.go | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 9a794392b9..3e94798670 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -108,9 +108,10 @@ var ( kataVirtioFSDevType = "virtio-fs" kataOverlayDevType = "overlayfs" kataWatchableBindDevType = "watchable-bind" - kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container - kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel - kataVfioApDevType = "vfio-ap" + kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container + kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel + kataVfioApDevType = "vfio-ap" // VFIO AP device for hot-plugging + kataVfioApColdDevType = "vfio-ap-cold" // VFIO AP device for cold-plugging sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} sharedDirVirtioFSDaxOptions = "dax" @@ -1211,6 +1212,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * for i, dev := range devList { if dev.Type == config.VFIOAPDeviceMediatedType { kataDevice.Type = kataVfioApDevType + coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort) + if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO { + // A new device type is required for cold-plugging VFIO-AP. + // The VM guest should handle this differently from hot-plugging VFIO-AP + // (e.g., wait_for_ap_device). + // Note that a device already exists for cold-plugging VFIO-AP + // at the time the device type is checked. + kataDevice.Type = kataVfioApColdDevType + } kataDevice.Options = dev.APDevices } else { From 200cbfd0b0cfa0ebb3ab89697100b49a2ba03fc6 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 21 Jan 2025 14:03:43 +0100 Subject: [PATCH 3/5] kata-types: Introduce new type `vfio-ap-cold` for VFIO-AP coldplug This newly introduced type will be used by the VFIO-AP device handler on the agent. Signed-off-by: Hyounggyu Choi --- src/libs/kata-types/src/device.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libs/kata-types/src/device.rs b/src/libs/kata-types/src/device.rs index 1938ef38d3..d9f28d5506 100644 --- a/src/libs/kata-types/src/device.rs +++ b/src/libs/kata-types/src/device.rs @@ -22,8 +22,10 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk"; /// VFIO PCI device to be bound to vfio-pci and made available inside the /// container as a VFIO device node pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; -/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap. +/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap hotplug. pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap"; +/// DRIVER_VFIO_AP_COLD_TYPE is the device driver for vfio-ap coldplug. +pub const DRIVER_VFIO_AP_COLD_TYPE: &str = "vfio-ap-cold"; /// DRIVER_9P_TYPE is the driver for 9pfs volume. pub const DRIVER_9P_TYPE: &str = "9p"; From 47db9b3773144cd7aade62a450fc63020845e7de Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 21 Jan 2025 13:54:00 +0100 Subject: [PATCH 4/5] agent: Run check_ap_device() for VFIO-AP coldplug This commit updates the device handler to call check_ap_device() instead of wait_for_ap_device() for VFIO-AP coldplug. The handler now returns a SpecUpdate for passthrough devices if the device is online (e.g., `/sys/devices/ap/card05/05.001f/online` is set to 1). Signed-off-by: Hyounggyu Choi --- src/agent/src/device/vfio_device_handler.rs | 48 +++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/agent/src/device/vfio_device_handler.rs b/src/agent/src/device/vfio_device_handler.rs index 607a7f77ea..59f89e8cb1 100644 --- a/src/agent/src/device/vfio_device_handler.rs +++ b/src/agent/src/device/vfio_device_handler.rs @@ -12,7 +12,9 @@ use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; -use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE}; +use kata_types::device::{ + DRIVER_VFIO_AP_COLD_TYPE, DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE, +}; use protocols::agent::Device; use slog::Logger; use std::ffi::OsStr; @@ -94,7 +96,7 @@ impl DeviceHandler for VfioPciDeviceHandler { impl DeviceHandler for VfioApDeviceHandler { #[instrument] fn driver_types(&self) -> &[&str] { - &[DRIVER_VFIO_AP_TYPE] + &[DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_AP_COLD_TYPE] } #[cfg(target_arch = "s390x")] @@ -103,7 +105,16 @@ impl DeviceHandler for VfioApDeviceHandler { // Force AP bus rescan fs::write(AP_SCANS_PATH, "1")?; for apqn in device.options.iter() { - wait_for_ap_device(ctx.sandbox, ap::Address::from_str(apqn)?).await?; + let ap_address = ap::Address::from_str(apqn).context("Failed to parse AP address")?; + match device.type_.as_str() { + DRIVER_VFIO_AP_TYPE => { + wait_for_ap_device(ctx.sandbox, ap_address).await?; + } + DRIVER_VFIO_AP_COLD_TYPE => { + check_ap_device(ctx.sandbox, ap_address).await?; + } + _ => return Err(anyhow!("Unsupported AP device type: {}", device.type_)), + } } let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?); Ok(SpecUpdate { @@ -201,6 +212,37 @@ async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) Ok(()) } +#[cfg(target_arch = "s390x")] +#[instrument] +async fn check_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { + let ap_path = format!( + "/sys/{}/card{:02x}/{}/online", + AP_ROOT_BUS_PATH, address.adapter_id, address + ); + if !Path::new(&ap_path).is_file() { + return Err(anyhow!( + "AP device online file not found or not accessible: {}", + ap_path + )); + } + match fs::read_to_string(&ap_path) { + Ok(content) => { + let is_online = content.trim() == "1"; + if !is_online { + return Err(anyhow!("AP device {} exists but is not online", address)); + } + } + Err(e) => { + return Err(anyhow!( + "Failed to read online status for AP device {}: {}", + address, + e + )); + } + } + Ok(()) +} + pub async fn wait_for_pci_device( sandbox: &Arc>, pcipath: &pci::Path, From dde627cef473f896db3716d4e5e9324ec0ad5534 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 21 Jan 2025 14:09:30 +0100 Subject: [PATCH 5/5] test: Run full set of zcrypttest for VFIO-AP coldplug Previously, the test for VFIO-AP coldplug only checked whether a passthrough device was attached to the VM guest. This commit expands the test to include a full set of zcrypttest to verify that the device functions properly within a container. Additionally, since containerd has been upgraded to v1.7.25 on the test machine, it is no longer necessary to run the test via crictl. The commit removes all related codes/files. Signed-off-by: Hyounggyu Choi --- .../vfio-ap/container-config.json.in | 33 ------------ tests/functional/vfio-ap/run.sh | 50 +++++-------------- 2 files changed, 13 insertions(+), 70 deletions(-) delete mode 100644 tests/functional/vfio-ap/container-config.json.in diff --git a/tests/functional/vfio-ap/container-config.json.in b/tests/functional/vfio-ap/container-config.json.in deleted file mode 100644 index cd78005d6b..0000000000 --- a/tests/functional/vfio-ap/container-config.json.in +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (c) 2024 IBM Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -{ - "metadata": { - "name": "test-container", - "namespace": "default" - }, - "image": { - "image": "$IMAGE_NAME" - }, - "command": [ - "sh", - "-c", - "sleep 3600" - ], - "mounts": [], - "log_path": "test-container.log", - "linux": { - "security_context": { - "privileged": true - } - }, - "devices": [ - { - "container_path": "/dev/vfio/$DEVICE_INDEX", - "host_path": "/dev/vfio/$DEVICE_INDEX", - "permissions": "rwm" - } - ] - } diff --git a/tests/functional/vfio-ap/run.sh b/tests/functional/vfio-ap/run.sh index c3bd41eff1..0154d97a77 100755 --- a/tests/functional/vfio-ap/run.sh +++ b/tests/functional/vfio-ap/run.sh @@ -58,7 +58,7 @@ setup_hotplug() { setup_coldplug() { echo "Set up the configuration file for Coldplug" - setup_config_file "vfio_mode" "replace" "guest-kernel" + setup_config_file "vfio_mode" "replace" "vfio" setup_config_file "hot_plug_vfio" "comment_out" setup_config_file "cold_plug_vfio" "replace" "bridge-port" show_config_file @@ -91,7 +91,7 @@ cleanup() { echo 0x$(printf -- 'f%.0s' {1..64}) | sudo tee /sys/bus/ap/aqmask > /dev/null # Remove files used for testing - rm -f ${script_path}/zcrypttest ${script_path}/container-config.json + rm -f ${script_path}/zcrypttest } validate_env() { @@ -202,46 +202,22 @@ create_mediated_device() { run_test() { local run_index=$1 - local container_cli=$2 - local test_message=$3 - local extra_cmd=${4:-} + local test_message=$2 + local extra_cmd=${3:-} local start_time=$(date +"%Y-%m-%d %H:%M:%S") [ -n "${dev_index}" ] || { echo "No dev_index" >&2; exit 1; } # Set time granularity to a second for capturing the log sleep 1 - if [ "${container_cli}" == "crictl" ]; then - sudo crictl pull ${test_image_name} - # Prepare container-config.json - IMAGE_NAME="${test_image_name}" DEVICE_INDEX="${dev_index}" \ - envsubst < ${script_path}/container-config.json.in > ${script_path}/container-config.json - # Create a container and run the test - POD_ID=$(sudo crictl runp --runtime=kata ${script_path}/sandbox-config.json) - sudo crictl pods - CONTAINER_ID=$(sudo crictl create $POD_ID ${script_path}/container-config.json ${script_path}/sandbox-config.json) - sudo crictl start $CONTAINER_ID - sudo crictl ps - # Give enough time for the container to start - sleep 5 - sudo crictl exec $CONTAINER_ID \ - bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}" + sudo ctr image pull --plain-http ${test_image_name} + # Create a container and run the test + sudo ctr run --runtime io.containerd.run.kata.v2 --rm \ + --privileged --privileged-without-host-devices \ + --device ${dev_base}/${dev_index} ${test_image_name} test \ + bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}" - [ $? -eq 0 ] && result=0 || result=1 - - # Clean up the container - echo "Clean up the container" - sudo crictl stopp $POD_ID - sudo crictl rmp $POD_ID - elif [ "${container_cli}" == "ctr" ]; then - sudo ctr image pull --plain-http ${test_image_name} - # Create a container and run the test - sudo ctr run --runtime io.containerd.run.kata.v2 --rm \ - --device ${dev_base}/${dev_index} ${test_image_name} test \ - bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN}" - - [ $? -eq 0 ] && result=0 || result=1 - fi + [ $? -eq 0 ] && result=0 || result=1 if [ $result -eq 0 ]; then echo "ok ${run_index} ${test_category} ${test_message}" @@ -254,10 +230,10 @@ run_test() { run_tests() { setup_hotplug - run_test "1" "crictl" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v" + run_test "1" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v" setup_coldplug - run_test "2" "ctr" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" + run_test "2" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" "&& zcrypttest -a -v" } main() {