diff --git a/src/agent/src/device/vfio_device_handler.rs b/src/agent/src/device/vfio_device_handler.rs index 607a7f77ea..59f89e8cb1 100644 --- a/src/agent/src/device/vfio_device_handler.rs +++ b/src/agent/src/device/vfio_device_handler.rs @@ -12,7 +12,9 @@ use crate::pci; use crate::sandbox::Sandbox; use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher}; use anyhow::{anyhow, Context, Result}; -use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE}; +use kata_types::device::{ + DRIVER_VFIO_AP_COLD_TYPE, DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE, +}; use protocols::agent::Device; use slog::Logger; use std::ffi::OsStr; @@ -94,7 +96,7 @@ impl DeviceHandler for VfioPciDeviceHandler { impl DeviceHandler for VfioApDeviceHandler { #[instrument] fn driver_types(&self) -> &[&str] { - &[DRIVER_VFIO_AP_TYPE] + &[DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_AP_COLD_TYPE] } #[cfg(target_arch = "s390x")] @@ -103,7 +105,16 @@ impl DeviceHandler for VfioApDeviceHandler { // Force AP bus rescan fs::write(AP_SCANS_PATH, "1")?; for apqn in device.options.iter() { - wait_for_ap_device(ctx.sandbox, ap::Address::from_str(apqn)?).await?; + let ap_address = ap::Address::from_str(apqn).context("Failed to parse AP address")?; + match device.type_.as_str() { + DRIVER_VFIO_AP_TYPE => { + wait_for_ap_device(ctx.sandbox, ap_address).await?; + } + DRIVER_VFIO_AP_COLD_TYPE => { + check_ap_device(ctx.sandbox, ap_address).await?; + } + _ => return Err(anyhow!("Unsupported AP device type: {}", device.type_)), + } } let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?); Ok(SpecUpdate { @@ -201,6 +212,37 @@ async fn wait_for_ap_device(sandbox: &Arc>, address: ap::Address) Ok(()) } +#[cfg(target_arch = "s390x")] +#[instrument] +async fn check_ap_device(sandbox: &Arc>, address: ap::Address) -> Result<()> { + let ap_path = format!( + "/sys/{}/card{:02x}/{}/online", + AP_ROOT_BUS_PATH, address.adapter_id, address + ); + if !Path::new(&ap_path).is_file() { + return Err(anyhow!( + "AP device online file not found or not accessible: {}", + ap_path + )); + } + match fs::read_to_string(&ap_path) { + Ok(content) => { + let is_online = content.trim() == "1"; + if !is_online { + return Err(anyhow!("AP device {} exists but is not online", address)); + } + } + Err(e) => { + return Err(anyhow!( + "Failed to read online status for AP device {}: {}", + address, + e + )); + } + } + Ok(()) +} + pub async fn wait_for_pci_device( sandbox: &Arc>, pcipath: &pci::Path, diff --git a/src/libs/kata-types/src/device.rs b/src/libs/kata-types/src/device.rs index 1938ef38d3..d9f28d5506 100644 --- a/src/libs/kata-types/src/device.rs +++ b/src/libs/kata-types/src/device.rs @@ -22,8 +22,10 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk"; /// VFIO PCI device to be bound to vfio-pci and made available inside the /// container as a VFIO device node pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci"; -/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap. +/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap hotplug. pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap"; +/// DRIVER_VFIO_AP_COLD_TYPE is the device driver for vfio-ap coldplug. +pub const DRIVER_VFIO_AP_COLD_TYPE: &str = "vfio-ap-cold"; /// DRIVER_9P_TYPE is the driver for 9pfs volume. pub const DRIVER_9P_TYPE: &str = "9p"; diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 2276cb5bc5..536fbcff13 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -864,7 +864,15 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error { // device /dev/vfio/vfio an 2nd the actuall device(s) afterwards. // Sort the devices starting with device #1 being the VFIO control group // device and the next the actuall device(s) /dev/vfio/ - deviceInfos = sortContainerVFIODevices(hotPlugDevices) + if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO { + // DeviceInfo should still be added to the sandbox's device manager + // if vfio_mode is VFIO and coldPlugVFIO is true (e.g. vfio-ap-cold). + // This ensures that ociSpec.Linux.Devices is updated with + // this information before the container is created on the guest. + deviceInfos = sortContainerVFIODevices(coldPlugDevices) + } else { + deviceInfos = sortContainerVFIODevices(hotPlugDevices) + } for _, info := range deviceInfos { dev, err := c.sandbox.devManager.NewDevice(info) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 9a794392b9..3e94798670 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -108,9 +108,10 @@ var ( kataVirtioFSDevType = "virtio-fs" kataOverlayDevType = "overlayfs" kataWatchableBindDevType = "watchable-bind" - kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container - kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel - kataVfioApDevType = "vfio-ap" + kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container + kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel + kataVfioApDevType = "vfio-ap" // VFIO AP device for hot-plugging + kataVfioApColdDevType = "vfio-ap-cold" // VFIO AP device for cold-plugging sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} sharedDirVirtioFSDaxOptions = "dax" @@ -1211,6 +1212,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * for i, dev := range devList { if dev.Type == config.VFIOAPDeviceMediatedType { kataDevice.Type = kataVfioApDevType + coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort) + if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO { + // A new device type is required for cold-plugging VFIO-AP. + // The VM guest should handle this differently from hot-plugging VFIO-AP + // (e.g., wait_for_ap_device). + // Note that a device already exists for cold-plugging VFIO-AP + // at the time the device type is checked. + kataDevice.Type = kataVfioApColdDevType + } kataDevice.Options = dev.APDevices } else { diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 67afc516d1..2c44164552 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -1218,3 +1218,137 @@ func TestIsNydusRootFSType(t *testing.T) { }) } } + +func TestKataAgentCreateContainerVFIODevices(t *testing.T) { + assert := assert.New(t) + + // Create temporary directory to mock IOMMU filesystem + tmpDir := t.TempDir() + iommuPath := filepath.Join(tmpDir, "sys", "kernel", "iommu_groups", "2", "devices") + err := os.MkdirAll(iommuPath, 0755) + assert.NoError(err) + + // Create a dummy device file to satisfy the IOMMU group check + dummyDev := filepath.Join(iommuPath, "0000:00:02.0") + err = os.WriteFile(dummyDev, []byte(""), 0644) + assert.NoError(err) + + // Save original paths and restore after test + origConfigSysIOMMUPath := config.SysIOMMUGroupPath + config.SysIOMMUGroupPath = filepath.Join(tmpDir, "sys", "kernel", "iommu_groups") + defer func() { + config.SysIOMMUGroupPath = origConfigSysIOMMUPath + }() + + tests := []struct { + name string + hotPlugVFIO config.PCIePort + coldPlugVFIO config.PCIePort + vfioMode config.VFIOModeType + expectVFIODev bool + }{ + { + name: "VFIO device with cold plug enabled", + hotPlugVFIO: config.NoPort, + coldPlugVFIO: config.BridgePort, + vfioMode: config.VFIOModeVFIO, + expectVFIODev: true, + }, + { + name: "VFIO device with hot plug enabled", + hotPlugVFIO: config.BridgePort, + coldPlugVFIO: config.NoPort, + vfioMode: config.VFIOModeVFIO, + expectVFIODev: true, + }, + { + name: "VFIO device with cold plug enabled but guest kernel mode", + hotPlugVFIO: config.NoPort, + coldPlugVFIO: config.BridgePort, + vfioMode: config.VFIOModeGuestKernel, + expectVFIODev: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vfioGroup := "2" + vfioPath := filepath.Join("/dev/vfio", vfioGroup) + + vfioDevice := config.DeviceInfo{ + ContainerPath: vfioPath, + HostPath: vfioPath, + DevType: "c", + Major: 10, + Minor: 196, + ColdPlug: tt.coldPlugVFIO != config.NoPort, + Port: tt.coldPlugVFIO, + } + + // Setup container config with the VFIO device + contConfig := &ContainerConfig{ + DeviceInfos: []config.DeviceInfo{vfioDevice}, + } + + // Create mock URL for kata agent + url, err := mock.GenerateKataMockHybridVSock() + assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) + + hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} + err = hybridVSockTTRPCMock.Start(url) + assert.NoError(err) + defer hybridVSockTTRPCMock.Stop() + + k := &kataAgent{ + ctx: context.Background(), + state: KataAgentState{ + URL: url, + }, + } + + mockReceiver := &api.MockDeviceReceiver{} + + sandbox := &Sandbox{ + config: &SandboxConfig{ + VfioMode: tt.vfioMode, + HypervisorConfig: HypervisorConfig{ + HotPlugVFIO: tt.hotPlugVFIO, + ColdPlugVFIO: tt.coldPlugVFIO, + }, + }, + devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil), + agent: k, + } + + container := &Container{ + sandbox: sandbox, + id: "test-container", + config: contConfig, + } + + // Call createDevices which should trigger the full flow + err = container.createDevices(contConfig) + assert.NoError(err) + + // Find the device in device manager using the original device info + dev := sandbox.devManager.FindDevice(&vfioDevice) + + if tt.expectVFIODev { + // For cases where VFIO device should be handled + assert.NotNil(dev, "VFIO device should be found in device manager") + if dev != nil { + // Manually attach the device to increase attach count + err = dev.Attach(context.Background(), mockReceiver) + assert.NoError(err, "Device attachment should succeed") + + assert.True(sandbox.devManager.IsDeviceAttached(dev.DeviceID()), + "Device should be marked as attached") + } + } else { + // For cases where VFIO device should be skipped + assert.Nil(dev, "VFIO device should not be found in device manager") + } + }) + } +} diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index cd3b175a7b..c09e297f5b 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -62,6 +62,7 @@ func testCreateSandbox(t *testing.T, id string, Volumes: volumes, Containers: containers, Annotations: sandboxAnnotations, + VfioMode: config.VFIOModeGuestKernel, } ctx := WithNewAgentFunc(context.Background(), newMockAgent) diff --git a/tests/functional/vfio-ap/container-config.json.in b/tests/functional/vfio-ap/container-config.json.in deleted file mode 100644 index cd78005d6b..0000000000 --- a/tests/functional/vfio-ap/container-config.json.in +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (c) 2024 IBM Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -{ - "metadata": { - "name": "test-container", - "namespace": "default" - }, - "image": { - "image": "$IMAGE_NAME" - }, - "command": [ - "sh", - "-c", - "sleep 3600" - ], - "mounts": [], - "log_path": "test-container.log", - "linux": { - "security_context": { - "privileged": true - } - }, - "devices": [ - { - "container_path": "/dev/vfio/$DEVICE_INDEX", - "host_path": "/dev/vfio/$DEVICE_INDEX", - "permissions": "rwm" - } - ] - } diff --git a/tests/functional/vfio-ap/run.sh b/tests/functional/vfio-ap/run.sh index c3bd41eff1..0154d97a77 100755 --- a/tests/functional/vfio-ap/run.sh +++ b/tests/functional/vfio-ap/run.sh @@ -58,7 +58,7 @@ setup_hotplug() { setup_coldplug() { echo "Set up the configuration file for Coldplug" - setup_config_file "vfio_mode" "replace" "guest-kernel" + setup_config_file "vfio_mode" "replace" "vfio" setup_config_file "hot_plug_vfio" "comment_out" setup_config_file "cold_plug_vfio" "replace" "bridge-port" show_config_file @@ -91,7 +91,7 @@ cleanup() { echo 0x$(printf -- 'f%.0s' {1..64}) | sudo tee /sys/bus/ap/aqmask > /dev/null # Remove files used for testing - rm -f ${script_path}/zcrypttest ${script_path}/container-config.json + rm -f ${script_path}/zcrypttest } validate_env() { @@ -202,46 +202,22 @@ create_mediated_device() { run_test() { local run_index=$1 - local container_cli=$2 - local test_message=$3 - local extra_cmd=${4:-} + local test_message=$2 + local extra_cmd=${3:-} local start_time=$(date +"%Y-%m-%d %H:%M:%S") [ -n "${dev_index}" ] || { echo "No dev_index" >&2; exit 1; } # Set time granularity to a second for capturing the log sleep 1 - if [ "${container_cli}" == "crictl" ]; then - sudo crictl pull ${test_image_name} - # Prepare container-config.json - IMAGE_NAME="${test_image_name}" DEVICE_INDEX="${dev_index}" \ - envsubst < ${script_path}/container-config.json.in > ${script_path}/container-config.json - # Create a container and run the test - POD_ID=$(sudo crictl runp --runtime=kata ${script_path}/sandbox-config.json) - sudo crictl pods - CONTAINER_ID=$(sudo crictl create $POD_ID ${script_path}/container-config.json ${script_path}/sandbox-config.json) - sudo crictl start $CONTAINER_ID - sudo crictl ps - # Give enough time for the container to start - sleep 5 - sudo crictl exec $CONTAINER_ID \ - bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}" + sudo ctr image pull --plain-http ${test_image_name} + # Create a container and run the test + sudo ctr run --runtime io.containerd.run.kata.v2 --rm \ + --privileged --privileged-without-host-devices \ + --device ${dev_base}/${dev_index} ${test_image_name} test \ + bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}" - [ $? -eq 0 ] && result=0 || result=1 - - # Clean up the container - echo "Clean up the container" - sudo crictl stopp $POD_ID - sudo crictl rmp $POD_ID - elif [ "${container_cli}" == "ctr" ]; then - sudo ctr image pull --plain-http ${test_image_name} - # Create a container and run the test - sudo ctr run --runtime io.containerd.run.kata.v2 --rm \ - --device ${dev_base}/${dev_index} ${test_image_name} test \ - bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN}" - - [ $? -eq 0 ] && result=0 || result=1 - fi + [ $? -eq 0 ] && result=0 || result=1 if [ $result -eq 0 ]; then echo "ok ${run_index} ${test_category} ${test_message}" @@ -254,10 +230,10 @@ run_test() { run_tests() { setup_hotplug - run_test "1" "crictl" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v" + run_test "1" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v" setup_coldplug - run_test "2" "ctr" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" + run_test "2" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" "&& zcrypttest -a -v" } main() {