Merge pull request #10767 from BbolroC/enable-coldplug-vfio-ap-s390x

Enable VFIO-AP coldplug for s390x
This commit is contained in:
Hyounggyu Choi 2025-01-30 12:11:00 +01:00 committed by GitHub
commit 530fedd188
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 218 additions and 78 deletions

View File

@ -12,7 +12,9 @@ use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
use anyhow::{anyhow, Context, Result};
use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE};
use kata_types::device::{
DRIVER_VFIO_AP_COLD_TYPE, DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE,
};
use protocols::agent::Device;
use slog::Logger;
use std::ffi::OsStr;
@ -94,7 +96,7 @@ impl DeviceHandler for VfioPciDeviceHandler {
impl DeviceHandler for VfioApDeviceHandler {
#[instrument]
fn driver_types(&self) -> &[&str] {
&[DRIVER_VFIO_AP_TYPE]
&[DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_AP_COLD_TYPE]
}
#[cfg(target_arch = "s390x")]
@ -103,7 +105,16 @@ impl DeviceHandler for VfioApDeviceHandler {
// Force AP bus rescan
fs::write(AP_SCANS_PATH, "1")?;
for apqn in device.options.iter() {
wait_for_ap_device(ctx.sandbox, ap::Address::from_str(apqn)?).await?;
let ap_address = ap::Address::from_str(apqn).context("Failed to parse AP address")?;
match device.type_.as_str() {
DRIVER_VFIO_AP_TYPE => {
wait_for_ap_device(ctx.sandbox, ap_address).await?;
}
DRIVER_VFIO_AP_COLD_TYPE => {
check_ap_device(ctx.sandbox, ap_address).await?;
}
_ => return Err(anyhow!("Unsupported AP device type: {}", device.type_)),
}
}
let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?);
Ok(SpecUpdate {
@ -201,6 +212,37 @@ async fn wait_for_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address)
Ok(())
}
#[cfg(target_arch = "s390x")]
#[instrument]
async fn check_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address) -> Result<()> {
let ap_path = format!(
"/sys/{}/card{:02x}/{}/online",
AP_ROOT_BUS_PATH, address.adapter_id, address
);
if !Path::new(&ap_path).is_file() {
return Err(anyhow!(
"AP device online file not found or not accessible: {}",
ap_path
));
}
match fs::read_to_string(&ap_path) {
Ok(content) => {
let is_online = content.trim() == "1";
if !is_online {
return Err(anyhow!("AP device {} exists but is not online", address));
}
}
Err(e) => {
return Err(anyhow!(
"Failed to read online status for AP device {}: {}",
address,
e
));
}
}
Ok(())
}
pub async fn wait_for_pci_device(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,

View File

@ -22,8 +22,10 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk";
/// VFIO PCI device to be bound to vfio-pci and made available inside the
/// container as a VFIO device node
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap.
/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap hotplug.
pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap";
/// DRIVER_VFIO_AP_COLD_TYPE is the device driver for vfio-ap coldplug.
pub const DRIVER_VFIO_AP_COLD_TYPE: &str = "vfio-ap-cold";
/// DRIVER_9P_TYPE is the driver for 9pfs volume.
pub const DRIVER_9P_TYPE: &str = "9p";

View File

@ -864,7 +864,15 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error {
// device /dev/vfio/vfio an 2nd the actuall device(s) afterwards.
// Sort the devices starting with device #1 being the VFIO control group
// device and the next the actuall device(s) /dev/vfio/<group>
deviceInfos = sortContainerVFIODevices(hotPlugDevices)
if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO {
// DeviceInfo should still be added to the sandbox's device manager
// if vfio_mode is VFIO and coldPlugVFIO is true (e.g. vfio-ap-cold).
// This ensures that ociSpec.Linux.Devices is updated with
// this information before the container is created on the guest.
deviceInfos = sortContainerVFIODevices(coldPlugDevices)
} else {
deviceInfos = sortContainerVFIODevices(hotPlugDevices)
}
for _, info := range deviceInfos {
dev, err := c.sandbox.devManager.NewDevice(info)

View File

@ -108,9 +108,10 @@ var (
kataVirtioFSDevType = "virtio-fs"
kataOverlayDevType = "overlayfs"
kataWatchableBindDevType = "watchable-bind"
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
kataVfioApDevType = "vfio-ap"
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
kataVfioApDevType = "vfio-ap" // VFIO AP device for hot-plugging
kataVfioApColdDevType = "vfio-ap-cold" // VFIO AP device for cold-plugging
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
sharedDirVirtioFSOptions = []string{}
sharedDirVirtioFSDaxOptions = "dax"
@ -1211,6 +1212,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
for i, dev := range devList {
if dev.Type == config.VFIOAPDeviceMediatedType {
kataDevice.Type = kataVfioApDevType
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO {
// A new device type is required for cold-plugging VFIO-AP.
// The VM guest should handle this differently from hot-plugging VFIO-AP
// (e.g., wait_for_ap_device).
// Note that a device already exists for cold-plugging VFIO-AP
// at the time the device type is checked.
kataDevice.Type = kataVfioApColdDevType
}
kataDevice.Options = dev.APDevices
} else {

View File

@ -1218,3 +1218,137 @@ func TestIsNydusRootFSType(t *testing.T) {
})
}
}
func TestKataAgentCreateContainerVFIODevices(t *testing.T) {
assert := assert.New(t)
// Create temporary directory to mock IOMMU filesystem
tmpDir := t.TempDir()
iommuPath := filepath.Join(tmpDir, "sys", "kernel", "iommu_groups", "2", "devices")
err := os.MkdirAll(iommuPath, 0755)
assert.NoError(err)
// Create a dummy device file to satisfy the IOMMU group check
dummyDev := filepath.Join(iommuPath, "0000:00:02.0")
err = os.WriteFile(dummyDev, []byte(""), 0644)
assert.NoError(err)
// Save original paths and restore after test
origConfigSysIOMMUPath := config.SysIOMMUGroupPath
config.SysIOMMUGroupPath = filepath.Join(tmpDir, "sys", "kernel", "iommu_groups")
defer func() {
config.SysIOMMUGroupPath = origConfigSysIOMMUPath
}()
tests := []struct {
name string
hotPlugVFIO config.PCIePort
coldPlugVFIO config.PCIePort
vfioMode config.VFIOModeType
expectVFIODev bool
}{
{
name: "VFIO device with cold plug enabled",
hotPlugVFIO: config.NoPort,
coldPlugVFIO: config.BridgePort,
vfioMode: config.VFIOModeVFIO,
expectVFIODev: true,
},
{
name: "VFIO device with hot plug enabled",
hotPlugVFIO: config.BridgePort,
coldPlugVFIO: config.NoPort,
vfioMode: config.VFIOModeVFIO,
expectVFIODev: true,
},
{
name: "VFIO device with cold plug enabled but guest kernel mode",
hotPlugVFIO: config.NoPort,
coldPlugVFIO: config.BridgePort,
vfioMode: config.VFIOModeGuestKernel,
expectVFIODev: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
vfioGroup := "2"
vfioPath := filepath.Join("/dev/vfio", vfioGroup)
vfioDevice := config.DeviceInfo{
ContainerPath: vfioPath,
HostPath: vfioPath,
DevType: "c",
Major: 10,
Minor: 196,
ColdPlug: tt.coldPlugVFIO != config.NoPort,
Port: tt.coldPlugVFIO,
}
// Setup container config with the VFIO device
contConfig := &ContainerConfig{
DeviceInfos: []config.DeviceInfo{vfioDevice},
}
// Create mock URL for kata agent
url, err := mock.GenerateKataMockHybridVSock()
assert.NoError(err)
defer mock.RemoveKataMockHybridVSock(url)
hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{}
err = hybridVSockTTRPCMock.Start(url)
assert.NoError(err)
defer hybridVSockTTRPCMock.Stop()
k := &kataAgent{
ctx: context.Background(),
state: KataAgentState{
URL: url,
},
}
mockReceiver := &api.MockDeviceReceiver{}
sandbox := &Sandbox{
config: &SandboxConfig{
VfioMode: tt.vfioMode,
HypervisorConfig: HypervisorConfig{
HotPlugVFIO: tt.hotPlugVFIO,
ColdPlugVFIO: tt.coldPlugVFIO,
},
},
devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil),
agent: k,
}
container := &Container{
sandbox: sandbox,
id: "test-container",
config: contConfig,
}
// Call createDevices which should trigger the full flow
err = container.createDevices(contConfig)
assert.NoError(err)
// Find the device in device manager using the original device info
dev := sandbox.devManager.FindDevice(&vfioDevice)
if tt.expectVFIODev {
// For cases where VFIO device should be handled
assert.NotNil(dev, "VFIO device should be found in device manager")
if dev != nil {
// Manually attach the device to increase attach count
err = dev.Attach(context.Background(), mockReceiver)
assert.NoError(err, "Device attachment should succeed")
assert.True(sandbox.devManager.IsDeviceAttached(dev.DeviceID()),
"Device should be marked as attached")
}
} else {
// For cases where VFIO device should be skipped
assert.Nil(dev, "VFIO device should not be found in device manager")
}
})
}
}

View File

@ -62,6 +62,7 @@ func testCreateSandbox(t *testing.T, id string,
Volumes: volumes,
Containers: containers,
Annotations: sandboxAnnotations,
VfioMode: config.VFIOModeGuestKernel,
}
ctx := WithNewAgentFunc(context.Background(), newMockAgent)

View File

@ -1,33 +0,0 @@
#
# Copyright (c) 2024 IBM Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
{
"metadata": {
"name": "test-container",
"namespace": "default"
},
"image": {
"image": "$IMAGE_NAME"
},
"command": [
"sh",
"-c",
"sleep 3600"
],
"mounts": [],
"log_path": "test-container.log",
"linux": {
"security_context": {
"privileged": true
}
},
"devices": [
{
"container_path": "/dev/vfio/$DEVICE_INDEX",
"host_path": "/dev/vfio/$DEVICE_INDEX",
"permissions": "rwm"
}
]
}

View File

@ -58,7 +58,7 @@ setup_hotplug() {
setup_coldplug() {
echo "Set up the configuration file for Coldplug"
setup_config_file "vfio_mode" "replace" "guest-kernel"
setup_config_file "vfio_mode" "replace" "vfio"
setup_config_file "hot_plug_vfio" "comment_out"
setup_config_file "cold_plug_vfio" "replace" "bridge-port"
show_config_file
@ -91,7 +91,7 @@ cleanup() {
echo 0x$(printf -- 'f%.0s' {1..64}) | sudo tee /sys/bus/ap/aqmask > /dev/null
# Remove files used for testing
rm -f ${script_path}/zcrypttest ${script_path}/container-config.json
rm -f ${script_path}/zcrypttest
}
validate_env() {
@ -202,46 +202,22 @@ create_mediated_device() {
run_test() {
local run_index=$1
local container_cli=$2
local test_message=$3
local extra_cmd=${4:-}
local test_message=$2
local extra_cmd=${3:-}
local start_time=$(date +"%Y-%m-%d %H:%M:%S")
[ -n "${dev_index}" ] || { echo "No dev_index" >&2; exit 1; }
# Set time granularity to a second for capturing the log
sleep 1
if [ "${container_cli}" == "crictl" ]; then
sudo crictl pull ${test_image_name}
# Prepare container-config.json
IMAGE_NAME="${test_image_name}" DEVICE_INDEX="${dev_index}" \
envsubst < ${script_path}/container-config.json.in > ${script_path}/container-config.json
# Create a container and run the test
POD_ID=$(sudo crictl runp --runtime=kata ${script_path}/sandbox-config.json)
sudo crictl pods
CONTAINER_ID=$(sudo crictl create $POD_ID ${script_path}/container-config.json ${script_path}/sandbox-config.json)
sudo crictl start $CONTAINER_ID
sudo crictl ps
# Give enough time for the container to start
sleep 5
sudo crictl exec $CONTAINER_ID \
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}"
sudo ctr image pull --plain-http ${test_image_name}
# Create a container and run the test
sudo ctr run --runtime io.containerd.run.kata.v2 --rm \
--privileged --privileged-without-host-devices \
--device ${dev_base}/${dev_index} ${test_image_name} test \
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}"
[ $? -eq 0 ] && result=0 || result=1
# Clean up the container
echo "Clean up the container"
sudo crictl stopp $POD_ID
sudo crictl rmp $POD_ID
elif [ "${container_cli}" == "ctr" ]; then
sudo ctr image pull --plain-http ${test_image_name}
# Create a container and run the test
sudo ctr run --runtime io.containerd.run.kata.v2 --rm \
--device ${dev_base}/${dev_index} ${test_image_name} test \
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN}"
[ $? -eq 0 ] && result=0 || result=1
fi
[ $? -eq 0 ] && result=0 || result=1
if [ $result -eq 0 ]; then
echo "ok ${run_index} ${test_category} ${test_message}"
@ -254,10 +230,10 @@ run_test() {
run_tests() {
setup_hotplug
run_test "1" "crictl" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v"
run_test "1" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v"
setup_coldplug
run_test "2" "ctr" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug"
run_test "2" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" "&& zcrypttest -a -v"
}
main() {