mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-21 13:08:00 +00:00
Merge pull request #10767 from BbolroC/enable-coldplug-vfio-ap-s390x
Enable VFIO-AP coldplug for s390x
This commit is contained in:
commit
530fedd188
@ -12,7 +12,9 @@ use crate::pci;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::device::{DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE};
|
||||
use kata_types::device::{
|
||||
DRIVER_VFIO_AP_COLD_TYPE, DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_PCI_GK_TYPE, DRIVER_VFIO_PCI_TYPE,
|
||||
};
|
||||
use protocols::agent::Device;
|
||||
use slog::Logger;
|
||||
use std::ffi::OsStr;
|
||||
@ -94,7 +96,7 @@ impl DeviceHandler for VfioPciDeviceHandler {
|
||||
impl DeviceHandler for VfioApDeviceHandler {
|
||||
#[instrument]
|
||||
fn driver_types(&self) -> &[&str] {
|
||||
&[DRIVER_VFIO_AP_TYPE]
|
||||
&[DRIVER_VFIO_AP_TYPE, DRIVER_VFIO_AP_COLD_TYPE]
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
@ -103,7 +105,16 @@ impl DeviceHandler for VfioApDeviceHandler {
|
||||
// Force AP bus rescan
|
||||
fs::write(AP_SCANS_PATH, "1")?;
|
||||
for apqn in device.options.iter() {
|
||||
wait_for_ap_device(ctx.sandbox, ap::Address::from_str(apqn)?).await?;
|
||||
let ap_address = ap::Address::from_str(apqn).context("Failed to parse AP address")?;
|
||||
match device.type_.as_str() {
|
||||
DRIVER_VFIO_AP_TYPE => {
|
||||
wait_for_ap_device(ctx.sandbox, ap_address).await?;
|
||||
}
|
||||
DRIVER_VFIO_AP_COLD_TYPE => {
|
||||
check_ap_device(ctx.sandbox, ap_address).await?;
|
||||
}
|
||||
_ => return Err(anyhow!("Unsupported AP device type: {}", device.type_)),
|
||||
}
|
||||
}
|
||||
let dev_update = Some(DevUpdate::new(Z9_CRYPT_DEV_PATH, Z9_CRYPT_DEV_PATH)?);
|
||||
Ok(SpecUpdate {
|
||||
@ -201,6 +212,37 @@ async fn wait_for_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "s390x")]
|
||||
#[instrument]
|
||||
async fn check_ap_device(sandbox: &Arc<Mutex<Sandbox>>, address: ap::Address) -> Result<()> {
|
||||
let ap_path = format!(
|
||||
"/sys/{}/card{:02x}/{}/online",
|
||||
AP_ROOT_BUS_PATH, address.adapter_id, address
|
||||
);
|
||||
if !Path::new(&ap_path).is_file() {
|
||||
return Err(anyhow!(
|
||||
"AP device online file not found or not accessible: {}",
|
||||
ap_path
|
||||
));
|
||||
}
|
||||
match fs::read_to_string(&ap_path) {
|
||||
Ok(content) => {
|
||||
let is_online = content.trim() == "1";
|
||||
if !is_online {
|
||||
return Err(anyhow!("AP device {} exists but is not online", address));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow!(
|
||||
"Failed to read online status for AP device {}: {}",
|
||||
address,
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn wait_for_pci_device(
|
||||
sandbox: &Arc<Mutex<Sandbox>>,
|
||||
pcipath: &pci::Path,
|
||||
|
@ -22,8 +22,10 @@ pub const DRIVER_VFIO_PCI_GK_TYPE: &str = "vfio-pci-gk";
|
||||
/// VFIO PCI device to be bound to vfio-pci and made available inside the
|
||||
/// container as a VFIO device node
|
||||
pub const DRIVER_VFIO_PCI_TYPE: &str = "vfio-pci";
|
||||
/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap.
|
||||
/// DRIVER_VFIO_AP_TYPE is the device driver for vfio-ap hotplug.
|
||||
pub const DRIVER_VFIO_AP_TYPE: &str = "vfio-ap";
|
||||
/// DRIVER_VFIO_AP_COLD_TYPE is the device driver for vfio-ap coldplug.
|
||||
pub const DRIVER_VFIO_AP_COLD_TYPE: &str = "vfio-ap-cold";
|
||||
|
||||
/// DRIVER_9P_TYPE is the driver for 9pfs volume.
|
||||
pub const DRIVER_9P_TYPE: &str = "9p";
|
||||
|
@ -864,7 +864,15 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error {
|
||||
// device /dev/vfio/vfio an 2nd the actuall device(s) afterwards.
|
||||
// Sort the devices starting with device #1 being the VFIO control group
|
||||
// device and the next the actuall device(s) /dev/vfio/<group>
|
||||
deviceInfos = sortContainerVFIODevices(hotPlugDevices)
|
||||
if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO {
|
||||
// DeviceInfo should still be added to the sandbox's device manager
|
||||
// if vfio_mode is VFIO and coldPlugVFIO is true (e.g. vfio-ap-cold).
|
||||
// This ensures that ociSpec.Linux.Devices is updated with
|
||||
// this information before the container is created on the guest.
|
||||
deviceInfos = sortContainerVFIODevices(coldPlugDevices)
|
||||
} else {
|
||||
deviceInfos = sortContainerVFIODevices(hotPlugDevices)
|
||||
}
|
||||
|
||||
for _, info := range deviceInfos {
|
||||
dev, err := c.sandbox.devManager.NewDevice(info)
|
||||
|
@ -108,9 +108,10 @@ var (
|
||||
kataVirtioFSDevType = "virtio-fs"
|
||||
kataOverlayDevType = "overlayfs"
|
||||
kataWatchableBindDevType = "watchable-bind"
|
||||
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
|
||||
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
|
||||
kataVfioApDevType = "vfio-ap"
|
||||
kataVfioPciDevType = "vfio-pci" // VFIO PCI device to used as VFIO in the container
|
||||
kataVfioPciGuestKernelDevType = "vfio-pci-gk" // VFIO PCI device for consumption by the guest kernel
|
||||
kataVfioApDevType = "vfio-ap" // VFIO AP device for hot-plugging
|
||||
kataVfioApColdDevType = "vfio-ap-cold" // VFIO AP device for cold-plugging
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
sharedDirVirtioFSOptions = []string{}
|
||||
sharedDirVirtioFSDaxOptions = "dax"
|
||||
@ -1211,6 +1212,15 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
||||
for i, dev := range devList {
|
||||
if dev.Type == config.VFIOAPDeviceMediatedType {
|
||||
kataDevice.Type = kataVfioApDevType
|
||||
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
||||
if coldPlugVFIO && c.sandbox.config.VfioMode == config.VFIOModeVFIO {
|
||||
// A new device type is required for cold-plugging VFIO-AP.
|
||||
// The VM guest should handle this differently from hot-plugging VFIO-AP
|
||||
// (e.g., wait_for_ap_device).
|
||||
// Note that a device already exists for cold-plugging VFIO-AP
|
||||
// at the time the device type is checked.
|
||||
kataDevice.Type = kataVfioApColdDevType
|
||||
}
|
||||
kataDevice.Options = dev.APDevices
|
||||
} else {
|
||||
|
||||
|
@ -1218,3 +1218,137 @@ func TestIsNydusRootFSType(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestKataAgentCreateContainerVFIODevices(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Create temporary directory to mock IOMMU filesystem
|
||||
tmpDir := t.TempDir()
|
||||
iommuPath := filepath.Join(tmpDir, "sys", "kernel", "iommu_groups", "2", "devices")
|
||||
err := os.MkdirAll(iommuPath, 0755)
|
||||
assert.NoError(err)
|
||||
|
||||
// Create a dummy device file to satisfy the IOMMU group check
|
||||
dummyDev := filepath.Join(iommuPath, "0000:00:02.0")
|
||||
err = os.WriteFile(dummyDev, []byte(""), 0644)
|
||||
assert.NoError(err)
|
||||
|
||||
// Save original paths and restore after test
|
||||
origConfigSysIOMMUPath := config.SysIOMMUGroupPath
|
||||
config.SysIOMMUGroupPath = filepath.Join(tmpDir, "sys", "kernel", "iommu_groups")
|
||||
defer func() {
|
||||
config.SysIOMMUGroupPath = origConfigSysIOMMUPath
|
||||
}()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
hotPlugVFIO config.PCIePort
|
||||
coldPlugVFIO config.PCIePort
|
||||
vfioMode config.VFIOModeType
|
||||
expectVFIODev bool
|
||||
}{
|
||||
{
|
||||
name: "VFIO device with cold plug enabled",
|
||||
hotPlugVFIO: config.NoPort,
|
||||
coldPlugVFIO: config.BridgePort,
|
||||
vfioMode: config.VFIOModeVFIO,
|
||||
expectVFIODev: true,
|
||||
},
|
||||
{
|
||||
name: "VFIO device with hot plug enabled",
|
||||
hotPlugVFIO: config.BridgePort,
|
||||
coldPlugVFIO: config.NoPort,
|
||||
vfioMode: config.VFIOModeVFIO,
|
||||
expectVFIODev: true,
|
||||
},
|
||||
{
|
||||
name: "VFIO device with cold plug enabled but guest kernel mode",
|
||||
hotPlugVFIO: config.NoPort,
|
||||
coldPlugVFIO: config.BridgePort,
|
||||
vfioMode: config.VFIOModeGuestKernel,
|
||||
expectVFIODev: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
vfioGroup := "2"
|
||||
vfioPath := filepath.Join("/dev/vfio", vfioGroup)
|
||||
|
||||
vfioDevice := config.DeviceInfo{
|
||||
ContainerPath: vfioPath,
|
||||
HostPath: vfioPath,
|
||||
DevType: "c",
|
||||
Major: 10,
|
||||
Minor: 196,
|
||||
ColdPlug: tt.coldPlugVFIO != config.NoPort,
|
||||
Port: tt.coldPlugVFIO,
|
||||
}
|
||||
|
||||
// Setup container config with the VFIO device
|
||||
contConfig := &ContainerConfig{
|
||||
DeviceInfos: []config.DeviceInfo{vfioDevice},
|
||||
}
|
||||
|
||||
// Create mock URL for kata agent
|
||||
url, err := mock.GenerateKataMockHybridVSock()
|
||||
assert.NoError(err)
|
||||
defer mock.RemoveKataMockHybridVSock(url)
|
||||
|
||||
hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{}
|
||||
err = hybridVSockTTRPCMock.Start(url)
|
||||
assert.NoError(err)
|
||||
defer hybridVSockTTRPCMock.Stop()
|
||||
|
||||
k := &kataAgent{
|
||||
ctx: context.Background(),
|
||||
state: KataAgentState{
|
||||
URL: url,
|
||||
},
|
||||
}
|
||||
|
||||
mockReceiver := &api.MockDeviceReceiver{}
|
||||
|
||||
sandbox := &Sandbox{
|
||||
config: &SandboxConfig{
|
||||
VfioMode: tt.vfioMode,
|
||||
HypervisorConfig: HypervisorConfig{
|
||||
HotPlugVFIO: tt.hotPlugVFIO,
|
||||
ColdPlugVFIO: tt.coldPlugVFIO,
|
||||
},
|
||||
},
|
||||
devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil),
|
||||
agent: k,
|
||||
}
|
||||
|
||||
container := &Container{
|
||||
sandbox: sandbox,
|
||||
id: "test-container",
|
||||
config: contConfig,
|
||||
}
|
||||
|
||||
// Call createDevices which should trigger the full flow
|
||||
err = container.createDevices(contConfig)
|
||||
assert.NoError(err)
|
||||
|
||||
// Find the device in device manager using the original device info
|
||||
dev := sandbox.devManager.FindDevice(&vfioDevice)
|
||||
|
||||
if tt.expectVFIODev {
|
||||
// For cases where VFIO device should be handled
|
||||
assert.NotNil(dev, "VFIO device should be found in device manager")
|
||||
if dev != nil {
|
||||
// Manually attach the device to increase attach count
|
||||
err = dev.Attach(context.Background(), mockReceiver)
|
||||
assert.NoError(err, "Device attachment should succeed")
|
||||
|
||||
assert.True(sandbox.devManager.IsDeviceAttached(dev.DeviceID()),
|
||||
"Device should be marked as attached")
|
||||
}
|
||||
} else {
|
||||
// For cases where VFIO device should be skipped
|
||||
assert.Nil(dev, "VFIO device should not be found in device manager")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -62,6 +62,7 @@ func testCreateSandbox(t *testing.T, id string,
|
||||
Volumes: volumes,
|
||||
Containers: containers,
|
||||
Annotations: sandboxAnnotations,
|
||||
VfioMode: config.VFIOModeGuestKernel,
|
||||
}
|
||||
|
||||
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
|
||||
|
@ -1,33 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024 IBM Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
{
|
||||
"metadata": {
|
||||
"name": "test-container",
|
||||
"namespace": "default"
|
||||
},
|
||||
"image": {
|
||||
"image": "$IMAGE_NAME"
|
||||
},
|
||||
"command": [
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 3600"
|
||||
],
|
||||
"mounts": [],
|
||||
"log_path": "test-container.log",
|
||||
"linux": {
|
||||
"security_context": {
|
||||
"privileged": true
|
||||
}
|
||||
},
|
||||
"devices": [
|
||||
{
|
||||
"container_path": "/dev/vfio/$DEVICE_INDEX",
|
||||
"host_path": "/dev/vfio/$DEVICE_INDEX",
|
||||
"permissions": "rwm"
|
||||
}
|
||||
]
|
||||
}
|
@ -58,7 +58,7 @@ setup_hotplug() {
|
||||
|
||||
setup_coldplug() {
|
||||
echo "Set up the configuration file for Coldplug"
|
||||
setup_config_file "vfio_mode" "replace" "guest-kernel"
|
||||
setup_config_file "vfio_mode" "replace" "vfio"
|
||||
setup_config_file "hot_plug_vfio" "comment_out"
|
||||
setup_config_file "cold_plug_vfio" "replace" "bridge-port"
|
||||
show_config_file
|
||||
@ -91,7 +91,7 @@ cleanup() {
|
||||
echo 0x$(printf -- 'f%.0s' {1..64}) | sudo tee /sys/bus/ap/aqmask > /dev/null
|
||||
|
||||
# Remove files used for testing
|
||||
rm -f ${script_path}/zcrypttest ${script_path}/container-config.json
|
||||
rm -f ${script_path}/zcrypttest
|
||||
}
|
||||
|
||||
validate_env() {
|
||||
@ -202,46 +202,22 @@ create_mediated_device() {
|
||||
|
||||
run_test() {
|
||||
local run_index=$1
|
||||
local container_cli=$2
|
||||
local test_message=$3
|
||||
local extra_cmd=${4:-}
|
||||
local test_message=$2
|
||||
local extra_cmd=${3:-}
|
||||
local start_time=$(date +"%Y-%m-%d %H:%M:%S")
|
||||
[ -n "${dev_index}" ] || { echo "No dev_index" >&2; exit 1; }
|
||||
|
||||
# Set time granularity to a second for capturing the log
|
||||
sleep 1
|
||||
|
||||
if [ "${container_cli}" == "crictl" ]; then
|
||||
sudo crictl pull ${test_image_name}
|
||||
# Prepare container-config.json
|
||||
IMAGE_NAME="${test_image_name}" DEVICE_INDEX="${dev_index}" \
|
||||
envsubst < ${script_path}/container-config.json.in > ${script_path}/container-config.json
|
||||
# Create a container and run the test
|
||||
POD_ID=$(sudo crictl runp --runtime=kata ${script_path}/sandbox-config.json)
|
||||
sudo crictl pods
|
||||
CONTAINER_ID=$(sudo crictl create $POD_ID ${script_path}/container-config.json ${script_path}/sandbox-config.json)
|
||||
sudo crictl start $CONTAINER_ID
|
||||
sudo crictl ps
|
||||
# Give enough time for the container to start
|
||||
sleep 5
|
||||
sudo crictl exec $CONTAINER_ID \
|
||||
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}"
|
||||
sudo ctr image pull --plain-http ${test_image_name}
|
||||
# Create a container and run the test
|
||||
sudo ctr run --runtime io.containerd.run.kata.v2 --rm \
|
||||
--privileged --privileged-without-host-devices \
|
||||
--device ${dev_base}/${dev_index} ${test_image_name} test \
|
||||
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN} ${extra_cmd}"
|
||||
|
||||
[ $? -eq 0 ] && result=0 || result=1
|
||||
|
||||
# Clean up the container
|
||||
echo "Clean up the container"
|
||||
sudo crictl stopp $POD_ID
|
||||
sudo crictl rmp $POD_ID
|
||||
elif [ "${container_cli}" == "ctr" ]; then
|
||||
sudo ctr image pull --plain-http ${test_image_name}
|
||||
# Create a container and run the test
|
||||
sudo ctr run --runtime io.containerd.run.kata.v2 --rm \
|
||||
--device ${dev_base}/${dev_index} ${test_image_name} test \
|
||||
bash -c "lszcrypt ${_APID}.${_APQI} | grep ${APQN}"
|
||||
|
||||
[ $? -eq 0 ] && result=0 || result=1
|
||||
fi
|
||||
[ $? -eq 0 ] && result=0 || result=1
|
||||
|
||||
if [ $result -eq 0 ]; then
|
||||
echo "ok ${run_index} ${test_category} ${test_message}"
|
||||
@ -254,10 +230,10 @@ run_test() {
|
||||
|
||||
run_tests() {
|
||||
setup_hotplug
|
||||
run_test "1" "crictl" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v"
|
||||
run_test "1" "Test can assign a CEX device inside the guest via VFIO-AP Hotplug" "&& zcrypttest -a -v"
|
||||
|
||||
setup_coldplug
|
||||
run_test "2" "ctr" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug"
|
||||
run_test "2" "Test can assign a CEX device inside the guest via VFIO-AP Coldplug" "&& zcrypttest -a -v"
|
||||
}
|
||||
|
||||
main() {
|
||||
|
Loading…
Reference in New Issue
Block a user