Merge pull request #7382 from zvonkok/vfio-ap-debug

s390x: Fixing device.Bus assignment
This commit is contained in:
Fabiano Fidêncio 2023-07-25 08:26:25 +02:00 committed by GitHub
commit 5ce0b4743f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 490 additions and 316 deletions

View File

@ -103,20 +103,19 @@ type RuntimeVersionInfo struct {
// HypervisorInfo stores hypervisor details // HypervisorInfo stores hypervisor details
type HypervisorInfo struct { type HypervisorInfo struct {
MachineType string MachineType string
Version string Version string
Path string Path string
BlockDeviceDriver string BlockDeviceDriver string
EntropySource string EntropySource string
SharedFS string SharedFS string
VirtioFSDaemon string VirtioFSDaemon string
SocketPath string SocketPath string
Msize9p uint32 Msize9p uint32
MemorySlots uint32 MemorySlots uint32
HotPlugVFIO config.PCIePort HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort ColdPlugVFIO config.PCIePort
HotplugVFIOOnRootBus bool Debug bool
Debug bool
} }
// AgentInfo stores agent details // AgentInfo stores agent details
@ -307,20 +306,19 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
} }
return HypervisorInfo{ return HypervisorInfo{
Debug: config.HypervisorConfig.Debug, Debug: config.HypervisorConfig.Debug,
MachineType: config.HypervisorConfig.HypervisorMachineType, MachineType: config.HypervisorConfig.HypervisorMachineType,
Version: version, Version: version,
Path: hypervisorPath, Path: hypervisorPath,
BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver, BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver,
Msize9p: config.HypervisorConfig.Msize9p, Msize9p: config.HypervisorConfig.Msize9p,
MemorySlots: config.HypervisorConfig.MemSlots, MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource, EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS, SharedFS: config.HypervisorConfig.SharedFS,
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon, VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO, HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO, ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, SocketPath: socketPath,
SocketPath: socketPath,
}, nil }, nil
} }

View File

@ -87,7 +87,6 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
disableBlock := true disableBlock := true
blockStorageDriver := "virtio-scsi" blockStorageDriver := "virtio-scsi"
enableIOThreads := true enableIOThreads := true
hotplugVFIOOnRootBus := true
hotPlugVFIO = config.BridgePort hotPlugVFIO = config.BridgePort
coldPlugVFIO = config.NoPort coldPlugVFIO = config.NoPort
disableNewNetNs := false disableNewNetNs := false
@ -132,7 +131,6 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
DisableBlock: disableBlock, DisableBlock: disableBlock,
BlockDeviceDriver: blockStorageDriver, BlockDeviceDriver: blockStorageDriver,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO, HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO, ColdPlugVFIO: coldPlugVFIO,
DisableNewNetNs: disableNewNetNs, DisableNewNetNs: disableNewNetNs,
@ -276,10 +274,8 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
EntropySource: config.HypervisorConfig.EntropySource, EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS, SharedFS: config.HypervisorConfig.SharedFS,
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon, VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
} }
if os.Geteuid() == 0 { if os.Geteuid() == 0 {

View File

@ -330,31 +330,29 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string,
disableBlockDevice := true disableBlockDevice := true
blockDeviceDriver := "virtio-scsi" blockDeviceDriver := "virtio-scsi"
enableIOThreads := true enableIOThreads := true
hotplugVFIOOnRootBus := true
disableNewNetNs := false disableNewNetNs := false
sharedFS := "virtio-9p" sharedFS := "virtio-9p"
virtioFSdaemon := path.Join(dir, "virtiofsd") virtioFSdaemon := path.Join(dir, "virtiofsd")
hotPlugVFIO = config.BridgePort hotPlugVFIO = config.BridgePort
coldPlugVFIO = config.RootPort coldPlugVFIO = config.NoPort
configFileOptions := ktu.RuntimeConfigOptions{ configFileOptions := ktu.RuntimeConfigOptions{
Hypervisor: "qemu", Hypervisor: "qemu",
HypervisorPath: hypervisorPath, HypervisorPath: hypervisorPath,
KernelPath: kernelPath, KernelPath: kernelPath,
ImagePath: imagePath, ImagePath: imagePath,
RootfsType: rootfsType, RootfsType: rootfsType,
KernelParams: kernelParams, KernelParams: kernelParams,
MachineType: machineType, MachineType: machineType,
LogPath: logPath, LogPath: logPath,
DisableBlock: disableBlockDevice, DisableBlock: disableBlockDevice,
BlockDeviceDriver: blockDeviceDriver, BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus, DisableNewNetNs: disableNewNetNs,
DisableNewNetNs: disableNewNetNs, SharedFS: sharedFS,
SharedFS: sharedFS, VirtioFSDaemon: virtioFSdaemon,
VirtioFSDaemon: virtioFSdaemon, HotPlugVFIO: hotPlugVFIO,
HotPlugVFIO: hotPlugVFIO, ColdPlugVFIO: coldPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
} }
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions) runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)

View File

@ -111,7 +111,7 @@ func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) {
return config.VFIODeviceErrorType, err return config.VFIODeviceErrorType, err
} }
if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) { if strings.Contains(deviceSysfsDev, vfioAPSysfsDir) {
return config.VFIOAPDeviceMediatedType, nil return config.VFIOAPDeviceMediatedType, nil
} }
@ -178,22 +178,22 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
} }
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize) id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
if err != nil {
return nil, err
}
if ignorePCIDevice {
continue
}
var vfio config.VFIODev var vfio config.VFIODev
switch vfioDeviceType { switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
// This is vfio-pci and vfio-mdev specific
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
if err != nil {
return nil, err
}
if ignorePCIDevice {
continue
}
// Do not directly assign to `vfio` -- need to access field still // Do not directly assign to `vfio` -- need to access field still
vfio = config.VFIODev{ vfio = config.VFIODev{
ID: id, ID: id,
@ -216,6 +216,7 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
SysfsDev: deviceSysfsDev, SysfsDev: deviceSysfsDev,
Type: config.VFIOAPDeviceMediatedType, Type: config.VFIOAPDeviceMediatedType,
APDevices: devices, APDevices: devices,
Port: device.Port,
} }
default: default:
return nil, fmt.Errorf("Failed to append device: VFIO device type unrecognized") return nil, fmt.Errorf("Failed to append device: VFIO device type unrecognized")

View File

@ -69,7 +69,14 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
if err != nil { if err != nil {
return err return err
} }
for _, vfio := range device.VfioDevs { for _, vfio := range device.VfioDevs {
// If vfio.Port is not set we bail out, users should set
// explicitly the port in the config file
if vfio.Port == "" {
return fmt.Errorf("cold_plug_vfio= or hot_plug_vfio= port is not set for device %s (BridgePort | RootPort | SwitchPort)", vfio.BDF)
}
if vfio.IsPCIe { if vfio.IsPCIe {
busIndex := len(config.PCIeDevices[vfio.Port]) busIndex := len(config.PCIeDevices[vfio.Port])
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex) vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)

View File

@ -120,7 +120,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device
if devInfo.ID, err = dm.newDeviceID(); err != nil { if devInfo.ID, err = dm.newDeviceID(); err != nil {
return nil, err return nil, err
} }
if IsVFIO(devInfo.HostPath) { if IsVFIODevice(devInfo.HostPath) {
return drivers.NewVFIODevice(&devInfo), nil return drivers.NewVFIODevice(&devInfo), nil
} else if IsVhostUserBlk(devInfo) { } else if IsVhostUserBlk(devInfo) {
if devInfo.DriverOptions == nil { if devInfo.DriverOptions == nil {
@ -191,12 +191,12 @@ func (dm *deviceManager) AttachDevice(ctx context.Context, id string, dr api.Dev
dm.Lock() dm.Lock()
defer dm.Unlock() defer dm.Unlock()
d, ok := dm.devices[id] dev, ok := dm.devices[id]
if !ok { if !ok {
return ErrDeviceNotExist return ErrDeviceNotExist
} }
if err := d.Attach(ctx, dr); err != nil { if err := dev.Attach(ctx, dr); err != nil {
return err return err
} }
return nil return nil

View File

@ -90,6 +90,100 @@ func TestNewDevice(t *testing.T) {
assert.Equal(t, vfioDev.DeviceInfo.GID, uint32(2)) assert.Equal(t, vfioDev.DeviceInfo.GID, uint32(2))
} }
func TestAttachVFIOAPDevice(t *testing.T) {
var err error
var ok bool
dm := &deviceManager{
devices: make(map[string]api.Device),
}
tmpDir := t.TempDir()
// sys/devices/vfio_ap/matrix/f94290f8-78ac-45fb-bb22-e55e519fa64f
testSysfsAP := "/sys/devices/vfio_ap/"
testDeviceAP := "f94290f8-78ac-45fb-bb22-e55e519fa64f"
testVFIOGroup := "42"
matrixDir := filepath.Join(tmpDir, testSysfsAP, "matrix")
err = os.MkdirAll(matrixDir, dirMode)
assert.Nil(t, err)
deviceAPFile := filepath.Join(matrixDir, testDeviceAP)
err = os.MkdirAll(deviceAPFile, dirMode)
assert.Nil(t, err)
matrixDeviceAPFile := filepath.Join(deviceAPFile, "matrix")
_, err = os.Create(matrixDeviceAPFile)
assert.Nil(t, err)
// create AP devices in the matrix file
APDevices := []byte("05.001f\n")
err = os.WriteFile(matrixDeviceAPFile, APDevices, 0644)
assert.Nil(t, err)
devicesVFIOGroupDir := filepath.Join(tmpDir, testVFIOGroup, "devices")
err = os.MkdirAll(devicesVFIOGroupDir, dirMode)
assert.Nil(t, err)
deviceAPSymlink := filepath.Join(devicesVFIOGroupDir, testDeviceAP)
err = os.Symlink(deviceAPFile, deviceAPSymlink)
assert.Nil(t, err)
savedIOMMUPath := config.SysIOMMUGroupPath
config.SysIOMMUGroupPath = tmpDir
savedSysBusPciDevicesPath := config.SysBusPciDevicesPath
config.SysBusPciDevicesPath = devicesVFIOGroupDir
defer func() {
config.SysIOMMUGroupPath = savedIOMMUPath
config.SysBusPciDevicesPath = savedSysBusPciDevicesPath
}()
path := filepath.Join(vfioPath, testVFIOGroup)
deviceInfo := config.DeviceInfo{
HostPath: path,
ContainerPath: path,
DevType: "c",
ColdPlug: false,
Port: config.RootPort,
}
device, err := dm.NewDevice(deviceInfo)
assert.Nil(t, err)
_, ok = device.(*drivers.VFIODevice)
assert.True(t, ok)
devReceiver := &api.MockDeviceReceiver{}
err = device.Attach(context.Background(), devReceiver)
assert.Nil(t, err)
err = device.Detach(context.Background(), devReceiver)
assert.Nil(t, err)
// If we omit the port setting we should fail
failDm := &deviceManager{
devices: make(map[string]api.Device),
}
failDeviceInfo := config.DeviceInfo{
HostPath: path,
ContainerPath: path,
DevType: "c",
ColdPlug: false,
}
failDevice, err := failDm.NewDevice(failDeviceInfo)
assert.Nil(t, err)
_, ok = failDevice.(*drivers.VFIODevice)
assert.True(t, ok)
failDevReceiver := &api.MockDeviceReceiver{}
err = failDevice.Attach(context.Background(), failDevReceiver)
assert.Error(t, err)
}
func TestAttachVFIODevice(t *testing.T) { func TestAttachVFIODevice(t *testing.T) {
dm := &deviceManager{ dm := &deviceManager{
blockDriver: config.VirtioBlock, blockDriver: config.VirtioBlock,
@ -132,6 +226,8 @@ func TestAttachVFIODevice(t *testing.T) {
HostPath: path, HostPath: path,
ContainerPath: path, ContainerPath: path,
DevType: "c", DevType: "c",
ColdPlug: false,
Port: config.RootPort,
} }
device, err := dm.NewDevice(deviceInfo) device, err := dm.NewDevice(deviceInfo)

View File

@ -17,8 +17,15 @@ const (
vfioPath = "/dev/vfio/" vfioPath = "/dev/vfio/"
) )
// IsVFIOControlDevice checks if the device provided is a vfio control device.
// Depending no the vfio_mode we need to know if a device is a VFIO device
// or the VFIO control device
func IsVFIOControlDevice(path string) bool {
return path == filepath.Join(vfioPath, "vfio")
}
// IsVFIO checks if the device provided is a vfio group. // IsVFIO checks if the device provided is a vfio group.
func IsVFIO(hostPath string) bool { func IsVFIODevice(hostPath string) bool {
// Ignore /dev/vfio/vfio character device // Ignore /dev/vfio/vfio character device
if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) { if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) {
return false return false

View File

@ -31,7 +31,7 @@ func TestIsVFIO(t *testing.T) {
} }
for _, d := range data { for _, d := range data {
isVFIO := IsVFIO(d.path) isVFIO := IsVFIODevice(d.path)
assert.Equal(t, d.expected, isVFIO) assert.Equal(t, d.expected, isVFIO)
} }
} }

View File

@ -163,6 +163,9 @@ const (
// TransportMMIO is the MMIO transport for virtio devices. // TransportMMIO is the MMIO transport for virtio devices.
TransportMMIO VirtioTransport = "mmio" TransportMMIO VirtioTransport = "mmio"
// TransportAP is the AP transport for virtio devices.
TransportAP VirtioTransport = "ap"
) )
// defaultTransport returns the default transport for the current combination // defaultTransport returns the default transport for the current combination
@ -199,6 +202,14 @@ func (transport VirtioTransport) isVirtioCCW(config *Config) bool {
return transport == TransportCCW return transport == TransportCCW
} }
func (transport VirtioTransport) isVirtioAP(config *Config) bool {
if transport == "" {
transport = transport.defaultTransport(config)
}
return transport == TransportAP
}
// getName returns the name of the current transport. // getName returns the name of the current transport.
func (transport VirtioTransport) getName(config *Config) string { func (transport VirtioTransport) getName(config *Config) string {
if transport == "" { if transport == "" {
@ -1811,6 +1822,9 @@ type VFIODevice struct {
// Transport is the virtio transport for this device. // Transport is the virtio transport for this device.
Transport VirtioTransport Transport VirtioTransport
// SysfsDev specifies the sysfs matrix entry for the AP device
SysfsDev string
} }
// VFIODeviceTransport is a map of the vfio device name that corresponds to // VFIODeviceTransport is a map of the vfio device name that corresponds to
@ -1819,11 +1833,13 @@ var VFIODeviceTransport = map[VirtioTransport]string{
TransportPCI: "vfio-pci", TransportPCI: "vfio-pci",
TransportCCW: "vfio-ccw", TransportCCW: "vfio-ccw",
TransportMMIO: "vfio-device", TransportMMIO: "vfio-device",
TransportAP: "vfio-ap",
} }
// Valid returns true if the VFIODevice structure is valid and complete. // Valid returns true if the VFIODevice structure is valid and complete.
// s390x architecture requires SysfsDev to be set.
func (vfioDev VFIODevice) Valid() bool { func (vfioDev VFIODevice) Valid() bool {
return vfioDev.BDF != "" return vfioDev.BDF != "" || vfioDev.SysfsDev != ""
} }
// QemuParams returns the qemu parameters built out of this vfio device. // QemuParams returns the qemu parameters built out of this vfio device.
@ -1833,6 +1849,15 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
driver := vfioDev.deviceName(config) driver := vfioDev.deviceName(config)
if vfioDev.Transport.isVirtioAP(config) {
deviceParams = append(deviceParams, fmt.Sprintf("%s,sysfsdev=%s", driver, vfioDev.SysfsDev))
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
return qemuParams
}
deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF)) deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF))
if vfioDev.Transport.isVirtioPCI(config) { if vfioDev.Transport.isVirtioPCI(config) {
if vfioDev.VendorID != "" { if vfioDev.VendorID != "" {
@ -2837,10 +2862,9 @@ func (config *Config) appendDevices(logger QMPLog) {
for _, d := range config.Devices { for _, d := range config.Devices {
if !d.Valid() { if !d.Valid() {
logger.Errorf("vm device is not valid: %+v", config.Devices) logger.Errorf("vm device is not valid: %+v", d)
continue continue
} }
config.qemuParams = append(config.qemuParams, d.QemuParams(config)...) config.qemuParams = append(config.qemuParams, d.QemuParams(config)...)
} }
} }

View File

@ -1217,10 +1217,11 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd
} }
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command. // ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error { func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string, devID string) error {
args := map[string]interface{}{ args := map[string]interface{}{
"driver": VfioAP, "driver": VfioAP,
"sysfsdev": sysfsdev, "sysfsdev": sysfsdev,
"id": devID,
} }
return q.executeCommand(ctx, "device_add", args, nil) return q.executeCommand(ctx, "device_add", args, nil)
} }

View File

@ -1128,7 +1128,7 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
checkVersion(t, connectedCh) checkVersion(t, connectedCh)
sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525" sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525"
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev) err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev, "test-id")
if err != nil { if err != nil {
t.Fatalf("Unexpected error %v", err) t.Fatalf("Unexpected error %v", err)
} }

View File

@ -42,10 +42,9 @@ type HypervisorState struct {
// HotpluggedCPUs is the list of CPUs that were hot-added // HotpluggedCPUs is the list of CPUs that were hot-added
HotpluggedVCPUs []CPUDevice HotpluggedVCPUs []CPUDevice
HotpluggedMemory int HotpluggedMemory int
VirtiofsDaemonPid int VirtiofsDaemonPid int
Pid int Pid int
HotPlugVFIO config.PCIePort HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort ColdPlugVFIO config.PCIePort
HotplugVFIOOnRootBus bool
} }

View File

@ -233,7 +233,6 @@ type RuntimeConfigOptions struct {
DefaultMsize9p uint32 DefaultMsize9p uint32
DisableBlock bool DisableBlock bool
EnableIOThreads bool EnableIOThreads bool
HotplugVFIOOnRootBus bool
DisableNewNetNs bool DisableNewNetNs bool
HypervisorDebug bool HypervisorDebug bool
RuntimeDebug bool RuntimeDebug bool
@ -317,8 +316,8 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + ` default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + ` disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + ` enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `" cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + ` msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + ` enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
guest_hook_path = "` + config.DefaultGuestHookPath + `" guest_hook_path = "` + config.DefaultGuestHookPath + `"

View File

@ -81,7 +81,6 @@ const defaultFileBackedMemRootDir string = ""
const defaultEnableDebug bool = false const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192 const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultEntropySource = "/dev/urandom" const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = "" const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "never" const defaultVirtioFSCacheMode = "never"

View File

@ -22,6 +22,7 @@ import (
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
@ -146,7 +147,6 @@ type hypervisor struct {
DisableNestingChecks bool `toml:"disable_nesting_checks"` DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"` EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"` DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"` HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"` ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"` DisableVhostNet bool `toml:"disable_vhost_net"`
@ -867,7 +867,6 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm, DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
HotPlugVFIO: h.hotPlugVFIO(), HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(), ColdPlugVFIO: h.coldPlugVFIO(),
DisableVhostNet: h.DisableVhostNet, DisableVhostNet: h.DisableVhostNet,
@ -1063,7 +1062,6 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads, EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(), Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
ColdPlugVFIO: h.coldPlugVFIO(), ColdPlugVFIO: h.coldPlugVFIO(),
HotPlugVFIO: h.hotPlugVFIO(), HotPlugVFIO: h.hotPlugVFIO(),
DisableVhostNet: true, DisableVhostNet: true,
@ -1294,7 +1292,6 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads, EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
ColdPlugVFIO: defaultColdPlugVFIO, ColdPlugVFIO: defaultColdPlugVFIO,
HotPlugVFIO: defaultHotPlugVFIO, HotPlugVFIO: defaultHotPlugVFIO,
GuestHookPath: defaultGuestHookPath, GuestHookPath: defaultGuestHookPath,
@ -1671,7 +1668,8 @@ func checkConfig(config oci.RuntimeConfig) error {
hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
machineType := config.HypervisorConfig.HypervisorMachineType machineType := config.HypervisorConfig.HypervisorMachineType
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType); err != nil { hypervisorType := config.HypervisorType
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType, hypervisorType); err != nil {
return err return err
} }
@ -1681,10 +1679,9 @@ func checkConfig(config oci.RuntimeConfig) error {
// checkPCIeConfig ensures the PCIe configuration is valid. // checkPCIeConfig ensures the PCIe configuration is valid.
// Only allow one of the following settings for cold-plug: // Only allow one of the following settings for cold-plug:
// no-port, root-port, switch-port // no-port, root-port, switch-port
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string) error { func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error {
// Currently only QEMU q35 supports advanced PCIe topologies if hypervisorType != virtcontainers.QemuHypervisor {
// firecracker, dragonball do not have right now any PCIe support kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU hypervisor, ignoring hot(cold)_vfio_port setting")
if machineType != "q35" {
return nil return nil
} }
@ -1694,6 +1691,12 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if coldPlug == config.NoPort && hotPlug == config.NoPort { if coldPlug == config.NoPort && hotPlug == config.NoPort {
return nil return nil
} }
// Currently only QEMU q35,virt support advanced PCIe topologies
// firecracker, dragonball do not have right now any PCIe support
if machineType != "q35" && machineType != "virt" {
return nil
}
var port config.PCIePort var port config.PCIePort
if coldPlug != config.NoPort { if coldPlug != config.NoPort {
port = coldPlug port = coldPlug
@ -1701,10 +1704,13 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if hotPlug != config.NoPort { if hotPlug != config.NoPort {
port = hotPlug port = hotPlug
} }
if port == config.NoPort || port == config.BridgePort || port == config.RootPort || port == config.SwitchPort { if port == config.NoPort {
return fmt.Errorf("invalid vfio_port=%s setting, use on of %s, %s, %s",
port, config.BridgePort, config.RootPort, config.SwitchPort)
}
if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
return nil return nil
} }
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s", return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s",
coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort) coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort)
} }

View File

@ -85,7 +85,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
blockDeviceDriver := "virtio-scsi" blockDeviceDriver := "virtio-scsi"
blockDeviceAIO := "io_uring" blockDeviceAIO := "io_uring"
enableIOThreads := true enableIOThreads := true
hotplugVFIOOnRootBus := true
hotPlugVFIO = config.NoPort hotPlugVFIO = config.NoPort
coldPlugVFIO = config.BridgePort coldPlugVFIO = config.BridgePort
disableNewNetNs := false disableNewNetNs := false
@ -108,7 +107,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
BlockDeviceDriver: blockDeviceDriver, BlockDeviceDriver: blockDeviceDriver,
BlockDeviceAIO: blockDeviceAIO, BlockDeviceAIO: blockDeviceAIO,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO, HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO, ColdPlugVFIO: coldPlugVFIO,
DisableNewNetNs: disableNewNetNs, DisableNewNetNs: disableNewNetNs,
@ -172,7 +170,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
BlockDeviceAIO: defaultBlockDeviceAIO, BlockDeviceAIO: defaultBlockDeviceAIO,
DefaultBridges: defaultBridgesCount, DefaultBridges: defaultBridgesCount,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO, HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO, ColdPlugVFIO: coldPlugVFIO,
Msize9p: defaultMsize9p, Msize9p: defaultMsize9p,
@ -611,7 +608,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
machineType := "machineType" machineType := "machineType"
disableBlock := true disableBlock := true
enableIOThreads := true enableIOThreads := true
hotplugVFIOOnRootBus := true
coldPlugVFIO = config.BridgePort coldPlugVFIO = config.BridgePort
orgVHostVSockDevicePath := utils.VHostVSockDevicePath orgVHostVSockDevicePath := utils.VHostVSockDevicePath
blockDeviceAIO := "io_uring" blockDeviceAIO := "io_uring"
@ -630,7 +626,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
MachineType: machineType, MachineType: machineType,
DisableBlockDeviceUse: disableBlock, DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
ColdPlugVFIO: coldPlugVFIO, ColdPlugVFIO: coldPlugVFIO,
RxRateLimiterMaxRate: rxRateLimiterMaxRate, RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate, TxRateLimiterMaxRate: txRateLimiterMaxRate,
@ -682,10 +677,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
t.Errorf("Expected value for enable IOThreads %v, got %v", enableIOThreads, config.EnableIOThreads) t.Errorf("Expected value for enable IOThreads %v, got %v", enableIOThreads, config.EnableIOThreads)
} }
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}
if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate { if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate {
t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate) t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate)
} }
@ -807,7 +798,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
machineType := "machineType" machineType := "machineType"
disableBlock := true disableBlock := true
enableIOThreads := true enableIOThreads := true
hotplugVFIOOnRootBus := true
hypervisor := hypervisor{ hypervisor := hypervisor{
Path: hypervisorPath, Path: hypervisorPath,
@ -817,7 +807,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
MachineType: machineType, MachineType: machineType,
DisableBlockDeviceUse: disableBlock, DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads, EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
} }
_, err := newQemuHypervisorConfig(hypervisor) _, err := newQemuHypervisorConfig(hypervisor)

View File

@ -500,12 +500,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err return err
} }
if err := newAnnotationConfiguration(ocispec, vcAnnotations.HotplugVFIOOnRootBus).setBool(func(hotplugVFIOOnRootBus bool) {
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.UseLegacySerial).setBool(func(useLegacySerial bool) { if err := newAnnotationConfiguration(ocispec, vcAnnotations.UseLegacySerial).setBool(func(useLegacySerial bool) {
config.HypervisorConfig.LegacySerial = useLegacySerial config.HypervisorConfig.LegacySerial = useLegacySerial
}); err != nil { }); err != nil {

View File

@ -659,7 +659,6 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.DisableVhostNet] = "true" ocispec.Annotations[vcAnnotations.DisableVhostNet] = "true"
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/" ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true" ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true" ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
@ -700,7 +699,6 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true) assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/") assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true) assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort)) assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort)) assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true) assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)

View File

@ -18,6 +18,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume" volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"
@ -869,6 +870,30 @@ func (c *Container) create(ctx context.Context) (err error) {
} }
} }
// If cold-plug we've attached the devices already, do not try to
// attach them a second time.
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
modeVFIO := (c.sandbox.config.VfioMode == config.VFIOModeVFIO)
if coldPlugVFIO {
var cntDevices []ContainerDevice
for _, dev := range c.devices {
isVFIOControlDevice := deviceManager.IsVFIOControlDevice(dev.ContainerPath)
if isVFIOControlDevice && modeVFIO {
cntDevices = append(cntDevices, dev)
}
if strings.HasPrefix(dev.ContainerPath, vfioPath) {
c.Logger().WithFields(logrus.Fields{
"device": dev,
}).Info("Remvoing device since we're cold-plugging no Attach needed")
continue
}
cntDevices = append(cntDevices, dev)
}
c.devices = cntDevices
}
c.Logger().WithFields(logrus.Fields{ c.Logger().WithFields(logrus.Fields{
"devices": c.devices, "devices": c.devices,
}).Info("Attach devices") }).Info("Attach devices")

View File

@ -284,10 +284,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices // DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
DisableImageNvdimm bool DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the // HotPlugVFIO is used to indicate if devices need to be hotplugged on the
// root port, switch, bridge or no port // root port, switch, bridge or no port
HotPlugVFIO hv.PCIePort HotPlugVFIO hv.PCIePort

View File

@ -599,10 +599,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices // DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
DisableImageNvdimm bool DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// GuestMemoryDumpPaging is used to indicate if enable paging // GuestMemoryDumpPaging is used to indicate if enable paging
// for QEMU dump-guest-memory command // for QEMU dump-guest-memory command
GuestMemoryDumpPaging bool GuestMemoryDumpPaging bool

View File

@ -244,7 +244,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList, FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList,
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks, DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm, DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
@ -485,7 +484,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
FileBackedMemRootList: hconf.FileBackedMemRootList, FileBackedMemRootList: hconf.FileBackedMemRootList,
DisableNestingChecks: hconf.DisableNestingChecks, DisableNestingChecks: hconf.DisableNestingChecks,
DisableImageNvdimm: hconf.DisableImageNvdimm, DisableImageNvdimm: hconf.DisableImageNvdimm,
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
HotPlugVFIO: hconf.HotPlugVFIO, HotPlugVFIO: hconf.HotPlugVFIO,
ColdPlugVFIO: hconf.ColdPlugVFIO, ColdPlugVFIO: hconf.ColdPlugVFIO,
BootToBeTemplate: hconf.BootToBeTemplate, BootToBeTemplate: hconf.BootToBeTemplate,

View File

@ -191,10 +191,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm disables nvdimm for guest rootfs image // DisableImageNvdimm disables nvdimm for guest rootfs image
DisableImageNvdimm bool DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the // HotPlugVFIO is used to indicate if devices need to be hotplugged on the
// root, switch, bridge or no-port // root, switch, bridge or no-port
HotPlugVFIO config.PCIePort HotPlugVFIO config.PCIePort

View File

@ -122,10 +122,6 @@ const (
// DisableImageNvdimm is a sandbox annotation to specify use of nvdimm device for guest rootfs image. // DisableImageNvdimm is a sandbox annotation to specify use of nvdimm device for guest rootfs image.
DisableImageNvdimm = kataAnnotHypervisorPrefix + "disable_image_nvdimm" DisableImageNvdimm = kataAnnotHypervisorPrefix + "disable_image_nvdimm"
// HotplugVFIOOnRootBus is a sandbox annotation used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
// ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged. // ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged.
ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio" ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio"

View File

@ -65,11 +65,6 @@ const romFile = ""
// Default value is false. // Default value is false.
const defaultDisableModern = false const defaultDisableModern = false
// A deeper PCIe topology than 5 is already not advisable just for the sake
// of having enough buffer we limit ourselves to 10 and exit if we reach
// the root bus
const maxPCIeTopoDepth = 10
type qmpChannel struct { type qmpChannel struct {
qmp *govmmQemu.QMP qmp *govmmQemu.QMP
ctx context.Context ctx context.Context
@ -80,15 +75,14 @@ type qmpChannel struct {
// QemuState keeps Qemu's state // QemuState keeps Qemu's state
type QemuState struct { type QemuState struct {
UUID string UUID string
HotPlugVFIO config.PCIePort HotPlugVFIO config.PCIePort
Bridges []types.Bridge Bridges []types.Bridge
HotpluggedVCPUs []hv.CPUDevice HotpluggedVCPUs []hv.CPUDevice
HotpluggedMemory int HotpluggedMemory int
VirtiofsDaemonPid int VirtiofsDaemonPid int
HotplugVFIOOnRootBus bool HotplugVFIO config.PCIePort
HotplugVFIO config.PCIePort ColdPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
} }
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor. // qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
@ -289,7 +283,6 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
q.state.UUID = uuid.Generate().String() q.state.UUID = uuid.Generate().String()
q.state.HotPlugVFIO = q.config.HotPlugVFIO q.state.HotPlugVFIO = q.config.HotPlugVFIO
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
q.state.HotPlugVFIO = q.config.HotPlugVFIO q.state.HotPlugVFIO = q.config.HotPlugVFIO
// The path might already exist, but in case of VM templating, // The path might already exist, but in case of VM templating,
@ -782,6 +775,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if err != nil { if err != nil {
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err) return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
} }
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev) devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil { if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
@ -792,7 +786,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
} }
} }
} }
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus) vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort) vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices) numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
@ -1638,7 +1632,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
config.PCIeDevices[config.RootPort][devID] = true config.PCIeDevices[config.RootPort][devID] = true
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID) bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
bridgeSlot, err := q.qomGetSlot(bridgeQomPath) bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
if err != nil { if err != nil {
return err return err
} }
@ -1741,88 +1735,6 @@ func (q *qemu) hotplugVhostUserDevice(ctx context.Context, vAttr *config.VhostUs
} }
} }
// Query QMP to find the PCI slot of a device, given its QOM path or ID
func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr")
if err != nil {
return types.PciSlot{}, err
}
addrf, ok := addr.(float64)
// XXX going via float makes no real sense, but that's how
// JSON works, and we'll get away with it for the small values
// we have here
if !ok {
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
}
addri := int(addrf)
slotNum, funcNum := addri>>3, addri&0x7
if funcNum != 0 {
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
slotNum, funcNum, qomPath)
}
return types.PciSlotFromInt(slotNum)
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
var slots []types.PciSlot
devSlot, err := q.qomGetSlot(qemuID)
if err != nil {
return types.PciPath{}, err
}
slots = append(slots, devSlot)
// This only works for Q35 and Virt
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
// buffer we limit ourselves to 10 and leave the loop early if we hit
// the root bus.
for i := 1; i <= maxPCIeTopoDepth; i++ {
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
if err != nil {
return types.PciPath{}, err
}
busQOM, ok := parenBusQOM.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
}
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if r.Match([]byte(busQOM)) {
break
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI parent_bus which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(busQOM, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
}
parentBus := busQOM[:idx]
parentSlot, err := q.qomGetSlot(parentBus)
if err != nil {
return types.PciPath{}, err
}
// Prepend the slots, since we're climbing the hierarchy
slots = append([]types.PciSlot{parentSlot}, slots...)
parentPath = parentBus
}
return types.PciPathFromSlots(slots...)
}
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) { func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
return q.executeVFIODeviceAdd(device) return q.executeVFIODeviceAdd(device)
} }
@ -1852,7 +1764,7 @@ func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, brid
case config.VFIOPCIDeviceMediatedType: case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile) return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
case config.VFIOAPDeviceMediatedType: case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
default: default:
return fmt.Errorf("Incorrect VFIO device type found") return fmt.Errorf("Incorrect VFIO device type found")
} }
@ -1865,7 +1777,7 @@ func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
case config.VFIOPCIDeviceMediatedType: case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile) return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
case config.VFIOAPDeviceMediatedType: case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev) return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
default: default:
return fmt.Errorf("Incorrect VFIO device type found") return fmt.Errorf("Incorrect VFIO device type found")
} }
@ -1883,46 +1795,43 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
"hot-plug-vfio": q.state.HotPlugVFIO, "hot-plug-vfio": q.state.HotPlugVFIO,
"device-info": string(buf), "device-info": string(buf),
}).Info("Start hot-plug VFIO device") }).Info("Start hot-plug VFIO device")
// In case MachineType is q35, a PCIe device is hotplugged on
// a PCIe Root Port or alternatively on a PCIe Switch Port err = fmt.Errorf("Incorrect hot plug configuration %v for device %v found", q.state.HotPlugVFIO, device)
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt { // In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
device.Bus = "" // for pc machine type instead of bridge. This is useful for devices that require
} else { // a large PCI BAR which is a currently a limitation with PCI bridges.
var err error if q.state.HotPlugVFIO == config.RootPort {
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus err = q.hotplugVFIODeviceRootPort(ctx, device)
// for pc machine type instead of bridge. This is useful for devices that require } else if q.state.HotPlugVFIO == config.SwitchPort {
// a large PCI BAR which is a currently a limitation with PCI bridges. err = q.hotplugVFIODeviceSwitchPort(ctx, device)
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus { } else if q.state.HotPlugVFIO == config.BridgePort {
err = q.hotplugVFIODeviceRootPort(ctx, device) err = q.hotplugVFIODeviceBridgePort(ctx, device)
} else if q.state.HotPlugVFIO == config.SwitchPort {
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
} else {
err = q.hotplugVFIODeviceBridgePort(ctx, device)
}
if err != nil {
return err
}
} }
// XXX: Depending on whether we're doing root port or if err != nil {
return err
}
// Depending on whether we're doing root port or
// bridge hotplug, and how the bridge is set up in // bridge hotplug, and how the bridge is set up in
// other parts of the code, we may or may not already // other parts of the code, we may or may not already
// have information about the slot number of the // have information about the slot number of the
// bridge and or the device. For simplicity, just // bridge and or the device. For simplicity, just
// query both of them back from qemu // query both of them back from qemu based on the arch
device.GuestPciPath, err = q.qomGetPciPath(device.ID) device.GuestPciPath, err = q.arch.qomGetPciPath(device.ID, &q.qmpMonitorCh)
return err return err
} } else {
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device") q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus { if q.state.HotPlugVFIO == config.BridgePort {
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil { if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
return err return err
}
} }
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
} }
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
} }
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error { func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
@ -2881,7 +2790,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
s.Type = string(QemuHypervisor) s.Type = string(QemuHypervisor)
s.UUID = q.state.UUID s.UUID = q.state.UUID
s.HotpluggedMemory = q.state.HotpluggedMemory s.HotpluggedMemory = q.state.HotpluggedMemory
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
for _, bridge := range q.arch.getBridges() { for _, bridge := range q.arch.getBridges() {
s.Bridges = append(s.Bridges, hv.Bridge{ s.Bridges = append(s.Bridges, hv.Bridge{
@ -2903,7 +2811,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
func (q *qemu) Load(s hv.HypervisorState) { func (q *qemu) Load(s hv.HypervisorState) {
q.state.UUID = s.UUID q.state.UUID = s.UUID
q.state.HotpluggedMemory = s.HotpluggedMemory q.state.HotpluggedMemory = s.HotpluggedMemory
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
for _, bridge := range s.Bridges { for _, bridge := range s.Bridges {

View File

@ -13,6 +13,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"regexp"
"runtime" "runtime"
"strings" "strings"
@ -24,6 +25,11 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
) )
// A deeper PCIe topology than 5 is already not advisable just for the sake
// of having enough buffer we limit ourselves to 10 and exit if we reach
// the root bus
const maxPCIeTopoDepth = 10
type qemuArch interface { type qemuArch interface {
// enableNestingChecks nesting checks will be honoured // enableNestingChecks nesting checks will be honoured
enableNestingChecks() enableNestingChecks()
@ -158,6 +164,12 @@ type qemuArch interface {
// scans the PCIe space and returns the biggest BAR sizes for 32-bit // scans the PCIe space and returns the biggest BAR sizes for 32-bit
// and 64-bit addressable memory // and 64-bit addressable memory
getBARsMaxAddressableMemory() (uint64, uint64) getBARsMaxAddressableMemory() (uint64, uint64)
// Query QMP to find a device's PCI path given its QOM path or ID
qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error)
// Query QMP to find the PCI slot of a device, given its QOM path or ID
qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error)
} }
type qemuArchBase struct { type qemuArchBase struct {
@ -881,3 +893,85 @@ func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmwa
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
return devices, firmware, nil return devices, firmware, nil
} }
// Query QMP to find the PCI slot of a device, given its QOM path or ID
func (q *qemuArchBase) qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error) {
addr, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, qomPath, "addr")
if err != nil {
return types.PciSlot{}, err
}
addrf, ok := addr.(float64)
// XXX going via float makes no real sense, but that's how
// JSON works, and we'll get away with it for the small values
// we have here
if !ok {
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
}
addri := int(addrf)
slotNum, funcNum := addri>>3, addri&0x7
if funcNum != 0 {
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
slotNum, funcNum, qomPath)
}
return types.PciSlotFromInt(slotNum)
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
var slots []types.PciSlot
devSlot, err := q.qomGetSlot(qemuID, qmpCh)
if err != nil {
return types.PciPath{}, err
}
slots = append(slots, devSlot)
// This only works for Q35 and Virt
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
// buffer we limit ourselves to 10 and leave the loop early if we hit
// the root bus.
for i := 1; i <= maxPCIeTopoDepth; i++ {
parenBusQOM, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, parentPath, "parent_bus")
if err != nil {
return types.PciPath{}, err
}
busQOM, ok := parenBusQOM.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
}
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if r.Match([]byte(busQOM)) {
break
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI parent_bus which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(busQOM, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
}
parentBus := busQOM[:idx]
parentSlot, err := q.qomGetSlot(parentBus, qmpCh)
if err != nil {
return types.PciPath{}, err
}
// Prepend the slots, since we're climbing the hierarchy
slots = append([]types.PciSlot{parentSlot}, slots...)
parentPath = parentBus
}
return types.PciPathFromSlots(slots...)
}

View File

@ -351,3 +351,32 @@ func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware,
return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection) return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
} }
} }
func (q *qemuS390x) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
if vfioDev.SysfsDev == "" {
return devices
}
if len(vfioDev.APDevices) > 0 {
devices = append(devices,
govmmQemu.VFIODevice{
SysfsDev: vfioDev.SysfsDev,
Transport: govmmQemu.TransportAP,
},
)
return devices
}
devices = append(devices,
govmmQemu.VFIODevice{
SysfsDev: vfioDev.SysfsDev,
},
)
return devices
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemuS390x) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
hvLogger.Warnf("qomGetPciPath not implemented for s390x")
return types.PciPath{}, nil
}

View File

@ -613,51 +613,10 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return nil, err return nil, err
} }
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices coldPlugVFIO, err := s.coldOrHotPlugVFIO(&sandboxConfig)
// until we have TDISP/IDE PCIe support. if err != nil {
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != config.NoPort) return nil, err
// Aggregate all the containner devices for hot-plug and use them to dedcue
// the correct amount of ports to reserve for the hypervisor.
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
var vfioDevices []config.DeviceInfo
// vhost-user-block device is a PCIe device in Virt, keep track of it
// for correct number of PCIe root ports.
var vhostUserBlkDevices []config.DeviceInfo
for cnt, containers := range sandboxConfig.Containers {
for dev, device := range containers.DeviceInfos {
if deviceManager.IsVhostUserBlk(device) {
vhostUserBlkDevices = append(vhostUserBlkDevices, device)
continue
}
isVFIO := deviceManager.IsVFIO(device.ContainerPath)
if hotPlugVFIO && isVFIO {
vfioDevices = append(vfioDevices, device)
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIO {
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
}
}
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
} }
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
// store doesn't require hypervisor to be stored immediately // store doesn't require hypervisor to be stored immediately
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil { if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
@ -672,7 +631,8 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return s, nil return s, nil
} }
for _, dev := range vfioDevices { for _, dev := range sandboxConfig.HypervisorConfig.VFIODevices {
s.Logger().Info("cold-plug device: ", dev)
_, err := s.AddDevice(ctx, dev) _, err := s.AddDevice(ctx, dev)
if err != nil { if err != nil {
s.Logger().WithError(err).Debug("Cannot cold-plug add device") s.Logger().WithError(err).Debug("Cannot cold-plug add device")
@ -682,6 +642,70 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return s, nil return s, nil
} }
func (s *Sandbox) coldOrHotPlugVFIO(sandboxConfig *SandboxConfig) (bool, error) {
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
// until we have TDISP/IDE PCIe support.
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != config.NoPort)
// Aggregate all the containner devices for hot-plug and use them to dedcue
// the correct amount of ports to reserve for the hypervisor.
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
modeIsGK := (sandboxConfig.VfioMode == config.VFIOModeGuestKernel)
modeIsVFIO := (sandboxConfig.VfioMode == config.VFIOModeVFIO)
var vfioDevices []config.DeviceInfo
// vhost-user-block device is a PCIe device in Virt, keep track of it
// for correct number of PCIe root ports.
var vhostUserBlkDevices []config.DeviceInfo
for cnt, containers := range sandboxConfig.Containers {
for dev, device := range containers.DeviceInfos {
if deviceManager.IsVhostUserBlk(device) {
vhostUserBlkDevices = append(vhostUserBlkDevices, device)
continue
}
isVFIODevice := deviceManager.IsVFIODevice(device.ContainerPath)
isVFIOControlDevice := deviceManager.IsVFIOControlDevice(device.ContainerPath)
// vfio_mode=vfio needs the VFIO control device add it to the list
// of devices to be added to the VM.
if modeIsVFIO && isVFIOControlDevice && !hotPlugVFIO {
vfioDevices = append(vfioDevices, device)
}
if hotPlugVFIO && isVFIODevice {
device.ColdPlug = false
device.Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
vfioDevices = append(vfioDevices, device)
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIODevice {
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
if modeIsGK {
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
}
}
}
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
}
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
return coldPlugVFIO, nil
}
func (s *Sandbox) createResourceController() error { func (s *Sandbox) createResourceController() error {
var err error var err error
cgroupPath := "" cgroupPath := ""
@ -2049,26 +2073,26 @@ func (s *Sandbox) AddDevice(ctx context.Context, info config.DeviceInfo) (api.De
} }
var err error var err error
b, err := s.devManager.NewDevice(info) add, err := s.devManager.NewDevice(info)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() { defer func() {
if err != nil { if err != nil {
s.devManager.RemoveDevice(b.DeviceID()) s.devManager.RemoveDevice(add.DeviceID())
} }
}() }()
if err = s.devManager.AttachDevice(ctx, b.DeviceID(), s); err != nil { if err = s.devManager.AttachDevice(ctx, add.DeviceID(), s); err != nil {
return nil, err return nil, err
} }
defer func() { defer func() {
if err != nil { if err != nil {
s.devManager.DetachDevice(ctx, b.DeviceID(), s) s.devManager.DetachDevice(ctx, add.DeviceID(), s)
} }
}() }()
return b, nil return add, nil
} }
// updateResources will: // updateResources will:

View File

@ -606,6 +606,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) {
HostPath: path, HostPath: path,
ContainerPath: path, ContainerPath: path,
DevType: "c", DevType: "c",
Port: config.RootPort,
} }
dev, err := dm.NewDevice(deviceInfo) dev, err := dm.NewDevice(deviceInfo)
assert.Nil(t, err, "deviceManager.NewDevice return error: %v", err) assert.Nil(t, err, "deviceManager.NewDevice return error: %v", err)