Merge pull request #7382 from zvonkok/vfio-ap-debug

s390x: Fixing device.Bus assignment
This commit is contained in:
Fabiano Fidêncio 2023-07-25 08:26:25 +02:00 committed by GitHub
commit 5ce0b4743f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 490 additions and 316 deletions

View File

@ -103,20 +103,19 @@ type RuntimeVersionInfo struct {
// HypervisorInfo stores hypervisor details
type HypervisorInfo struct {
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
SharedFS string
VirtioFSDaemon string
SocketPath string
Msize9p uint32
MemorySlots uint32
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
HotplugVFIOOnRootBus bool
Debug bool
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
SharedFS string
VirtioFSDaemon string
SocketPath string
Msize9p uint32
MemorySlots uint32
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
Debug bool
}
// AgentInfo stores agent details
@ -307,20 +306,19 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
}
return HypervisorInfo{
Debug: config.HypervisorConfig.Debug,
MachineType: config.HypervisorConfig.HypervisorMachineType,
Version: version,
Path: hypervisorPath,
BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver,
Msize9p: config.HypervisorConfig.Msize9p,
MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
SocketPath: socketPath,
Debug: config.HypervisorConfig.Debug,
MachineType: config.HypervisorConfig.HypervisorMachineType,
Version: version,
Path: hypervisorPath,
BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver,
Msize9p: config.HypervisorConfig.Msize9p,
MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
SocketPath: socketPath,
}, nil
}

View File

@ -87,7 +87,6 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
disableBlock := true
blockStorageDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
hotPlugVFIO = config.BridgePort
coldPlugVFIO = config.NoPort
disableNewNetNs := false
@ -132,7 +131,6 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
DisableBlock: disableBlock,
BlockDeviceDriver: blockStorageDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
DisableNewNetNs: disableNewNetNs,
@ -276,10 +274,8 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
}
if os.Geteuid() == 0 {

View File

@ -330,31 +330,29 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string,
disableBlockDevice := true
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
disableNewNetNs := false
sharedFS := "virtio-9p"
virtioFSdaemon := path.Join(dir, "virtiofsd")
hotPlugVFIO = config.BridgePort
coldPlugVFIO = config.RootPort
coldPlugVFIO = config.NoPort
configFileOptions := ktu.RuntimeConfigOptions{
Hypervisor: "qemu",
HypervisorPath: hypervisorPath,
KernelPath: kernelPath,
ImagePath: imagePath,
RootfsType: rootfsType,
KernelParams: kernelParams,
MachineType: machineType,
LogPath: logPath,
DisableBlock: disableBlockDevice,
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
VirtioFSDaemon: virtioFSdaemon,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
Hypervisor: "qemu",
HypervisorPath: hypervisorPath,
KernelPath: kernelPath,
ImagePath: imagePath,
RootfsType: rootfsType,
KernelParams: kernelParams,
MachineType: machineType,
LogPath: logPath,
DisableBlock: disableBlockDevice,
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
VirtioFSDaemon: virtioFSdaemon,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
}
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)

View File

@ -111,7 +111,7 @@ func GetVFIODeviceType(deviceFilePath string) (config.VFIODeviceType, error) {
return config.VFIODeviceErrorType, err
}
if strings.HasPrefix(deviceSysfsDev, vfioAPSysfsDir) {
if strings.Contains(deviceSysfsDev, vfioAPSysfsDir) {
return config.VFIOAPDeviceMediatedType, nil
}
@ -178,22 +178,22 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
}
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
if err != nil {
return nil, err
}
if ignorePCIDevice {
continue
}
var vfio config.VFIODev
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
// This is vfio-pci and vfio-mdev specific
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
if err != nil {
return nil, err
}
if ignorePCIDevice {
continue
}
// Do not directly assign to `vfio` -- need to access field still
vfio = config.VFIODev{
ID: id,
@ -216,6 +216,7 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
SysfsDev: deviceSysfsDev,
Type: config.VFIOAPDeviceMediatedType,
APDevices: devices,
Port: device.Port,
}
default:
return nil, fmt.Errorf("Failed to append device: VFIO device type unrecognized")

View File

@ -69,7 +69,14 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
if err != nil {
return err
}
for _, vfio := range device.VfioDevs {
// If vfio.Port is not set we bail out, users should set
// explicitly the port in the config file
if vfio.Port == "" {
return fmt.Errorf("cold_plug_vfio= or hot_plug_vfio= port is not set for device %s (BridgePort | RootPort | SwitchPort)", vfio.BDF)
}
if vfio.IsPCIe {
busIndex := len(config.PCIeDevices[vfio.Port])
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)

View File

@ -120,7 +120,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device
if devInfo.ID, err = dm.newDeviceID(); err != nil {
return nil, err
}
if IsVFIO(devInfo.HostPath) {
if IsVFIODevice(devInfo.HostPath) {
return drivers.NewVFIODevice(&devInfo), nil
} else if IsVhostUserBlk(devInfo) {
if devInfo.DriverOptions == nil {
@ -191,12 +191,12 @@ func (dm *deviceManager) AttachDevice(ctx context.Context, id string, dr api.Dev
dm.Lock()
defer dm.Unlock()
d, ok := dm.devices[id]
dev, ok := dm.devices[id]
if !ok {
return ErrDeviceNotExist
}
if err := d.Attach(ctx, dr); err != nil {
if err := dev.Attach(ctx, dr); err != nil {
return err
}
return nil

View File

@ -90,6 +90,100 @@ func TestNewDevice(t *testing.T) {
assert.Equal(t, vfioDev.DeviceInfo.GID, uint32(2))
}
func TestAttachVFIOAPDevice(t *testing.T) {
var err error
var ok bool
dm := &deviceManager{
devices: make(map[string]api.Device),
}
tmpDir := t.TempDir()
// sys/devices/vfio_ap/matrix/f94290f8-78ac-45fb-bb22-e55e519fa64f
testSysfsAP := "/sys/devices/vfio_ap/"
testDeviceAP := "f94290f8-78ac-45fb-bb22-e55e519fa64f"
testVFIOGroup := "42"
matrixDir := filepath.Join(tmpDir, testSysfsAP, "matrix")
err = os.MkdirAll(matrixDir, dirMode)
assert.Nil(t, err)
deviceAPFile := filepath.Join(matrixDir, testDeviceAP)
err = os.MkdirAll(deviceAPFile, dirMode)
assert.Nil(t, err)
matrixDeviceAPFile := filepath.Join(deviceAPFile, "matrix")
_, err = os.Create(matrixDeviceAPFile)
assert.Nil(t, err)
// create AP devices in the matrix file
APDevices := []byte("05.001f\n")
err = os.WriteFile(matrixDeviceAPFile, APDevices, 0644)
assert.Nil(t, err)
devicesVFIOGroupDir := filepath.Join(tmpDir, testVFIOGroup, "devices")
err = os.MkdirAll(devicesVFIOGroupDir, dirMode)
assert.Nil(t, err)
deviceAPSymlink := filepath.Join(devicesVFIOGroupDir, testDeviceAP)
err = os.Symlink(deviceAPFile, deviceAPSymlink)
assert.Nil(t, err)
savedIOMMUPath := config.SysIOMMUGroupPath
config.SysIOMMUGroupPath = tmpDir
savedSysBusPciDevicesPath := config.SysBusPciDevicesPath
config.SysBusPciDevicesPath = devicesVFIOGroupDir
defer func() {
config.SysIOMMUGroupPath = savedIOMMUPath
config.SysBusPciDevicesPath = savedSysBusPciDevicesPath
}()
path := filepath.Join(vfioPath, testVFIOGroup)
deviceInfo := config.DeviceInfo{
HostPath: path,
ContainerPath: path,
DevType: "c",
ColdPlug: false,
Port: config.RootPort,
}
device, err := dm.NewDevice(deviceInfo)
assert.Nil(t, err)
_, ok = device.(*drivers.VFIODevice)
assert.True(t, ok)
devReceiver := &api.MockDeviceReceiver{}
err = device.Attach(context.Background(), devReceiver)
assert.Nil(t, err)
err = device.Detach(context.Background(), devReceiver)
assert.Nil(t, err)
// If we omit the port setting we should fail
failDm := &deviceManager{
devices: make(map[string]api.Device),
}
failDeviceInfo := config.DeviceInfo{
HostPath: path,
ContainerPath: path,
DevType: "c",
ColdPlug: false,
}
failDevice, err := failDm.NewDevice(failDeviceInfo)
assert.Nil(t, err)
_, ok = failDevice.(*drivers.VFIODevice)
assert.True(t, ok)
failDevReceiver := &api.MockDeviceReceiver{}
err = failDevice.Attach(context.Background(), failDevReceiver)
assert.Error(t, err)
}
func TestAttachVFIODevice(t *testing.T) {
dm := &deviceManager{
blockDriver: config.VirtioBlock,
@ -132,6 +226,8 @@ func TestAttachVFIODevice(t *testing.T) {
HostPath: path,
ContainerPath: path,
DevType: "c",
ColdPlug: false,
Port: config.RootPort,
}
device, err := dm.NewDevice(deviceInfo)

View File

@ -17,8 +17,15 @@ const (
vfioPath = "/dev/vfio/"
)
// IsVFIOControlDevice checks if the device provided is a vfio control device.
// Depending no the vfio_mode we need to know if a device is a VFIO device
// or the VFIO control device
func IsVFIOControlDevice(path string) bool {
return path == filepath.Join(vfioPath, "vfio")
}
// IsVFIO checks if the device provided is a vfio group.
func IsVFIO(hostPath string) bool {
func IsVFIODevice(hostPath string) bool {
// Ignore /dev/vfio/vfio character device
if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) {
return false

View File

@ -31,7 +31,7 @@ func TestIsVFIO(t *testing.T) {
}
for _, d := range data {
isVFIO := IsVFIO(d.path)
isVFIO := IsVFIODevice(d.path)
assert.Equal(t, d.expected, isVFIO)
}
}

View File

@ -163,6 +163,9 @@ const (
// TransportMMIO is the MMIO transport for virtio devices.
TransportMMIO VirtioTransport = "mmio"
// TransportAP is the AP transport for virtio devices.
TransportAP VirtioTransport = "ap"
)
// defaultTransport returns the default transport for the current combination
@ -199,6 +202,14 @@ func (transport VirtioTransport) isVirtioCCW(config *Config) bool {
return transport == TransportCCW
}
func (transport VirtioTransport) isVirtioAP(config *Config) bool {
if transport == "" {
transport = transport.defaultTransport(config)
}
return transport == TransportAP
}
// getName returns the name of the current transport.
func (transport VirtioTransport) getName(config *Config) string {
if transport == "" {
@ -1811,6 +1822,9 @@ type VFIODevice struct {
// Transport is the virtio transport for this device.
Transport VirtioTransport
// SysfsDev specifies the sysfs matrix entry for the AP device
SysfsDev string
}
// VFIODeviceTransport is a map of the vfio device name that corresponds to
@ -1819,11 +1833,13 @@ var VFIODeviceTransport = map[VirtioTransport]string{
TransportPCI: "vfio-pci",
TransportCCW: "vfio-ccw",
TransportMMIO: "vfio-device",
TransportAP: "vfio-ap",
}
// Valid returns true if the VFIODevice structure is valid and complete.
// s390x architecture requires SysfsDev to be set.
func (vfioDev VFIODevice) Valid() bool {
return vfioDev.BDF != ""
return vfioDev.BDF != "" || vfioDev.SysfsDev != ""
}
// QemuParams returns the qemu parameters built out of this vfio device.
@ -1833,6 +1849,15 @@ func (vfioDev VFIODevice) QemuParams(config *Config) []string {
driver := vfioDev.deviceName(config)
if vfioDev.Transport.isVirtioAP(config) {
deviceParams = append(deviceParams, fmt.Sprintf("%s,sysfsdev=%s", driver, vfioDev.SysfsDev))
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
return qemuParams
}
deviceParams = append(deviceParams, fmt.Sprintf("%s,host=%s", driver, vfioDev.BDF))
if vfioDev.Transport.isVirtioPCI(config) {
if vfioDev.VendorID != "" {
@ -2837,10 +2862,9 @@ func (config *Config) appendDevices(logger QMPLog) {
for _, d := range config.Devices {
if !d.Valid() {
logger.Errorf("vm device is not valid: %+v", config.Devices)
logger.Errorf("vm device is not valid: %+v", d)
continue
}
config.qemuParams = append(config.qemuParams, d.QemuParams(config)...)
}
}

View File

@ -1217,10 +1217,11 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd
}
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error {
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string, devID string) error {
args := map[string]interface{}{
"driver": VfioAP,
"sysfsdev": sysfsdev,
"id": devID,
}
return q.executeCommand(ctx, "device_add", args, nil)
}

View File

@ -1128,7 +1128,7 @@ func TestQMPAPVFIOMediatedDeviceAdd(t *testing.T) {
q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh)
checkVersion(t, connectedCh)
sysfsDev := "/sys/devices/vfio_ap/matrix/a297db4a-f4c2-11e6-90f6-d3b88d6c9525"
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev)
err := q.ExecuteAPVFIOMediatedDeviceAdd(context.Background(), sysfsDev, "test-id")
if err != nil {
t.Fatalf("Unexpected error %v", err)
}

View File

@ -42,10 +42,9 @@ type HypervisorState struct {
// HotpluggedCPUs is the list of CPUs that were hot-added
HotpluggedVCPUs []CPUDevice
HotpluggedMemory int
VirtiofsDaemonPid int
Pid int
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
HotplugVFIOOnRootBus bool
HotpluggedMemory int
VirtiofsDaemonPid int
Pid int
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
}

View File

@ -233,7 +233,6 @@ type RuntimeConfigOptions struct {
DefaultMsize9p uint32
DisableBlock bool
EnableIOThreads bool
HotplugVFIOOnRootBus bool
DisableNewNetNs bool
HypervisorDebug bool
RuntimeDebug bool
@ -317,8 +316,8 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
default_memory = ` + strconv.FormatUint(uint64(config.DefaultMemSize), 10) + `
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
guest_hook_path = "` + config.DefaultGuestHookPath + `"

View File

@ -81,7 +81,6 @@ const defaultFileBackedMemRootDir string = ""
const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "never"

View File

@ -22,6 +22,7 @@ import (
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
@ -146,7 +147,6 @@ type hypervisor struct {
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"`
@ -867,7 +867,6 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(),
DisableVhostNet: h.DisableVhostNet,
@ -1063,7 +1062,6 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
ColdPlugVFIO: h.coldPlugVFIO(),
HotPlugVFIO: h.hotPlugVFIO(),
DisableVhostNet: true,
@ -1294,7 +1292,6 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
ColdPlugVFIO: defaultColdPlugVFIO,
HotPlugVFIO: defaultHotPlugVFIO,
GuestHookPath: defaultGuestHookPath,
@ -1671,7 +1668,8 @@ func checkConfig(config oci.RuntimeConfig) error {
hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
machineType := config.HypervisorConfig.HypervisorMachineType
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType); err != nil {
hypervisorType := config.HypervisorType
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType, hypervisorType); err != nil {
return err
}
@ -1681,10 +1679,9 @@ func checkConfig(config oci.RuntimeConfig) error {
// checkPCIeConfig ensures the PCIe configuration is valid.
// Only allow one of the following settings for cold-plug:
// no-port, root-port, switch-port
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string) error {
// Currently only QEMU q35 supports advanced PCIe topologies
// firecracker, dragonball do not have right now any PCIe support
if machineType != "q35" {
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error {
if hypervisorType != virtcontainers.QemuHypervisor {
kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU hypervisor, ignoring hot(cold)_vfio_port setting")
return nil
}
@ -1694,6 +1691,12 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if coldPlug == config.NoPort && hotPlug == config.NoPort {
return nil
}
// Currently only QEMU q35,virt support advanced PCIe topologies
// firecracker, dragonball do not have right now any PCIe support
if machineType != "q35" && machineType != "virt" {
return nil
}
var port config.PCIePort
if coldPlug != config.NoPort {
port = coldPlug
@ -1701,10 +1704,13 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT
if hotPlug != config.NoPort {
port = hotPlug
}
if port == config.NoPort || port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
if port == config.NoPort {
return fmt.Errorf("invalid vfio_port=%s setting, use on of %s, %s, %s",
port, config.BridgePort, config.RootPort, config.SwitchPort)
}
if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
return nil
}
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s",
coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort)
}

View File

@ -85,7 +85,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
blockDeviceDriver := "virtio-scsi"
blockDeviceAIO := "io_uring"
enableIOThreads := true
hotplugVFIOOnRootBus := true
hotPlugVFIO = config.NoPort
coldPlugVFIO = config.BridgePort
disableNewNetNs := false
@ -108,7 +107,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
BlockDeviceDriver: blockDeviceDriver,
BlockDeviceAIO: blockDeviceAIO,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
DisableNewNetNs: disableNewNetNs,
@ -172,7 +170,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
BlockDeviceAIO: defaultBlockDeviceAIO,
DefaultBridges: defaultBridgesCount,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
Msize9p: defaultMsize9p,
@ -611,7 +608,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
machineType := "machineType"
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
coldPlugVFIO = config.BridgePort
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
blockDeviceAIO := "io_uring"
@ -630,7 +626,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
MachineType: machineType,
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
ColdPlugVFIO: coldPlugVFIO,
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
@ -682,10 +677,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
t.Errorf("Expected value for enable IOThreads %v, got %v", enableIOThreads, config.EnableIOThreads)
}
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}
if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate {
t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate)
}
@ -807,7 +798,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
machineType := "machineType"
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
hypervisor := hypervisor{
Path: hypervisorPath,
@ -817,7 +807,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
MachineType: machineType,
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
}
_, err := newQemuHypervisorConfig(hypervisor)

View File

@ -500,12 +500,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.HotplugVFIOOnRootBus).setBool(func(hotplugVFIOOnRootBus bool) {
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
}); err != nil {
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.UseLegacySerial).setBool(func(useLegacySerial bool) {
config.HypervisorConfig.LegacySerial = useLegacySerial
}); err != nil {

View File

@ -659,7 +659,6 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.DisableVhostNet] = "true"
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
@ -700,7 +699,6 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)

View File

@ -18,6 +18,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"
@ -869,6 +870,30 @@ func (c *Container) create(ctx context.Context) (err error) {
}
}
// If cold-plug we've attached the devices already, do not try to
// attach them a second time.
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
modeVFIO := (c.sandbox.config.VfioMode == config.VFIOModeVFIO)
if coldPlugVFIO {
var cntDevices []ContainerDevice
for _, dev := range c.devices {
isVFIOControlDevice := deviceManager.IsVFIOControlDevice(dev.ContainerPath)
if isVFIOControlDevice && modeVFIO {
cntDevices = append(cntDevices, dev)
}
if strings.HasPrefix(dev.ContainerPath, vfioPath) {
c.Logger().WithFields(logrus.Fields{
"device": dev,
}).Info("Remvoing device since we're cold-plugging no Attach needed")
continue
}
cntDevices = append(cntDevices, dev)
}
c.devices = cntDevices
}
c.Logger().WithFields(logrus.Fields{
"devices": c.devices,
}).Info("Attach devices")

View File

@ -284,10 +284,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the
// root port, switch, bridge or no port
HotPlugVFIO hv.PCIePort

View File

@ -599,10 +599,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// GuestMemoryDumpPaging is used to indicate if enable paging
// for QEMU dump-guest-memory command
GuestMemoryDumpPaging bool

View File

@ -244,7 +244,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList,
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
@ -485,7 +484,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
FileBackedMemRootList: hconf.FileBackedMemRootList,
DisableNestingChecks: hconf.DisableNestingChecks,
DisableImageNvdimm: hconf.DisableImageNvdimm,
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
HotPlugVFIO: hconf.HotPlugVFIO,
ColdPlugVFIO: hconf.ColdPlugVFIO,
BootToBeTemplate: hconf.BootToBeTemplate,

View File

@ -191,10 +191,6 @@ type HypervisorConfig struct {
// DisableImageNvdimm disables nvdimm for guest rootfs image
DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the
// root, switch, bridge or no-port
HotPlugVFIO config.PCIePort

View File

@ -122,10 +122,6 @@ const (
// DisableImageNvdimm is a sandbox annotation to specify use of nvdimm device for guest rootfs image.
DisableImageNvdimm = kataAnnotHypervisorPrefix + "disable_image_nvdimm"
// HotplugVFIOOnRootBus is a sandbox annotation used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
// ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged.
ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio"

View File

@ -65,11 +65,6 @@ const romFile = ""
// Default value is false.
const defaultDisableModern = false
// A deeper PCIe topology than 5 is already not advisable just for the sake
// of having enough buffer we limit ourselves to 10 and exit if we reach
// the root bus
const maxPCIeTopoDepth = 10
type qmpChannel struct {
qmp *govmmQemu.QMP
ctx context.Context
@ -80,15 +75,14 @@ type qmpChannel struct {
// QemuState keeps Qemu's state
type QemuState struct {
UUID string
HotPlugVFIO config.PCIePort
Bridges []types.Bridge
HotpluggedVCPUs []hv.CPUDevice
HotpluggedMemory int
VirtiofsDaemonPid int
HotplugVFIOOnRootBus bool
HotplugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
UUID string
HotPlugVFIO config.PCIePort
Bridges []types.Bridge
HotpluggedVCPUs []hv.CPUDevice
HotpluggedMemory int
VirtiofsDaemonPid int
HotplugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
}
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
@ -289,7 +283,6 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
q.state.UUID = uuid.Generate().String()
q.state.HotPlugVFIO = q.config.HotPlugVFIO
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
q.state.HotPlugVFIO = q.config.HotPlugVFIO
// The path might already exist, but in case of VM templating,
@ -782,6 +775,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if err != nil {
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
}
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
@ -792,7 +786,7 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
}
}
}
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus)
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
@ -1638,7 +1632,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
config.PCIeDevices[config.RootPort][devID] = true
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
if err != nil {
return err
}
@ -1741,88 +1735,6 @@ func (q *qemu) hotplugVhostUserDevice(ctx context.Context, vAttr *config.VhostUs
}
}
// Query QMP to find the PCI slot of a device, given its QOM path or ID
func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
addr, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qomPath, "addr")
if err != nil {
return types.PciSlot{}, err
}
addrf, ok := addr.(float64)
// XXX going via float makes no real sense, but that's how
// JSON works, and we'll get away with it for the small values
// we have here
if !ok {
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
}
addri := int(addrf)
slotNum, funcNum := addri>>3, addri&0x7
if funcNum != 0 {
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
slotNum, funcNum, qomPath)
}
return types.PciSlotFromInt(slotNum)
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
var slots []types.PciSlot
devSlot, err := q.qomGetSlot(qemuID)
if err != nil {
return types.PciPath{}, err
}
slots = append(slots, devSlot)
// This only works for Q35 and Virt
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
// buffer we limit ourselves to 10 and leave the loop early if we hit
// the root bus.
for i := 1; i <= maxPCIeTopoDepth; i++ {
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
if err != nil {
return types.PciPath{}, err
}
busQOM, ok := parenBusQOM.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
}
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if r.Match([]byte(busQOM)) {
break
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI parent_bus which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(busQOM, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
}
parentBus := busQOM[:idx]
parentSlot, err := q.qomGetSlot(parentBus)
if err != nil {
return types.PciPath{}, err
}
// Prepend the slots, since we're climbing the hierarchy
slots = append([]types.PciSlot{parentSlot}, slots...)
parentPath = parentBus
}
return types.PciPathFromSlots(slots...)
}
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
return q.executeVFIODeviceAdd(device)
}
@ -1852,7 +1764,7 @@ func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, brid
case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
@ -1865,7 +1777,7 @@ func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev, device.ID)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
@ -1883,46 +1795,43 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
"hot-plug-vfio": q.state.HotPlugVFIO,
"device-info": string(buf),
}).Info("Start hot-plug VFIO device")
// In case MachineType is q35, a PCIe device is hotplugged on
// a PCIe Root Port or alternatively on a PCIe Switch Port
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
device.Bus = ""
} else {
var err error
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
err = q.hotplugVFIODeviceRootPort(ctx, device)
} else if q.state.HotPlugVFIO == config.SwitchPort {
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
} else {
err = q.hotplugVFIODeviceBridgePort(ctx, device)
}
if err != nil {
return err
}
err = fmt.Errorf("Incorrect hot plug configuration %v for device %v found", q.state.HotPlugVFIO, device)
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotPlugVFIO == config.RootPort {
err = q.hotplugVFIODeviceRootPort(ctx, device)
} else if q.state.HotPlugVFIO == config.SwitchPort {
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
} else if q.state.HotPlugVFIO == config.BridgePort {
err = q.hotplugVFIODeviceBridgePort(ctx, device)
}
// XXX: Depending on whether we're doing root port or
if err != nil {
return err
}
// Depending on whether we're doing root port or
// bridge hotplug, and how the bridge is set up in
// other parts of the code, we may or may not already
// have information about the slot number of the
// bridge and or the device. For simplicity, just
// query both of them back from qemu
device.GuestPciPath, err = q.qomGetPciPath(device.ID)
// query both of them back from qemu based on the arch
device.GuestPciPath, err = q.arch.qomGetPciPath(device.ID, &q.qmpMonitorCh)
return err
}
} else {
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus {
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
return err
if q.state.HotPlugVFIO == config.BridgePort {
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
return err
}
}
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
}
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
}
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
@ -2881,7 +2790,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
s.Type = string(QemuHypervisor)
s.UUID = q.state.UUID
s.HotpluggedMemory = q.state.HotpluggedMemory
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
for _, bridge := range q.arch.getBridges() {
s.Bridges = append(s.Bridges, hv.Bridge{
@ -2903,7 +2811,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
func (q *qemu) Load(s hv.HypervisorState) {
q.state.UUID = s.UUID
q.state.HotpluggedMemory = s.HotpluggedMemory
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
for _, bridge := range s.Bridges {

View File

@ -13,6 +13,7 @@ import (
"errors"
"fmt"
"os"
"regexp"
"runtime"
"strings"
@ -24,6 +25,11 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
)
// A deeper PCIe topology than 5 is already not advisable just for the sake
// of having enough buffer we limit ourselves to 10 and exit if we reach
// the root bus
const maxPCIeTopoDepth = 10
type qemuArch interface {
// enableNestingChecks nesting checks will be honoured
enableNestingChecks()
@ -158,6 +164,12 @@ type qemuArch interface {
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
// and 64-bit addressable memory
getBARsMaxAddressableMemory() (uint64, uint64)
// Query QMP to find a device's PCI path given its QOM path or ID
qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error)
// Query QMP to find the PCI slot of a device, given its QOM path or ID
qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error)
}
type qemuArchBase struct {
@ -881,3 +893,85 @@ func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmwa
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
return devices, firmware, nil
}
// Query QMP to find the PCI slot of a device, given its QOM path or ID
func (q *qemuArchBase) qomGetSlot(qomPath string, qmpCh *qmpChannel) (types.PciSlot, error) {
addr, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, qomPath, "addr")
if err != nil {
return types.PciSlot{}, err
}
addrf, ok := addr.(float64)
// XXX going via float makes no real sense, but that's how
// JSON works, and we'll get away with it for the small values
// we have here
if !ok {
return types.PciSlot{}, fmt.Errorf("addr QOM property of %q is %T not a number", qomPath, addr)
}
addri := int(addrf)
slotNum, funcNum := addri>>3, addri&0x7
if funcNum != 0 {
return types.PciSlot{}, fmt.Errorf("Unexpected non-zero PCI function (%02x.%1x) on %q",
slotNum, funcNum, qomPath)
}
return types.PciSlotFromInt(slotNum)
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemuArchBase) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
var slots []types.PciSlot
devSlot, err := q.qomGetSlot(qemuID, qmpCh)
if err != nil {
return types.PciPath{}, err
}
slots = append(slots, devSlot)
// This only works for Q35 and Virt
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
// buffer we limit ourselves to 10 and leave the loop early if we hit
// the root bus.
for i := 1; i <= maxPCIeTopoDepth; i++ {
parenBusQOM, err := qmpCh.qmp.ExecQomGet(qmpCh.ctx, parentPath, "parent_bus")
if err != nil {
return types.PciPath{}, err
}
busQOM, ok := parenBusQOM.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
}
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if r.Match([]byte(busQOM)) {
break
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI parent_bus which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(busQOM, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
}
parentBus := busQOM[:idx]
parentSlot, err := q.qomGetSlot(parentBus, qmpCh)
if err != nil {
return types.PciPath{}, err
}
// Prepend the slots, since we're climbing the hierarchy
slots = append([]types.PciSlot{parentSlot}, slots...)
parentPath = parentBus
}
return types.PciPathFromSlots(slots...)
}

View File

@ -351,3 +351,32 @@ func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware,
return devices, firmware, fmt.Errorf("Unsupported guest protection technology: %v", q.protection)
}
}
func (q *qemuS390x) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
if vfioDev.SysfsDev == "" {
return devices
}
if len(vfioDev.APDevices) > 0 {
devices = append(devices,
govmmQemu.VFIODevice{
SysfsDev: vfioDev.SysfsDev,
Transport: govmmQemu.TransportAP,
},
)
return devices
}
devices = append(devices,
govmmQemu.VFIODevice{
SysfsDev: vfioDev.SysfsDev,
},
)
return devices
}
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemuS390x) qomGetPciPath(qemuID string, qmpCh *qmpChannel) (types.PciPath, error) {
hvLogger.Warnf("qomGetPciPath not implemented for s390x")
return types.PciPath{}, nil
}

View File

@ -613,51 +613,10 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return nil, err
}
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
// until we have TDISP/IDE PCIe support.
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != config.NoPort)
// Aggregate all the containner devices for hot-plug and use them to dedcue
// the correct amount of ports to reserve for the hypervisor.
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
var vfioDevices []config.DeviceInfo
// vhost-user-block device is a PCIe device in Virt, keep track of it
// for correct number of PCIe root ports.
var vhostUserBlkDevices []config.DeviceInfo
for cnt, containers := range sandboxConfig.Containers {
for dev, device := range containers.DeviceInfos {
if deviceManager.IsVhostUserBlk(device) {
vhostUserBlkDevices = append(vhostUserBlkDevices, device)
continue
}
isVFIO := deviceManager.IsVFIO(device.ContainerPath)
if hotPlugVFIO && isVFIO {
vfioDevices = append(vfioDevices, device)
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIO {
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
}
}
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
coldPlugVFIO, err := s.coldOrHotPlugVFIO(&sandboxConfig)
if err != nil {
return nil, err
}
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
// store doesn't require hypervisor to be stored immediately
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
@ -672,7 +631,8 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return s, nil
}
for _, dev := range vfioDevices {
for _, dev := range sandboxConfig.HypervisorConfig.VFIODevices {
s.Logger().Info("cold-plug device: ", dev)
_, err := s.AddDevice(ctx, dev)
if err != nil {
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
@ -682,6 +642,70 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return s, nil
}
func (s *Sandbox) coldOrHotPlugVFIO(sandboxConfig *SandboxConfig) (bool, error) {
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
// until we have TDISP/IDE PCIe support.
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != config.NoPort)
// Aggregate all the containner devices for hot-plug and use them to dedcue
// the correct amount of ports to reserve for the hypervisor.
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
modeIsGK := (sandboxConfig.VfioMode == config.VFIOModeGuestKernel)
modeIsVFIO := (sandboxConfig.VfioMode == config.VFIOModeVFIO)
var vfioDevices []config.DeviceInfo
// vhost-user-block device is a PCIe device in Virt, keep track of it
// for correct number of PCIe root ports.
var vhostUserBlkDevices []config.DeviceInfo
for cnt, containers := range sandboxConfig.Containers {
for dev, device := range containers.DeviceInfos {
if deviceManager.IsVhostUserBlk(device) {
vhostUserBlkDevices = append(vhostUserBlkDevices, device)
continue
}
isVFIODevice := deviceManager.IsVFIODevice(device.ContainerPath)
isVFIOControlDevice := deviceManager.IsVFIOControlDevice(device.ContainerPath)
// vfio_mode=vfio needs the VFIO control device add it to the list
// of devices to be added to the VM.
if modeIsVFIO && isVFIOControlDevice && !hotPlugVFIO {
vfioDevices = append(vfioDevices, device)
}
if hotPlugVFIO && isVFIODevice {
device.ColdPlug = false
device.Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
vfioDevices = append(vfioDevices, device)
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIODevice {
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
if modeIsGK {
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
}
}
}
var filteredDevices []config.DeviceInfo
for _, device := range containers.DeviceInfos {
if device.ID != "remove-we-are-cold-plugging" {
filteredDevices = append(filteredDevices, device)
}
}
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
}
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
return coldPlugVFIO, nil
}
func (s *Sandbox) createResourceController() error {
var err error
cgroupPath := ""
@ -2049,26 +2073,26 @@ func (s *Sandbox) AddDevice(ctx context.Context, info config.DeviceInfo) (api.De
}
var err error
b, err := s.devManager.NewDevice(info)
add, err := s.devManager.NewDevice(info)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
s.devManager.RemoveDevice(b.DeviceID())
s.devManager.RemoveDevice(add.DeviceID())
}
}()
if err = s.devManager.AttachDevice(ctx, b.DeviceID(), s); err != nil {
if err = s.devManager.AttachDevice(ctx, add.DeviceID(), s); err != nil {
return nil, err
}
defer func() {
if err != nil {
s.devManager.DetachDevice(ctx, b.DeviceID(), s)
s.devManager.DetachDevice(ctx, add.DeviceID(), s)
}
}()
return b, nil
return add, nil
}
// updateResources will:

View File

@ -606,6 +606,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) {
HostPath: path,
ContainerPath: path,
DevType: "c",
Port: config.RootPort,
}
dev, err := dm.NewDevice(deviceInfo)
assert.Nil(t, err, "deviceManager.NewDevice return error: %v", err)