qemu: Support PCIe device hotplug for q35

- add pcie-root-port device to qemu command line for q35
- hotplug a PCIe device into a PCIe Root Port

Fixes: #2432

Signed-off-by: Jimmy Xu <junming.xjm@antfin.com>
This commit is contained in:
Jimmy Xu 2020-02-05 14:35:14 +08:00
parent fa7d00ec25
commit bb41b7248a
22 changed files with 281 additions and 21 deletions

View File

@ -186,6 +186,7 @@ DEFENABLEDEBUG := false
DEFDISABLENESTINGCHECKS := false
DEFMSIZE9P := 8192
DEFHOTPLUGVFIOONROOTBUS := false
DEFPCIEROOTPORT := 0
# Default cgroup model
DEFSANDBOXCGROUPONLY ?= false
@ -444,6 +445,7 @@ USER_VARS += DEFENABLEDEBUG
USER_VARS += DEFDISABLENESTINGCHECKS
USER_VARS += DEFMSIZE9P
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
USER_VARS += DEFPCIEROOTPORT
USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += BUILDFLAGS
@ -619,7 +621,8 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
-e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \
-e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
-e "s|@DEFHOTPLUGONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFHOTPLUGVFIOONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFPCIEROOTPORT@|$(DEFPCIEROOTPORT)|g" \
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
$< > $@

View File

@ -224,6 +224,13 @@ enable_iothreads = @DEFENABLEIOTHREADS@
# Default false
#hotplug_vfio_on_root_bus = true
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
# The value means the number of pcie_root_port
# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
# Default 0
#pcie_root_port = 2
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
#disable_vhost_net = true

View File

@ -88,11 +88,13 @@ type HypervisorInfo struct {
Path string
BlockDeviceDriver string
EntropySource string
SharedFS string
Msize9p uint32
MemorySlots uint32
PCIeRootPort uint32
HotplugVFIOOnRootBus bool
Debug bool
UseVSock bool
SharedFS string
}
// ProxyInfo stores proxy details
@ -355,6 +357,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

View File

@ -91,6 +91,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
blockStorageDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -150,6 +151,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
BlockDeviceDriver: blockStorageDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: hypConfig.NumVCPUs,
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
@ -329,6 +331,9 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
Debug: config.HypervisorConfig.Debug,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

View File

@ -398,6 +398,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -416,6 +417,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
}

View File

@ -28,6 +28,7 @@ type RuntimeConfigOptions struct {
AgentTraceMode string
AgentTraceType string
SharedFS string
PCIeRootPort uint32
DisableBlock bool
EnableIOThreads bool
HotplugVFIOOnRootBus bool
@ -59,6 +60,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
guest_hook_path = "` + config.DefaultGuestHookPath + `"

View File

@ -44,6 +44,7 @@ const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultPCIeRootPort = 0
const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "none"

View File

@ -111,6 +111,7 @@ type hypervisor struct {
MemOffset uint32 `toml:"memory_offset"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
@ -648,6 +649,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
UseVSock: useVSock,
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
GuestHookPath: h.guestHookPath(),
}, nil
@ -796,6 +798,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: true,
UseVSock: true,
}, nil
@ -1073,6 +1076,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
PCIeRootPort: defaultPCIeRootPort,
GuestHookPath: defaultGuestHookPath,
VirtioFSCache: defaultVirtioFSCacheMode,
DisableImageNvdimm: defaultDisableImageNvdimm,

View File

@ -82,6 +82,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -101,6 +102,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: defaultVCPUCount,
DefaultMaxVCPUCount: defaultMaxVCPUCount,
@ -158,6 +160,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
Mlock: !defaultEnableSwap,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
Msize9p: defaultMsize9p,
MemSlots: defaultMemSlots,
EntropySource: defaultEntropySource,
@ -775,6 +778,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
defer func() {
utils.VHostVSockDevicePath = orgVHostVSockDevicePath
@ -789,6 +793,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
UseVSock: true,
}
@ -846,6 +851,10 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}
if config.PCIeRootPort != pcieRootPort {
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
}
}
func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
@ -869,6 +878,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
hypervisor := hypervisor{
Path: hypervisorPath,
@ -879,6 +889,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
}
_, err = newQemuHypervisorConfig(hypervisor)

View File

@ -75,6 +75,12 @@ var SysDevPrefix = "/sys/dev"
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
var SysIOMMUPath = "/sys/kernel/iommu_groups"
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
// SysBusPciSlotsPath is static string of /sys/bus/pci/slots
var SysBusPciSlotsPath = "/sys/bus/pci/slots"
// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// Hostpath is device path on host
@ -165,12 +171,15 @@ const (
// VFIODev represents a VFIO drive used for hotplugging
type VFIODev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
// IsPCIe specifies device is PCIe or PCI
IsPCIe bool
// Type of VFIO device
Type VFIODeviceType
// ID is used to identify this drive in the hypervisor options.
ID string
// BDF (Bus:Device.Function) of the PCI address
BDF string
@ -182,6 +191,12 @@ type VFIODev struct {
// DeviceID specifies device id
DeviceID string
// PCI Class Code
Class string
// Bus of VFIO PCIe device
Bus string
}
// RNGDev represents a random number generator device

View File

@ -7,13 +7,102 @@
package drivers
import (
"github.com/sirupsen/logrus"
"fmt"
"io/ioutil"
"path/filepath"
"strings"
"github.com/kata-containers/runtime/virtcontainers/device/api"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/sirupsen/logrus"
)
const intMax uint = ^uint(0)
const (
intMax = ^uint(0)
PCIDomain = "0000"
PCIeKeyword = "PCIe"
)
type PCISysFsType string
var (
PCISysFsDevices PCISysFsType = "devices" // /sys/bus/pci/devices
PCISysFsSlots PCISysFsType = "slots" // /sys/bus/pci/slots
)
type PCISysFsProperty string
var (
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
)
func deviceLogger() *logrus.Entry {
return api.DeviceLogger()
}
/*
Identify PCIe device by /sys/bus/pci/slots/xx/max_bus_speed, sample content "8.0 GT/s PCIe"
The /sys/bus/pci/slots/xx/address contains bdf, sample content "0000:04:00"
bdf format: bus:slot.function
*/
func isPCIeDevice(bdf string) bool {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
slots, err := ioutil.ReadDir(config.SysBusPciSlotsPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", config.SysBusPciSlotsPath).Warn("failed to list pci slots")
return false
}
b := strings.Split(bdf, ".")[0]
for _, slot := range slots {
address := getPCISlotProperty(slot.Name(), PCISysFsSlotsAddress)
if b == address {
maxBusSpeed := getPCISlotProperty(slot.Name(), PCISysFsSlotsMaxBusSpeed)
if strings.Contains(maxBusSpeed, PCIeKeyword) {
return true
}
}
}
deviceLogger().WithField("dev-bdf", bdf).Debug("can not find slot for bdf of pci device")
return false
}
// read from /sys/bus/pci/devices/xxx/property
func getPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
propertyPath := filepath.Join(config.SysBusPciDevicesPath, bdf, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci device property")
return ""
}
return rlt
}
// read from /sys/bus/pci/slots/xxx/property
func getPCISlotProperty(slot string, property PCISysFsProperty) string {
propertyPath := filepath.Join(config.SysBusPciSlotsPath, slot, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci slot property")
return ""
}
return rlt
}
func readPCIProperty(propertyPath string) (string, error) {
var (
buf []byte
err error
)
if buf, err = ioutil.ReadFile(propertyPath); err != nil {
return "", fmt.Errorf("failed to read pci sysfs %v, error:%v", propertyPath, err)
}
return strings.Split(string(buf), "\n")[0], nil
}

View File

@ -27,6 +27,11 @@ const (
pciDriverBindPath = "/sys/bus/pci/drivers/%s/bind"
vfioNewIDPath = "/sys/bus/pci/drivers/vfio-pci/new_id"
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
pcieRootPortPrefix = "rp"
)
var (
AllPCIeDevs = map[string]bool{}
)
// VFIODevice is a vfio device meant to be passed to the hypervisor
@ -83,8 +88,14 @@ func (device *VFIODevice) Attach(devReceiver api.DeviceReceiver) (retErr error)
Type: vfioDeviceType,
BDF: deviceBDF,
SysfsDev: deviceSysfsDev,
IsPCIe: isPCIeDevice(deviceBDF),
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
}
device.VfioDevs = append(device.VfioDevs, vfio)
if vfio.IsPCIe {
vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
AllPCIeDevs[vfio.BDF] = true
}
}
// hotplug a VFIO device is actually hotplugging a group of iommu devices

View File

@ -369,6 +369,10 @@ type HypervisorConfig struct {
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool

View File

@ -248,6 +248,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
UseVSock: sconfig.HypervisorConfig.UseVSock,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: sconfig.HypervisorConfig.PCIeRootPort,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
@ -534,6 +535,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
UseVSock: hconf.UseVSock,
DisableImageNvdimm: hconf.DisableImageNvdimm,
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
PCIeRootPort: hconf.PCIeRootPort,
BootToBeTemplate: hconf.BootToBeTemplate,
BootFromTemplate: hconf.BootFromTemplate,
DisableVhostNet: hconf.DisableVhostNet,

View File

@ -153,6 +153,10 @@ type HypervisorConfig struct {
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool

View File

@ -41,4 +41,5 @@ type HypervisorState struct {
HotpluggedMemory int
VirtiofsdPid int
HotplugVFIOOnRootBus bool
PCIeRootPort int
}

View File

@ -97,6 +97,10 @@ const (
// root bus instead of a bridge.
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort = kataAnnotHypervisorPrefix + "pcie_root_port"
// EntropySource is a sandbox annotation to specify the path to a host source of
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"

View File

@ -447,6 +447,14 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig)
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
}
if value, ok := ocispec.Annotations[vcAnnotations.PCIeRootPort]; ok {
pcieRootPort, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return fmt.Errorf("Error parsing annotation for pcie_root_port: %v, Please specify an integer greater than or equal to 0", err)
}
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
}
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
if value != "" {
config.HypervisorConfig.EntropySource = value

View File

@ -763,6 +763,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.UseVSock] = "true"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
ocispec.Annotations[vcAnnotations.EntropySource] = "/dev/urandom"
addAnnotations(ocispec, &config)
@ -793,6 +794,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(config.HypervisorConfig.UseVSock, true)
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
assert.Equal(config.HypervisorConfig.EntropySource, "/dev/urandom")
// In case an absurd large value is provided, the config value if not over-ridden

View File

@ -71,6 +71,7 @@ type QemuState struct {
UUID string
HotplugVFIOOnRootBus bool
VirtiofsdPid int
PCIeRootPort int
}
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
@ -266,6 +267,7 @@ func (q *qemu) setup(id string, hypervisorConfig *HypervisorConfig) error {
q.state.UUID = uuid.Generate().String()
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
// The path might already exist, but in case of VM templating,
// we have to create it since the sandbox has not created it yet.
@ -584,6 +586,13 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
return err
}
// Add PCIe Root Port devices to hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged into PCIe Root Port.
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
if hypervisorConfig.PCIeRootPort > 0 {
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort)
}
q.qemuConfig = qemuConfig
return nil
@ -1149,17 +1158,39 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
}
devID := device.ID
machinneType := q.hypervisorConfig().HypervisorMachineType
if op == addDevice {
buf, _ := json.Marshal(device)
q.Logger().WithFields(logrus.Fields{
"machine-type": machinneType,
"hotplug-vfio-on-root-bus": q.state.HotplugVFIOOnRootBus,
"pcie-root-port": q.state.PCIeRootPort,
"device-info": string(buf),
}).Info("Start hot-plug VFIO device")
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotplugVFIOOnRootBus {
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
switch machinneType {
case QemuQ35:
if device.IsPCIe && q.state.PCIeRootPort <= 0 {
q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
device.Bus = ""
}
default:
device.Bus = ""
}
switch device.Type {
case config.VFIODeviceNormalType:
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, "", romFile)
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
case config.VFIODeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", "", romFile)
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
@ -1185,6 +1216,8 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
return fmt.Errorf("Incorrect VFIO device type found")
}
} else {
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus {
if err := q.arch.removeDeviceFromBridge(devID); err != nil {
return err
@ -1848,6 +1881,39 @@ func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8, memoryOff
return memory
}
// genericAppendPCIeRootPort appends to devices the given pcie-root-port
func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machineType string) []govmmQemu.Device {
var (
bus string
chassis string
multiFunction bool
addr string
)
switch machineType {
case QemuQ35:
bus = defaultBridgeBus
chassis = "0"
multiFunction = false
addr = "0"
default:
return devices
}
for i := uint32(0); i < number; i++ {
devices = append(devices,
govmmQemu.PCIeRootPortDevice{
ID: fmt.Sprintf("%s%d", pcieRootPortPrefix, i),
Bus: bus,
Chassis: chassis,
Slot: strconv.FormatUint(uint64(i), 10),
Multifunction: multiFunction,
Addr: addr,
},
)
}
return devices
}
func (q *qemu) getThreadIDs() (vcpuThreadIDs, error) {
span, _ := q.trace("getThreadIDs")
defer span.Finish()
@ -2013,6 +2079,7 @@ func (q *qemu) save() (s persistapi.HypervisorState) {
s.UUID = q.state.UUID
s.HotpluggedMemory = q.state.HotpluggedMemory
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
s.PCIeRootPort = q.state.PCIeRootPort
for _, bridge := range q.arch.getBridges() {
s.Bridges = append(s.Bridges, persistapi.Bridge{
@ -2036,6 +2103,7 @@ func (q *qemu) load(s persistapi.HypervisorState) {
q.state.HotpluggedMemory = s.HotpluggedMemory
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
q.state.VirtiofsdPid = s.VirtiofsdPid
q.state.PCIeRootPort = s.PCIeRootPort
for _, bridge := range s.Bridges {
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))

View File

@ -20,13 +20,15 @@ type qemuAmd64 struct {
vmFactory bool
}
const defaultQemuPath = "/usr/bin/qemu-system-x86_64"
const (
defaultQemuPath = "/usr/bin/qemu-system-x86_64"
const defaultQemuMachineType = QemuPC
defaultQemuMachineType = QemuPC
const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
const qmpMigrationWaitTimeout = 5 * time.Second
qmpMigrationWaitTimeout = 5 * time.Second
)
var qemuPaths = map[string]string{
QemuPCLite: "/usr/bin/qemu-lite-system-x86_64",

View File

@ -127,6 +127,9 @@ type qemuArch interface {
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device
}
type qemuArchBase struct {
@ -153,6 +156,7 @@ const (
defaultPCBridgeBus = "pci.0"
maxDevIDSize = 31
defaultMsize9p = 8192
pcieRootPortPrefix = "rp"
)
// This is the PCI start address assigned to the first bridge that
@ -646,6 +650,7 @@ func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev conf
BDF: vfioDev.BDF,
VendorID: vfioDev.VendorID,
DeviceID: vfioDev.DeviceID,
Bus: vfioDev.Bus,
},
)
@ -750,3 +755,8 @@ func (q *qemuArchBase) setBridges(bridges []types.Bridge) {
func (q *qemuArchBase) addBridge(b types.Bridge) {
q.Bridges = append(q.Bridges, b)
}
// appendPCIeRootPortDevice appends to devices the given pcie-root-port
func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device {
return genericAppendPCIeRootPort(devices, number, q.machineType)
}