qemu: Support PCIe device hotplug for q35

- add pcie-root-port device to qemu command line for q35
- hotplug a PCIe device into a PCIe Root Port

Fixes: #2432

Signed-off-by: Jimmy Xu <junming.xjm@antfin.com>
This commit is contained in:
Jimmy Xu 2020-02-05 14:35:14 +08:00
parent fa7d00ec25
commit bb41b7248a
22 changed files with 281 additions and 21 deletions

View File

@ -186,6 +186,7 @@ DEFENABLEDEBUG := false
DEFDISABLENESTINGCHECKS := false
DEFMSIZE9P := 8192
DEFHOTPLUGVFIOONROOTBUS := false
DEFPCIEROOTPORT := 0
# Default cgroup model
DEFSANDBOXCGROUPONLY ?= false
@ -444,6 +445,7 @@ USER_VARS += DEFENABLEDEBUG
USER_VARS += DEFDISABLENESTINGCHECKS
USER_VARS += DEFMSIZE9P
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
USER_VARS += DEFPCIEROOTPORT
USER_VARS += DEFENTROPYSOURCE
USER_VARS += DEFSANDBOXCGROUPONLY
USER_VARS += BUILDFLAGS
@ -619,7 +621,8 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
-e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \
-e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
-e "s|@DEFHOTPLUGONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFHOTPLUGVFIOONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
-e "s|@DEFPCIEROOTPORT@|$(DEFPCIEROOTPORT)|g" \
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
$< > $@

View File

@ -224,6 +224,13 @@ enable_iothreads = @DEFENABLEIOTHREADS@
# Default false
#hotplug_vfio_on_root_bus = true
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
# The value means the number of pcie_root_port
# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
# Default 0
#pcie_root_port = 2
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
# security (vhost-net runs ring0) for network I/O performance.
#disable_vhost_net = true

View File

@ -83,16 +83,18 @@ type RuntimeVersionInfo struct {
// HypervisorInfo stores hypervisor details
type HypervisorInfo struct {
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
Msize9p uint32
MemorySlots uint32
Debug bool
UseVSock bool
SharedFS string
MachineType string
Version string
Path string
BlockDeviceDriver string
EntropySource string
SharedFS string
Msize9p uint32
MemorySlots uint32
PCIeRootPort uint32
HotplugVFIOOnRootBus bool
Debug bool
UseVSock bool
}
// ProxyInfo stores proxy details
@ -355,6 +357,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
MemorySlots: config.HypervisorConfig.MemSlots,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

View File

@ -91,6 +91,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
blockStorageDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -150,6 +151,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
BlockDeviceDriver: blockStorageDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: hypConfig.NumVCPUs,
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
@ -329,6 +331,9 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
Debug: config.HypervisorConfig.Debug,
EntropySource: config.HypervisorConfig.EntropySource,
SharedFS: config.HypervisorConfig.SharedFS,
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
}
}

View File

@ -398,6 +398,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -416,6 +417,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
}

View File

@ -28,6 +28,7 @@ type RuntimeConfigOptions struct {
AgentTraceMode string
AgentTraceType string
SharedFS string
PCIeRootPort uint32
DisableBlock bool
EnableIOThreads bool
HotplugVFIOOnRootBus bool
@ -59,6 +60,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
guest_hook_path = "` + config.DefaultGuestHookPath + `"

View File

@ -44,6 +44,7 @@ const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultPCIeRootPort = 0
const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "none"

View File

@ -111,6 +111,7 @@ type hypervisor struct {
MemOffset uint32 `toml:"memory_offset"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
@ -648,6 +649,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
UseVSock: useVSock,
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
GuestHookPath: h.guestHookPath(),
}, nil
@ -796,6 +798,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: true,
UseVSock: true,
}, nil
@ -1073,6 +1076,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
PCIeRootPort: defaultPCIeRootPort,
GuestHookPath: defaultGuestHookPath,
VirtioFSCache: defaultVirtioFSCacheMode,
DisableImageNvdimm: defaultDisableImageNvdimm,

View File

@ -82,6 +82,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
@ -101,6 +102,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: defaultVCPUCount,
DefaultMaxVCPUCount: defaultMaxVCPUCount,
@ -158,6 +160,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
Mlock: !defaultEnableSwap,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
Msize9p: defaultMsize9p,
MemSlots: defaultMemSlots,
EntropySource: defaultEntropySource,
@ -775,6 +778,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
defer func() {
utils.VHostVSockDevicePath = orgVHostVSockDevicePath
@ -789,6 +793,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
UseVSock: true,
}
@ -846,6 +851,10 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}
if config.PCIeRootPort != pcieRootPort {
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
}
}
func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
@ -869,6 +878,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
hypervisor := hypervisor{
Path: hypervisorPath,
@ -879,6 +889,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
}
_, err = newQemuHypervisorConfig(hypervisor)

View File

@ -75,6 +75,12 @@ var SysDevPrefix = "/sys/dev"
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
var SysIOMMUPath = "/sys/kernel/iommu_groups"
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
// SysBusPciSlotsPath is static string of /sys/bus/pci/slots
var SysBusPciSlotsPath = "/sys/bus/pci/slots"
// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// Hostpath is device path on host
@ -165,12 +171,15 @@ const (
// VFIODev represents a VFIO drive used for hotplugging
type VFIODev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
// IsPCIe specifies device is PCIe or PCI
IsPCIe bool
// Type of VFIO device
Type VFIODeviceType
// ID is used to identify this drive in the hypervisor options.
ID string
// BDF (Bus:Device.Function) of the PCI address
BDF string
@ -182,6 +191,12 @@ type VFIODev struct {
// DeviceID specifies device id
DeviceID string
// PCI Class Code
Class string
// Bus of VFIO PCIe device
Bus string
}
// RNGDev represents a random number generator device

View File

@ -7,13 +7,102 @@
package drivers
import (
"github.com/sirupsen/logrus"
"fmt"
"io/ioutil"
"path/filepath"
"strings"
"github.com/kata-containers/runtime/virtcontainers/device/api"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/sirupsen/logrus"
)
const intMax uint = ^uint(0)
const (
intMax = ^uint(0)
PCIDomain = "0000"
PCIeKeyword = "PCIe"
)
type PCISysFsType string
var (
PCISysFsDevices PCISysFsType = "devices" // /sys/bus/pci/devices
PCISysFsSlots PCISysFsType = "slots" // /sys/bus/pci/slots
)
type PCISysFsProperty string
var (
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
)
func deviceLogger() *logrus.Entry {
return api.DeviceLogger()
}
/*
Identify PCIe device by /sys/bus/pci/slots/xx/max_bus_speed, sample content "8.0 GT/s PCIe"
The /sys/bus/pci/slots/xx/address contains bdf, sample content "0000:04:00"
bdf format: bus:slot.function
*/
func isPCIeDevice(bdf string) bool {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
slots, err := ioutil.ReadDir(config.SysBusPciSlotsPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", config.SysBusPciSlotsPath).Warn("failed to list pci slots")
return false
}
b := strings.Split(bdf, ".")[0]
for _, slot := range slots {
address := getPCISlotProperty(slot.Name(), PCISysFsSlotsAddress)
if b == address {
maxBusSpeed := getPCISlotProperty(slot.Name(), PCISysFsSlotsMaxBusSpeed)
if strings.Contains(maxBusSpeed, PCIeKeyword) {
return true
}
}
}
deviceLogger().WithField("dev-bdf", bdf).Debug("can not find slot for bdf of pci device")
return false
}
// read from /sys/bus/pci/devices/xxx/property
func getPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
propertyPath := filepath.Join(config.SysBusPciDevicesPath, bdf, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci device property")
return ""
}
return rlt
}
// read from /sys/bus/pci/slots/xxx/property
func getPCISlotProperty(slot string, property PCISysFsProperty) string {
propertyPath := filepath.Join(config.SysBusPciSlotsPath, slot, string(property))
rlt, err := readPCIProperty(propertyPath)
if err != nil {
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci slot property")
return ""
}
return rlt
}
func readPCIProperty(propertyPath string) (string, error) {
var (
buf []byte
err error
)
if buf, err = ioutil.ReadFile(propertyPath); err != nil {
return "", fmt.Errorf("failed to read pci sysfs %v, error:%v", propertyPath, err)
}
return strings.Split(string(buf), "\n")[0], nil
}

View File

@ -27,6 +27,11 @@ const (
pciDriverBindPath = "/sys/bus/pci/drivers/%s/bind"
vfioNewIDPath = "/sys/bus/pci/drivers/vfio-pci/new_id"
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
pcieRootPortPrefix = "rp"
)
var (
AllPCIeDevs = map[string]bool{}
)
// VFIODevice is a vfio device meant to be passed to the hypervisor
@ -83,8 +88,14 @@ func (device *VFIODevice) Attach(devReceiver api.DeviceReceiver) (retErr error)
Type: vfioDeviceType,
BDF: deviceBDF,
SysfsDev: deviceSysfsDev,
IsPCIe: isPCIeDevice(deviceBDF),
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
}
device.VfioDevs = append(device.VfioDevs, vfio)
if vfio.IsPCIe {
vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
AllPCIeDevs[vfio.BDF] = true
}
}
// hotplug a VFIO device is actually hotplugging a group of iommu devices

View File

@ -369,6 +369,10 @@ type HypervisorConfig struct {
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool

View File

@ -248,6 +248,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
UseVSock: sconfig.HypervisorConfig.UseVSock,
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
PCIeRootPort: sconfig.HypervisorConfig.PCIeRootPort,
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
@ -534,6 +535,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
UseVSock: hconf.UseVSock,
DisableImageNvdimm: hconf.DisableImageNvdimm,
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
PCIeRootPort: hconf.PCIeRootPort,
BootToBeTemplate: hconf.BootToBeTemplate,
BootFromTemplate: hconf.BootFromTemplate,
DisableVhostNet: hconf.DisableVhostNet,

View File

@ -153,6 +153,10 @@ type HypervisorConfig struct {
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool

View File

@ -41,4 +41,5 @@ type HypervisorState struct {
HotpluggedMemory int
VirtiofsdPid int
HotplugVFIOOnRootBus bool
PCIeRootPort int
}

View File

@ -97,6 +97,10 @@ const (
// root bus instead of a bridge.
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort = kataAnnotHypervisorPrefix + "pcie_root_port"
// EntropySource is a sandbox annotation to specify the path to a host source of
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"

View File

@ -447,6 +447,14 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig)
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
}
if value, ok := ocispec.Annotations[vcAnnotations.PCIeRootPort]; ok {
pcieRootPort, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return fmt.Errorf("Error parsing annotation for pcie_root_port: %v, Please specify an integer greater than or equal to 0", err)
}
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
}
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
if value != "" {
config.HypervisorConfig.EntropySource = value

View File

@ -763,6 +763,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.UseVSock] = "true"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
ocispec.Annotations[vcAnnotations.EntropySource] = "/dev/urandom"
addAnnotations(ocispec, &config)
@ -793,6 +794,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(config.HypervisorConfig.UseVSock, true)
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
assert.Equal(config.HypervisorConfig.EntropySource, "/dev/urandom")
// In case an absurd large value is provided, the config value if not over-ridden

View File

@ -71,6 +71,7 @@ type QemuState struct {
UUID string
HotplugVFIOOnRootBus bool
VirtiofsdPid int
PCIeRootPort int
}
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
@ -266,6 +267,7 @@ func (q *qemu) setup(id string, hypervisorConfig *HypervisorConfig) error {
q.state.UUID = uuid.Generate().String()
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
// The path might already exist, but in case of VM templating,
// we have to create it since the sandbox has not created it yet.
@ -584,6 +586,13 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
return err
}
// Add PCIe Root Port devices to hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged into PCIe Root Port.
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
if hypervisorConfig.PCIeRootPort > 0 {
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort)
}
q.qemuConfig = qemuConfig
return nil
@ -1149,17 +1158,39 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
}
devID := device.ID
machinneType := q.hypervisorConfig().HypervisorMachineType
if op == addDevice {
buf, _ := json.Marshal(device)
q.Logger().WithFields(logrus.Fields{
"machine-type": machinneType,
"hotplug-vfio-on-root-bus": q.state.HotplugVFIOOnRootBus,
"pcie-root-port": q.state.PCIeRootPort,
"device-info": string(buf),
}).Info("Start hot-plug VFIO device")
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotplugVFIOOnRootBus {
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
switch machinneType {
case QemuQ35:
if device.IsPCIe && q.state.PCIeRootPort <= 0 {
q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
device.Bus = ""
}
default:
device.Bus = ""
}
switch device.Type {
case config.VFIODeviceNormalType:
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, "", romFile)
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
case config.VFIODeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", "", romFile)
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
@ -1185,6 +1216,8 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
return fmt.Errorf("Incorrect VFIO device type found")
}
} else {
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus {
if err := q.arch.removeDeviceFromBridge(devID); err != nil {
return err
@ -1848,6 +1881,39 @@ func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8, memoryOff
return memory
}
// genericAppendPCIeRootPort appends to devices the given pcie-root-port
func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machineType string) []govmmQemu.Device {
var (
bus string
chassis string
multiFunction bool
addr string
)
switch machineType {
case QemuQ35:
bus = defaultBridgeBus
chassis = "0"
multiFunction = false
addr = "0"
default:
return devices
}
for i := uint32(0); i < number; i++ {
devices = append(devices,
govmmQemu.PCIeRootPortDevice{
ID: fmt.Sprintf("%s%d", pcieRootPortPrefix, i),
Bus: bus,
Chassis: chassis,
Slot: strconv.FormatUint(uint64(i), 10),
Multifunction: multiFunction,
Addr: addr,
},
)
}
return devices
}
func (q *qemu) getThreadIDs() (vcpuThreadIDs, error) {
span, _ := q.trace("getThreadIDs")
defer span.Finish()
@ -2013,6 +2079,7 @@ func (q *qemu) save() (s persistapi.HypervisorState) {
s.UUID = q.state.UUID
s.HotpluggedMemory = q.state.HotpluggedMemory
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
s.PCIeRootPort = q.state.PCIeRootPort
for _, bridge := range q.arch.getBridges() {
s.Bridges = append(s.Bridges, persistapi.Bridge{
@ -2036,6 +2103,7 @@ func (q *qemu) load(s persistapi.HypervisorState) {
q.state.HotpluggedMemory = s.HotpluggedMemory
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
q.state.VirtiofsdPid = s.VirtiofsdPid
q.state.PCIeRootPort = s.PCIeRootPort
for _, bridge := range s.Bridges {
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))

View File

@ -20,13 +20,15 @@ type qemuAmd64 struct {
vmFactory bool
}
const defaultQemuPath = "/usr/bin/qemu-system-x86_64"
const (
defaultQemuPath = "/usr/bin/qemu-system-x86_64"
const defaultQemuMachineType = QemuPC
defaultQemuMachineType = QemuPC
const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
const qmpMigrationWaitTimeout = 5 * time.Second
qmpMigrationWaitTimeout = 5 * time.Second
)
var qemuPaths = map[string]string{
QemuPCLite: "/usr/bin/qemu-lite-system-x86_64",

View File

@ -127,6 +127,9 @@ type qemuArch interface {
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device
}
type qemuArchBase struct {
@ -153,6 +156,7 @@ const (
defaultPCBridgeBus = "pci.0"
maxDevIDSize = 31
defaultMsize9p = 8192
pcieRootPortPrefix = "rp"
)
// This is the PCI start address assigned to the first bridge that
@ -646,6 +650,7 @@ func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev conf
BDF: vfioDev.BDF,
VendorID: vfioDev.VendorID,
DeviceID: vfioDev.DeviceID,
Bus: vfioDev.Bus,
},
)
@ -750,3 +755,8 @@ func (q *qemuArchBase) setBridges(bridges []types.Bridge) {
func (q *qemuArchBase) addBridge(b types.Bridge) {
q.Bridges = append(q.Bridges, b)
}
// appendPCIeRootPortDevice appends to devices the given pcie-root-port
func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device {
return genericAppendPCIeRootPort(devices, number, q.machineType)
}