mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-25 06:52:13 +00:00
qemu: Support PCIe device hotplug for q35
- add pcie-root-port device to qemu command line for q35 - hotplug a PCIe device into a PCIe Root Port Fixes: #2432 Signed-off-by: Jimmy Xu <junming.xjm@antfin.com>
This commit is contained in:
parent
fa7d00ec25
commit
bb41b7248a
5
Makefile
5
Makefile
@ -186,6 +186,7 @@ DEFENABLEDEBUG := false
|
||||
DEFDISABLENESTINGCHECKS := false
|
||||
DEFMSIZE9P := 8192
|
||||
DEFHOTPLUGVFIOONROOTBUS := false
|
||||
DEFPCIEROOTPORT := 0
|
||||
|
||||
# Default cgroup model
|
||||
DEFSANDBOXCGROUPONLY ?= false
|
||||
@ -444,6 +445,7 @@ USER_VARS += DEFENABLEDEBUG
|
||||
USER_VARS += DEFDISABLENESTINGCHECKS
|
||||
USER_VARS += DEFMSIZE9P
|
||||
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
|
||||
USER_VARS += DEFPCIEROOTPORT
|
||||
USER_VARS += DEFENTROPYSOURCE
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY
|
||||
USER_VARS += BUILDFLAGS
|
||||
@ -619,7 +621,8 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
|
||||
-e "s|@DEFENABLEDEBUG@|$(DEFENABLEDEBUG)|g" \
|
||||
-e "s|@DEFDISABLENESTINGCHECKS@|$(DEFDISABLENESTINGCHECKS)|g" \
|
||||
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
|
||||
-e "s|@DEFHOTPLUGONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
|
||||
-e "s|@DEFHOTPLUGVFIOONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
|
||||
-e "s|@DEFPCIEROOTPORT@|$(DEFPCIEROOTPORT)|g" \
|
||||
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
|
||||
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
|
||||
$< > $@
|
||||
|
@ -224,6 +224,13 @@ enable_iothreads = @DEFENABLEIOTHREADS@
|
||||
# Default false
|
||||
#hotplug_vfio_on_root_bus = true
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
# The value means the number of pcie_root_port
|
||||
# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
|
||||
# Default 0
|
||||
#pcie_root_port = 2
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
# security (vhost-net runs ring0) for network I/O performance.
|
||||
#disable_vhost_net = true
|
||||
|
@ -83,16 +83,18 @@ type RuntimeVersionInfo struct {
|
||||
|
||||
// HypervisorInfo stores hypervisor details
|
||||
type HypervisorInfo struct {
|
||||
MachineType string
|
||||
Version string
|
||||
Path string
|
||||
BlockDeviceDriver string
|
||||
EntropySource string
|
||||
Msize9p uint32
|
||||
MemorySlots uint32
|
||||
Debug bool
|
||||
UseVSock bool
|
||||
SharedFS string
|
||||
MachineType string
|
||||
Version string
|
||||
Path string
|
||||
BlockDeviceDriver string
|
||||
EntropySource string
|
||||
SharedFS string
|
||||
Msize9p uint32
|
||||
MemorySlots uint32
|
||||
PCIeRootPort uint32
|
||||
HotplugVFIOOnRootBus bool
|
||||
Debug bool
|
||||
UseVSock bool
|
||||
}
|
||||
|
||||
// ProxyInfo stores proxy details
|
||||
@ -355,6 +357,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo {
|
||||
MemorySlots: config.HypervisorConfig.MemSlots,
|
||||
EntropySource: config.HypervisorConfig.EntropySource,
|
||||
SharedFS: config.HypervisorConfig.SharedFS,
|
||||
|
||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
||||
blockStorageDriver := "virtio-scsi"
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
|
||||
@ -150,6 +151,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
||||
BlockDeviceDriver: blockStorageDriver,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
DefaultVCPUCount: hypConfig.NumVCPUs,
|
||||
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
|
||||
@ -329,6 +331,9 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
|
||||
Debug: config.HypervisorConfig.Debug,
|
||||
EntropySource: config.HypervisorConfig.EntropySource,
|
||||
SharedFS: config.HypervisorConfig.SharedFS,
|
||||
|
||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -398,6 +398,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
||||
blockDeviceDriver := "virtio-scsi"
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
|
||||
@ -416,6 +417,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
||||
BlockDeviceDriver: blockDeviceDriver,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
SharedFS: sharedFS,
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ type RuntimeConfigOptions struct {
|
||||
AgentTraceMode string
|
||||
AgentTraceType string
|
||||
SharedFS string
|
||||
PCIeRootPort uint32
|
||||
DisableBlock bool
|
||||
EnableIOThreads bool
|
||||
HotplugVFIOOnRootBus bool
|
||||
@ -59,6 +60,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
|
||||
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
|
||||
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
||||
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
|
||||
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
|
||||
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
||||
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
||||
guest_hook_path = "` + config.DefaultGuestHookPath + `"
|
||||
|
@ -44,6 +44,7 @@ const defaultEnableDebug bool = false
|
||||
const defaultDisableNestingChecks bool = false
|
||||
const defaultMsize9p uint32 = 8192
|
||||
const defaultHotplugVFIOOnRootBus bool = false
|
||||
const defaultPCIeRootPort = 0
|
||||
const defaultEntropySource = "/dev/urandom"
|
||||
const defaultGuestHookPath string = ""
|
||||
const defaultVirtioFSCacheMode = "none"
|
||||
|
@ -111,6 +111,7 @@ type hypervisor struct {
|
||||
MemOffset uint32 `toml:"memory_offset"`
|
||||
DefaultBridges uint32 `toml:"default_bridges"`
|
||||
Msize9p uint32 `toml:"msize_9p"`
|
||||
PCIeRootPort uint32 `toml:"pcie_root_port"`
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
@ -648,6 +649,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
UseVSock: useVSock,
|
||||
DisableImageNvdimm: h.DisableImageNvdimm,
|
||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: h.PCIeRootPort,
|
||||
DisableVhostNet: h.DisableVhostNet,
|
||||
GuestHookPath: h.guestHookPath(),
|
||||
}, nil
|
||||
@ -796,6 +798,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
EnableIOThreads: h.EnableIOThreads,
|
||||
Msize9p: h.msize9p(),
|
||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: h.PCIeRootPort,
|
||||
DisableVhostNet: true,
|
||||
UseVSock: true,
|
||||
}, nil
|
||||
@ -1073,6 +1076,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
EnableIOThreads: defaultEnableIOThreads,
|
||||
Msize9p: defaultMsize9p,
|
||||
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
|
||||
PCIeRootPort: defaultPCIeRootPort,
|
||||
GuestHookPath: defaultGuestHookPath,
|
||||
VirtioFSCache: defaultVirtioFSCacheMode,
|
||||
DisableImageNvdimm: defaultDisableImageNvdimm,
|
||||
|
@ -82,6 +82,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
blockDeviceDriver := "virtio-scsi"
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
|
||||
@ -101,6 +102,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
BlockDeviceDriver: blockDeviceDriver,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
DefaultVCPUCount: defaultVCPUCount,
|
||||
DefaultMaxVCPUCount: defaultMaxVCPUCount,
|
||||
@ -158,6 +160,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
Mlock: !defaultEnableSwap,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
Msize9p: defaultMsize9p,
|
||||
MemSlots: defaultMemSlots,
|
||||
EntropySource: defaultEntropySource,
|
||||
@ -775,6 +778,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
disableBlock := true
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
||||
defer func() {
|
||||
utils.VHostVSockDevicePath = orgVHostVSockDevicePath
|
||||
@ -789,6 +793,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
DisableBlockDeviceUse: disableBlock,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
UseVSock: true,
|
||||
}
|
||||
|
||||
@ -846,6 +851,10 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
if config.HotplugVFIOOnRootBus != hotplugVFIOOnRootBus {
|
||||
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
|
||||
}
|
||||
|
||||
if config.PCIeRootPort != pcieRootPort {
|
||||
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
|
||||
@ -869,6 +878,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
|
||||
disableBlock := true
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
|
||||
hypervisor := hypervisor{
|
||||
Path: hypervisorPath,
|
||||
@ -879,6 +889,7 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
|
||||
DisableBlockDeviceUse: disableBlock,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
}
|
||||
|
||||
_, err = newQemuHypervisorConfig(hypervisor)
|
||||
|
@ -75,6 +75,12 @@ var SysDevPrefix = "/sys/dev"
|
||||
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
|
||||
var SysIOMMUPath = "/sys/kernel/iommu_groups"
|
||||
|
||||
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
|
||||
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
|
||||
|
||||
// SysBusPciSlotsPath is static string of /sys/bus/pci/slots
|
||||
var SysBusPciSlotsPath = "/sys/bus/pci/slots"
|
||||
|
||||
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
||||
type DeviceInfo struct {
|
||||
// Hostpath is device path on host
|
||||
@ -165,12 +171,15 @@ const (
|
||||
|
||||
// VFIODev represents a VFIO drive used for hotplugging
|
||||
type VFIODev struct {
|
||||
// ID is used to identify this drive in the hypervisor options.
|
||||
ID string
|
||||
// IsPCIe specifies device is PCIe or PCI
|
||||
IsPCIe bool
|
||||
|
||||
// Type of VFIO device
|
||||
Type VFIODeviceType
|
||||
|
||||
// ID is used to identify this drive in the hypervisor options.
|
||||
ID string
|
||||
|
||||
// BDF (Bus:Device.Function) of the PCI address
|
||||
BDF string
|
||||
|
||||
@ -182,6 +191,12 @@ type VFIODev struct {
|
||||
|
||||
// DeviceID specifies device id
|
||||
DeviceID string
|
||||
|
||||
// PCI Class Code
|
||||
Class string
|
||||
|
||||
// Bus of VFIO PCIe device
|
||||
Bus string
|
||||
}
|
||||
|
||||
// RNGDev represents a random number generator device
|
||||
|
@ -7,13 +7,102 @@
|
||||
package drivers
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/kata-containers/runtime/virtcontainers/device/api"
|
||||
"github.com/kata-containers/runtime/virtcontainers/device/config"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const intMax uint = ^uint(0)
|
||||
const (
|
||||
intMax = ^uint(0)
|
||||
|
||||
PCIDomain = "0000"
|
||||
PCIeKeyword = "PCIe"
|
||||
)
|
||||
|
||||
type PCISysFsType string
|
||||
|
||||
var (
|
||||
PCISysFsDevices PCISysFsType = "devices" // /sys/bus/pci/devices
|
||||
PCISysFsSlots PCISysFsType = "slots" // /sys/bus/pci/slots
|
||||
)
|
||||
|
||||
type PCISysFsProperty string
|
||||
|
||||
var (
|
||||
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
|
||||
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
|
||||
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
|
||||
)
|
||||
|
||||
func deviceLogger() *logrus.Entry {
|
||||
return api.DeviceLogger()
|
||||
}
|
||||
|
||||
/*
|
||||
Identify PCIe device by /sys/bus/pci/slots/xx/max_bus_speed, sample content "8.0 GT/s PCIe"
|
||||
The /sys/bus/pci/slots/xx/address contains bdf, sample content "0000:04:00"
|
||||
bdf format: bus:slot.function
|
||||
*/
|
||||
func isPCIeDevice(bdf string) bool {
|
||||
if len(strings.Split(bdf, ":")) == 2 {
|
||||
bdf = PCIDomain + ":" + bdf
|
||||
}
|
||||
slots, err := ioutil.ReadDir(config.SysBusPciSlotsPath)
|
||||
if err != nil {
|
||||
deviceLogger().WithError(err).WithField("path", config.SysBusPciSlotsPath).Warn("failed to list pci slots")
|
||||
return false
|
||||
}
|
||||
b := strings.Split(bdf, ".")[0]
|
||||
for _, slot := range slots {
|
||||
address := getPCISlotProperty(slot.Name(), PCISysFsSlotsAddress)
|
||||
if b == address {
|
||||
maxBusSpeed := getPCISlotProperty(slot.Name(), PCISysFsSlotsMaxBusSpeed)
|
||||
if strings.Contains(maxBusSpeed, PCIeKeyword) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
deviceLogger().WithField("dev-bdf", bdf).Debug("can not find slot for bdf of pci device")
|
||||
return false
|
||||
}
|
||||
|
||||
// read from /sys/bus/pci/devices/xxx/property
|
||||
func getPCIDeviceProperty(bdf string, property PCISysFsProperty) string {
|
||||
if len(strings.Split(bdf, ":")) == 2 {
|
||||
bdf = PCIDomain + ":" + bdf
|
||||
}
|
||||
propertyPath := filepath.Join(config.SysBusPciDevicesPath, bdf, string(property))
|
||||
rlt, err := readPCIProperty(propertyPath)
|
||||
if err != nil {
|
||||
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci device property")
|
||||
return ""
|
||||
}
|
||||
return rlt
|
||||
}
|
||||
|
||||
// read from /sys/bus/pci/slots/xxx/property
|
||||
func getPCISlotProperty(slot string, property PCISysFsProperty) string {
|
||||
propertyPath := filepath.Join(config.SysBusPciSlotsPath, slot, string(property))
|
||||
rlt, err := readPCIProperty(propertyPath)
|
||||
if err != nil {
|
||||
deviceLogger().WithError(err).WithField("path", propertyPath).Warn("failed to read pci slot property")
|
||||
return ""
|
||||
}
|
||||
return rlt
|
||||
}
|
||||
|
||||
func readPCIProperty(propertyPath string) (string, error) {
|
||||
var (
|
||||
buf []byte
|
||||
err error
|
||||
)
|
||||
if buf, err = ioutil.ReadFile(propertyPath); err != nil {
|
||||
return "", fmt.Errorf("failed to read pci sysfs %v, error:%v", propertyPath, err)
|
||||
}
|
||||
return strings.Split(string(buf), "\n")[0], nil
|
||||
}
|
||||
|
@ -27,6 +27,11 @@ const (
|
||||
pciDriverBindPath = "/sys/bus/pci/drivers/%s/bind"
|
||||
vfioNewIDPath = "/sys/bus/pci/drivers/vfio-pci/new_id"
|
||||
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
|
||||
pcieRootPortPrefix = "rp"
|
||||
)
|
||||
|
||||
var (
|
||||
AllPCIeDevs = map[string]bool{}
|
||||
)
|
||||
|
||||
// VFIODevice is a vfio device meant to be passed to the hypervisor
|
||||
@ -83,8 +88,14 @@ func (device *VFIODevice) Attach(devReceiver api.DeviceReceiver) (retErr error)
|
||||
Type: vfioDeviceType,
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIeDevice(deviceBDF),
|
||||
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
||||
}
|
||||
device.VfioDevs = append(device.VfioDevs, vfio)
|
||||
if vfio.IsPCIe {
|
||||
vfio.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
AllPCIeDevs[vfio.BDF] = true
|
||||
}
|
||||
}
|
||||
|
||||
// hotplug a VFIO device is actually hotplugging a group of iommu devices
|
||||
|
@ -369,6 +369,10 @@ type HypervisorConfig struct {
|
||||
// root bus instead of a bridge.
|
||||
HotplugVFIOOnRootBus bool
|
||||
|
||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort uint32
|
||||
|
||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||
BootToBeTemplate bool
|
||||
|
||||
|
@ -248,6 +248,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
||||
UseVSock: sconfig.HypervisorConfig.UseVSock,
|
||||
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
|
||||
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: sconfig.HypervisorConfig.PCIeRootPort,
|
||||
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
|
||||
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
|
||||
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
|
||||
@ -534,6 +535,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
||||
UseVSock: hconf.UseVSock,
|
||||
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
||||
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: hconf.PCIeRootPort,
|
||||
BootToBeTemplate: hconf.BootToBeTemplate,
|
||||
BootFromTemplate: hconf.BootFromTemplate,
|
||||
DisableVhostNet: hconf.DisableVhostNet,
|
||||
|
@ -153,6 +153,10 @@ type HypervisorConfig struct {
|
||||
// root bus instead of a bridge.
|
||||
HotplugVFIOOnRootBus bool
|
||||
|
||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort uint32
|
||||
|
||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||
BootToBeTemplate bool
|
||||
|
||||
|
@ -41,4 +41,5 @@ type HypervisorState struct {
|
||||
HotpluggedMemory int
|
||||
VirtiofsdPid int
|
||||
HotplugVFIOOnRootBus bool
|
||||
PCIeRootPort int
|
||||
}
|
||||
|
@ -97,6 +97,10 @@ const (
|
||||
// root bus instead of a bridge.
|
||||
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
|
||||
|
||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort = kataAnnotHypervisorPrefix + "pcie_root_port"
|
||||
|
||||
// EntropySource is a sandbox annotation to specify the path to a host source of
|
||||
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
||||
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"
|
||||
|
@ -447,6 +447,14 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig)
|
||||
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.PCIeRootPort]; ok {
|
||||
pcieRootPort, err := strconv.ParseUint(value, 10, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing annotation for pcie_root_port: %v, Please specify an integer greater than or equal to 0", err)
|
||||
}
|
||||
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
|
||||
if value != "" {
|
||||
config.HypervisorConfig.EntropySource = value
|
||||
|
@ -763,6 +763,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.UseVSock] = "true"
|
||||
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
||||
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
|
||||
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
|
||||
ocispec.Annotations[vcAnnotations.EntropySource] = "/dev/urandom"
|
||||
|
||||
addAnnotations(ocispec, &config)
|
||||
@ -793,6 +794,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
assert.Equal(config.HypervisorConfig.UseVSock, true)
|
||||
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
|
||||
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
|
||||
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
|
||||
assert.Equal(config.HypervisorConfig.EntropySource, "/dev/urandom")
|
||||
|
||||
// In case an absurd large value is provided, the config value if not over-ridden
|
||||
|
@ -71,6 +71,7 @@ type QemuState struct {
|
||||
UUID string
|
||||
HotplugVFIOOnRootBus bool
|
||||
VirtiofsdPid int
|
||||
PCIeRootPort int
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
@ -266,6 +267,7 @@ func (q *qemu) setup(id string, hypervisorConfig *HypervisorConfig) error {
|
||||
q.state.UUID = uuid.Generate().String()
|
||||
|
||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
||||
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
|
||||
|
||||
// The path might already exist, but in case of VM templating,
|
||||
// we have to create it since the sandbox has not created it yet.
|
||||
@ -584,6 +586,13 @@ func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNa
|
||||
return err
|
||||
}
|
||||
|
||||
// Add PCIe Root Port devices to hypervisor
|
||||
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged into PCIe Root Port.
|
||||
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
|
||||
if hypervisorConfig.PCIeRootPort > 0 {
|
||||
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort)
|
||||
}
|
||||
|
||||
q.qemuConfig = qemuConfig
|
||||
|
||||
return nil
|
||||
@ -1149,17 +1158,39 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
|
||||
}
|
||||
|
||||
devID := device.ID
|
||||
machinneType := q.hypervisorConfig().HypervisorMachineType
|
||||
|
||||
if op == addDevice {
|
||||
|
||||
buf, _ := json.Marshal(device)
|
||||
q.Logger().WithFields(logrus.Fields{
|
||||
"machine-type": machinneType,
|
||||
"hotplug-vfio-on-root-bus": q.state.HotplugVFIOOnRootBus,
|
||||
"pcie-root-port": q.state.PCIeRootPort,
|
||||
"device-info": string(buf),
|
||||
}).Info("Start hot-plug VFIO device")
|
||||
|
||||
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
||||
// for pc machine type instead of bridge. This is useful for devices that require
|
||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||
if q.state.HotplugVFIOOnRootBus {
|
||||
|
||||
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
||||
switch machinneType {
|
||||
case QemuQ35:
|
||||
if device.IsPCIe && q.state.PCIeRootPort <= 0 {
|
||||
q.Logger().WithField("dev-id", device.ID).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
||||
device.Bus = ""
|
||||
}
|
||||
default:
|
||||
device.Bus = ""
|
||||
}
|
||||
|
||||
switch device.Type {
|
||||
case config.VFIODeviceNormalType:
|
||||
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, "", romFile)
|
||||
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, device.BDF, device.Bus, romFile)
|
||||
case config.VFIODeviceMediatedType:
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", "", romFile)
|
||||
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, device.SysfsDev, "", device.Bus, romFile)
|
||||
default:
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
@ -1185,6 +1216,8 @@ func (q *qemu) hotplugVFIODevice(device *config.VFIODev, op operation) (err erro
|
||||
return fmt.Errorf("Incorrect VFIO device type found")
|
||||
}
|
||||
} else {
|
||||
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
||||
|
||||
if !q.state.HotplugVFIOOnRootBus {
|
||||
if err := q.arch.removeDeviceFromBridge(devID); err != nil {
|
||||
return err
|
||||
@ -1848,6 +1881,39 @@ func genericMemoryTopology(memoryMb, hostMemoryMb uint64, slots uint8, memoryOff
|
||||
return memory
|
||||
}
|
||||
|
||||
// genericAppendPCIeRootPort appends to devices the given pcie-root-port
|
||||
func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machineType string) []govmmQemu.Device {
|
||||
var (
|
||||
bus string
|
||||
chassis string
|
||||
multiFunction bool
|
||||
addr string
|
||||
)
|
||||
switch machineType {
|
||||
case QemuQ35:
|
||||
bus = defaultBridgeBus
|
||||
chassis = "0"
|
||||
multiFunction = false
|
||||
addr = "0"
|
||||
default:
|
||||
return devices
|
||||
}
|
||||
|
||||
for i := uint32(0); i < number; i++ {
|
||||
devices = append(devices,
|
||||
govmmQemu.PCIeRootPortDevice{
|
||||
ID: fmt.Sprintf("%s%d", pcieRootPortPrefix, i),
|
||||
Bus: bus,
|
||||
Chassis: chassis,
|
||||
Slot: strconv.FormatUint(uint64(i), 10),
|
||||
Multifunction: multiFunction,
|
||||
Addr: addr,
|
||||
},
|
||||
)
|
||||
}
|
||||
return devices
|
||||
}
|
||||
|
||||
func (q *qemu) getThreadIDs() (vcpuThreadIDs, error) {
|
||||
span, _ := q.trace("getThreadIDs")
|
||||
defer span.Finish()
|
||||
@ -2013,6 +2079,7 @@ func (q *qemu) save() (s persistapi.HypervisorState) {
|
||||
s.UUID = q.state.UUID
|
||||
s.HotpluggedMemory = q.state.HotpluggedMemory
|
||||
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
|
||||
s.PCIeRootPort = q.state.PCIeRootPort
|
||||
|
||||
for _, bridge := range q.arch.getBridges() {
|
||||
s.Bridges = append(s.Bridges, persistapi.Bridge{
|
||||
@ -2036,6 +2103,7 @@ func (q *qemu) load(s persistapi.HypervisorState) {
|
||||
q.state.HotpluggedMemory = s.HotpluggedMemory
|
||||
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
|
||||
q.state.VirtiofsdPid = s.VirtiofsdPid
|
||||
q.state.PCIeRootPort = s.PCIeRootPort
|
||||
|
||||
for _, bridge := range s.Bridges {
|
||||
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))
|
||||
|
@ -20,13 +20,15 @@ type qemuAmd64 struct {
|
||||
vmFactory bool
|
||||
}
|
||||
|
||||
const defaultQemuPath = "/usr/bin/qemu-system-x86_64"
|
||||
const (
|
||||
defaultQemuPath = "/usr/bin/qemu-system-x86_64"
|
||||
|
||||
const defaultQemuMachineType = QemuPC
|
||||
defaultQemuMachineType = QemuPC
|
||||
|
||||
const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
|
||||
defaultQemuMachineOptions = "accel=kvm,kernel_irqchip"
|
||||
|
||||
const qmpMigrationWaitTimeout = 5 * time.Second
|
||||
qmpMigrationWaitTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
var qemuPaths = map[string]string{
|
||||
QemuPCLite: "/usr/bin/qemu-lite-system-x86_64",
|
||||
|
@ -127,6 +127,9 @@ type qemuArch interface {
|
||||
|
||||
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
|
||||
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
|
||||
|
||||
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
|
||||
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device
|
||||
}
|
||||
|
||||
type qemuArchBase struct {
|
||||
@ -153,6 +156,7 @@ const (
|
||||
defaultPCBridgeBus = "pci.0"
|
||||
maxDevIDSize = 31
|
||||
defaultMsize9p = 8192
|
||||
pcieRootPortPrefix = "rp"
|
||||
)
|
||||
|
||||
// This is the PCI start address assigned to the first bridge that
|
||||
@ -646,6 +650,7 @@ func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev conf
|
||||
BDF: vfioDev.BDF,
|
||||
VendorID: vfioDev.VendorID,
|
||||
DeviceID: vfioDev.DeviceID,
|
||||
Bus: vfioDev.Bus,
|
||||
},
|
||||
)
|
||||
|
||||
@ -750,3 +755,8 @@ func (q *qemuArchBase) setBridges(bridges []types.Bridge) {
|
||||
func (q *qemuArchBase) addBridge(b types.Bridge) {
|
||||
q.Bridges = append(q.Bridges, b)
|
||||
}
|
||||
|
||||
// appendPCIeRootPortDevice appends to devices the given pcie-root-port
|
||||
func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device {
|
||||
return genericAppendPCIeRootPort(devices, number, q.machineType)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user