mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-14 06:06:12 +00:00
gpu: reintroduce pcie_root_port and add pcie_switch_port
In Kubernetes we still do not have proper VM sizing at sandbox creation level. This KEP tries to mitigates that: kubernetes/enhancements#4113 but this can take some time until Kube and containerd or other runtimes have those changes rolled out. Before we used a static config of VFIO ports, and we introduced CDI support which needs a patched contianerd. We want to eliminate the patched continerd in the GPU case as well. Fixes: #8860 Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
6f6a164451
commit
c7b41361b2
@ -123,6 +123,8 @@ type HypervisorInfo struct {
|
|||||||
MemorySlots uint32
|
MemorySlots uint32
|
||||||
HotPlugVFIO config.PCIePort
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
PCIeRootPort uint32
|
||||||
|
PCIeSwitchPort uint32
|
||||||
Debug bool
|
Debug bool
|
||||||
SecurityInfo SecurityInfo
|
SecurityInfo SecurityInfo
|
||||||
}
|
}
|
||||||
@ -339,6 +341,8 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
|
|||||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||||
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
||||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||||
|
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||||
|
PCIeSwitchPort: config.HypervisorConfig.PCIeSwitchPort,
|
||||||
SocketPath: socketPath,
|
SocketPath: socketPath,
|
||||||
SecurityInfo: securityInfo,
|
SecurityInfo: securityInfo,
|
||||||
}, nil
|
}, nil
|
||||||
|
@ -89,6 +89,8 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
|
|||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotPlugVFIO = config.BridgePort
|
hotPlugVFIO = config.BridgePort
|
||||||
coldPlugVFIO = config.NoPort
|
coldPlugVFIO = config.NoPort
|
||||||
|
pcieRootPort := uint32(0)
|
||||||
|
pcieSwitchPort := uint32(0)
|
||||||
disableNewNetNs := false
|
disableNewNetNs := false
|
||||||
sharedFS := "virtio-9p"
|
sharedFS := "virtio-9p"
|
||||||
virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd")
|
virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd")
|
||||||
@ -133,6 +135,8 @@ func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.Runti
|
|||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotPlugVFIO: hotPlugVFIO,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
|
PCIeRootPort: pcieRootPort,
|
||||||
|
PCIeSwitchPort: pcieSwitchPort,
|
||||||
DisableNewNetNs: disableNewNetNs,
|
DisableNewNetNs: disableNewNetNs,
|
||||||
DefaultVCPUCount: hypConfig.NumVCPUs(),
|
DefaultVCPUCount: hypConfig.NumVCPUs(),
|
||||||
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
|
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
|
||||||
@ -276,6 +280,8 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
|
|||||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||||
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
||||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||||
|
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||||
|
PCIeSwitchPort: config.HypervisorConfig.PCIeSwitchPort,
|
||||||
}
|
}
|
||||||
|
|
||||||
if os.Geteuid() == 0 {
|
if os.Geteuid() == 0 {
|
||||||
|
@ -335,6 +335,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string,
|
|||||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||||
hotPlugVFIO = config.BridgePort
|
hotPlugVFIO = config.BridgePort
|
||||||
coldPlugVFIO = config.NoPort
|
coldPlugVFIO = config.NoPort
|
||||||
|
pcieRootPort := uint32(0)
|
||||||
|
pcieSwitchPort := uint32(0)
|
||||||
|
|
||||||
configFileOptions := ktu.RuntimeConfigOptions{
|
configFileOptions := ktu.RuntimeConfigOptions{
|
||||||
Hypervisor: "qemu",
|
Hypervisor: "qemu",
|
||||||
@ -353,6 +355,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string,
|
|||||||
VirtioFSDaemon: virtioFSdaemon,
|
VirtioFSDaemon: virtioFSdaemon,
|
||||||
HotPlugVFIO: hotPlugVFIO,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
|
PCIeRootPort: pcieRootPort,
|
||||||
|
PCIeSwitchPort: pcieSwitchPort,
|
||||||
}
|
}
|
||||||
|
|
||||||
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)
|
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)
|
||||||
|
@ -227,13 +227,11 @@ func (p PCIePort) Valid() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
type PCIePortMapping map[string]bool
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Each of this structures keeps track of the devices attached to the
|
// Each of this structures keeps track of the devices attached to the
|
||||||
// different types of PCI ports. We can deduces the Bus number from it
|
// different types of PCI ports. We can deduces the Bus number from it
|
||||||
// and eliminate duplicates being assigned.
|
// and eliminate duplicates being assigned.
|
||||||
PCIeDevices = map[PCIePort]PCIePortMapping{}
|
PCIeDevicesPerPort = map[PCIePort][]VFIODev{}
|
||||||
)
|
)
|
||||||
|
|
||||||
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
||||||
@ -420,11 +418,12 @@ type VFIODev struct {
|
|||||||
// APDevices are the Adjunct Processor devices assigned to the mdev
|
// APDevices are the Adjunct Processor devices assigned to the mdev
|
||||||
APDevices []string
|
APDevices []string
|
||||||
|
|
||||||
// Rank identifies a device in a IOMMU group
|
|
||||||
Rank int
|
|
||||||
|
|
||||||
// Port is the PCIe port type to which the device is attached
|
// Port is the PCIe port type to which the device is attached
|
||||||
Port PCIePort
|
Port PCIePort
|
||||||
|
|
||||||
|
// HostPath is the path to the device on the host we need it as a reference
|
||||||
|
// to match a /dev/vfio/<num> device to a device in GK mode
|
||||||
|
HostPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// RNGDev represents a random number generator device
|
// RNGDev represents a random number generator device
|
||||||
|
@ -41,6 +41,8 @@ var (
|
|||||||
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
|
PCISysFsDevicesClass PCISysFsProperty = "class" // /sys/bus/pci/devices/xxx/class
|
||||||
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
|
PCISysFsSlotsAddress PCISysFsProperty = "address" // /sys/bus/pci/slots/xxx/address
|
||||||
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
|
PCISysFsSlotsMaxBusSpeed PCISysFsProperty = "max_bus_speed" // /sys/bus/pci/slots/xxx/max_bus_speed
|
||||||
|
PCISysFsDevicesVendor PCISysFsProperty = "vendor" // /sys/bus/pci/devices/xxx/vendor
|
||||||
|
PCISysFsDevicesDevice PCISysFsProperty = "device" // /sys/bus/pci/devices/xxx/device
|
||||||
)
|
)
|
||||||
|
|
||||||
func deviceLogger() *logrus.Entry {
|
func deviceLogger() *logrus.Entry {
|
||||||
@ -194,6 +196,10 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
|||||||
if ignorePCIDevice {
|
if ignorePCIDevice {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// Fetch the PCI Vendor ID and Device ID
|
||||||
|
vendorID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesVendor)
|
||||||
|
deviceID := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesDevice)
|
||||||
|
|
||||||
// Do not directly assign to `vfio` -- need to access field still
|
// Do not directly assign to `vfio` -- need to access field still
|
||||||
vfio = config.VFIODev{
|
vfio = config.VFIODev{
|
||||||
ID: id,
|
ID: id,
|
||||||
@ -202,8 +208,10 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODe
|
|||||||
SysfsDev: deviceSysfsDev,
|
SysfsDev: deviceSysfsDev,
|
||||||
IsPCIe: IsPCIeDevice(deviceBDF),
|
IsPCIe: IsPCIeDevice(deviceBDF),
|
||||||
Class: pciClass,
|
Class: pciClass,
|
||||||
Rank: -1,
|
VendorID: vendorID,
|
||||||
|
DeviceID: deviceID,
|
||||||
Port: device.Port,
|
Port: device.Port,
|
||||||
|
HostPath: device.HostPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
@ -78,9 +78,12 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
|||||||
}
|
}
|
||||||
|
|
||||||
if vfio.IsPCIe {
|
if vfio.IsPCIe {
|
||||||
busIndex := len(config.PCIeDevices[vfio.Port])
|
busIndex := len(config.PCIeDevicesPerPort[vfio.Port])
|
||||||
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
||||||
config.PCIeDevices[vfio.Port][vfio.BDF] = true
|
// We need to keep track the number of devices per port to deduce
|
||||||
|
// the corectu bus number, additionally we can use the VFIO device
|
||||||
|
// info to act upon different Vendor IDs and Device IDs.
|
||||||
|
config.PCIeDevicesPerPort[vfio.Port] = append(config.PCIeDevicesPerPort[vfio.Port], *vfio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,11 +71,10 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS
|
|||||||
dm.blockDriver = config.VirtioSCSI
|
dm.blockDriver = config.VirtioSCSI
|
||||||
}
|
}
|
||||||
|
|
||||||
config.PCIeDevices = make(map[config.PCIePort]config.PCIePortMapping)
|
config.PCIeDevicesPerPort = make(map[config.PCIePort][]config.VFIODev)
|
||||||
|
config.PCIeDevicesPerPort[config.RootPort] = make([]config.VFIODev, 0)
|
||||||
config.PCIeDevices[config.RootPort] = make(map[string]bool)
|
config.PCIeDevicesPerPort[config.SwitchPort] = make([]config.VFIODev, 0)
|
||||||
config.PCIeDevices[config.SwitchPort] = make(map[string]bool)
|
config.PCIeDevicesPerPort[config.BridgePort] = make([]config.VFIODev, 0)
|
||||||
config.PCIeDevices[config.BridgePort] = make(map[string]bool)
|
|
||||||
|
|
||||||
for _, dev := range devices {
|
for _, dev := range devices {
|
||||||
dm.devices[dev.DeviceID()] = dev
|
dm.devices[dev.DeviceID()] = dev
|
||||||
|
@ -47,4 +47,6 @@ type HypervisorState struct {
|
|||||||
Pid int
|
Pid int
|
||||||
HotPlugVFIO config.PCIePort
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
PCIeRootPort uint32
|
||||||
|
PCIeSwitchPort uint32
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,8 @@ type RuntimeConfigOptions struct {
|
|||||||
PFlash []string
|
PFlash []string
|
||||||
HotPlugVFIO config.PCIePort
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
PCIeRootPort uint32
|
||||||
|
PCIeSwitchPort uint32
|
||||||
DefaultVCPUCount uint32
|
DefaultVCPUCount uint32
|
||||||
DefaultMaxVCPUCount uint32
|
DefaultMaxVCPUCount uint32
|
||||||
DefaultMemSize uint32
|
DefaultMemSize uint32
|
||||||
@ -318,6 +320,8 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
|
|||||||
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
||||||
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
|
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
|
||||||
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
|
hot_plug_vfio = "` + config.HotPlugVFIO.String() + `"
|
||||||
|
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
|
||||||
|
pcie_switch_port = ` + strconv.FormatUint(uint64(config.PCIeSwitchPort), 10) + `
|
||||||
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
||||||
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
||||||
guest_hook_path = "` + config.DefaultGuestHookPath + `"
|
guest_hook_path = "` + config.DefaultGuestHookPath + `"
|
||||||
|
@ -112,5 +112,8 @@ var defaultRuntimeConfiguration = "@CONFIG_PATH@"
|
|||||||
const defaultHotPlugVFIO = config.NoPort
|
const defaultHotPlugVFIO = config.NoPort
|
||||||
const defaultColdPlugVFIO = config.NoPort
|
const defaultColdPlugVFIO = config.NoPort
|
||||||
|
|
||||||
|
const defaultPCIeRootPort = 0
|
||||||
|
const defaultPCIeSwitchPort = 0
|
||||||
|
|
||||||
const defaultRemoteHypervisorSocket = "/run/peerpod/hypervisor.sock"
|
const defaultRemoteHypervisorSocket = "/run/peerpod/hypervisor.sock"
|
||||||
const defaultRemoteHypervisorTimeout = 600
|
const defaultRemoteHypervisorTimeout = 600
|
||||||
|
@ -57,6 +57,10 @@ const (
|
|||||||
|
|
||||||
// the maximum amount of PCI bridges that can be cold plugged in a VM
|
// the maximum amount of PCI bridges that can be cold plugged in a VM
|
||||||
maxPCIBridges uint32 = 5
|
maxPCIBridges uint32 = 5
|
||||||
|
// For mor info why these values, see:
|
||||||
|
// https://github.com/kata-containers/kata-containers/blob/main/docs/design/kata-vra.md#hypervisor-resource-limits
|
||||||
|
maxPCIeRootPorts uint32 = 16
|
||||||
|
maxPCIeSwitchPorts uint32 = 16
|
||||||
|
|
||||||
errInvalidHypervisorPrefix = "configuration file contains invalid hypervisor section"
|
errInvalidHypervisorPrefix = "configuration file contains invalid hypervisor section"
|
||||||
)
|
)
|
||||||
@ -150,6 +154,8 @@ type hypervisor struct {
|
|||||||
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
||||||
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
|
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
|
||||||
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
|
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
|
||||||
|
PCIeRootPort uint32 `toml:"pcie_root_port"`
|
||||||
|
PCIeSwitchPort uint32 `toml:"pcie_switch_port"`
|
||||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||||
@ -302,6 +308,20 @@ func (h hypervisor) hotPlugVFIO() config.PCIePort {
|
|||||||
return h.HotPlugVFIO
|
return h.HotPlugVFIO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h hypervisor) pcieRootPort() uint32 {
|
||||||
|
if h.PCIeRootPort > maxPCIeRootPorts {
|
||||||
|
return maxPCIeRootPorts
|
||||||
|
}
|
||||||
|
return h.PCIeRootPort
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h hypervisor) pcieSwitchPort() uint32 {
|
||||||
|
if h.PCIeSwitchPort > maxPCIeSwitchPorts {
|
||||||
|
return maxPCIeSwitchPorts
|
||||||
|
}
|
||||||
|
return h.PCIeSwitchPort
|
||||||
|
}
|
||||||
|
|
||||||
func (h hypervisor) firmwareVolume() (string, error) {
|
func (h hypervisor) firmwareVolume() (string, error) {
|
||||||
p := h.FirmwareVolume
|
p := h.FirmwareVolume
|
||||||
|
|
||||||
@ -936,6 +956,8 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
DisableImageNvdimm: h.DisableImageNvdimm,
|
DisableImageNvdimm: h.DisableImageNvdimm,
|
||||||
HotPlugVFIO: h.hotPlugVFIO(),
|
HotPlugVFIO: h.hotPlugVFIO(),
|
||||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||||
|
PCIeRootPort: h.pcieRootPort(),
|
||||||
|
PCIeSwitchPort: h.pcieSwitchPort(),
|
||||||
DisableVhostNet: h.DisableVhostNet,
|
DisableVhostNet: h.DisableVhostNet,
|
||||||
EnableVhostUserStore: h.EnableVhostUserStore,
|
EnableVhostUserStore: h.EnableVhostUserStore,
|
||||||
VhostUserStorePath: h.vhostUserStorePath(),
|
VhostUserStorePath: h.vhostUserStorePath(),
|
||||||
@ -1131,6 +1153,8 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
Msize9p: h.msize9p(),
|
Msize9p: h.msize9p(),
|
||||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||||
HotPlugVFIO: h.hotPlugVFIO(),
|
HotPlugVFIO: h.hotPlugVFIO(),
|
||||||
|
PCIeRootPort: h.pcieRootPort(),
|
||||||
|
PCIeSwitchPort: h.pcieSwitchPort(),
|
||||||
DisableVhostNet: true,
|
DisableVhostNet: true,
|
||||||
GuestHookPath: h.guestHookPath(),
|
GuestHookPath: h.guestHookPath(),
|
||||||
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
|
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
|
||||||
@ -1484,6 +1508,8 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
|||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||||
HotPlugVFIO: defaultHotPlugVFIO,
|
HotPlugVFIO: defaultHotPlugVFIO,
|
||||||
|
PCIeRootPort: defaultPCIeRootPort,
|
||||||
|
PCIeSwitchPort: defaultPCIeSwitchPort,
|
||||||
GuestHookPath: defaultGuestHookPath,
|
GuestHookPath: defaultGuestHookPath,
|
||||||
VhostUserStorePath: defaultVhostUserStorePath,
|
VhostUserStorePath: defaultVhostUserStorePath,
|
||||||
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
|
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
|
||||||
|
@ -87,6 +87,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
|
|||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotPlugVFIO = config.NoPort
|
hotPlugVFIO = config.NoPort
|
||||||
coldPlugVFIO = config.BridgePort
|
coldPlugVFIO = config.BridgePort
|
||||||
|
pcieRootPort := uint32(0)
|
||||||
|
pcieSwitchPort := uint32(0)
|
||||||
disableNewNetNs := false
|
disableNewNetNs := false
|
||||||
sharedFS := "virtio-9p"
|
sharedFS := "virtio-9p"
|
||||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||||
@ -109,6 +111,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
|
|||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotPlugVFIO: hotPlugVFIO,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
|
PCIeRootPort: pcieRootPort,
|
||||||
|
PCIeSwitchPort: pcieSwitchPort,
|
||||||
DisableNewNetNs: disableNewNetNs,
|
DisableNewNetNs: disableNewNetNs,
|
||||||
DefaultVCPUCount: defaultVCPUCount,
|
DefaultVCPUCount: defaultVCPUCount,
|
||||||
DefaultMaxVCPUCount: defaultMaxVCPUCount,
|
DefaultMaxVCPUCount: defaultMaxVCPUCount,
|
||||||
@ -172,6 +176,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntime
|
|||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotPlugVFIO: hotPlugVFIO,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
|
PCIeRootPort: pcieRootPort,
|
||||||
|
PCIeSwitchPort: pcieSwitchPort,
|
||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
MemSlots: defaultMemSlots,
|
MemSlots: defaultMemSlots,
|
||||||
EntropySource: defaultEntropySource,
|
EntropySource: defaultEntropySource,
|
||||||
@ -569,6 +575,8 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
|||||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
HotPlugVFIO: defaultHotPlugVFIO,
|
HotPlugVFIO: defaultHotPlugVFIO,
|
||||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||||
|
PCIeRootPort: defaultPCIeRootPort,
|
||||||
|
PCIeSwitchPort: defaultPCIeSwitchPort,
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedAgentConfig := vc.KataAgentConfig{
|
expectedAgentConfig := vc.KataAgentConfig{
|
||||||
@ -610,6 +618,8 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
|||||||
disableBlock := true
|
disableBlock := true
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
coldPlugVFIO = config.BridgePort
|
coldPlugVFIO = config.BridgePort
|
||||||
|
pcieRootPort := uint32(0)
|
||||||
|
pcieSwitchPort := uint32(0)
|
||||||
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
||||||
blockDeviceAIO := "io_uring"
|
blockDeviceAIO := "io_uring"
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -628,6 +638,8 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
|||||||
DisableBlockDeviceUse: disableBlock,
|
DisableBlockDeviceUse: disableBlock,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
|
PCIeRootPort: pcieRootPort,
|
||||||
|
PCIeSwitchPort: pcieSwitchPort,
|
||||||
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
|
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
|
||||||
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
||||||
SharedFS: "virtio-fs",
|
SharedFS: "virtio-fs",
|
||||||
|
@ -463,6 +463,14 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := addHypervisorPCIeRootPortOverrides(ocispec, config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := addHypervisorPCIeSwitchPortOverrides(ocispec, config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
||||||
if value != "" {
|
if value != "" {
|
||||||
config.HypervisorConfig.HypervisorMachineType = value
|
config.HypervisorConfig.HypervisorMachineType = value
|
||||||
@ -605,6 +613,29 @@ func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.Sand
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func addHypervisorPCIeRootPortOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
||||||
|
|
||||||
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeRootPort).setUint(func(pcieRootPort uint64) {
|
||||||
|
if pcieRootPort > 0 {
|
||||||
|
sbConfig.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
|
||||||
|
}
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func addHypervisorPCIeSwitchPortOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
||||||
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeSwitchPort).setUint(func(pcieSwitchPort uint64) {
|
||||||
|
if pcieSwitchPort > 0 {
|
||||||
|
sbConfig.HypervisorConfig.PCIeSwitchPort = uint32(pcieSwitchPort)
|
||||||
|
}
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
||||||
|
|
||||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
||||||
|
@ -661,6 +661,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
|||||||
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
||||||
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
|
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
|
||||||
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
|
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
|
||||||
|
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "1"
|
||||||
|
ocispec.Annotations[vcAnnotations.PCIeSwitchPort] = "1"
|
||||||
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
|
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
|
||||||
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
|
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
|
||||||
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
|
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
|
||||||
@ -701,6 +703,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
|||||||
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
|
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
|
||||||
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
|
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
|
||||||
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
|
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
|
||||||
|
assert.Equal(sbConfig.HypervisorConfig.PCIeRootPort, uint32(1))
|
||||||
|
assert.Equal(sbConfig.HypervisorConfig.PCIeSwitchPort, uint32(1))
|
||||||
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
|
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
|
||||||
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
|
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
|
||||||
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
|
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
@ -830,12 +831,45 @@ func (c *Container) createDevices(contConfig *ContainerConfig) error {
|
|||||||
}
|
}
|
||||||
deviceInfos := append(virtualVolumesDeviceInfos, contConfig.DeviceInfos...)
|
deviceInfos := append(virtualVolumesDeviceInfos, contConfig.DeviceInfos...)
|
||||||
|
|
||||||
|
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
|
||||||
|
// until we have TDISP/IDE PCIe support.
|
||||||
|
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
||||||
|
// Aggregate all the containner devices for hot-plug and use them to dedcue
|
||||||
|
// the correct amount of ports to reserve for the hypervisor.
|
||||||
|
hotPlugVFIO := (c.sandbox.config.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
||||||
|
modeIsVFIO := (c.sandbox.config.VfioMode == config.VFIOModeVFIO)
|
||||||
|
|
||||||
|
updatedDeviceInfos := []config.DeviceInfo{}
|
||||||
|
|
||||||
|
for i, vfio := range deviceInfos {
|
||||||
|
// Only considering VFIO updates for Port and ColdPlug or
|
||||||
|
// HotPlug updates
|
||||||
|
isVFIODevice := deviceManager.IsVFIODevice(vfio.ContainerPath)
|
||||||
|
if hotPlugVFIO && isVFIODevice {
|
||||||
|
deviceInfos[i].ColdPlug = false
|
||||||
|
deviceInfos[i].Port = c.sandbox.config.HypervisorConfig.HotPlugVFIO
|
||||||
|
}
|
||||||
|
// Device is already cold-plugged at sandbox creation time
|
||||||
|
// ignore it for the container creation
|
||||||
|
if coldPlugVFIO && isVFIODevice {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedDeviceInfos = append(updatedDeviceInfos, deviceInfos[i])
|
||||||
|
}
|
||||||
|
// If modeVFIO is enabled we need 1st to attach the VFIO control group
|
||||||
|
// device /dev/vfio/vfio an 2nd the actuall device(s) afterwards.
|
||||||
|
// Sort the devices starting with device #1 being the VFIO control group
|
||||||
|
// device and the next the actuall device(s) /dev/vfio/<group>
|
||||||
|
if modeIsVFIO {
|
||||||
|
deviceInfos = sortContainerVFIODevices(updatedDeviceInfos)
|
||||||
|
}
|
||||||
|
|
||||||
for _, info := range deviceInfos {
|
for _, info := range deviceInfos {
|
||||||
dev, err := c.sandbox.devManager.NewDevice(info)
|
dev, err := c.sandbox.devManager.NewDevice(info)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
storedDevices = append(storedDevices, ContainerDevice{
|
storedDevices = append(storedDevices, ContainerDevice{
|
||||||
ID: dev.DeviceID(),
|
ID: dev.DeviceID(),
|
||||||
ContainerPath: info.ContainerPath,
|
ContainerPath: info.ContainerPath,
|
||||||
@ -889,12 +923,12 @@ func (c *Container) checkBlockDeviceSupport(ctx context.Context) bool {
|
|||||||
|
|
||||||
// Sort the devices starting with device #1 being the VFIO control group
|
// Sort the devices starting with device #1 being the VFIO control group
|
||||||
// device and the next the actuall device(s) e.g. /dev/vfio/<group>
|
// device and the next the actuall device(s) e.g. /dev/vfio/<group>
|
||||||
func sortContainerVFIODevices(devices []ContainerDevice) []ContainerDevice {
|
func sortContainerVFIODevices(devices []config.DeviceInfo) []config.DeviceInfo {
|
||||||
var vfioDevices []ContainerDevice
|
var vfioDevices []config.DeviceInfo
|
||||||
|
|
||||||
for _, device := range devices {
|
for _, device := range devices {
|
||||||
if deviceManager.IsVFIOControlDevice(device.ContainerPath) {
|
if deviceManager.IsVFIOControlDevice(device.ContainerPath) {
|
||||||
vfioDevices = append([]ContainerDevice{device}, vfioDevices...)
|
vfioDevices = append([]config.DeviceInfo{device}, vfioDevices...)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
vfioDevices = append(vfioDevices, device)
|
vfioDevices = append(vfioDevices, device)
|
||||||
@ -902,6 +936,66 @@ func sortContainerVFIODevices(devices []ContainerDevice) []ContainerDevice {
|
|||||||
return vfioDevices
|
return vfioDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Depending on the HW we might need to inject metadata into the container
|
||||||
|
// In this case for the NV GPU we need to provide the correct mapping from
|
||||||
|
// VFIO-<NUM> to GPU index inside of the VM when vfio_mode="guest-kernel",
|
||||||
|
// otherwise we do not know which GPU is which.
|
||||||
|
func (c *Container) annotateContainerWithVFIOMetadata() {
|
||||||
|
|
||||||
|
type relation struct {
|
||||||
|
Bus string
|
||||||
|
Path string
|
||||||
|
Index int
|
||||||
|
}
|
||||||
|
|
||||||
|
modeIsGK := (c.sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
||||||
|
|
||||||
|
if modeIsGK {
|
||||||
|
// Hot plug is done let's update meta information about the
|
||||||
|
// hot plugged devices especially VFIO devices in modeIsGK
|
||||||
|
siblings := make([]relation, 0)
|
||||||
|
// In the sandbox we first create the root-ports and secondly
|
||||||
|
// the switch-ports. The range over map is not deterministic
|
||||||
|
// so lets first iterate over all root-port devices and then
|
||||||
|
// switch-port devices no special handling for bridge-port (PCI)
|
||||||
|
for _, dev := range config.PCIeDevicesPerPort["root-port"] {
|
||||||
|
// For the NV GPU we need special handling let's use only those
|
||||||
|
if dev.VendorID == "0x10de" && strings.Contains(dev.Class, "0x030") {
|
||||||
|
siblings = append(siblings, relation{Bus: dev.Bus, Path: dev.HostPath})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, dev := range config.PCIeDevicesPerPort["switch-port"] {
|
||||||
|
// For the NV GPU we need special handling let's use only those
|
||||||
|
if dev.VendorID == "0x10de" && strings.Contains(dev.Class, "0x030") {
|
||||||
|
siblings = append(siblings, relation{Bus: dev.Bus, Path: dev.HostPath})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We need to sort the VFIO devices by bus to get the correct
|
||||||
|
// ordering root-port < switch-port
|
||||||
|
sort.Slice(siblings, func(i, j int) bool {
|
||||||
|
return siblings[i].Bus < siblings[j].Bus
|
||||||
|
})
|
||||||
|
|
||||||
|
for i := range siblings {
|
||||||
|
siblings[i].Index = i
|
||||||
|
}
|
||||||
|
// Now that we have the index lets connect the /dev/vfio/<num>
|
||||||
|
// to the correct index
|
||||||
|
for _, dev := range c.devices {
|
||||||
|
for _, bdf := range siblings {
|
||||||
|
if bdf.Path == dev.ContainerPath {
|
||||||
|
vfioNum := filepath.Base(dev.ContainerPath)
|
||||||
|
annoKey := fmt.Sprintf("cdi.k8s.io/vfio%s", vfioNum)
|
||||||
|
annoValue := fmt.Sprintf("nvidia.com/gpu=%d", bdf.Index)
|
||||||
|
c.config.CustomSpec.Annotations[annoKey] = annoValue
|
||||||
|
c.Logger().Infof("Annotated container with %s: %s", annoKey, annoValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// create creates and starts a container inside a Sandbox. It has to be
|
// create creates and starts a container inside a Sandbox. It has to be
|
||||||
// called only when a new container, not known by the sandbox, has to be created.
|
// called only when a new container, not known by the sandbox, has to be created.
|
||||||
func (c *Container) create(ctx context.Context) (err error) {
|
func (c *Container) create(ctx context.Context) (err error) {
|
||||||
@ -921,37 +1015,6 @@ func (c *Container) create(ctx context.Context) (err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If cold-plug we've attached the devices already, do not try to
|
|
||||||
// attach them a second time.
|
|
||||||
coldPlugVFIO := (c.sandbox.config.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
|
||||||
modeVFIO := (c.sandbox.config.VfioMode == config.VFIOModeVFIO)
|
|
||||||
|
|
||||||
if coldPlugVFIO {
|
|
||||||
var cntDevices []ContainerDevice
|
|
||||||
for _, dev := range c.devices {
|
|
||||||
isVFIOControlDevice := deviceManager.IsVFIOControlDevice(dev.ContainerPath)
|
|
||||||
if isVFIOControlDevice && modeVFIO {
|
|
||||||
cntDevices = append(cntDevices, dev)
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.HasPrefix(dev.ContainerPath, vfioPath) {
|
|
||||||
c.Logger().WithFields(logrus.Fields{
|
|
||||||
"device": dev,
|
|
||||||
}).Info("Remvoing device since we're cold-plugging no Attach needed")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
cntDevices = append(cntDevices, dev)
|
|
||||||
}
|
|
||||||
c.devices = cntDevices
|
|
||||||
}
|
|
||||||
// If modeVFIO is enabled we need 1st to attach the VFIO control group
|
|
||||||
// device /dev/vfio/vfio an 2nd the actuall device(s) afterwards.
|
|
||||||
// Sort the devices starting with device #1 being the VFIO control group
|
|
||||||
// device and the next the actuall device(s) /dev/vfio/<group>
|
|
||||||
if modeVFIO {
|
|
||||||
c.devices = sortContainerVFIODevices(c.devices)
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Logger().WithFields(logrus.Fields{
|
c.Logger().WithFields(logrus.Fields{
|
||||||
"devices": c.devices,
|
"devices": c.devices,
|
||||||
}).Info("Attach devices")
|
}).Info("Attach devices")
|
||||||
@ -959,6 +1022,8 @@ func (c *Container) create(ctx context.Context) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.annotateContainerWithVFIOMetadata()
|
||||||
|
|
||||||
// Deduce additional system mount info that should be handled by the agent
|
// Deduce additional system mount info that should be handled by the agent
|
||||||
// inside the VM
|
// inside the VM
|
||||||
c.getSystemMountInfo()
|
c.getSystemMountInfo()
|
||||||
|
@ -546,6 +546,12 @@ type HypervisorConfig struct {
|
|||||||
// root port, switch or no port
|
// root port, switch or no port
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
|
||||||
|
// PCIeRootPort is the number of root-port to create for the VM
|
||||||
|
PCIeRootPort uint32
|
||||||
|
|
||||||
|
// PCIeSwitchPort is the number of switch-port to create for the VM
|
||||||
|
PCIeSwitchPort uint32
|
||||||
|
|
||||||
// NumVCPUs specifies default number of vCPUs for the VM.
|
// NumVCPUs specifies default number of vCPUs for the VM.
|
||||||
NumVCPUsF float32
|
NumVCPUsF float32
|
||||||
|
|
||||||
|
@ -486,6 +486,8 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
|||||||
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
||||||
HotPlugVFIO: hconf.HotPlugVFIO,
|
HotPlugVFIO: hconf.HotPlugVFIO,
|
||||||
ColdPlugVFIO: hconf.ColdPlugVFIO,
|
ColdPlugVFIO: hconf.ColdPlugVFIO,
|
||||||
|
PCIeRootPort: hconf.PCIeRootPort,
|
||||||
|
PCIeSwitchPort: hconf.PCIeSwitchPort,
|
||||||
BootToBeTemplate: hconf.BootToBeTemplate,
|
BootToBeTemplate: hconf.BootToBeTemplate,
|
||||||
BootFromTemplate: hconf.BootFromTemplate,
|
BootFromTemplate: hconf.BootFromTemplate,
|
||||||
DisableVhostNet: hconf.DisableVhostNet,
|
DisableVhostNet: hconf.DisableVhostNet,
|
||||||
|
@ -199,6 +199,12 @@ type HypervisorConfig struct {
|
|||||||
// root, bridge, switch or no-port
|
// root, bridge, switch or no-port
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
|
||||||
|
// PCIeRootPort is the number of ports needed in the hypvervisor
|
||||||
|
PCIeRootPort uint32
|
||||||
|
|
||||||
|
// PCIeSwitchPort is the number of ports needed in the hypvervisor
|
||||||
|
PCIeSwitchPort uint32
|
||||||
|
|
||||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||||
BootToBeTemplate bool
|
BootToBeTemplate bool
|
||||||
|
|
||||||
|
@ -128,6 +128,12 @@ const (
|
|||||||
// HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged.
|
// HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged.
|
||||||
HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio"
|
HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio"
|
||||||
|
|
||||||
|
// PCIeRootPort is the number of PCIe root ports to create for the VM.
|
||||||
|
PCIeRootPort = kataAnnotHypervisorPrefix + "pcie_root_port"
|
||||||
|
|
||||||
|
// PCIeSwitchPort is the number of PCIe switch ports to create for the VM.
|
||||||
|
PCIeSwitchPort = kataAnnotHypervisorPrefix + "pcie_switch_port"
|
||||||
|
|
||||||
// EntropySource is a sandbox annotation to specify the path to a host source of
|
// EntropySource is a sandbox annotation to specify the path to a host source of
|
||||||
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
||||||
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"
|
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"
|
||||||
|
@ -83,6 +83,8 @@ type QemuState struct {
|
|||||||
VirtiofsDaemonPid int
|
VirtiofsDaemonPid int
|
||||||
HotplugVFIO config.PCIePort
|
HotplugVFIO config.PCIePort
|
||||||
ColdPlugVFIO config.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
PCIeRootPort uint32
|
||||||
|
PCIeSwitchPort uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||||
@ -283,7 +285,8 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
|||||||
q.state.UUID = uuid.Generate().String()
|
q.state.UUID = uuid.Generate().String()
|
||||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
q.state.PCIeRootPort = q.config.PCIeRootPort
|
||||||
|
q.state.PCIeSwitchPort = q.config.PCIeSwitchPort
|
||||||
|
|
||||||
// The path might already exist, but in case of VM templating,
|
// The path might already exist, but in case of VM templating,
|
||||||
// we have to create it since the sandbox has not created it yet.
|
// we have to create it since the sandbox has not created it yet.
|
||||||
@ -803,11 +806,19 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
||||||
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||||
|
|
||||||
|
// If the devices are not advertised via CRI or cold-plugged we need to
|
||||||
|
// get the number of pluggable root/switch ports from the config
|
||||||
|
numPCIeRootPorts := hypervisorConfig.PCIeRootPort
|
||||||
|
numPCIeSwitchPorts := hypervisorConfig.PCIeSwitchPort
|
||||||
|
|
||||||
// If number of PCIe root ports > 16 then bail out otherwise we may
|
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||||
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||||
// cannot be added which are crucial for Kata max slots on root bus is 32
|
// cannot be added which are crucial for Kata max slots on root bus is 32
|
||||||
// max slots on the complete pci(e) topology is 256 in QEMU
|
// max slots on the complete pci(e) topology is 256 in QEMU
|
||||||
if vfioOnRootPort {
|
if vfioOnRootPort {
|
||||||
|
if numOfPluggablePorts < numPCIeRootPorts {
|
||||||
|
numOfPluggablePorts = numPCIeRootPorts
|
||||||
|
}
|
||||||
if numOfPluggablePorts > maxPCIeRootPort {
|
if numOfPluggablePorts > maxPCIeRootPort {
|
||||||
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||||
}
|
}
|
||||||
@ -815,6 +826,9 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if vfioOnSwitchPort {
|
if vfioOnSwitchPort {
|
||||||
|
if numOfPluggablePorts < numPCIeSwitchPorts {
|
||||||
|
numOfPluggablePorts = numPCIeSwitchPorts
|
||||||
|
}
|
||||||
if numOfPluggablePorts > maxPCIeSwitchPort {
|
if numOfPluggablePorts > maxPCIeSwitchPort {
|
||||||
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
||||||
}
|
}
|
||||||
@ -924,8 +938,9 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error {
|
|||||||
machineType := q.HypervisorConfig().HypervisorMachineType
|
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||||
if machineType == QemuVirt {
|
if machineType == QemuVirt {
|
||||||
addr = "00"
|
addr = "00"
|
||||||
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||||
config.PCIeDevices[config.RootPort]["virtiomem"] = true
|
dev := config.VFIODev{ID: "virtiomem"}
|
||||||
|
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
|
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
|
||||||
@ -1640,8 +1655,9 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
|||||||
//Since the dev is the first and only one on this bus(root port), it should be 0.
|
//Since the dev is the first and only one on this bus(root port), it should be 0.
|
||||||
addr := "00"
|
addr := "00"
|
||||||
|
|
||||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||||
config.PCIeDevices[config.RootPort][devID] = true
|
dev := config.VFIODev{ID: devID}
|
||||||
|
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||||
|
|
||||||
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
||||||
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
|
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
|
||||||
@ -1901,8 +1917,10 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
|
|||||||
// Hotplug net dev to pcie root port for QemuVirt
|
// Hotplug net dev to pcie root port for QemuVirt
|
||||||
if machineType == QemuVirt {
|
if machineType == QemuVirt {
|
||||||
addr := "00"
|
addr := "00"
|
||||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||||
config.PCIeDevices[config.RootPort][devID] = true
|
dev := config.VFIODev{ID: devID}
|
||||||
|
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||||
|
|
||||||
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs()), defaultDisableModern)
|
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs()), defaultDisableModern)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user