mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-17 14:58:16 +00:00
gpu: reintroduce pcie_root_port and add pcie_switch_port
In Kubernetes we still do not have proper VM sizing at sandbox creation level. This KEP tries to mitigates that: kubernetes/enhancements#4113 but this can take some time until Kube and containerd or other runtimes have those changes rolled out. Before we used a static config of VFIO ports, and we introduced CDI support which needs a patched contianerd. We want to eliminate the patched continerd in the GPU case as well. Fixes: #8860 Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
@@ -83,6 +83,8 @@ type QemuState struct {
|
||||
VirtiofsDaemonPid int
|
||||
HotplugVFIO config.PCIePort
|
||||
ColdPlugVFIO config.PCIePort
|
||||
PCIeRootPort uint32
|
||||
PCIeSwitchPort uint32
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
@@ -283,7 +285,8 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
||||
q.state.UUID = uuid.Generate().String()
|
||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||
q.state.PCIeRootPort = q.config.PCIeRootPort
|
||||
q.state.PCIeSwitchPort = q.config.PCIeSwitchPort
|
||||
|
||||
// The path might already exist, but in case of VM templating,
|
||||
// we have to create it since the sandbox has not created it yet.
|
||||
@@ -803,11 +806,19 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
||||
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||
|
||||
// If the devices are not advertised via CRI or cold-plugged we need to
|
||||
// get the number of pluggable root/switch ports from the config
|
||||
numPCIeRootPorts := hypervisorConfig.PCIeRootPort
|
||||
numPCIeSwitchPorts := hypervisorConfig.PCIeSwitchPort
|
||||
|
||||
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||
// cannot be added which are crucial for Kata max slots on root bus is 32
|
||||
// max slots on the complete pci(e) topology is 256 in QEMU
|
||||
if vfioOnRootPort {
|
||||
if numOfPluggablePorts < numPCIeRootPorts {
|
||||
numOfPluggablePorts = numPCIeRootPorts
|
||||
}
|
||||
if numOfPluggablePorts > maxPCIeRootPort {
|
||||
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||
}
|
||||
@@ -815,6 +826,9 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
return nil
|
||||
}
|
||||
if vfioOnSwitchPort {
|
||||
if numOfPluggablePorts < numPCIeSwitchPorts {
|
||||
numOfPluggablePorts = numPCIeSwitchPorts
|
||||
}
|
||||
if numOfPluggablePorts > maxPCIeSwitchPort {
|
||||
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
||||
}
|
||||
@@ -924,8 +938,9 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error {
|
||||
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||
if machineType == QemuVirt {
|
||||
addr = "00"
|
||||
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||
config.PCIeDevices[config.RootPort]["virtiomem"] = true
|
||||
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||
dev := config.VFIODev{ID: "virtiomem"}
|
||||
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||
}
|
||||
|
||||
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
|
||||
@@ -1640,8 +1655,9 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
||||
//Since the dev is the first and only one on this bus(root port), it should be 0.
|
||||
addr := "00"
|
||||
|
||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||
config.PCIeDevices[config.RootPort][devID] = true
|
||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||
dev := config.VFIODev{ID: devID}
|
||||
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||
|
||||
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
||||
bridgeSlot, err := q.arch.qomGetSlot(bridgeQomPath, &q.qmpMonitorCh)
|
||||
@@ -1901,8 +1917,10 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
|
||||
// Hotplug net dev to pcie root port for QemuVirt
|
||||
if machineType == QemuVirt {
|
||||
addr := "00"
|
||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||
config.PCIeDevices[config.RootPort][devID] = true
|
||||
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevicesPerPort[config.RootPort]))
|
||||
dev := config.VFIODev{ID: devID}
|
||||
config.PCIeDevicesPerPort[config.RootPort] = append(config.PCIeDevicesPerPort[config.RootPort], dev)
|
||||
|
||||
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs()), defaultDisableModern)
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user