Merge pull request #7647 from jongwu/use_pcie_virt

AArch64: runtime: use pcie root port to do pci/pcie device hotplug
This commit is contained in:
Fabiano Fidêncio 2023-10-25 09:17:13 +02:00 committed by GitHub
commit 328ba0da99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 21 deletions

View File

@ -226,6 +226,9 @@ type Network interface {
// SetEndpoints sets a sandbox's network endpoints.
SetEndpoints([]Endpoint)
// GetEndpoints number of sandbox's network endpoints.
GetEndpointsNum() (int, error)
}
func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) {

View File

@ -92,6 +92,10 @@ func (n *DarwinNetwork) SetEndpoints(endpoints []Endpoint) {
n.eps = endpoints
}
func (n *DarwinNetwork) GetEndpointsNum() (int, error) {
return 0, nil
}
func validGuestRoute(route netlink.Route) bool {
return true
}

View File

@ -291,6 +291,27 @@ func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool {
return false
}
func (n *LinuxNetwork) GetEndpointsNum() (int, error) {
netnsHandle, err := netns.GetFromPath(n.netNSPath)
if err != nil {
return 0, err
}
defer netnsHandle.Close()
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
if err != nil {
return 0, err
}
defer netlinkHandle.Close()
linkList, err := netlinkHandle.LinkList()
if err != nil {
return 0, err
}
return len(linkList), nil
}
// Scan the networking namespace through netlink and then:
// 1. Create the endpoints for the relevant interfaces found there.
// 2. Attach them to the VM.

View File

@ -720,7 +720,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
}
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type, network); err != nil {
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
return err
}
@ -759,7 +759,7 @@ func (q *qemu) checkBpfEnabled() {
// Max PCIe switch ports is 16
// There is only 64kB of IO memory each root,switch port will consume 4k hence
// only 16 ports possible.
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error {
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string, network Network) error {
// If no-port set just return no need to add PCIe Root Port or PCIe Switches
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
@ -787,8 +787,21 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
}
// Get the number of hot(cold)-pluggable ports needed from the provided
// VFIO devices and VhostUserBlockDevices
// VFIO devices
var numOfPluggablePorts uint32 = 0
// Fow now, pcie native hotplug is the only way for Arm to hotadd pci device.
if machineType == QemuVirt {
epNum, err := network.GetEndpointsNum()
if err != nil {
q.Logger().Warn("Fail to get network endpoints number")
}
virtPcieRootPortNum := len(hypervisorConfig.VhostUserBlkDevices) + epNum
if hypervisorConfig.VirtioMem {
virtPcieRootPortNum++
}
numOfPluggablePorts += uint32(virtPcieRootPortNum)
}
for _, dev := range hypervisorConfig.VFIODevices {
var err error
dev.HostPath, err = config.GetHostPath(dev, false, "")
@ -809,18 +822,11 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
// If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
// cannot be added which are crucial for Kata max slots on root bus is 32
// max slots on the complete pci(e) topology is 256 in QEMU
if vfioOnRootPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices)
}
if numOfPluggablePorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
@ -828,21 +834,16 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return nil
}
if vfioOnSwitchPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices)
if numOfPluggableRootPorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit)
}
if numOfPluggablePorts > maxPCIeSwitchPort {
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
}
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
return nil
}
// If both Root Port and Switch Port are not enabled, check if QemuVirt need add pcie root port.
if machineType == QemuVirt {
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
}
return nil
}
@ -936,7 +937,17 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error {
}
}()
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID)
bridgeID := bridge.ID
// Hot add virtioMem dev to pcie-root-port for QemuVirt
machineType := q.HypervisorConfig().HypervisorMachineType
if machineType == QemuVirt {
addr = "00"
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
config.PCIeDevices[config.RootPort]["virtiomem"] = true
}
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
if err == nil {
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
} else {
@ -1895,6 +1906,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
}
devID := "virtio-" + tap.ID
machineType := q.HypervisorConfig().HypervisorMachineType
if op == AddDevice {
if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil {
return err
@ -1906,6 +1918,14 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
}
}()
// Hotplug net dev to pcie root port for QemuVirt
if machineType == QemuVirt {
addr := "00"
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
config.PCIeDevices[config.RootPort][devID] = true
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
}
addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI)
if err != nil {
return err
@ -1938,7 +1958,6 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs))
}
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
}
if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil {