Merge pull request #7647 from jongwu/use_pcie_virt

AArch64: runtime: use pcie root port to do pci/pcie device hotplug
This commit is contained in:
Fabiano Fidêncio 2023-10-25 09:17:13 +02:00 committed by GitHub
commit 328ba0da99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 21 deletions

View File

@ -226,6 +226,9 @@ type Network interface {
// SetEndpoints sets a sandbox's network endpoints. // SetEndpoints sets a sandbox's network endpoints.
SetEndpoints([]Endpoint) SetEndpoints([]Endpoint)
// GetEndpoints number of sandbox's network endpoints.
GetEndpointsNum() (int, error)
} }
func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) { func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) {

View File

@ -92,6 +92,10 @@ func (n *DarwinNetwork) SetEndpoints(endpoints []Endpoint) {
n.eps = endpoints n.eps = endpoints
} }
func (n *DarwinNetwork) GetEndpointsNum() (int, error) {
return 0, nil
}
func validGuestRoute(route netlink.Route) bool { func validGuestRoute(route netlink.Route) bool {
return true return true
} }

View File

@ -291,6 +291,27 @@ func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool {
return false return false
} }
func (n *LinuxNetwork) GetEndpointsNum() (int, error) {
netnsHandle, err := netns.GetFromPath(n.netNSPath)
if err != nil {
return 0, err
}
defer netnsHandle.Close()
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
if err != nil {
return 0, err
}
defer netlinkHandle.Close()
linkList, err := netlinkHandle.LinkList()
if err != nil {
return 0, err
}
return len(linkList), nil
}
// Scan the networking namespace through netlink and then: // Scan the networking namespace through netlink and then:
// 1. Create the endpoints for the relevant interfaces found there. // 1. Create the endpoints for the relevant interfaces found there.
// 2. Attach them to the VM. // 2. Attach them to the VM.

View File

@ -720,7 +720,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
} }
if machine.Type == QemuQ35 || machine.Type == QemuVirt { if machine.Type == QemuQ35 || machine.Type == QemuVirt {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil { if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type, network); err != nil {
q.Logger().WithError(err).Errorf("Cannot create PCIe topology") q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
return err return err
} }
@ -759,7 +759,7 @@ func (q *qemu) checkBpfEnabled() {
// Max PCIe switch ports is 16 // Max PCIe switch ports is 16
// There is only 64kB of IO memory each root,switch port will consume 4k hence // There is only 64kB of IO memory each root,switch port will consume 4k hence
// only 16 ports possible. // only 16 ports possible.
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error { func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string, network Network) error {
// If no-port set just return no need to add PCIe Root Port or PCIe Switches // If no-port set just return no need to add PCIe Root Port or PCIe Switches
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 { if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
@ -787,8 +787,21 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
} }
// Get the number of hot(cold)-pluggable ports needed from the provided // Get the number of hot(cold)-pluggable ports needed from the provided
// VFIO devices and VhostUserBlockDevices // VFIO devices
var numOfPluggablePorts uint32 = 0 var numOfPluggablePorts uint32 = 0
// Fow now, pcie native hotplug is the only way for Arm to hotadd pci device.
if machineType == QemuVirt {
epNum, err := network.GetEndpointsNum()
if err != nil {
q.Logger().Warn("Fail to get network endpoints number")
}
virtPcieRootPortNum := len(hypervisorConfig.VhostUserBlkDevices) + epNum
if hypervisorConfig.VirtioMem {
virtPcieRootPortNum++
}
numOfPluggablePorts += uint32(virtPcieRootPortNum)
}
for _, dev := range hypervisorConfig.VFIODevices { for _, dev := range hypervisorConfig.VFIODevices {
var err error var err error
dev.HostPath, err = config.GetHostPath(dev, false, "") dev.HostPath, err = config.GetHostPath(dev, false, "")
@ -809,18 +822,11 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort) vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort) vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
// If number of PCIe root ports > 16 then bail out otherwise we may // If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices // use up all slots or IO memory on the root bus and vfio-XXX-pci devices
// cannot be added which are crucial for Kata max slots on root bus is 32 // cannot be added which are crucial for Kata max slots on root bus is 32
// max slots on the complete pci(e) topology is 256 in QEMU // max slots on the complete pci(e) topology is 256 in QEMU
if vfioOnRootPort { if vfioOnRootPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices)
}
if numOfPluggablePorts > maxPCIeRootPort { if numOfPluggablePorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
} }
@ -828,21 +834,16 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
return nil return nil
} }
if vfioOnSwitchPort { if vfioOnSwitchPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices)
if numOfPluggableRootPorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit)
}
if numOfPluggablePorts > maxPCIeSwitchPort { if numOfPluggablePorts > maxPCIeSwitchPort {
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort) return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
} }
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit) qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
return nil return nil
} }
// If both Root Port and Switch Port are not enabled, check if QemuVirt need add pcie root port.
if machineType == QemuVirt {
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
}
return nil return nil
} }
@ -936,7 +937,17 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error {
} }
}() }()
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID) bridgeID := bridge.ID
// Hot add virtioMem dev to pcie-root-port for QemuVirt
machineType := q.HypervisorConfig().HypervisorMachineType
if machineType == QemuVirt {
addr = "00"
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
config.PCIeDevices[config.RootPort]["virtiomem"] = true
}
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
if err == nil { if err == nil {
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB) q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
} else { } else {
@ -1895,6 +1906,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
} }
devID := "virtio-" + tap.ID devID := "virtio-" + tap.ID
machineType := q.HypervisorConfig().HypervisorMachineType
if op == AddDevice { if op == AddDevice {
if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil { if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil {
return err return err
@ -1906,6 +1918,14 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
} }
}() }()
// Hotplug net dev to pcie root port for QemuVirt
if machineType == QemuVirt {
addr := "00"
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
config.PCIeDevices[config.RootPort][devID] = true
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
}
addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI) addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI)
if err != nil { if err != nil {
return err return err
@ -1938,7 +1958,6 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs)) return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs))
} }
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern) return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
} }
if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil { if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil {