From 28a41e1d1672b618ea5d32fc8a77102ed8dd6d92 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 30 Aug 2023 09:56:40 +0000 Subject: [PATCH 1/4] runtime: add a new API for Network interface Add GetEndpointsNum API for Network Interface to get the number of network endpoints. This is used for caculate the number of pcie root port for QemuVirt. Signed-off-by: Jianyong Wu --- src/runtime/virtcontainers/network.go | 3 +++ src/runtime/virtcontainers/network_darwin.go | 4 ++++ src/runtime/virtcontainers/network_linux.go | 21 ++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/src/runtime/virtcontainers/network.go b/src/runtime/virtcontainers/network.go index cf2c031de5..8619389e1f 100644 --- a/src/runtime/virtcontainers/network.go +++ b/src/runtime/virtcontainers/network.go @@ -226,6 +226,9 @@ type Network interface { // SetEndpoints sets a sandbox's network endpoints. SetEndpoints([]Endpoint) + + // GetEndpoints number of sandbox's network endpoints. + GetEndpointsNum() (int, error) } func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) { diff --git a/src/runtime/virtcontainers/network_darwin.go b/src/runtime/virtcontainers/network_darwin.go index b86150f24e..922da24c63 100644 --- a/src/runtime/virtcontainers/network_darwin.go +++ b/src/runtime/virtcontainers/network_darwin.go @@ -92,6 +92,10 @@ func (n *DarwinNetwork) SetEndpoints(endpoints []Endpoint) { n.eps = endpoints } +func (n *DarwinNetwork) GetEndpointsNum() (int, error) { + return 0, nil +} + func validGuestRoute(route netlink.Route) bool { return true } diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index 41dffed8fe..d4decdd2d2 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -291,6 +291,27 @@ func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool { return false } +func (n *LinuxNetwork) GetEndpointsNum() (int, error) { + netnsHandle, err := netns.GetFromPath(n.netNSPath) + if err != nil { + return 0, err + } + defer netnsHandle.Close() + + netlinkHandle, err := netlink.NewHandleAt(netnsHandle) + if err != nil { + return 0, err + } + defer netlinkHandle.Close() + + linkList, err := netlinkHandle.LinkList() + if err != nil { + return 0, err + } + + return len(linkList), nil +} + // Scan the networking namespace through netlink and then: // 1. Create the endpoints for the relevant interfaces found there. // 2. Attach them to the VM. From f1aec98f9d0a5f996b96b17db42041caea0d227c Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Mon, 14 Aug 2023 09:16:07 +0000 Subject: [PATCH 2/4] qemu/virt: use pcie_root_port to do device hotplug for virt ACPI PCI device hotplug on qemu virt is not supported. The only way to hotplug pci device is pcie native way. Thus we need create pcie root port as default. Pcie root port number depends on following: 1. reserved one for network device as default; 2. virtio-mem dev; 3. add enough port for vhost user blk dev; Fixes: #7646 Signed-off-by: Jianyong Wu --- src/runtime/virtcontainers/qemu.go | 39 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 03b5fa676a..555be392c3 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -720,7 +720,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi } if machine.Type == QemuQ35 || machine.Type == QemuVirt { - if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil { + if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type, network); err != nil { q.Logger().WithError(err).Errorf("Cannot create PCIe topology") return err } @@ -759,7 +759,7 @@ func (q *qemu) checkBpfEnabled() { // Max PCIe switch ports is 16 // There is only 64kB of IO memory each root,switch port will consume 4k hence // only 16 ports possible. -func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error { +func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string, network Network) error { // If no-port set just return no need to add PCIe Root Port or PCIe Switches if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 { @@ -787,8 +787,21 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig } // Get the number of hot(cold)-pluggable ports needed from the provided - // VFIO devices and VhostUserBlockDevices + // VFIO devices var numOfPluggablePorts uint32 = 0 + + // Fow now, pcie native hotplug is the only way for Arm to hotadd pci device. + if machineType == QemuVirt { + epNum, err := network.GetEndpointsNum() + if err != nil { + q.Logger().Warn("Fail to get network endpoints number") + } + virtPcieRootPortNum := len(hypervisorConfig.VhostUserBlkDevices) + epNum + if hypervisorConfig.VirtioMem { + virtPcieRootPortNum++ + } + numOfPluggablePorts += uint32(virtPcieRootPortNum) + } for _, dev := range hypervisorConfig.VFIODevices { var err error dev.HostPath, err = config.GetHostPath(dev, false, "") @@ -809,18 +822,11 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort) vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort) - numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices) - // If number of PCIe root ports > 16 then bail out otherwise we may // use up all slots or IO memory on the root bus and vfio-XXX-pci devices // cannot be added which are crucial for Kata max slots on root bus is 32 // max slots on the complete pci(e) topology is 256 in QEMU if vfioOnRootPort { - // On Arm the vhost-user-block device is a PCIe device we need - // to account for it in the number of pluggable ports - if machineType == QemuVirt { - numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices) - } if numOfPluggablePorts > maxPCIeRootPort { return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) } @@ -828,21 +834,16 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig return nil } if vfioOnSwitchPort { - // On Arm the vhost-user-block device is a PCIe device we need - // to account for it in the number of pluggable ports - if machineType == QemuVirt { - numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices) - if numOfPluggableRootPorts > maxPCIeRootPort { - return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) - } - qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit) - } if numOfPluggablePorts > maxPCIeSwitchPort { return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort) } qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit) return nil } + // If both Root Port and Switch Port are not enabled, check if QemuVirt need add pcie root port. + if machineType == QemuVirt { + qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit) + } return nil } From ef18c9550cf0ba46a506cc7804df6e13cf57abbf Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Tue, 15 Aug 2023 01:33:27 +0000 Subject: [PATCH 3/4] runtime:qemuvirt: hotadd net dev to pcie root port Hotplug network device to pcie root port as this is the only way on QemuVirt. Fixes: #7646 Signed-off-by: Jianyong Wu --- src/runtime/virtcontainers/qemu.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 555be392c3..4d3fbee69c 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1896,6 +1896,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera } devID := "virtio-" + tap.ID + machineType := q.HypervisorConfig().HypervisorMachineType if op == AddDevice { if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil { return err @@ -1907,6 +1908,14 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera } }() + // Hotplug net dev to pcie root port for QemuVirt + if machineType == QemuVirt { + addr := "00" + bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort])) + config.PCIeDevices[config.RootPort][devID] = true + return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern) + } + addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI) if err != nil { return err @@ -1939,7 +1948,6 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs)) } return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern) - } if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil { From f9c9d8f645b0e32bc4eaefdccc999075dc7cdd33 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Tue, 15 Aug 2023 06:29:06 +0000 Subject: [PATCH 4/4] runtime: QemuVirt: hotadd virtio-mem dev to pcie root port Hotplug virtio-mem device to pcie root port for Qemu Virt. Fixes: #7646 Signed-off-by: Jianyong Wu --- src/runtime/virtcontainers/qemu.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 4d3fbee69c..6c052a86a9 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -937,7 +937,17 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error { } }() - err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID) + bridgeID := bridge.ID + + // Hot add virtioMem dev to pcie-root-port for QemuVirt + machineType := q.HypervisorConfig().HypervisorMachineType + if machineType == QemuVirt { + addr = "00" + bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort])) + config.PCIeDevices[config.RootPort]["virtiomem"] = true + } + + err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID) if err == nil { q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB) } else {