diff --git a/src/runtime/virtcontainers/network.go b/src/runtime/virtcontainers/network.go index cf2c031de5..8619389e1f 100644 --- a/src/runtime/virtcontainers/network.go +++ b/src/runtime/virtcontainers/network.go @@ -226,6 +226,9 @@ type Network interface { // SetEndpoints sets a sandbox's network endpoints. SetEndpoints([]Endpoint) + + // GetEndpoints number of sandbox's network endpoints. + GetEndpointsNum() (int, error) } func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) { diff --git a/src/runtime/virtcontainers/network_darwin.go b/src/runtime/virtcontainers/network_darwin.go index b86150f24e..922da24c63 100644 --- a/src/runtime/virtcontainers/network_darwin.go +++ b/src/runtime/virtcontainers/network_darwin.go @@ -92,6 +92,10 @@ func (n *DarwinNetwork) SetEndpoints(endpoints []Endpoint) { n.eps = endpoints } +func (n *DarwinNetwork) GetEndpointsNum() (int, error) { + return 0, nil +} + func validGuestRoute(route netlink.Route) bool { return true } diff --git a/src/runtime/virtcontainers/network_linux.go b/src/runtime/virtcontainers/network_linux.go index 41dffed8fe..d4decdd2d2 100644 --- a/src/runtime/virtcontainers/network_linux.go +++ b/src/runtime/virtcontainers/network_linux.go @@ -291,6 +291,27 @@ func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool { return false } +func (n *LinuxNetwork) GetEndpointsNum() (int, error) { + netnsHandle, err := netns.GetFromPath(n.netNSPath) + if err != nil { + return 0, err + } + defer netnsHandle.Close() + + netlinkHandle, err := netlink.NewHandleAt(netnsHandle) + if err != nil { + return 0, err + } + defer netlinkHandle.Close() + + linkList, err := netlinkHandle.LinkList() + if err != nil { + return 0, err + } + + return len(linkList), nil +} + // Scan the networking namespace through netlink and then: // 1. Create the endpoints for the relevant interfaces found there. // 2. Attach them to the VM. diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 03b5fa676a..6c052a86a9 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -720,7 +720,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi } if machine.Type == QemuQ35 || machine.Type == QemuVirt { - if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil { + if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type, network); err != nil { q.Logger().WithError(err).Errorf("Cannot create PCIe topology") return err } @@ -759,7 +759,7 @@ func (q *qemu) checkBpfEnabled() { // Max PCIe switch ports is 16 // There is only 64kB of IO memory each root,switch port will consume 4k hence // only 16 ports possible. -func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error { +func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string, network Network) error { // If no-port set just return no need to add PCIe Root Port or PCIe Switches if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 { @@ -787,8 +787,21 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig } // Get the number of hot(cold)-pluggable ports needed from the provided - // VFIO devices and VhostUserBlockDevices + // VFIO devices var numOfPluggablePorts uint32 = 0 + + // Fow now, pcie native hotplug is the only way for Arm to hotadd pci device. + if machineType == QemuVirt { + epNum, err := network.GetEndpointsNum() + if err != nil { + q.Logger().Warn("Fail to get network endpoints number") + } + virtPcieRootPortNum := len(hypervisorConfig.VhostUserBlkDevices) + epNum + if hypervisorConfig.VirtioMem { + virtPcieRootPortNum++ + } + numOfPluggablePorts += uint32(virtPcieRootPortNum) + } for _, dev := range hypervisorConfig.VFIODevices { var err error dev.HostPath, err = config.GetHostPath(dev, false, "") @@ -809,18 +822,11 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort) vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort) - numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices) - // If number of PCIe root ports > 16 then bail out otherwise we may // use up all slots or IO memory on the root bus and vfio-XXX-pci devices // cannot be added which are crucial for Kata max slots on root bus is 32 // max slots on the complete pci(e) topology is 256 in QEMU if vfioOnRootPort { - // On Arm the vhost-user-block device is a PCIe device we need - // to account for it in the number of pluggable ports - if machineType == QemuVirt { - numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices) - } if numOfPluggablePorts > maxPCIeRootPort { return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) } @@ -828,21 +834,16 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig return nil } if vfioOnSwitchPort { - // On Arm the vhost-user-block device is a PCIe device we need - // to account for it in the number of pluggable ports - if machineType == QemuVirt { - numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices) - if numOfPluggableRootPorts > maxPCIeRootPort { - return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort) - } - qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit) - } if numOfPluggablePorts > maxPCIeSwitchPort { return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort) } qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit) return nil } + // If both Root Port and Switch Port are not enabled, check if QemuVirt need add pcie root port. + if machineType == QemuVirt { + qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit) + } return nil } @@ -936,7 +937,17 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error { } }() - err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID) + bridgeID := bridge.ID + + // Hot add virtioMem dev to pcie-root-port for QemuVirt + machineType := q.HypervisorConfig().HypervisorMachineType + if machineType == QemuVirt { + addr = "00" + bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort])) + config.PCIeDevices[config.RootPort]["virtiomem"] = true + } + + err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID) if err == nil { q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB) } else { @@ -1895,6 +1906,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera } devID := "virtio-" + tap.ID + machineType := q.HypervisorConfig().HypervisorMachineType if op == AddDevice { if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil { return err @@ -1906,6 +1918,14 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera } }() + // Hotplug net dev to pcie root port for QemuVirt + if machineType == QemuVirt { + addr := "00" + bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort])) + config.PCIeDevices[config.RootPort][devID] = true + return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern) + } + addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI) if err != nil { return err @@ -1938,7 +1958,6 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs)) } return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern) - } if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil {