mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-28 08:17:37 +00:00
Merge pull request #7647 from jongwu/use_pcie_virt
AArch64: runtime: use pcie root port to do pci/pcie device hotplug
This commit is contained in:
commit
328ba0da99
@ -226,6 +226,9 @@ type Network interface {
|
|||||||
|
|
||||||
// SetEndpoints sets a sandbox's network endpoints.
|
// SetEndpoints sets a sandbox's network endpoints.
|
||||||
SetEndpoints([]Endpoint)
|
SetEndpoints([]Endpoint)
|
||||||
|
|
||||||
|
// GetEndpoints number of sandbox's network endpoints.
|
||||||
|
GetEndpointsNum() (int, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) {
|
func generateVCNetworkStructures(ctx context.Context, network Network) ([]*pbTypes.Interface, []*pbTypes.Route, []*pbTypes.ARPNeighbor, error) {
|
||||||
|
@ -92,6 +92,10 @@ func (n *DarwinNetwork) SetEndpoints(endpoints []Endpoint) {
|
|||||||
n.eps = endpoints
|
n.eps = endpoints
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *DarwinNetwork) GetEndpointsNum() (int, error) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
func validGuestRoute(route netlink.Route) bool {
|
func validGuestRoute(route netlink.Route) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -291,6 +291,27 @@ func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *LinuxNetwork) GetEndpointsNum() (int, error) {
|
||||||
|
netnsHandle, err := netns.GetFromPath(n.netNSPath)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer netnsHandle.Close()
|
||||||
|
|
||||||
|
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer netlinkHandle.Close()
|
||||||
|
|
||||||
|
linkList, err := netlinkHandle.LinkList()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return len(linkList), nil
|
||||||
|
}
|
||||||
|
|
||||||
// Scan the networking namespace through netlink and then:
|
// Scan the networking namespace through netlink and then:
|
||||||
// 1. Create the endpoints for the relevant interfaces found there.
|
// 1. Create the endpoints for the relevant interfaces found there.
|
||||||
// 2. Attach them to the VM.
|
// 2. Attach them to the VM.
|
||||||
|
@ -720,7 +720,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
|
|||||||
}
|
}
|
||||||
|
|
||||||
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
|
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
|
||||||
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil {
|
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type, network); err != nil {
|
||||||
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
|
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -759,7 +759,7 @@ func (q *qemu) checkBpfEnabled() {
|
|||||||
// Max PCIe switch ports is 16
|
// Max PCIe switch ports is 16
|
||||||
// There is only 64kB of IO memory each root,switch port will consume 4k hence
|
// There is only 64kB of IO memory each root,switch port will consume 4k hence
|
||||||
// only 16 ports possible.
|
// only 16 ports possible.
|
||||||
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error {
|
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string, network Network) error {
|
||||||
|
|
||||||
// If no-port set just return no need to add PCIe Root Port or PCIe Switches
|
// If no-port set just return no need to add PCIe Root Port or PCIe Switches
|
||||||
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
|
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
|
||||||
@ -787,8 +787,21 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the number of hot(cold)-pluggable ports needed from the provided
|
// Get the number of hot(cold)-pluggable ports needed from the provided
|
||||||
// VFIO devices and VhostUserBlockDevices
|
// VFIO devices
|
||||||
var numOfPluggablePorts uint32 = 0
|
var numOfPluggablePorts uint32 = 0
|
||||||
|
|
||||||
|
// Fow now, pcie native hotplug is the only way for Arm to hotadd pci device.
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
epNum, err := network.GetEndpointsNum()
|
||||||
|
if err != nil {
|
||||||
|
q.Logger().Warn("Fail to get network endpoints number")
|
||||||
|
}
|
||||||
|
virtPcieRootPortNum := len(hypervisorConfig.VhostUserBlkDevices) + epNum
|
||||||
|
if hypervisorConfig.VirtioMem {
|
||||||
|
virtPcieRootPortNum++
|
||||||
|
}
|
||||||
|
numOfPluggablePorts += uint32(virtPcieRootPortNum)
|
||||||
|
}
|
||||||
for _, dev := range hypervisorConfig.VFIODevices {
|
for _, dev := range hypervisorConfig.VFIODevices {
|
||||||
var err error
|
var err error
|
||||||
dev.HostPath, err = config.GetHostPath(dev, false, "")
|
dev.HostPath, err = config.GetHostPath(dev, false, "")
|
||||||
@ -809,18 +822,11 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort)
|
||||||
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||||
|
|
||||||
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
|
|
||||||
|
|
||||||
// If number of PCIe root ports > 16 then bail out otherwise we may
|
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||||
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||||
// cannot be added which are crucial for Kata max slots on root bus is 32
|
// cannot be added which are crucial for Kata max slots on root bus is 32
|
||||||
// max slots on the complete pci(e) topology is 256 in QEMU
|
// max slots on the complete pci(e) topology is 256 in QEMU
|
||||||
if vfioOnRootPort {
|
if vfioOnRootPort {
|
||||||
// On Arm the vhost-user-block device is a PCIe device we need
|
|
||||||
// to account for it in the number of pluggable ports
|
|
||||||
if machineType == QemuVirt {
|
|
||||||
numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices)
|
|
||||||
}
|
|
||||||
if numOfPluggablePorts > maxPCIeRootPort {
|
if numOfPluggablePorts > maxPCIeRootPort {
|
||||||
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||||
}
|
}
|
||||||
@ -828,21 +834,16 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if vfioOnSwitchPort {
|
if vfioOnSwitchPort {
|
||||||
// On Arm the vhost-user-block device is a PCIe device we need
|
|
||||||
// to account for it in the number of pluggable ports
|
|
||||||
if machineType == QemuVirt {
|
|
||||||
numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices)
|
|
||||||
if numOfPluggableRootPorts > maxPCIeRootPort {
|
|
||||||
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
|
||||||
}
|
|
||||||
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit)
|
|
||||||
}
|
|
||||||
if numOfPluggablePorts > maxPCIeSwitchPort {
|
if numOfPluggablePorts > maxPCIeSwitchPort {
|
||||||
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
||||||
}
|
}
|
||||||
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
|
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// If both Root Port and Switch Port are not enabled, check if QemuVirt need add pcie root port.
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -936,7 +937,17 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID)
|
bridgeID := bridge.ID
|
||||||
|
|
||||||
|
// Hot add virtioMem dev to pcie-root-port for QemuVirt
|
||||||
|
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
addr = "00"
|
||||||
|
bridgeID = fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||||
|
config.PCIeDevices[config.RootPort]["virtiomem"] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridgeID)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
|
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
|
||||||
} else {
|
} else {
|
||||||
@ -1895,6 +1906,7 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
|
|||||||
}
|
}
|
||||||
|
|
||||||
devID := "virtio-" + tap.ID
|
devID := "virtio-" + tap.ID
|
||||||
|
machineType := q.HypervisorConfig().HypervisorMachineType
|
||||||
if op == AddDevice {
|
if op == AddDevice {
|
||||||
if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil {
|
if err = q.hotAddNetDevice(tap.Name, endpoint.HardwareAddr(), tap.VMFds, tap.VhostFds); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1906,6 +1918,14 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
// Hotplug net dev to pcie root port for QemuVirt
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
addr := "00"
|
||||||
|
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||||
|
config.PCIeDevices[config.RootPort][devID] = true
|
||||||
|
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridgeID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
|
||||||
|
}
|
||||||
|
|
||||||
addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI)
|
addr, bridge, err := q.arch.addDeviceToBridge(ctx, tap.ID, types.PCI)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1938,7 +1958,6 @@ func (q *qemu) hotplugNetDevice(ctx context.Context, endpoint Endpoint, op Opera
|
|||||||
return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs))
|
return q.qmpMonitorCh.qmp.ExecuteNetCCWDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), devNoHotplug, int(q.config.NumVCPUs))
|
||||||
}
|
}
|
||||||
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
|
return q.qmpMonitorCh.qmp.ExecuteNetPCIDeviceAdd(q.qmpMonitorCh.ctx, tap.Name, devID, endpoint.HardwareAddr(), addr, bridge.ID, romFile, int(q.config.NumVCPUs), defaultDisableModern)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil {
|
if err := q.arch.removeDeviceFromBridge(tap.ID); err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user