From c8cf7ed3bc3f9b7f40dcb66fcffc97eea1db4a57 Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Fri, 21 Apr 2023 08:56:47 +0000 Subject: [PATCH] gpu: Add ColdPlug of VFIO devices with devManager If we have a VFIO device and cold-plug is enabled we mark each device as ColdPlug=true and let the VFIO module do the attaching. Signed-off-by: Zvonko Kaiser --- src/runtime/pkg/device/manager/manager.go | 2 +- src/runtime/pkg/device/manager/utils.go | 4 +- src/runtime/pkg/device/manager/utils_test.go | 2 +- src/runtime/virtcontainers/hypervisor.go | 4 +- src/runtime/virtcontainers/kata_agent.go | 3 +- src/runtime/virtcontainers/qemu.go | 20 +++++++++- src/runtime/virtcontainers/sandbox.go | 40 +++++++++++++++++--- 7 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/runtime/pkg/device/manager/manager.go b/src/runtime/pkg/device/manager/manager.go index 34a51d3001..baf1209a75 100644 --- a/src/runtime/pkg/device/manager/manager.go +++ b/src/runtime/pkg/device/manager/manager.go @@ -116,7 +116,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device if devInfo.ID, err = dm.newDeviceID(); err != nil { return nil, err } - if isVFIO(devInfo.HostPath) { + if IsVFIO(devInfo.HostPath) { return drivers.NewVFIODevice(&devInfo), nil } else if isVhostUserBlk(devInfo) { if devInfo.DriverOptions == nil { diff --git a/src/runtime/pkg/device/manager/utils.go b/src/runtime/pkg/device/manager/utils.go index 17d14741c1..e78205d0c7 100644 --- a/src/runtime/pkg/device/manager/utils.go +++ b/src/runtime/pkg/device/manager/utils.go @@ -17,8 +17,8 @@ const ( vfioPath = "/dev/vfio/" ) -// isVFIO checks if the device provided is a vfio group. -func isVFIO(hostPath string) bool { +// IsVFIO checks if the device provided is a vfio group. +func IsVFIO(hostPath string) bool { // Ignore /dev/vfio/vfio character device if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) { return false diff --git a/src/runtime/pkg/device/manager/utils_test.go b/src/runtime/pkg/device/manager/utils_test.go index 273283823f..b57992b3d0 100644 --- a/src/runtime/pkg/device/manager/utils_test.go +++ b/src/runtime/pkg/device/manager/utils_test.go @@ -31,7 +31,7 @@ func TestIsVFIO(t *testing.T) { } for _, d := range data { - isVFIO := isVFIO(d.path) + isVFIO := IsVFIO(d.path) assert.Equal(t, d.expected, isVFIO) } } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index bc33cfd672..f773e91d5b 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -509,9 +509,9 @@ type HypervisorConfig struct { // The PCIe Root Port device is used to hot-plug the PCIe device PCIeRootPort uint32 - // RawDevics are used to get PCIe device info early before the sandbox + // VFIODevics are used to get PCIe device info early before the sandbox // is started to make better PCIe topology decisions - RawDevices []config.DeviceInfo + VFIODevices []config.DeviceInfo // ColdPlugVFIO is used to indicate if devices need to be coldplugged on the // root port, switch or no port diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 5c22277d0a..9e5c8b34f4 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1177,8 +1177,7 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr case config.VhostUserBlk: kataDevice = k.appendVhostUserBlkDevice(dev, device, c) case config.DeviceVFIO: - k.Logger().Infof("### ColdPlugging container is not adding any VFIO devices") - //kataDevice = k.appendVfioDevice(dev, device, c) + kataDevice = k.appendVfioDevice(dev, device, c) } if kataDevice == nil { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 3c8cad5204..d65c93553a 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -83,6 +83,7 @@ type QemuState struct { VirtiofsDaemonPid int PCIeRootPort int HotplugVFIOOnRootBus bool + ColdPlugVFIO hv.PCIePort } // qemu is an Hypervisor interface implementation for the Linux qemu hypervisor. @@ -282,6 +283,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso q.Logger().Debug("Creating UUID") q.state.UUID = uuid.Generate().String() + q.state.ColdPlugVFIO = q.config.ColdPlugVFIO q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus q.state.PCIeRootPort = int(q.config.PCIeRootPort) @@ -708,9 +710,25 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort, memSize32bit, memSize64bit) } + q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath) + + // If we have a VFIO device we need to update the firmware configuration + // if executed in a trusted execution environment. + if hypervisorConfig.ConfidentialGuest { + // At the sandbox level we alreaady checked that we have a + // VFIO device, pass-through of a PCIe device needs allocated + // mmemory in the firmware otherwise BARs cannot be mapped + if len(hypervisorConfig.VFIODevices) > 0 { + fwCfg := govmmQemu.FwCfg{ + Name: "opt/ovmf/X-PciMmio64Mb", + Str: "262144", + } + qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg) + } + } + q.qemuConfig = qemuConfig - q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath) return err } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index da1d52e9d5..35b3193944 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -32,6 +32,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers" deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager" + hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" @@ -619,12 +620,30 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor if err := validateHypervisorConfig(&sandboxConfig.HypervisorConfig); err != nil { return nil, err } - // Aggregate all the container devices and update the HV config - var devices []config.DeviceInfo - for _, ct := range sandboxConfig.Containers { - devices = append(devices, ct.DeviceInfos...) + + // If we have a confidential guest we need to cold-plug the PCIe VFIO devices + // until we have TDISP/IDE PCIe support. + coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != hv.NoPort) + var devs []config.DeviceInfo + for cnt, containers := range sandboxConfig.Containers { + for dev, device := range containers.DeviceInfos { + if coldPlugVFIO && deviceManager.IsVFIO(device.ContainerPath) { + device.ColdPlug = true + devs = append(devs, device) + // We need to remove the devices marked for cold-plug + // otherwise at the container level the kata-agent + // will try to hot-plug them. + infos := sandboxConfig.Containers[cnt].DeviceInfos + infos = append(infos[:dev], infos[dev+1:]...) + sandboxConfig.Containers[cnt].DeviceInfos = infos + } + } } - sandboxConfig.HypervisorConfig.RawDevices = devices + // If we have a confidential guest, we need to add a specific + // firmware configuration to the hypervisor. We cannot do it here at + // the sandbox level we need to do that at the hypervisor level, capturing + // the devices here and processing in CreateVM(). + sandboxConfig.HypervisorConfig.VFIODevices = devs // store doesn't require hypervisor to be stored immediately if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil { @@ -635,6 +654,17 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return nil, err } + if !coldPlugVFIO { + return s, nil + } + + for _, dev := range devs { + _, err := s.AddDevice(ctx, dev) + if err != nil { + s.Logger().WithError(err).Debug("Cannot cold-plug add device") + return nil, err + } + } return s, nil }