mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-27 03:33:11 +00:00
gpu: Add ColdPlug of VFIO devices with devManager
If we have a VFIO device and cold-plug is enabled we mark each device as ColdPlug=true and let the VFIO module do the attaching. Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
@@ -509,9 +509,9 @@ type HypervisorConfig struct {
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort uint32
|
||||
|
||||
// RawDevics are used to get PCIe device info early before the sandbox
|
||||
// VFIODevics are used to get PCIe device info early before the sandbox
|
||||
// is started to make better PCIe topology decisions
|
||||
RawDevices []config.DeviceInfo
|
||||
VFIODevices []config.DeviceInfo
|
||||
|
||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||
// root port, switch or no port
|
||||
|
@@ -1177,8 +1177,7 @@ func (k *kataAgent) appendDevices(deviceList []*grpc.Device, c *Container) []*gr
|
||||
case config.VhostUserBlk:
|
||||
kataDevice = k.appendVhostUserBlkDevice(dev, device, c)
|
||||
case config.DeviceVFIO:
|
||||
k.Logger().Infof("### ColdPlugging container is not adding any VFIO devices")
|
||||
//kataDevice = k.appendVfioDevice(dev, device, c)
|
||||
kataDevice = k.appendVfioDevice(dev, device, c)
|
||||
}
|
||||
|
||||
if kataDevice == nil {
|
||||
|
@@ -83,6 +83,7 @@ type QemuState struct {
|
||||
VirtiofsDaemonPid int
|
||||
PCIeRootPort int
|
||||
HotplugVFIOOnRootBus bool
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
@@ -282,6 +283,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
||||
q.Logger().Debug("Creating UUID")
|
||||
q.state.UUID = uuid.Generate().String()
|
||||
|
||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
||||
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
|
||||
|
||||
@@ -708,9 +710,25 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
|
||||
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort, memSize32bit, memSize64bit)
|
||||
}
|
||||
|
||||
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
|
||||
|
||||
// If we have a VFIO device we need to update the firmware configuration
|
||||
// if executed in a trusted execution environment.
|
||||
if hypervisorConfig.ConfidentialGuest {
|
||||
// At the sandbox level we alreaady checked that we have a
|
||||
// VFIO device, pass-through of a PCIe device needs allocated
|
||||
// mmemory in the firmware otherwise BARs cannot be mapped
|
||||
if len(hypervisorConfig.VFIODevices) > 0 {
|
||||
fwCfg := govmmQemu.FwCfg{
|
||||
Name: "opt/ovmf/X-PciMmio64Mb",
|
||||
Str: "262144",
|
||||
}
|
||||
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
|
||||
}
|
||||
}
|
||||
|
||||
q.qemuConfig = qemuConfig
|
||||
|
||||
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@@ -32,6 +32,7 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
||||
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol"
|
||||
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
||||
@@ -619,12 +620,30 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
if err := validateHypervisorConfig(&sandboxConfig.HypervisorConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Aggregate all the container devices and update the HV config
|
||||
var devices []config.DeviceInfo
|
||||
for _, ct := range sandboxConfig.Containers {
|
||||
devices = append(devices, ct.DeviceInfos...)
|
||||
|
||||
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
|
||||
// until we have TDISP/IDE PCIe support.
|
||||
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != hv.NoPort)
|
||||
var devs []config.DeviceInfo
|
||||
for cnt, containers := range sandboxConfig.Containers {
|
||||
for dev, device := range containers.DeviceInfos {
|
||||
if coldPlugVFIO && deviceManager.IsVFIO(device.ContainerPath) {
|
||||
device.ColdPlug = true
|
||||
devs = append(devs, device)
|
||||
// We need to remove the devices marked for cold-plug
|
||||
// otherwise at the container level the kata-agent
|
||||
// will try to hot-plug them.
|
||||
infos := sandboxConfig.Containers[cnt].DeviceInfos
|
||||
infos = append(infos[:dev], infos[dev+1:]...)
|
||||
sandboxConfig.Containers[cnt].DeviceInfos = infos
|
||||
}
|
||||
}
|
||||
}
|
||||
sandboxConfig.HypervisorConfig.RawDevices = devices
|
||||
// If we have a confidential guest, we need to add a specific
|
||||
// firmware configuration to the hypervisor. We cannot do it here at
|
||||
// the sandbox level we need to do that at the hypervisor level, capturing
|
||||
// the devices here and processing in CreateVM().
|
||||
sandboxConfig.HypervisorConfig.VFIODevices = devs
|
||||
|
||||
// store doesn't require hypervisor to be stored immediately
|
||||
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
|
||||
@@ -635,6 +654,17 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !coldPlugVFIO {
|
||||
return s, nil
|
||||
}
|
||||
|
||||
for _, dev := range devs {
|
||||
_, err := s.AddDevice(ctx, dev)
|
||||
if err != nil {
|
||||
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user