mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-10-24 05:31:31 +00:00
runtime/device: Allow VFIO devices to be presented to guest as VFIO devices
On a conventional (e.g. runc) container, passing in a VFIO group device, /dev/vfio/NN, will result in the same VFIO group device being available within the container. With Kata, however, the VFIO device will be bound to the guest kernel's driver (if it has one), possibly appearing as some other device (or a network interface) within the guest. This add a new `vfio_mode` option to alter this. If set to "vfio" it will instruct the agent to remap VFIO devices to the VFIO driver within the guest as well, meaning they will appear as VFIO devices within the container. Unlike a runc container, the VFIO devices will have different names to the host, since the names correspond to the IOMMU groups of the guest and those can't be remapped with namespaces. For now we keep 'guest-kernel' as the value in the default configuration files, to maintain current Kata behaviour. In future we should change this to 'vfio' as the default. That will make Kata's default behaviour more closely resemble OCI specified behaviour. fixes #693 Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
@@ -267,6 +267,13 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - vfio
|
||||
# Matches behaviour of OCI runtimes (e.g. runc) as much as
|
||||
# possible. VFIO devices will appear in the container as VFIO
|
||||
# character devices under /dev/vfio. The exact names may differ
|
||||
# from the host (they need to match the VM's IOMMU group numbers
|
||||
# rather than the host's)
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
|
@@ -547,6 +547,13 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - vfio
|
||||
# Matches behaviour of OCI runtimes (e.g. runc) as much as
|
||||
# possible. VFIO devices will appear in the container as VFIO
|
||||
# character devices under /dev/vfio. The exact names may differ
|
||||
# from the host (they need to match the VM's IOMMU group numbers
|
||||
# rather than the host's)
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
|
@@ -191,20 +191,29 @@ type BlockDrive struct {
|
||||
type VFIOModeType uint32
|
||||
|
||||
const (
|
||||
// VFIOModeVFIO specifies OCI compliant behaviour: VFIO
|
||||
// devices specified to Kata appear as VFIO devices within the
|
||||
// container
|
||||
VFIOModeVFIO VFIOModeType = iota
|
||||
|
||||
// VFIOModeGuestKernel specifies Kata-specific behaviour
|
||||
// useful in certain cases: VFIO devices specified to Kata are
|
||||
// bound to whatever driver in the VM will take them. This
|
||||
// requires specialized containers expecting this behaviour to
|
||||
// locate and use the devices
|
||||
VFIOModeGuestKernel = iota
|
||||
VFIOModeGuestKernel
|
||||
)
|
||||
|
||||
const (
|
||||
vfioModeVfioStr = "vfio"
|
||||
vfioModeGuestKernelStr = "guest-kernel"
|
||||
)
|
||||
|
||||
func (m *VFIOModeType) VFIOSetMode(modeName string) error {
|
||||
switch modeName {
|
||||
case vfioModeVfioStr:
|
||||
*m = VFIOModeVFIO
|
||||
return nil
|
||||
case vfioModeGuestKernelStr:
|
||||
*m = VFIOModeGuestKernel
|
||||
return nil
|
||||
|
@@ -92,6 +92,7 @@ var (
|
||||
kataNvdimmDevType = "nvdimm"
|
||||
kataVirtioFSDevType = "virtio-fs"
|
||||
kataWatchableBindDevType = "watchable-bind"
|
||||
kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container
|
||||
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
|
||||
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
|
||||
sharedDirVirtioFSOptions = []string{}
|
||||
@@ -1183,11 +1184,19 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
||||
// (see qomGetPciPath() for details).
|
||||
kataDevice := &grpc.Device{
|
||||
ContainerPath: dev.ContainerPath,
|
||||
Type: kataVfioGuestKernelDevType,
|
||||
Type: kataVfioDevType,
|
||||
Id: groupNum,
|
||||
Options: make([]string, len(devList)),
|
||||
}
|
||||
|
||||
// We always pass the device information to the agent, since
|
||||
// it needs that to wait for them to be ready. But depending
|
||||
// on the vfio_mode, we need to use a different device type so
|
||||
// the agent can handle it properly
|
||||
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
||||
kataDevice.Type = kataVfioGuestKernelDevType
|
||||
}
|
||||
|
||||
for i, pciDev := range devList {
|
||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath)
|
||||
}
|
||||
@@ -1417,7 +1426,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
|
||||
|
||||
// We need to constrain the spec to make sure we're not
|
||||
// passing irrelevant information to the agent.
|
||||
k.constrainGRPCSpec(grpcSpec, passSeccomp, true)
|
||||
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
||||
|
||||
req := &grpc.CreateContainerRequest{
|
||||
ContainerId: c.id,
|
||||
|
Reference in New Issue
Block a user