diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 076a113941..3e46d4961d 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -267,6 +267,13 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@ # Determines how VFIO devices should be be presented to the container. # Options: # +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# # - guest-kernel # This is a Kata-specific behaviour that's useful in certain cases. # The VFIO device is managed by whatever driver in the VM kernel diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index f2c7c4fa73..5bc8e9cc38 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -547,6 +547,13 @@ sandbox_bind_mounts=@DEFBINDMOUNTS@ # Determines how VFIO devices should be be presented to the container. # Options: # +# - vfio +# Matches behaviour of OCI runtimes (e.g. runc) as much as +# possible. VFIO devices will appear in the container as VFIO +# character devices under /dev/vfio. The exact names may differ +# from the host (they need to match the VM's IOMMU group numbers +# rather than the host's) +# # - guest-kernel # This is a Kata-specific behaviour that's useful in certain cases. # The VFIO device is managed by whatever driver in the VM kernel diff --git a/src/runtime/virtcontainers/device/config/config.go b/src/runtime/virtcontainers/device/config/config.go index 5e62d0c179..4f27d9358b 100644 --- a/src/runtime/virtcontainers/device/config/config.go +++ b/src/runtime/virtcontainers/device/config/config.go @@ -191,20 +191,29 @@ type BlockDrive struct { type VFIOModeType uint32 const ( + // VFIOModeVFIO specifies OCI compliant behaviour: VFIO + // devices specified to Kata appear as VFIO devices within the + // container + VFIOModeVFIO VFIOModeType = iota + // VFIOModeGuestKernel specifies Kata-specific behaviour // useful in certain cases: VFIO devices specified to Kata are // bound to whatever driver in the VM will take them. This // requires specialized containers expecting this behaviour to // locate and use the devices - VFIOModeGuestKernel = iota + VFIOModeGuestKernel ) const ( + vfioModeVfioStr = "vfio" vfioModeGuestKernelStr = "guest-kernel" ) func (m *VFIOModeType) VFIOSetMode(modeName string) error { switch modeName { + case vfioModeVfioStr: + *m = VFIOModeVFIO + return nil case vfioModeGuestKernelStr: *m = VFIOModeGuestKernel return nil diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index c1a39220fb..d3c4fa48b8 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -92,6 +92,7 @@ var ( kataNvdimmDevType = "nvdimm" kataVirtioFSDevType = "virtio-fs" kataWatchableBindDevType = "watchable-bind" + kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"} sharedDirVirtioFSOptions = []string{} @@ -1183,11 +1184,19 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c * // (see qomGetPciPath() for details). kataDevice := &grpc.Device{ ContainerPath: dev.ContainerPath, - Type: kataVfioGuestKernelDevType, + Type: kataVfioDevType, Id: groupNum, Options: make([]string, len(devList)), } + // We always pass the device information to the agent, since + // it needs that to wait for them to be ready. But depending + // on the vfio_mode, we need to use a different device type so + // the agent can handle it properly + if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel { + kataDevice.Type = kataVfioGuestKernelDevType + } + for i, pciDev := range devList { kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath) } @@ -1417,7 +1426,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co // We need to constrain the spec to make sure we're not // passing irrelevant information to the agent. - k.constrainGRPCSpec(grpcSpec, passSeccomp, true) + k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel) req := &grpc.CreateContainerRequest{ ContainerId: c.id,