Merge pull request #2795 from dgibson/vfio-as-vfio

Allow VFIO devices to be used as VFIO devices in the container
This commit is contained in:
David Gibson
2021-10-25 14:25:26 +11:00
committed by GitHub
14 changed files with 591 additions and 97 deletions

View File

@@ -187,6 +187,40 @@ type BlockDrive struct {
Swap bool
}
// VFIOMode indicates e behaviour mode for handling devices in the VM
type VFIOModeType uint32
const (
// VFIOModeVFIO specifies OCI compliant behaviour: VFIO
// devices specified to Kata appear as VFIO devices within the
// container
VFIOModeVFIO VFIOModeType = iota
// VFIOModeGuestKernel specifies Kata-specific behaviour
// useful in certain cases: VFIO devices specified to Kata are
// bound to whatever driver in the VM will take them. This
// requires specialized containers expecting this behaviour to
// locate and use the devices
VFIOModeGuestKernel
)
const (
vfioModeVfioStr = "vfio"
vfioModeGuestKernelStr = "guest-kernel"
)
func (m *VFIOModeType) VFIOSetMode(modeName string) error {
switch modeName {
case vfioModeVfioStr:
*m = VFIOModeVFIO
return nil
case vfioModeGuestKernelStr:
*m = VFIOModeGuestKernel
return nil
}
return fmt.Errorf("Unknown VFIO mode %s", modeName)
}
// VFIODeviceType indicates VFIO device type
type VFIODeviceType uint32

View File

@@ -92,6 +92,7 @@ var (
kataNvdimmDevType = "nvdimm"
kataVirtioFSDevType = "virtio-fs"
kataWatchableBindDevType = "watchable-bind"
kataVfioDevType = "vfio" // VFIO device to used as VFIO in the container
kataVfioGuestKernelDevType = "vfio-gk" // VFIO device for consumption by the guest kernel
sharedDir9pOptions = []string{"trans=virtio,version=9p2000.L,cache=mmap", "nodev"}
sharedDirVirtioFSOptions = []string{}
@@ -995,7 +996,7 @@ func (k *kataAgent) replaceOCIMountsForStorages(spec *specs.Spec, volumeStorages
return nil
}
func (k *kataAgent) constraintGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool) {
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) {
// Disable Hooks since they have been handled on the host and there is
// no reason to send them to the agent. It would make no sense to try
// to apply them on the guest.
@@ -1058,17 +1059,21 @@ func (k *kataAgent) constraintGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool) {
}
grpcSpec.Linux.Namespaces = tmpNamespaces
// VFIO char device shouldn't not appear in the guest,
// the device driver should handle it and determinate its group.
var linuxDevices []grpc.LinuxDevice
for _, dev := range grpcSpec.Linux.Devices {
if dev.Type == "c" && strings.HasPrefix(dev.Path, vfioPath) {
k.Logger().WithField("vfio-dev", dev.Path).Debug("removing vfio device from grpcSpec")
continue
if stripVfio {
// VFIO char device shouldn't appear in the guest
// (because the VM device driver will do something
// with it rather than just presenting it to the
// container unmodified)
var linuxDevices []grpc.LinuxDevice
for _, dev := range grpcSpec.Linux.Devices {
if dev.Type == "c" && strings.HasPrefix(dev.Path, vfioPath) {
k.Logger().WithField("vfio-dev", dev.Path).Debug("removing vfio device from grpcSpec")
continue
}
linuxDevices = append(linuxDevices, dev)
}
linuxDevices = append(linuxDevices, dev)
grpcSpec.Linux.Devices = linuxDevices
}
grpcSpec.Linux.Devices = linuxDevices
}
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
@@ -1179,11 +1184,19 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
// (see qomGetPciPath() for details).
kataDevice := &grpc.Device{
ContainerPath: dev.ContainerPath,
Type: kataVfioGuestKernelDevType,
Type: kataVfioDevType,
Id: groupNum,
Options: make([]string, len(devList)),
}
// We always pass the device information to the agent, since
// it needs that to wait for them to be ready. But depending
// on the vfio_mode, we need to use a different device type so
// the agent can handle it properly
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
kataDevice.Type = kataVfioGuestKernelDevType
}
for i, pciDev := range devList {
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDev.BDF, pciDev.GuestPciPath)
}
@@ -1411,9 +1424,9 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
// We need to constraint the spec to make sure we're not passing
// irrelevant information to the agent.
k.constraintGRPCSpec(grpcSpec, passSeccomp)
// We need to constrain the spec to make sure we're not
// passing irrelevant information to the agent.
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
req := &grpc.CreateContainerRequest{
ContainerId: c.id,

View File

@@ -541,7 +541,7 @@ func TestAppendVhostUserBlkDevices(t *testing.T) {
updatedDevList, expected)
}
func TestConstraintGRPCSpec(t *testing.T) {
func TestConstrainGRPCSpec(t *testing.T) {
assert := assert.New(t)
expectedCgroupPath := "/foo/bar"
@@ -589,7 +589,7 @@ func TestConstraintGRPCSpec(t *testing.T) {
}
k := kataAgent{}
k.constraintGRPCSpec(g, true)
k.constrainGRPCSpec(g, true, true)
// Check nil fields
assert.Nil(g.Hooks)

View File

@@ -250,6 +250,10 @@ const (
// DisableNewNetNs is a sandbox annotation that determines if create a netns for hypervisor process.
DisableNewNetNs = kataAnnotRuntimePrefix + "disable_new_netns"
// VfioMode is a sandbox annotation to specify how attached VFIO devices should be treated
// Overrides the runtime.vfio_mode parameter in the global configuration.toml
VfioMode = kataAnnotRuntimePrefix + "vfio_mode"
)
// Agent related annotations

View File

@@ -116,6 +116,10 @@ type RuntimeConfig struct {
//the container network interface
InterNetworkModel vc.NetInterworkingModel
//Determines how VFIO devices should be presented to the
//container
VfioMode config.VFIOModeType
Debug bool
Trace bool
@@ -826,6 +830,13 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r
sbConfig.NetworkConfig.InterworkingModel = runtimeConfig.InterNetworkModel
}
if value, ok := ocispec.Annotations[vcAnnotations.VfioMode]; ok {
if err := sbConfig.VfioMode.VFIOSetMode(value); err != nil {
return fmt.Errorf("Unknown VFIO mode \"%s\" in annotation %s",
value, vcAnnotations.VfioMode)
}
}
return nil
}
@@ -893,6 +904,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
ShmSize: shmSize,
VfioMode: runtime.VfioMode,
SystemdCgroup: systemdCgroup,
SandboxCgroupOnly: runtime.SandboxCgroupOnly,

View File

@@ -134,6 +134,8 @@ type SandboxConfig struct {
ShmSize uint64
VfioMode config.VFIOModeType
// SharePidNs sets all containers to share the same sandbox level pid namespace.
SharePidNs bool