mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-28 19:54:35 +00:00
runtime: Introduce "vfio_mode" config variable and annotation
In order to support DPDK workloads, we need to change the way VFIO devices will be handled in Kata containers. However, the current method, although it is not remotely OCI compliant has real uses. Therefore, introduce a new runtime configuration field "vfio_mode" to control how VFIO devices will be presented to the container. We also add a new sandbox annotation - io.katacontainers.config.runtime.vfio_mode - to override this on a per-sandbox basis. For now, the only allowed value is "guest-kernel" which refers to the current behaviour where VFIO devices added to the container will be bound to whatever driver in the VM kernel claims them. Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
parent
730b9c433f
commit
57ab408576
@ -190,6 +190,7 @@ DEFVALIDVHOSTUSERSTOREPATHS := [\"$(DEFVHOSTUSERSTOREPATH)\"]
|
||||
DEFFILEMEMBACKEND := ""
|
||||
DEFVALIDFILEMEMBACKENDS := [\"$(DEFFILEMEMBACKEND)\"]
|
||||
DEFMSIZE9P := 8192
|
||||
DEFVFIOMODE := guest-kernel
|
||||
|
||||
# Default cgroup model
|
||||
DEFSANDBOXCGROUPONLY ?= false
|
||||
@ -459,6 +460,7 @@ USER_VARS += DEFENTROPYSOURCE
|
||||
USER_VARS += DEFVALIDENTROPYSOURCES
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY
|
||||
USER_VARS += DEFBINDMOUNTS
|
||||
USER_VARS += DEFVFIOMODE
|
||||
USER_VARS += FEATURE_SELINUX
|
||||
USER_VARS += BUILDFLAGS
|
||||
|
||||
|
@ -263,6 +263,20 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
|
||||
# VFIO Mode
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
# claims it. This means it will appear as one or more device nodes
|
||||
# or network interfaces depending on the nature of the device.
|
||||
# Using this mode requires specially built workloads that know how
|
||||
# to locate the relevant device interfaces within the VM.
|
||||
#
|
||||
vfio_mode="@DEFVFIOMODE@"
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -543,6 +543,20 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
|
||||
# VFIO Mode
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
# claims it. This means it will appear as one or more device nodes
|
||||
# or network interfaces depending on the nature of the device.
|
||||
# Using this mode requires specially built workloads that know how
|
||||
# to locate the relevant device interfaces within the VM.
|
||||
#
|
||||
vfio_mode="@DEFVFIOMODE@"
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -88,6 +88,7 @@ const defaultConfidentialGuest = false
|
||||
const defaultGuestSwap = false
|
||||
const defaultRootlessHypervisor = false
|
||||
const defaultDisableSeccomp = false
|
||||
const defaultVfioMode = "guest-kernel"
|
||||
|
||||
var defaultSGXEPCSize = int64(0)
|
||||
|
||||
|
@ -143,6 +143,7 @@ type runtime struct {
|
||||
JaegerEndpoint string `toml:"jaeger_endpoint"`
|
||||
JaegerUser string `toml:"jaeger_user"`
|
||||
JaegerPassword string `toml:"jaeger_password"`
|
||||
VfioMode string `toml:"vfio_mode"`
|
||||
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
||||
Experimental []string `toml:"experimental"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
@ -1068,6 +1069,11 @@ func initConfig() (config oci.RuntimeConfig, err error) {
|
||||
return oci.RuntimeConfig{}, err
|
||||
}
|
||||
|
||||
err = config.VfioMode.VFIOSetMode(defaultVfioMode)
|
||||
if err != nil {
|
||||
return oci.RuntimeConfig{}, err
|
||||
}
|
||||
|
||||
config = oci.RuntimeConfig{
|
||||
HypervisorType: defaultHypervisor,
|
||||
HypervisorConfig: GetDefaultHypervisorConfig(),
|
||||
@ -1114,6 +1120,14 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
|
||||
}
|
||||
}
|
||||
|
||||
if tomlConf.Runtime.VfioMode != "" {
|
||||
err = config.VfioMode.VFIOSetMode(tomlConf.Runtime.VfioMode)
|
||||
|
||||
if err != nil {
|
||||
return "", config, err
|
||||
}
|
||||
}
|
||||
|
||||
if !ignoreLogging {
|
||||
err := handleSystemLog("", "")
|
||||
if err != nil {
|
||||
|
@ -187,6 +187,31 @@ type BlockDrive struct {
|
||||
Swap bool
|
||||
}
|
||||
|
||||
// VFIOMode indicates e behaviour mode for handling devices in the VM
|
||||
type VFIOModeType uint32
|
||||
|
||||
const (
|
||||
// VFIOModeGuestKernel specifies Kata-specific behaviour
|
||||
// useful in certain cases: VFIO devices specified to Kata are
|
||||
// bound to whatever driver in the VM will take them. This
|
||||
// requires specialized containers expecting this behaviour to
|
||||
// locate and use the devices
|
||||
VFIOModeGuestKernel = iota
|
||||
)
|
||||
|
||||
const (
|
||||
vfioModeGuestKernelStr = "guest-kernel"
|
||||
)
|
||||
|
||||
func (m *VFIOModeType) VFIOSetMode(modeName string) error {
|
||||
switch modeName {
|
||||
case vfioModeGuestKernelStr:
|
||||
*m = VFIOModeGuestKernel
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Unknown VFIO mode %s", modeName)
|
||||
}
|
||||
|
||||
// VFIODeviceType indicates VFIO device type
|
||||
type VFIODeviceType uint32
|
||||
|
||||
|
@ -250,6 +250,10 @@ const (
|
||||
|
||||
// DisableNewNetNs is a sandbox annotation that determines if create a netns for hypervisor process.
|
||||
DisableNewNetNs = kataAnnotRuntimePrefix + "disable_new_netns"
|
||||
|
||||
// VfioMode is a sandbox annotation to specify how attached VFIO devices should be treated
|
||||
// Overrides the runtime.vfio_mode parameter in the global configuration.toml
|
||||
VfioMode = kataAnnotRuntimePrefix + "vfio_mode"
|
||||
)
|
||||
|
||||
// Agent related annotations
|
||||
|
@ -116,6 +116,10 @@ type RuntimeConfig struct {
|
||||
//the container network interface
|
||||
InterNetworkModel vc.NetInterworkingModel
|
||||
|
||||
//Determines how VFIO devices should be presented to the
|
||||
//container
|
||||
VfioMode config.VFIOModeType
|
||||
|
||||
Debug bool
|
||||
Trace bool
|
||||
|
||||
@ -826,6 +830,13 @@ func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, r
|
||||
sbConfig.NetworkConfig.InterworkingModel = runtimeConfig.InterNetworkModel
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.VfioMode]; ok {
|
||||
if err := sbConfig.VfioMode.VFIOSetMode(value); err != nil {
|
||||
return fmt.Errorf("Unknown VFIO mode \"%s\" in annotation %s",
|
||||
value, vcAnnotations.VfioMode)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -893,6 +904,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
|
||||
|
||||
ShmSize: shmSize,
|
||||
|
||||
VfioMode: runtime.VfioMode,
|
||||
|
||||
SystemdCgroup: systemdCgroup,
|
||||
|
||||
SandboxCgroupOnly: runtime.SandboxCgroupOnly,
|
||||
|
@ -134,6 +134,8 @@ type SandboxConfig struct {
|
||||
|
||||
ShmSize uint64
|
||||
|
||||
VfioMode config.VFIOModeType
|
||||
|
||||
// SharePidNs sets all containers to share the same sandbox level pid namespace.
|
||||
SharePidNs bool
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user