mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-29 04:04:45 +00:00
vfio: Added annotation for hot(cold) plug
Now it is possible to configure the PCIe topology via annotations and addded a simple test, checking for Invalid and RootPort Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
8f0d4e2612
commit
40101ea7db
@ -162,6 +162,8 @@ const (
|
||||
BridgePort = "bridge-port"
|
||||
// NoPort is for disabling VFIO hotplug/coldplug
|
||||
NoPort = "no-port"
|
||||
// InvalidPort is for invalid port
|
||||
InvalidPort = "invalid-port"
|
||||
)
|
||||
|
||||
func (p PCIePort) String() string {
|
||||
@ -173,6 +175,8 @@ func (p PCIePort) String() string {
|
||||
case BridgePort:
|
||||
fallthrough
|
||||
case NoPort:
|
||||
fallthrough
|
||||
case InvalidPort:
|
||||
return string(p)
|
||||
}
|
||||
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
|
||||
@ -184,6 +188,34 @@ var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{
|
||||
BridgePort: PCIBridgePortPrefix,
|
||||
}
|
||||
|
||||
func (p PCIePort) InValid() bool {
|
||||
switch p {
|
||||
case RootPort:
|
||||
fallthrough
|
||||
case SwitchPort:
|
||||
fallthrough
|
||||
case BridgePort:
|
||||
fallthrough
|
||||
case NoPort:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (p PCIePort) Valid() bool {
|
||||
switch p {
|
||||
case RootPort:
|
||||
fallthrough
|
||||
case SwitchPort:
|
||||
fallthrough
|
||||
case BridgePort:
|
||||
fallthrough
|
||||
case NoPort:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
||||
type DeviceInfo struct {
|
||||
// DriverOptions is specific options for each device driver
|
||||
|
@ -109,5 +109,3 @@ var defaultRuntimeConfiguration = "@CONFIG_PATH@"
|
||||
|
||||
const defaultHotPlugVFIO = config.NoPort
|
||||
const defaultColdPlugVFIO = config.NoPort
|
||||
|
||||
|
||||
|
@ -453,6 +453,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
|
||||
return err
|
||||
}
|
||||
|
||||
if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
||||
if value != "" {
|
||||
config.HypervisorConfig.HypervisorMachineType = value
|
||||
@ -570,6 +574,33 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
|
||||
return nil
|
||||
}
|
||||
|
||||
func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) {
|
||||
if value == "" {
|
||||
return config.NoPort, nil
|
||||
}
|
||||
port := config.PCIePort(value)
|
||||
if port.InValid() {
|
||||
return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value)
|
||||
}
|
||||
return port, nil
|
||||
}
|
||||
|
||||
func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
||||
|
||||
var err error
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok {
|
||||
if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok {
|
||||
if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
||||
|
||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
||||
|
@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) {
|
||||
func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
config := vc.SandboxConfig{
|
||||
sbConfig := vc.SandboxConfig{
|
||||
Annotations: make(map[string]string),
|
||||
}
|
||||
|
||||
@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"}
|
||||
|
||||
ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on"
|
||||
addHypervisorConfigOverrides(ocispec, &config, runtimeConfig)
|
||||
assert.Exactly(expectedHyperConfig, config.HypervisorConfig)
|
||||
addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig)
|
||||
assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig)
|
||||
|
||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
||||
@ -660,7 +660,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
|
||||
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
||||
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
|
||||
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
|
||||
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = string(config.InvalidPort)
|
||||
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = string(config.RootPort)
|
||||
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
|
||||
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
|
||||
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
|
||||
@ -668,55 +669,56 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
|
||||
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
|
||||
|
||||
addAnnotations(ocispec, &config, runtimeConfig)
|
||||
assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1))
|
||||
assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
|
||||
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
|
||||
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
|
||||
assert.Equal(config.HypervisorConfig.MemOffset, uint64(512))
|
||||
assert.Equal(config.HypervisorConfig.VirtioMem, true)
|
||||
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
|
||||
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||
assert.Equal(config.HypervisorConfig.HugePages, true)
|
||||
assert.Equal(config.HypervisorConfig.IOMMU, true)
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring")
|
||||
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
|
||||
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true)
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true)
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true)
|
||||
assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs")
|
||||
assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false")
|
||||
assert.Equal(config.HypervisorConfig.VirtioFSCache, "auto")
|
||||
assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
|
||||
assert.Equal(config.HypervisorConfig.Msize9p, uint32(512))
|
||||
assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35")
|
||||
assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw")
|
||||
assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off")
|
||||
assert.Equal(config.HypervisorConfig.DisableVhostNet, true)
|
||||
assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/")
|
||||
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
|
||||
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
|
||||
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
|
||||
assert.Equal(config.HypervisorConfig.IOMMUPlatform, true)
|
||||
assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864))
|
||||
assert.Equal(config.HypervisorConfig.LegacySerial, true)
|
||||
assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
|
||||
assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
|
||||
addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||
assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1))
|
||||
assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
|
||||
assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024))
|
||||
assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20))
|
||||
assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512))
|
||||
assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||
assert.Equal(sbConfig.HypervisorConfig.HugePages, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.IOMMU, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
|
||||
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring")
|
||||
assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs")
|
||||
assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false")
|
||||
assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto")
|
||||
assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
|
||||
assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512))
|
||||
assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35")
|
||||
assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw")
|
||||
assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off")
|
||||
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
|
||||
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.ColdPlugVFIO, config.InvalidPort)
|
||||
assert.Equal(sbConfig.HypervisorConfig.HotPlugVFIO, config.RootPort)
|
||||
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
|
||||
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
|
||||
assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
|
||||
assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
|
||||
|
||||
// In case an absurd large value is provided, the config value if not over-ridden
|
||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
|
||||
err := addAnnotations(ocispec, &config, runtimeConfig)
|
||||
err := addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||
assert.Error(err)
|
||||
|
||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1"
|
||||
err = addAnnotations(ocispec, &config, runtimeConfig)
|
||||
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||
assert.Error(err)
|
||||
|
||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1"
|
||||
err = addAnnotations(ocispec, &config, runtimeConfig)
|
||||
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||
assert.Error(err)
|
||||
|
||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
||||
|
@ -126,6 +126,12 @@ const (
|
||||
// root bus instead of a bridge.
|
||||
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
|
||||
|
||||
// ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged.
|
||||
ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio"
|
||||
|
||||
// HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged.
|
||||
HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio"
|
||||
|
||||
// EntropySource is a sandbox annotation to specify the path to a host source of
|
||||
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
||||
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"
|
||||
|
@ -746,6 +746,8 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort {
|
||||
return nil
|
||||
}
|
||||
|
||||
q.Logger().Info("### PCIe Topology ###")
|
||||
// Add PCIe Root Port or PCIe Switches to the hypervisor
|
||||
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
|
||||
// into a PCIe Root Port or PCIe Switch.
|
||||
@ -778,12 +780,15 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
|
||||
if err != nil {
|
||||
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
|
||||
}
|
||||
q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup)
|
||||
for _, vfioDevice := range devicesPerIOMMUGroup {
|
||||
q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice)
|
||||
if drivers.IsPCIeDevice(vfioDevice.BDF) {
|
||||
numOfPluggablePorts = numOfPluggablePorts + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts)
|
||||
|
||||
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||
@ -2642,7 +2647,7 @@ func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, mach
|
||||
pcieRootPort := govmmQemu.PCIeRootPortDevice{
|
||||
ID: fmt.Sprintf("%s%s%d", config.PCIeSwitchPortPrefix, config.PCIeRootPortPrefix, 0),
|
||||
Bus: defaultBridgeBus,
|
||||
Chassis: "0",
|
||||
Chassis: "1",
|
||||
Slot: strconv.FormatUint(uint64(0), 10),
|
||||
Multifunction: false,
|
||||
Addr: "0",
|
||||
|
@ -620,20 +620,20 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
// the correct amount of ports to reserve for the hypervisor.
|
||||
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
||||
|
||||
var vfioHotPlugDevices []config.DeviceInfo
|
||||
var vfioColdPlugDevices []config.DeviceInfo
|
||||
var vfioDevices []config.DeviceInfo
|
||||
|
||||
for cnt, containers := range sandboxConfig.Containers {
|
||||
for dev, device := range containers.DeviceInfos {
|
||||
isVFIO := deviceManager.IsVFIO(device.ContainerPath)
|
||||
if hotPlugVFIO && isVFIO {
|
||||
vfioHotPlugDevices = append(vfioHotPlugDevices, device)
|
||||
vfioDevices = append(vfioDevices, device)
|
||||
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
|
||||
}
|
||||
if coldPlugVFIO && isVFIO {
|
||||
s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO)
|
||||
device.ColdPlug = true
|
||||
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
|
||||
vfioColdPlugDevices = append(vfioColdPlugDevices, device)
|
||||
vfioDevices = append(vfioDevices, device)
|
||||
// We need to remove the devices marked for cold-plug
|
||||
// otherwise at the container level the kata-agent
|
||||
// will try to hot-plug them.
|
||||
@ -643,7 +643,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
}
|
||||
}
|
||||
}
|
||||
sandboxConfig.HypervisorConfig.VFIODevices = vfioHotPlugDevices
|
||||
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
||||
|
||||
// store doesn't require hypervisor to be stored immediately
|
||||
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
|
||||
@ -658,7 +658,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return s, nil
|
||||
}
|
||||
|
||||
for _, dev := range vfioColdPlugDevices {
|
||||
for _, dev := range vfioDevices {
|
||||
_, err := s.AddDevice(ctx, dev)
|
||||
if err != nil {
|
||||
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
|
||||
|
Loading…
Reference in New Issue
Block a user