vfio: Added annotation for hot(cold) plug

Now it is possible to configure the PCIe topology via annotations
and addded a simple test, checking for Invalid and RootPort

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser 2023-05-10 07:04:36 +00:00
parent 8f0d4e2612
commit 40101ea7db
7 changed files with 126 additions and 52 deletions

View File

@ -162,6 +162,8 @@ const (
BridgePort = "bridge-port"
// NoPort is for disabling VFIO hotplug/coldplug
NoPort = "no-port"
// InvalidPort is for invalid port
InvalidPort = "invalid-port"
)
func (p PCIePort) String() string {
@ -173,6 +175,8 @@ func (p PCIePort) String() string {
case BridgePort:
fallthrough
case NoPort:
fallthrough
case InvalidPort:
return string(p)
}
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
@ -184,6 +188,34 @@ var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{
BridgePort: PCIBridgePortPrefix,
}
func (p PCIePort) InValid() bool {
switch p {
case RootPort:
fallthrough
case SwitchPort:
fallthrough
case BridgePort:
fallthrough
case NoPort:
return false
}
return true
}
func (p PCIePort) Valid() bool {
switch p {
case RootPort:
fallthrough
case SwitchPort:
fallthrough
case BridgePort:
fallthrough
case NoPort:
return true
}
return false
}
// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// DriverOptions is specific options for each device driver

View File

@ -109,5 +109,3 @@ var defaultRuntimeConfiguration = "@CONFIG_PATH@"
const defaultHotPlugVFIO = config.NoPort
const defaultColdPlugVFIO = config.NoPort

View File

@ -453,6 +453,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err
}
if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil {
return err
}
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
if value != "" {
config.HypervisorConfig.HypervisorMachineType = value
@ -570,6 +574,33 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
return nil
}
func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) {
if value == "" {
return config.NoPort, nil
}
port := config.PCIePort(value)
if port.InValid() {
return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value)
}
return port, nil
}
func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
var err error
if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok {
if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
return err
}
}
if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok {
if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
return err
}
}
return nil
}
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {

View File

@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) {
func TestAddHypervisorAnnotations(t *testing.T) {
assert := assert.New(t)
config := vc.SandboxConfig{
sbConfig := vc.SandboxConfig{
Annotations: make(map[string]string),
}
@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"}
ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on"
addHypervisorConfigOverrides(ocispec, &config, runtimeConfig)
assert.Exactly(expectedHyperConfig, config.HypervisorConfig)
addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig)
assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
@ -660,7 +660,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = string(config.InvalidPort)
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = string(config.RootPort)
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
@ -668,55 +669,56 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
addAnnotations(ocispec, &config, runtimeConfig)
assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1))
assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(config.HypervisorConfig.MemOffset, uint64(512))
assert.Equal(config.HypervisorConfig.VirtioMem, true)
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
assert.Equal(config.HypervisorConfig.HugePages, true)
assert.Equal(config.HypervisorConfig.IOMMU, true)
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring")
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true)
assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs")
assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false")
assert.Equal(config.HypervisorConfig.VirtioFSCache, "auto")
assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
assert.Equal(config.HypervisorConfig.Msize9p, uint32(512))
assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35")
assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw")
assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off")
assert.Equal(config.HypervisorConfig.DisableVhostNet, true)
assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
assert.Equal(config.HypervisorConfig.IOMMUPlatform, true)
assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864))
assert.Equal(config.HypervisorConfig.LegacySerial, true)
assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1))
assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512))
assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true)
assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true)
assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
assert.Equal(sbConfig.HypervisorConfig.HugePages, true)
assert.Equal(sbConfig.HypervisorConfig.IOMMU, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring")
assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true)
assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true)
assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs")
assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false")
assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto")
assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512))
assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35")
assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw")
assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off")
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(sbConfig.HypervisorConfig.ColdPlugVFIO, config.InvalidPort)
assert.Equal(sbConfig.HypervisorConfig.HotPlugVFIO, config.RootPort)
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
// In case an absurd large value is provided, the config value if not over-ridden
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
err := addAnnotations(ocispec, &config, runtimeConfig)
err := addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1"
err = addAnnotations(ocispec, &config, runtimeConfig)
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1"
err = addAnnotations(ocispec, &config, runtimeConfig)
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"

View File

@ -126,6 +126,12 @@ const (
// root bus instead of a bridge.
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
// ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged.
ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio"
// HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged.
HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio"
// EntropySource is a sandbox annotation to specify the path to a host source of
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
EntropySource = kataAnnotHypervisorPrefix + "entropy_source"

View File

@ -746,6 +746,8 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort {
return nil
}
q.Logger().Info("### PCIe Topology ###")
// Add PCIe Root Port or PCIe Switches to the hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
// into a PCIe Root Port or PCIe Switch.
@ -778,12 +780,15 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
}
q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup)
for _, vfioDevice := range devicesPerIOMMUGroup {
q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice)
if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1
}
}
}
q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts)
// If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
@ -2642,7 +2647,7 @@ func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, mach
pcieRootPort := govmmQemu.PCIeRootPortDevice{
ID: fmt.Sprintf("%s%s%d", config.PCIeSwitchPortPrefix, config.PCIeRootPortPrefix, 0),
Bus: defaultBridgeBus,
Chassis: "0",
Chassis: "1",
Slot: strconv.FormatUint(uint64(0), 10),
Multifunction: false,
Addr: "0",

View File

@ -620,20 +620,20 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
// the correct amount of ports to reserve for the hypervisor.
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
var vfioHotPlugDevices []config.DeviceInfo
var vfioColdPlugDevices []config.DeviceInfo
var vfioDevices []config.DeviceInfo
for cnt, containers := range sandboxConfig.Containers {
for dev, device := range containers.DeviceInfos {
isVFIO := deviceManager.IsVFIO(device.ContainerPath)
if hotPlugVFIO && isVFIO {
vfioHotPlugDevices = append(vfioHotPlugDevices, device)
vfioDevices = append(vfioDevices, device)
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
}
if coldPlugVFIO && isVFIO {
s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO)
device.ColdPlug = true
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
vfioColdPlugDevices = append(vfioColdPlugDevices, device)
vfioDevices = append(vfioDevices, device)
// We need to remove the devices marked for cold-plug
// otherwise at the container level the kata-agent
// will try to hot-plug them.
@ -643,7 +643,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
}
}
}
sandboxConfig.HypervisorConfig.VFIODevices = vfioHotPlugDevices
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
// store doesn't require hypervisor to be stored immediately
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
@ -658,7 +658,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
return s, nil
}
for _, dev := range vfioColdPlugDevices {
for _, dev := range vfioDevices {
_, err := s.AddDevice(ctx, dev)
if err != nil {
s.Logger().WithError(err).Debug("Cannot cold-plug add device")