diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index f6ac005571..dcb0e93b8f 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -162,6 +162,8 @@ const ( BridgePort = "bridge-port" // NoPort is for disabling VFIO hotplug/coldplug NoPort = "no-port" + // InvalidPort is for invalid port + InvalidPort = "invalid-port" ) func (p PCIePort) String() string { @@ -173,6 +175,8 @@ func (p PCIePort) String() string { case BridgePort: fallthrough case NoPort: + fallthrough + case InvalidPort: return string(p) } return fmt.Sprintf("", string(p)) @@ -184,6 +188,34 @@ var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{ BridgePort: PCIBridgePortPrefix, } +func (p PCIePort) InValid() bool { + switch p { + case RootPort: + fallthrough + case SwitchPort: + fallthrough + case BridgePort: + fallthrough + case NoPort: + return false + } + return true +} + +func (p PCIePort) Valid() bool { + switch p { + case RootPort: + fallthrough + case SwitchPort: + fallthrough + case BridgePort: + fallthrough + case NoPort: + return true + } + return false +} + // DeviceInfo is an embedded type that contains device data common to all types of devices. type DeviceInfo struct { // DriverOptions is specific options for each device driver diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index d662f910f0..1a4b7da04f 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -109,5 +109,3 @@ var defaultRuntimeConfiguration = "@CONFIG_PATH@" const defaultHotPlugVFIO = config.NoPort const defaultColdPlugVFIO = config.NoPort - - diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index bcbf30f8f7..e53fc8e2ca 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -453,6 +453,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok { if value != "" { config.HypervisorConfig.HypervisorMachineType = value @@ -570,6 +574,33 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru return nil } +func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) { + if value == "" { + return config.NoPort, nil + } + port := config.PCIePort(value) + if port.InValid() { + return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value) + } + return port, nil +} + +func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + + var err error + if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok { + if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil { + return err + } + } + if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok { + if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil { + return err + } + } + return nil +} + func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error { if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error { diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index 7b2c10f2f0..deb62cc147 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) { func TestAddHypervisorAnnotations(t *testing.T) { assert := assert.New(t) - config := vc.SandboxConfig{ + sbConfig := vc.SandboxConfig{ Annotations: make(map[string]string), } @@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"} ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on" - addHypervisorConfigOverrides(ocispec, &config, runtimeConfig) - assert.Exactly(expectedHyperConfig, config.HypervisorConfig) + addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig) + assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" @@ -660,7 +660,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/" ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true" ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true" - ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2" + ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = string(config.InvalidPort) + ocispec.Annotations[vcAnnotations.HotPlugVFIO] = string(config.RootPort) ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true" ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi" ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true" @@ -668,55 +669,56 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000" ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000" - addAnnotations(ocispec, &config, runtimeConfig) - assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1)) - assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) - assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024)) - assert.Equal(config.HypervisorConfig.MemSlots, uint32(20)) - assert.Equal(config.HypervisorConfig.MemOffset, uint64(512)) - assert.Equal(config.HypervisorConfig.VirtioMem, true) - assert.Equal(config.HypervisorConfig.MemPrealloc, true) - assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") - assert.Equal(config.HypervisorConfig.HugePages, true) - assert.Equal(config.HypervisorConfig.IOMMU, true) - assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") - assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring") - assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true) - assert.Equal(config.HypervisorConfig.EnableIOThreads, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true) - assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true) - assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs") - assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false") - assert.Equal(config.HypervisorConfig.VirtioFSCache, "auto") - assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"}) - assert.Equal(config.HypervisorConfig.Msize9p, uint32(512)) - assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35") - assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw") - assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off") - assert.Equal(config.HypervisorConfig.DisableVhostNet, true) - assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/") - assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true) - assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true) - assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2)) - assert.Equal(config.HypervisorConfig.IOMMUPlatform, true) - assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864)) - assert.Equal(config.HypervisorConfig.LegacySerial, true) - assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000)) - assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000)) + addAnnotations(ocispec, &sbConfig, runtimeConfig) + assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1)) + assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1)) + assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024)) + assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20)) + assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512)) + assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true) + assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true) + assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") + assert.Equal(sbConfig.HypervisorConfig.HugePages, true) + assert.Equal(sbConfig.HypervisorConfig.IOMMU, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring") + assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true) + assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true) + assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs") + assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false") + assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto") + assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"}) + assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512)) + assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35") + assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw") + assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off") + assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true) + assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/") + assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true) + assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true) + assert.Equal(sbConfig.HypervisorConfig.ColdPlugVFIO, config.InvalidPort) + assert.Equal(sbConfig.HypervisorConfig.HotPlugVFIO, config.RootPort) + assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true) + assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864)) + assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true) + assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000)) + assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000)) // In case an absurd large value is provided, the config value if not over-ridden ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536" - err := addAnnotations(ocispec, &config, runtimeConfig) + err := addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1" - err = addAnnotations(ocispec, &config, runtimeConfig) + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1" ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1" - err = addAnnotations(ocispec, &config, runtimeConfig) + err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.Error(err) ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1" diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 238ab28901..bad1f66ee2 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -126,6 +126,12 @@ const ( // root bus instead of a bridge. HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus" + // ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged. + ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio" + + // HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged. + HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio" + // EntropySource is a sandbox annotation to specify the path to a host source of // entropy (/dev/random, /dev/urandom or real hardware RNG device) EntropySource = kataAnnotHypervisorPrefix + "entropy_source" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 685cc791b2..284ce2d793 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -746,6 +746,8 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort { return nil } + + q.Logger().Info("### PCIe Topology ###") // Add PCIe Root Port or PCIe Switches to the hypervisor // The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged // into a PCIe Root Port or PCIe Switch. @@ -778,12 +780,15 @@ func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig if err != nil { return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err) } + q.Logger().Info("### PCIe Topology devices ", devicesPerIOMMUGroup) for _, vfioDevice := range devicesPerIOMMUGroup { + q.Logger().Info("### PCIe Topology vfioDevice ", vfioDevice) if drivers.IsPCIeDevice(vfioDevice.BDF) { numOfPluggablePorts = numOfPluggablePorts + 1 } } } + q.Logger().Info("### PCIe Topology numOfPluggablePorts ", numOfPluggablePorts) // If number of PCIe root ports > 16 then bail out otherwise we may // use up all slots or IO memory on the root bus and vfio-XXX-pci devices @@ -2642,7 +2647,7 @@ func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, mach pcieRootPort := govmmQemu.PCIeRootPortDevice{ ID: fmt.Sprintf("%s%s%d", config.PCIeSwitchPortPrefix, config.PCIeRootPortPrefix, 0), Bus: defaultBridgeBus, - Chassis: "0", + Chassis: "1", Slot: strconv.FormatUint(uint64(0), 10), Multifunction: false, Addr: "0", diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 9c227ad56c..5244fdf00d 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -620,20 +620,20 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor // the correct amount of ports to reserve for the hypervisor. hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort) - var vfioHotPlugDevices []config.DeviceInfo - var vfioColdPlugDevices []config.DeviceInfo + var vfioDevices []config.DeviceInfo for cnt, containers := range sandboxConfig.Containers { for dev, device := range containers.DeviceInfos { isVFIO := deviceManager.IsVFIO(device.ContainerPath) if hotPlugVFIO && isVFIO { - vfioHotPlugDevices = append(vfioHotPlugDevices, device) + vfioDevices = append(vfioDevices, device) sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO } if coldPlugVFIO && isVFIO { + s.Logger().Info("### coldplug and vfio ", device, "coldplug ", sandboxConfig.HypervisorConfig.ColdPlugVFIO) device.ColdPlug = true device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO - vfioColdPlugDevices = append(vfioColdPlugDevices, device) + vfioDevices = append(vfioDevices, device) // We need to remove the devices marked for cold-plug // otherwise at the container level the kata-agent // will try to hot-plug them. @@ -643,7 +643,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor } } } - sandboxConfig.HypervisorConfig.VFIODevices = vfioHotPlugDevices + sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices // store doesn't require hypervisor to be stored immediately if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil { @@ -658,7 +658,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return s, nil } - for _, dev := range vfioColdPlugDevices { + for _, dev := range vfioDevices { _, err := s.AddDevice(ctx, dev) if err != nil { s.Logger().WithError(err).Debug("Cannot cold-plug add device")