diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 186b4dccd2..70d6caa080 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -229,6 +229,12 @@ disable_image_nvdimm = @DEFDISABLEIMAGENVDIMM_CLH@ # The default setting is "no-port" hot_plug_vfio = "no-port" +# In a confidential compute environment hot-plugging can compromise +# security. +# Enable cold-plugging of VFIO devices to a root-port. +# The default setting is "no-port", which means disabled. +cold_plug_vfio = "no-port" + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index eab2d90112..d4c860fe38 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1950,11 +1950,11 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT return nil } if hypervisorType == vc.ClhHypervisor { - if coldPlug != config.NoPort { - return fmt.Errorf("cold-plug not supported on CLH") + if coldPlug != config.NoPort && coldPlug != config.RootPort { + return fmt.Errorf("only cold-plug=%s or %s supported on CLH", config.NoPort, config.RootPort) } - if hotPlug != config.RootPort { - return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort) + if hotPlug != config.NoPort && hotPlug != config.RootPort { + return fmt.Errorf("only hot-plug=%s or %s supported on CLH", config.NoPort, config.RootPort) } } diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 588ef9cbb5..d1b53e28b6 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -430,9 +430,11 @@ func TestVfioChecksClh(t *testing.T) { } assert.NoError(f(config.NoPort, config.NoPort)) assert.NoError(f(config.NoPort, config.RootPort)) + assert.NoError(f(config.RootPort, config.NoPort)) assert.Error(f(config.RootPort, config.RootPort)) - assert.Error(f(config.RootPort, config.NoPort)) assert.Error(f(config.NoPort, config.SwitchPort)) + assert.Error(f(config.SwitchPort, config.NoPort)) + assert.Error(f(config.BridgePort, config.NoPort)) } func TestVfioCheckQemu(t *testing.T) { diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 3e69b1c530..51e3577ab6 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -976,6 +976,44 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro return err } +// coldPlugVFIODevice appends a VFIO device to the VM configuration so that it +// is present when the VM is created (before boot). Cloud Hypervisor's CreateVM +// API accepts a list of devices that are attached at VM creation time, which +// effectively provides cold-plug semantics — the guest sees the device on its +// PCI bus from the very first enumeration. +func (clh *cloudHypervisor) coldPlugVFIODevice(device *config.VFIODev) error { + switch device.Type { + case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType: + // Supported PCI VFIO device types for Cloud Hypervisor. + default: + return fmt.Errorf("VFIO device %+v has unsupported type %v; only PCI VFIO devices are supported in Cloud Hypervisor", device, device.Type) + } + if strings.TrimSpace(device.SysfsDev) == "" { + return fmt.Errorf("VFIO device %q has empty or invalid SysfsDev path", device.ID) + } + + clh.Logger().WithFields(log.Fields{ + "device": device.ID, + "sysfs": device.SysfsDev, + "bdf": device.BDF, + }).Info("Cold-plugging VFIO device into VM config") + + clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice.SetIommu(clh.config.IOMMU) + clhDevice.SetId(device.ID) + + if clh.vmconfig.Devices != nil { + *clh.vmconfig.Devices = append(*clh.vmconfig.Devices, clhDevice) + } else { + clh.vmconfig.Devices = &[]chclient.DeviceConfig{clhDevice} + } + + // Track the device ID so that it can be referenced later (e.g. for removal). + clh.devicesIds[device.ID] = device.ID + + return nil +} + func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { cl := clh.client() ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) @@ -1342,6 +1380,8 @@ func (clh *cloudHypervisor) AddDevice(ctx context.Context, devInfo interface{}, clh.addVSock(defaultGuestVSockCID, v.UdsPath) case types.Volume: err = clh.addVolume(v) + case config.VFIODev: + err = clh.coldPlugVFIODevice(&v) default: clh.Logger().WithField("function", "AddDevice").Warnf("Add device of type %v is not supported.", v) return fmt.Errorf("Not implemented support for %s", v) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 45fe1c08ba..7ddbcff206 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -682,6 +682,94 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) { assert.Error(err, "Hotplug remove pmem block device expected error") } +func TestCloudHypervisorColdPlugVFIODevice(t *testing.T) { + assert := assert.New(t) + + clhConfig, err := newClhConfig() + assert.NoError(err) + + clh := &cloudHypervisor{} + clh.config = clhConfig + clh.devicesIds = make(map[string]string) + clh.vmconfig = *chclient.NewVmConfig(*chclient.NewPayloadConfig()) + + // Cold-plug a PCI VFIO device + dev := &config.VFIODev{ + ID: "gpu0", + SysfsDev: "/sys/bus/pci/devices/0000:41:00.0", + BDF: "0000:41:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(dev) + assert.NoError(err, "Cold-plug PCI VFIO device expected no error") + + // Verify the device was added to vmconfig.Devices + assert.NotNil(clh.vmconfig.Devices) + assert.Len(*clh.vmconfig.Devices, 1) + assert.Equal("/sys/bus/pci/devices/0000:41:00.0", (*clh.vmconfig.Devices)[0].Path) + assert.Equal("gpu0", clh.devicesIds["gpu0"]) + + // Cold-plug a second device + dev2 := &config.VFIODev{ + ID: "gpu1", + SysfsDev: "/sys/bus/pci/devices/0000:42:00.0", + BDF: "0000:42:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(dev2) + assert.NoError(err, "Cold-plug second VFIO device expected no error") + assert.Len(*clh.vmconfig.Devices, 2) + + // AP mediated device should fail + apDev := &config.VFIODev{ + ID: "ap0", + Type: config.VFIOAPDeviceMediatedType, + } + err = clh.coldPlugVFIODevice(apDev) + assert.Error(err, "Cold-plug AP mediated device expected error") + + // Error type (0) should fail + errDev := &config.VFIODev{ + ID: "bad0", + SysfsDev: "/sys/bus/pci/devices/0000:43:00.0", + Type: config.VFIODeviceErrorType, + } + err = clh.coldPlugVFIODevice(errDev) + assert.Error(err, "Cold-plug error-type device expected error") + + // Empty SysfsDev should fail + emptySysfsDev := &config.VFIODev{ + ID: "bad1", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.coldPlugVFIODevice(emptySysfsDev) + assert.Error(err, "Cold-plug with empty SysfsDev expected error") +} + +func TestCloudHypervisorAddDeviceVFIO(t *testing.T) { + assert := assert.New(t) + + clhConfig, err := newClhConfig() + assert.NoError(err) + + clh := &cloudHypervisor{} + clh.config = clhConfig + clh.devicesIds = make(map[string]string) + clh.vmconfig = *chclient.NewVmConfig(*chclient.NewPayloadConfig()) + + // AddDevice with VFIODev type should cold-plug + dev := config.VFIODev{ + ID: "nic0", + SysfsDev: "/sys/bus/pci/devices/0000:05:00.0", + BDF: "0000:05:00.0", + Type: config.VFIOPCIDeviceNormalType, + } + err = clh.AddDevice(context.Background(), dev, VfioDev) + assert.NoError(err, "AddDevice VFIO expected no error") + assert.NotNil(clh.vmconfig.Devices) + assert.Len(*clh.vmconfig.Devices, 1) +} + func TestClhGenerateSocket(t *testing.T) { assert := assert.New(t)