mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-16 07:05:14 +00:00
Merge pull request #6699 from zvonkok/cold-plug-vfio
gpu: cold plug VFIO devices
This commit is contained in:
commit
65670e6b0a
@ -17,6 +17,7 @@ import (
|
||||
"github.com/prometheus/procfs"
|
||||
"github.com/urfave/cli"
|
||||
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
|
||||
@ -113,6 +114,7 @@ type HypervisorInfo struct {
|
||||
Msize9p uint32
|
||||
MemorySlots uint32
|
||||
PCIeRootPort uint32
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
HotplugVFIOOnRootBus bool
|
||||
Debug bool
|
||||
}
|
||||
@ -305,17 +307,17 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
|
||||
}
|
||||
|
||||
return HypervisorInfo{
|
||||
Debug: config.HypervisorConfig.Debug,
|
||||
MachineType: config.HypervisorConfig.HypervisorMachineType,
|
||||
Version: version,
|
||||
Path: hypervisorPath,
|
||||
BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver,
|
||||
Msize9p: config.HypervisorConfig.Msize9p,
|
||||
MemorySlots: config.HypervisorConfig.MemSlots,
|
||||
EntropySource: config.HypervisorConfig.EntropySource,
|
||||
SharedFS: config.HypervisorConfig.SharedFS,
|
||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||
|
||||
Debug: config.HypervisorConfig.Debug,
|
||||
MachineType: config.HypervisorConfig.HypervisorMachineType,
|
||||
Version: version,
|
||||
Path: hypervisorPath,
|
||||
BlockDeviceDriver: config.HypervisorConfig.BlockDeviceDriver,
|
||||
Msize9p: config.HypervisorConfig.Msize9p,
|
||||
MemorySlots: config.HypervisorConfig.MemSlots,
|
||||
EntropySource: config.HypervisorConfig.EntropySource,
|
||||
SharedFS: config.HypervisorConfig.SharedFS,
|
||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||
SocketPath: socketPath,
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
@ -74,6 +75,7 @@ func createConfig(configPath string, fileData string) error {
|
||||
}
|
||||
|
||||
func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeConfig, err error) {
|
||||
var coldPlugVFIO hv.PCIePort
|
||||
const logPath = "/log/path"
|
||||
hypervisorPath := filepath.Join(prefixDir, "hypervisor")
|
||||
kernelPath := filepath.Join(prefixDir, "kernel")
|
||||
@ -86,6 +88,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
coldPlugVFIO = hv.NoPort
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd")
|
||||
@ -129,6 +132,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
||||
BlockDeviceDriver: blockStorageDriver,
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
ColdPlugVFIO: coldPlugVFIO,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
DefaultVCPUCount: hypConfig.NumVCPUs,
|
||||
@ -191,12 +195,13 @@ func genericGetExpectedHostDetails(tmpdir string, expectedVendor string, expecte
|
||||
|
||||
expectedSupportVSocks, _ := vcUtils.SupportsVsocks()
|
||||
expectedHostDetails := HostInfo{
|
||||
Kernel: expectedKernelVersion,
|
||||
Architecture: expectedArch,
|
||||
Distro: expectedDistro,
|
||||
CPU: expectedCPU,
|
||||
VMContainerCapable: expectedVMContainerCapable,
|
||||
SupportVSocks: expectedSupportVSocks,
|
||||
AvailableGuestProtections: vc.AvailableGuestProtections(),
|
||||
Kernel: expectedKernelVersion,
|
||||
Architecture: expectedArch,
|
||||
Distro: expectedDistro,
|
||||
CPU: expectedCPU,
|
||||
VMContainerCapable: expectedVMContainerCapable,
|
||||
SupportVSocks: expectedSupportVSocks,
|
||||
}
|
||||
|
||||
testProcCPUInfo := filepath.Join(tmpdir, "cpuinfo")
|
||||
@ -273,6 +278,7 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
|
||||
|
||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||
}
|
||||
|
||||
if os.Geteuid() == 0 {
|
||||
|
@ -352,6 +352,11 @@ pflashes = []
|
||||
# Default false
|
||||
#hotplug_vfio_on_root_bus = true
|
||||
|
||||
# In a confidential compute environment hot-plugging can compromise
|
||||
# security. Enable cold-plugging of VFIO devices to a root-port.
|
||||
# The default setting is "no-port", which means disabled.
|
||||
#cold_plug_vfio = "root-port"
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
# The value means the number of pcie_root_port
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||
@ -308,6 +309,7 @@ func TestCreateContainerConfigFail(t *testing.T) {
|
||||
}
|
||||
|
||||
func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err error) {
|
||||
var coldPlugVFIO hv.PCIePort
|
||||
if dir == "" {
|
||||
return "", fmt.Errorf("BUG: need directory")
|
||||
}
|
||||
@ -332,6 +334,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||
coldPlugVFIO = hv.RootPort
|
||||
|
||||
configFileOptions := ktu.RuntimeConfigOptions{
|
||||
Hypervisor: "qemu",
|
||||
@ -350,6 +353,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
SharedFS: sharedFS,
|
||||
VirtioFSDaemon: virtioFSdaemon,
|
||||
ColdPlugVFIO: coldPlugVFIO,
|
||||
}
|
||||
|
||||
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)
|
||||
|
@ -10,10 +10,12 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -133,3 +135,99 @@ func GetAPVFIODevices(sysfsdev string) ([]string, error) {
|
||||
// Split by newlines, omitting final newline
|
||||
return strings.Split(string(data[:len(data)-1]), "\n"), nil
|
||||
}
|
||||
|
||||
// Ignore specific PCI devices, supply the pciClass and the bitmask to check
|
||||
// against the device class, deviceBDF for meaningfull info message
|
||||
func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (bool, error) {
|
||||
if pciClass == "" {
|
||||
return false, nil
|
||||
}
|
||||
pciClassID, err := strconv.ParseUint(pciClass, 0, 32)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// ClassID is 16 bits, remove the two trailing zeros
|
||||
pciClassID = pciClassID >> 8
|
||||
if pciClassID&bitmask == bitmask {
|
||||
deviceLogger().Infof("Ignoring PCI (Host) Bridge deviceBDF %v Class %x", deviceBDF, pciClassID)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
|
||||
// We can reuse this function at various levels, sandbox, container.
|
||||
// Only the VFIO module is allowed to do bus assignments, all other modules need to
|
||||
// ignore it if used as helper function to get VFIO information.
|
||||
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignment bool) ([]*config.VFIODev, error) {
|
||||
|
||||
vfioDevs := []*config.VFIODev{}
|
||||
|
||||
vfioGroup := filepath.Base(device.HostPath)
|
||||
iommuDevicesPath := filepath.Join(config.SysIOMMUPath, vfioGroup, "devices")
|
||||
|
||||
deviceFiles, err := os.ReadDir(iommuDevicesPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Pass all devices in iommu group
|
||||
for i, deviceFile := range deviceFiles {
|
||||
//Get bdf of device eg 0000:00:1c.0
|
||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(deviceFile.Name(), iommuDevicesPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
id := utils.MakeNameID("vfio", device.ID+strconv.Itoa(i), maxDevIDSize)
|
||||
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
ignorePCIDevice, err := checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ignorePCIDevice {
|
||||
continue
|
||||
}
|
||||
|
||||
var vfio config.VFIODev
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
isPCIe := isPCIeDevice(deviceBDF)
|
||||
// Do not directly assign to `vfio` -- need to access field still
|
||||
vfioPCI := config.VFIOPCIDev{
|
||||
ID: id,
|
||||
Type: vfioDeviceType,
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIe,
|
||||
Class: pciClass,
|
||||
}
|
||||
if isPCIe && !ignoreBusAssignment {
|
||||
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
AllPCIeDevs[deviceBDF] = true
|
||||
}
|
||||
vfio = vfioPCI
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
vfio = config.VFIOAPDev{
|
||||
ID: id,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
Type: config.VFIOAPDeviceMediatedType,
|
||||
APDevices: devices,
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("Failed to append device: VFIO device type unrecognized")
|
||||
}
|
||||
|
||||
vfioDevs = append(vfioDevs, &vfio)
|
||||
}
|
||||
|
||||
return vfioDevs, nil
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -54,25 +53,6 @@ func NewVFIODevice(devInfo *config.DeviceInfo) *VFIODevice {
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore specific PCI devices, supply the pciClass and the bitmask to check
|
||||
// against the device class, deviceBDF for meaningfull info message
|
||||
func (device *VFIODevice) checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (bool, error) {
|
||||
if pciClass == "" {
|
||||
return false, nil
|
||||
}
|
||||
pciClassID, err := strconv.ParseUint(pciClass, 0, 32)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// ClassID is 16 bits, remove the two trailing zeros
|
||||
pciClassID = pciClassID >> 8
|
||||
if pciClassID&bitmask == bitmask {
|
||||
deviceLogger().Infof("Ignoring PCI (Host) Bridge deviceBDF %v Class %x", deviceBDF, pciClassID)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Attach is standard interface of api.Device, it's used to add device to some
|
||||
// DeviceReceiver
|
||||
func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceReceiver) (retErr error) {
|
||||
@ -90,72 +70,11 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
}
|
||||
}()
|
||||
|
||||
vfioGroup := filepath.Base(device.DeviceInfo.HostPath)
|
||||
iommuDevicesPath := filepath.Join(config.SysIOMMUPath, vfioGroup, "devices")
|
||||
|
||||
deviceFiles, err := os.ReadDir(iommuDevicesPath)
|
||||
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Pass all devices in iommu group
|
||||
for i, deviceFile := range deviceFiles {
|
||||
//Get bdf of device eg 0000:00:1c.0
|
||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(deviceFile.Name(), iommuDevicesPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize)
|
||||
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
ignorePCIDevice, err := device.checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ignorePCIDevice {
|
||||
continue
|
||||
}
|
||||
|
||||
var vfio config.VFIODev
|
||||
|
||||
switch vfioDeviceType {
|
||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||
isPCIe := isPCIeDevice(deviceBDF)
|
||||
// Do not directly assign to `vfio` -- need to access field still
|
||||
vfioPCI := config.VFIOPCIDev{
|
||||
ID: id,
|
||||
Type: vfioDeviceType,
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIe,
|
||||
Class: pciClass,
|
||||
}
|
||||
if isPCIe {
|
||||
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
AllPCIeDevs[deviceBDF] = true
|
||||
}
|
||||
vfio = vfioPCI
|
||||
case config.VFIOAPDeviceMediatedType:
|
||||
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vfio = config.VFIOAPDev{
|
||||
ID: id,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
Type: config.VFIOAPDeviceMediatedType,
|
||||
APDevices: devices,
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("Failed to append device: VFIO device type unrecognized")
|
||||
}
|
||||
|
||||
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||
}
|
||||
|
||||
coldPlug := device.DeviceInfo.ColdPlug
|
||||
deviceLogger().WithField("cold-plug", coldPlug).Info("Attaching VFIO device")
|
||||
|
||||
|
@ -116,7 +116,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device
|
||||
if devInfo.ID, err = dm.newDeviceID(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if isVFIO(devInfo.HostPath) {
|
||||
if IsVFIO(devInfo.HostPath) {
|
||||
return drivers.NewVFIODevice(&devInfo), nil
|
||||
} else if isVhostUserBlk(devInfo) {
|
||||
if devInfo.DriverOptions == nil {
|
||||
|
@ -17,8 +17,8 @@ const (
|
||||
vfioPath = "/dev/vfio/"
|
||||
)
|
||||
|
||||
// isVFIO checks if the device provided is a vfio group.
|
||||
func isVFIO(hostPath string) bool {
|
||||
// IsVFIO checks if the device provided is a vfio group.
|
||||
func IsVFIO(hostPath string) bool {
|
||||
// Ignore /dev/vfio/vfio character device
|
||||
if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) {
|
||||
return false
|
||||
|
@ -31,7 +31,7 @@ func TestIsVFIO(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, d := range data {
|
||||
isVFIO := isVFIO(d.path)
|
||||
isVFIO := IsVFIO(d.path)
|
||||
assert.Equal(t, d.expected, isVFIO)
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
package hypervisors
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Bridge is a bridge where devices can be hot plugged
|
||||
type Bridge struct {
|
||||
// DeviceAddr contains information about devices plugged and its address in the bridge
|
||||
@ -26,6 +28,34 @@ type CPUDevice struct {
|
||||
ID string
|
||||
}
|
||||
|
||||
// PCIePort distinguish only between root and switch port
|
||||
type PCIePort string
|
||||
|
||||
const (
|
||||
// RootPort attach VFIO devices to a root-port
|
||||
RootPort PCIePort = "root-port"
|
||||
// SwitchPort attach VFIO devices to a switch-port
|
||||
SwitchPort = "switch-port"
|
||||
// BridgePort is the default
|
||||
BridgePort = "bridge-port"
|
||||
// NoPort is for disabling VFIO hotplug/coldplug
|
||||
NoPort = "no-port"
|
||||
)
|
||||
|
||||
func (p PCIePort) String() string {
|
||||
switch p {
|
||||
case RootPort:
|
||||
return "root-port"
|
||||
case SwitchPort:
|
||||
return "switch-port"
|
||||
case BridgePort:
|
||||
return "bridge-port"
|
||||
case NoPort:
|
||||
return "no-port"
|
||||
}
|
||||
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
|
||||
}
|
||||
|
||||
type HypervisorState struct {
|
||||
BlockIndexMap map[int]struct{}
|
||||
|
||||
@ -41,10 +71,10 @@ type HypervisorState struct {
|
||||
// HotpluggedCPUs is the list of CPUs that were hot-added
|
||||
HotpluggedVCPUs []CPUDevice
|
||||
|
||||
HotpluggedMemory int
|
||||
VirtiofsDaemonPid int
|
||||
Pid int
|
||||
PCIeRootPort int
|
||||
|
||||
HotpluggedMemory int
|
||||
VirtiofsDaemonPid int
|
||||
Pid int
|
||||
PCIeRootPort int
|
||||
ColdPlugVFIO PCIePort
|
||||
HotplugVFIOOnRootBus bool
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@ -224,6 +225,7 @@ type RuntimeConfigOptions struct {
|
||||
JaegerPassword string
|
||||
PFlash []string
|
||||
PCIeRootPort uint32
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
DefaultVCPUCount uint32
|
||||
DefaultMaxVCPUCount uint32
|
||||
DefaultMemSize uint32
|
||||
@ -317,6 +319,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
|
||||
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
||||
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
|
||||
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
|
||||
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
|
||||
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
||||
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
||||
guest_hook_path = "` + config.DefaultGuestHookPath + `"
|
||||
|
@ -9,6 +9,10 @@
|
||||
|
||||
package katautils
|
||||
|
||||
import (
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
)
|
||||
|
||||
// name is the name of the runtime
|
||||
var NAME = "@RUNTIME_NAME@"
|
||||
|
||||
@ -103,3 +107,5 @@ const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock"
|
||||
|
||||
// Default config file used by stateless systems.
|
||||
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
|
||||
|
||||
const defaultColdPlugVFIO = hv.NoPort
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
@ -77,87 +78,88 @@ type factory struct {
|
||||
}
|
||||
|
||||
type hypervisor struct {
|
||||
Path string `toml:"path"`
|
||||
JailerPath string `toml:"jailer_path"`
|
||||
Kernel string `toml:"kernel"`
|
||||
CtlPath string `toml:"ctlpath"`
|
||||
Initrd string `toml:"initrd"`
|
||||
Image string `toml:"image"`
|
||||
RootfsType string `toml:"rootfs_type"`
|
||||
Firmware string `toml:"firmware"`
|
||||
FirmwareVolume string `toml:"firmware_volume"`
|
||||
MachineAccelerators string `toml:"machine_accelerators"`
|
||||
CPUFeatures string `toml:"cpu_features"`
|
||||
KernelParams string `toml:"kernel_params"`
|
||||
MachineType string `toml:"machine_type"`
|
||||
BlockDeviceDriver string `toml:"block_device_driver"`
|
||||
EntropySource string `toml:"entropy_source"`
|
||||
SharedFS string `toml:"shared_fs"`
|
||||
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
|
||||
VirtioFSCache string `toml:"virtio_fs_cache"`
|
||||
VhostUserStorePath string `toml:"vhost_user_store_path"`
|
||||
FileBackedMemRootDir string `toml:"file_mem_backend"`
|
||||
GuestHookPath string `toml:"guest_hook_path"`
|
||||
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
|
||||
SeccompSandbox string `toml:"seccompsandbox"`
|
||||
BlockDeviceAIO string `toml:"block_device_aio"`
|
||||
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
|
||||
JailerPathList []string `toml:"valid_jailer_paths"`
|
||||
CtlPathList []string `toml:"valid_ctlpaths"`
|
||||
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
|
||||
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
|
||||
PFlashList []string `toml:"pflashes"`
|
||||
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
|
||||
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
|
||||
EntropySourceList []string `toml:"valid_entropy_sources"`
|
||||
EnableAnnotations []string `toml:"enable_annotations"`
|
||||
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
|
||||
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
|
||||
MemOffset uint64 `toml:"memory_offset"`
|
||||
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
|
||||
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
|
||||
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
|
||||
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
|
||||
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
|
||||
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
|
||||
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
|
||||
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
|
||||
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
|
||||
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
|
||||
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
|
||||
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
|
||||
MemorySize uint32 `toml:"default_memory"`
|
||||
MemSlots uint32 `toml:"memory_slots"`
|
||||
DefaultBridges uint32 `toml:"default_bridges"`
|
||||
Msize9p uint32 `toml:"msize_9p"`
|
||||
PCIeRootPort uint32 `toml:"pcie_root_port"`
|
||||
NumVCPUs int32 `toml:"default_vcpus"`
|
||||
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
|
||||
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
|
||||
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
|
||||
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
|
||||
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
VirtioMem bool `toml:"enable_virtio_mem"`
|
||||
IOMMU bool `toml:"enable_iommu"`
|
||||
IOMMUPlatform bool `toml:"enable_iommu_platform"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
DisableNestingChecks bool `toml:"disable_nesting_checks"`
|
||||
EnableIOThreads bool `toml:"enable_iothreads"`
|
||||
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
||||
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
|
||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||
SevSnpGuest bool `toml:"sev_snp_guest"`
|
||||
GuestSwap bool `toml:"enable_guest_swap"`
|
||||
Rootless bool `toml:"rootless"`
|
||||
DisableSeccomp bool `toml:"disable_seccomp"`
|
||||
DisableSeLinux bool `toml:"disable_selinux"`
|
||||
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
|
||||
LegacySerial bool `toml:"use_legacy_serial"`
|
||||
Path string `toml:"path"`
|
||||
JailerPath string `toml:"jailer_path"`
|
||||
Kernel string `toml:"kernel"`
|
||||
CtlPath string `toml:"ctlpath"`
|
||||
Initrd string `toml:"initrd"`
|
||||
Image string `toml:"image"`
|
||||
RootfsType string `toml:"rootfs_type"`
|
||||
Firmware string `toml:"firmware"`
|
||||
FirmwareVolume string `toml:"firmware_volume"`
|
||||
MachineAccelerators string `toml:"machine_accelerators"`
|
||||
CPUFeatures string `toml:"cpu_features"`
|
||||
KernelParams string `toml:"kernel_params"`
|
||||
MachineType string `toml:"machine_type"`
|
||||
BlockDeviceDriver string `toml:"block_device_driver"`
|
||||
EntropySource string `toml:"entropy_source"`
|
||||
SharedFS string `toml:"shared_fs"`
|
||||
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
|
||||
VirtioFSCache string `toml:"virtio_fs_cache"`
|
||||
VhostUserStorePath string `toml:"vhost_user_store_path"`
|
||||
FileBackedMemRootDir string `toml:"file_mem_backend"`
|
||||
GuestHookPath string `toml:"guest_hook_path"`
|
||||
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
|
||||
SeccompSandbox string `toml:"seccompsandbox"`
|
||||
BlockDeviceAIO string `toml:"block_device_aio"`
|
||||
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
|
||||
JailerPathList []string `toml:"valid_jailer_paths"`
|
||||
CtlPathList []string `toml:"valid_ctlpaths"`
|
||||
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
|
||||
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
|
||||
PFlashList []string `toml:"pflashes"`
|
||||
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
|
||||
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
|
||||
EntropySourceList []string `toml:"valid_entropy_sources"`
|
||||
EnableAnnotations []string `toml:"enable_annotations"`
|
||||
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
|
||||
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
|
||||
MemOffset uint64 `toml:"memory_offset"`
|
||||
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
|
||||
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
|
||||
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
|
||||
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
|
||||
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
|
||||
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
|
||||
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
|
||||
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
|
||||
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
|
||||
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
|
||||
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
|
||||
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
|
||||
MemorySize uint32 `toml:"default_memory"`
|
||||
MemSlots uint32 `toml:"memory_slots"`
|
||||
DefaultBridges uint32 `toml:"default_bridges"`
|
||||
Msize9p uint32 `toml:"msize_9p"`
|
||||
PCIeRootPort uint32 `toml:"pcie_root_port"`
|
||||
NumVCPUs int32 `toml:"default_vcpus"`
|
||||
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
|
||||
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
|
||||
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
|
||||
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
|
||||
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
VirtioMem bool `toml:"enable_virtio_mem"`
|
||||
IOMMU bool `toml:"enable_iommu"`
|
||||
IOMMUPlatform bool `toml:"enable_iommu_platform"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
DisableNestingChecks bool `toml:"disable_nesting_checks"`
|
||||
EnableIOThreads bool `toml:"enable_iothreads"`
|
||||
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
||||
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
|
||||
ColdPlugVFIO hv.PCIePort `toml:"cold_plug_vfio"`
|
||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||
SevSnpGuest bool `toml:"sev_snp_guest"`
|
||||
GuestSwap bool `toml:"enable_guest_swap"`
|
||||
Rootless bool `toml:"rootless"`
|
||||
DisableSeccomp bool `toml:"disable_seccomp"`
|
||||
DisableSeLinux bool `toml:"disable_selinux"`
|
||||
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
|
||||
LegacySerial bool `toml:"use_legacy_serial"`
|
||||
}
|
||||
|
||||
type runtime struct {
|
||||
@ -285,6 +287,13 @@ func (h hypervisor) firmware() (string, error) {
|
||||
return ResolvePath(p)
|
||||
}
|
||||
|
||||
func (h hypervisor) coldPlugVFIO() hv.PCIePort {
|
||||
if h.ColdPlugVFIO == "" {
|
||||
return defaultColdPlugVFIO
|
||||
}
|
||||
return h.ColdPlugVFIO
|
||||
}
|
||||
|
||||
func (h hypervisor) firmwareVolume() (string, error) {
|
||||
p := h.FirmwareVolume
|
||||
|
||||
@ -854,6 +863,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
Msize9p: h.msize9p(),
|
||||
DisableImageNvdimm: h.DisableImageNvdimm,
|
||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||
PCIeRootPort: h.PCIeRootPort,
|
||||
DisableVhostNet: h.DisableVhostNet,
|
||||
EnableVhostUserStore: h.EnableVhostUserStore,
|
||||
@ -1048,6 +1058,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
EnableIOThreads: h.EnableIOThreads,
|
||||
Msize9p: h.msize9p(),
|
||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||
PCIeRootPort: h.PCIeRootPort,
|
||||
DisableVhostNet: true,
|
||||
GuestHookPath: h.guestHookPath(),
|
||||
@ -1278,6 +1289,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
EnableIOThreads: defaultEnableIOThreads,
|
||||
Msize9p: defaultMsize9p,
|
||||
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
|
||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||
PCIeRootPort: defaultPCIeRootPort,
|
||||
GuestHookPath: defaultGuestHookPath,
|
||||
VhostUserStorePath: defaultVhostUserStorePath,
|
||||
@ -1650,9 +1662,32 @@ func checkConfig(config oci.RuntimeConfig) error {
|
||||
return err
|
||||
}
|
||||
|
||||
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
|
||||
machineType := config.HypervisorConfig.HypervisorMachineType
|
||||
if err := checkPCIeConfig(coldPlugVFIO, machineType); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkPCIeConfig ensures the PCIe configuration is valid.
|
||||
// Only allow one of the following settings for cold-plug:
|
||||
// no-port, root-port, switch-port
|
||||
func checkPCIeConfig(vfioPort hv.PCIePort, machineType string) error {
|
||||
// Currently only QEMU q35 supports advanced PCIe topologies
|
||||
// firecracker, dragonball do not have right now any PCIe support
|
||||
if machineType != "q35" {
|
||||
return nil
|
||||
}
|
||||
if vfioPort == hv.NoPort || vfioPort == hv.RootPort || vfioPort == hv.SwitchPort {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s",
|
||||
vfioPort, hv.NoPort, hv.RootPort, hv.SwitchPort)
|
||||
}
|
||||
|
||||
// checkNetNsConfig performs sanity checks on disable_new_netns config.
|
||||
// Because it is an expert option and conflicts with some other common configs.
|
||||
func checkNetNsConfig(config oci.RuntimeConfig) error {
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
@ -70,7 +71,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
if hypervisor == "" {
|
||||
return config, fmt.Errorf("BUG: need hypervisor")
|
||||
}
|
||||
|
||||
var coldPlugVFIO hv.PCIePort
|
||||
hypervisorPath := path.Join(dir, "hypervisor")
|
||||
kernelPath := path.Join(dir, "kernel")
|
||||
kernelParams := "foo=bar xyz"
|
||||
@ -85,6 +86,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
coldPlugVFIO = hv.RootPort
|
||||
disableNewNetNs := false
|
||||
sharedFS := "virtio-9p"
|
||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||
@ -107,6 +109,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
ColdPlugVFIO: coldPlugVFIO,
|
||||
DisableNewNetNs: disableNewNetNs,
|
||||
DefaultVCPUCount: defaultVCPUCount,
|
||||
DefaultMaxVCPUCount: defaultMaxVCPUCount,
|
||||
@ -170,6 +173,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
ColdPlugVFIO: coldPlugVFIO,
|
||||
Msize9p: defaultMsize9p,
|
||||
MemSlots: defaultMemSlots,
|
||||
EntropySource: defaultEntropySource,
|
||||
@ -564,6 +568,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
||||
VirtioFSCache: defaultVirtioFSCacheMode,
|
||||
BlockDeviceAIO: defaultBlockDeviceAIO,
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||
}
|
||||
|
||||
expectedAgentConfig := vc.KataAgentConfig{
|
||||
@ -597,7 +602,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
||||
|
||||
func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
var coldPlugVFIO hv.PCIePort
|
||||
hypervisorPath := path.Join(dir, "hypervisor")
|
||||
kernelPath := path.Join(dir, "kernel")
|
||||
imagePath := path.Join(dir, "image")
|
||||
@ -606,6 +611,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
enableIOThreads := true
|
||||
hotplugVFIOOnRootBus := true
|
||||
pcieRootPort := uint32(2)
|
||||
coldPlugVFIO = hv.RootPort
|
||||
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
||||
blockDeviceAIO := "io_uring"
|
||||
defer func() {
|
||||
@ -625,6 +631,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||
EnableIOThreads: enableIOThreads,
|
||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||
PCIeRootPort: pcieRootPort,
|
||||
ColdPlugVFIO: coldPlugVFIO,
|
||||
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
|
||||
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
||||
SharedFS: "virtio-fs",
|
||||
|
@ -292,6 +292,10 @@ type HypervisorConfig struct {
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort uint32
|
||||
|
||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||
// root port, switch or no port
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
|
||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||
BootToBeTemplate bool
|
||||
|
||||
|
@ -509,6 +509,10 @@ type HypervisorConfig struct {
|
||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
||||
PCIeRootPort uint32
|
||||
|
||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||
// root port, switch or no port
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
|
||||
// NumVCPUs specifies default number of vCPUs for the VM.
|
||||
NumVCPUs uint32
|
||||
|
||||
|
@ -487,6 +487,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
||||
DisableNestingChecks: hconf.DisableNestingChecks,
|
||||
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
||||
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
|
||||
ColdPlugVFIO: hconf.ColdPlugVFIO,
|
||||
PCIeRootPort: hconf.PCIeRootPort,
|
||||
BootToBeTemplate: hconf.BootToBeTemplate,
|
||||
BootFromTemplate: hconf.BootFromTemplate,
|
||||
|
@ -7,6 +7,7 @@
|
||||
package persistapi
|
||||
|
||||
import (
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
@ -198,6 +199,10 @@ type HypervisorConfig struct {
|
||||
// root bus instead of a bridge.
|
||||
HotplugVFIOOnRootBus bool
|
||||
|
||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||
// root port or a switch or no-port
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
|
||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||
BootToBeTemplate bool
|
||||
|
||||
|
@ -83,6 +83,7 @@ type QemuState struct {
|
||||
VirtiofsDaemonPid int
|
||||
PCIeRootPort int
|
||||
HotplugVFIOOnRootBus bool
|
||||
ColdPlugVFIO hv.PCIePort
|
||||
}
|
||||
|
||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||
@ -282,6 +283,7 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
||||
q.Logger().Debug("Creating UUID")
|
||||
q.state.UUID = uuid.Generate().String()
|
||||
|
||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
||||
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
|
||||
|
||||
@ -708,6 +710,18 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
|
||||
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort, memSize32bit, memSize64bit)
|
||||
}
|
||||
|
||||
// The default OVMF MMIO aperture is too small for some PCIe devices
|
||||
// with huge BARs so we need to increase it.
|
||||
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
|
||||
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
|
||||
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
|
||||
fwCfg := govmmQemu.FwCfg{
|
||||
Name: "opt/ovmf/X-PciMmio64Mb",
|
||||
Str: pciMmio64Mb,
|
||||
}
|
||||
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
|
||||
}
|
||||
|
||||
q.qemuConfig = qemuConfig
|
||||
|
||||
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
|
||||
|
@ -32,6 +32,7 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
||||
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol"
|
||||
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
||||
@ -620,6 +621,25 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
|
||||
// until we have TDISP/IDE PCIe support.
|
||||
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != hv.NoPort)
|
||||
var devs []config.DeviceInfo
|
||||
for cnt, containers := range sandboxConfig.Containers {
|
||||
for dev, device := range containers.DeviceInfos {
|
||||
if coldPlugVFIO && deviceManager.IsVFIO(device.ContainerPath) {
|
||||
device.ColdPlug = true
|
||||
devs = append(devs, device)
|
||||
// We need to remove the devices marked for cold-plug
|
||||
// otherwise at the container level the kata-agent
|
||||
// will try to hot-plug them.
|
||||
infos := sandboxConfig.Containers[cnt].DeviceInfos
|
||||
infos = append(infos[:dev], infos[dev+1:]...)
|
||||
sandboxConfig.Containers[cnt].DeviceInfos = infos
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// store doesn't require hypervisor to be stored immediately
|
||||
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
|
||||
return nil, err
|
||||
@ -629,6 +649,17 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !coldPlugVFIO {
|
||||
return s, nil
|
||||
}
|
||||
|
||||
for _, dev := range devs {
|
||||
_, err := s.AddDevice(ctx, dev)
|
||||
if err != nil {
|
||||
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user