mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-25 06:52:13 +00:00
Merge pull request #304 from fidencio/wip/forward_port_2703
[foward port] Add vIOMMU support to qemu q35
This commit is contained in:
commit
ac9cc96a6f
@ -129,6 +129,12 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
|
||||
# result in memory pre allocation
|
||||
#enable_hugepages = true
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
#enable_iommu = true
|
||||
|
||||
# Enable swap of vm memory. Default false.
|
||||
# The behaviour is undefined if mem_prealloc is also set to true
|
||||
#enable_swap = true
|
||||
|
@ -183,6 +183,12 @@ enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
|
||||
# simulated block device nodes for vhost-user devices to live.
|
||||
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
#enable_iommu = true
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
|
@ -190,6 +190,12 @@ enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
|
||||
# simulated block device nodes for vhost-user devices to live.
|
||||
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
#enable_iommu = true
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
|
@ -29,7 +29,7 @@ require (
|
||||
github.com/gogo/protobuf v1.3.1
|
||||
github.com/hashicorp/go-multierror v1.0.0
|
||||
github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d
|
||||
github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c
|
||||
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b
|
||||
github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
|
||||
github.com/mitchellh/mapstructure v1.1.2
|
||||
github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c
|
||||
|
@ -130,6 +130,9 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c h1:hbbnB7xG1bSzUBqSTeNATPODx3CXM/omWUF8RMfFY5s=
|
||||
github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
|
||||
github.com/intel/govmm v0.0.0-20200527135442-7efaf0b1cde3/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
|
||||
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b h1:QqUb1HVk0Nb9zyzvIkMmhI7DP5gzyWPx/6md21M52U0=
|
||||
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
|
||||
github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
|
@ -38,6 +38,7 @@ const defaultBlockDeviceCacheNoflush bool = false
|
||||
const defaultEnableIOThreads bool = false
|
||||
const defaultEnableMemPrealloc bool = false
|
||||
const defaultEnableHugePages bool = false
|
||||
const defaultEnableIOMMU bool = false
|
||||
const defaultFileBackedMemRootDir string = ""
|
||||
const defaultEnableSwap bool = false
|
||||
const defaultEnableDebug bool = false
|
||||
|
@ -118,6 +118,7 @@ type hypervisor struct {
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
VirtioMem bool `toml:"enable_virtio_mem"`
|
||||
IOMMU bool `toml:"enable_iommu"`
|
||||
FileBackedMemRootDir string `toml:"file_mem_backend"`
|
||||
Swap bool `toml:"enable_swap"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
@ -645,6 +646,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
|
||||
MemPrealloc: h.MemPrealloc,
|
||||
HugePages: h.HugePages,
|
||||
IOMMU: h.IOMMU,
|
||||
FileBackedMemRootDir: h.FileBackedMemRootDir,
|
||||
Mlock: !h.Swap,
|
||||
Debug: h.Debug,
|
||||
@ -1086,6 +1088,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
DefaultBridges: defaultBridgesCount,
|
||||
MemPrealloc: defaultEnableMemPrealloc,
|
||||
HugePages: defaultEnableHugePages,
|
||||
IOMMU: defaultEnableIOMMU,
|
||||
FileBackedMemRootDir: defaultFileBackedMemRootDir,
|
||||
Mlock: !defaultEnableSwap,
|
||||
Debug: defaultEnableDebug,
|
||||
|
71
src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go
generated
vendored
71
src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go
generated
vendored
@ -51,6 +51,11 @@ type Machine struct {
|
||||
Options string
|
||||
}
|
||||
|
||||
const (
|
||||
// MachineTypeMicrovm is the QEMU microvm machine type for amd64
|
||||
MachineTypeMicrovm string = "microvm"
|
||||
)
|
||||
|
||||
// Device is the qemu device interface.
|
||||
type Device interface {
|
||||
Valid() bool
|
||||
@ -127,7 +132,11 @@ const (
|
||||
|
||||
func isDimmSupported(config *Config) bool {
|
||||
switch runtime.GOARCH {
|
||||
case "amd64", "386":
|
||||
case "amd64", "386", "ppc64le":
|
||||
if config != nil && config.Machine.Type == MachineTypeMicrovm {
|
||||
// microvm does not support NUMA
|
||||
return false
|
||||
}
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
@ -153,6 +162,9 @@ const (
|
||||
func (transport VirtioTransport) defaultTransport(config *Config) VirtioTransport {
|
||||
switch runtime.GOARCH {
|
||||
case "amd64", "386":
|
||||
if config != nil && config.Machine.Type == MachineTypeMicrovm {
|
||||
return TransportMMIO
|
||||
}
|
||||
return TransportPCI
|
||||
case "s390x":
|
||||
return TransportCCW
|
||||
@ -868,6 +880,9 @@ type SerialDevice struct {
|
||||
|
||||
// Transport is the virtio transport for this device.
|
||||
Transport VirtioTransport
|
||||
|
||||
// MaxPorts is the maximum number of ports for this device.
|
||||
MaxPorts uint
|
||||
}
|
||||
|
||||
// Valid returns true if the SerialDevice structure is valid and complete.
|
||||
@ -891,6 +906,9 @@ func (dev SerialDevice) QemuParams(config *Config) []string {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf(",id=%s", dev.ID))
|
||||
if dev.Transport.isVirtioPCI(config) {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf(",romfile=%s", dev.ROMFile))
|
||||
if dev.Driver == VirtioSerial && dev.MaxPorts != 0 {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf(",max_ports=%d", dev.MaxPorts))
|
||||
}
|
||||
}
|
||||
|
||||
if dev.Transport.isVirtioCCW(config) {
|
||||
@ -1843,6 +1861,52 @@ func (b BalloonDevice) deviceName(config *Config) string {
|
||||
return BalloonDeviceTransport[b.Transport]
|
||||
}
|
||||
|
||||
// IommuDev represents a Intel IOMMU Device
|
||||
type IommuDev struct {
|
||||
Intremap bool
|
||||
DeviceIotlb bool
|
||||
CachingMode bool
|
||||
}
|
||||
|
||||
// Valid returns true if the IommuDev is valid
|
||||
func (dev IommuDev) Valid() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// deviceName the qemu device name
|
||||
func (dev IommuDev) deviceName() string {
|
||||
return "intel-iommu"
|
||||
}
|
||||
|
||||
// QemuParams returns the qemu parameters built out of the IommuDev.
|
||||
func (dev IommuDev) QemuParams(_ *Config) []string {
|
||||
var qemuParams []string
|
||||
var deviceParams []string
|
||||
|
||||
deviceParams = append(deviceParams, dev.deviceName())
|
||||
if dev.Intremap {
|
||||
deviceParams = append(deviceParams, "intremap=on")
|
||||
} else {
|
||||
deviceParams = append(deviceParams, "intremap=off")
|
||||
}
|
||||
|
||||
if dev.DeviceIotlb {
|
||||
deviceParams = append(deviceParams, "device-iotlb=on")
|
||||
} else {
|
||||
deviceParams = append(deviceParams, "device-iotlb=off")
|
||||
}
|
||||
|
||||
if dev.CachingMode {
|
||||
deviceParams = append(deviceParams, "caching-mode=on")
|
||||
} else {
|
||||
deviceParams = append(deviceParams, "caching-mode=off")
|
||||
}
|
||||
|
||||
qemuParams = append(qemuParams, "-device")
|
||||
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
|
||||
return qemuParams
|
||||
}
|
||||
|
||||
// RTCBaseType is the qemu RTC base time type.
|
||||
type RTCBaseType string
|
||||
|
||||
@ -1864,6 +1928,9 @@ const (
|
||||
// Host is for using the host clock as a reference.
|
||||
Host RTCClock = "host"
|
||||
|
||||
// RT is for using the host monotonic clock as a reference.
|
||||
RT RTCClock = "rt"
|
||||
|
||||
// VM is for using the guest clock as a reference
|
||||
VM RTCClock = "vm"
|
||||
)
|
||||
@ -1890,7 +1957,7 @@ type RTC struct {
|
||||
|
||||
// Valid returns true if the RTC structure is valid and complete.
|
||||
func (rtc RTC) Valid() bool {
|
||||
if rtc.Clock != Host && rtc.Clock != VM {
|
||||
if rtc.Clock != Host && rtc.Clock != RT && rtc.Clock != VM {
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -358,6 +358,9 @@ type HypervisorConfig struct {
|
||||
// VirtioMem is used to enable/disable virtio-mem
|
||||
VirtioMem bool
|
||||
|
||||
// IOMMU specifies if the VM should have a vIOMMU
|
||||
IOMMU bool
|
||||
|
||||
// Realtime Used to enable/disable realtime
|
||||
Realtime bool
|
||||
|
||||
|
@ -148,6 +148,9 @@ const (
|
||||
// HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages
|
||||
HugePages = kataAnnotHypervisorPrefix + "enable_hugepages"
|
||||
|
||||
// Iommu is a sandbox annotation to specify if the VM should have a vIOMMU device
|
||||
IOMMU = kataAnnotHypervisorPrefix + "enable_iommu"
|
||||
|
||||
// FileBackedMemRootDir is a sandbox annotation to soecify file based memory backend root directory
|
||||
FileBackedMemRootDir = kataAnnotHypervisorPrefix + "file_mem_backend"
|
||||
|
||||
|
@ -539,6 +539,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
||||
|
||||
sbConfig.HypervisorConfig.HugePages = hugePages
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.IOMMU]; ok {
|
||||
iommu, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing annotation for iommu: Please specify boolean value 'true|false'")
|
||||
}
|
||||
|
||||
sbConfig.HypervisorConfig.IOMMU = iommu
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -771,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.EnableSwap] = "true"
|
||||
ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm"
|
||||
ocispec.Annotations[vcAnnotations.HugePages] = "true"
|
||||
ocispec.Annotations[vcAnnotations.IOMMU] = "true"
|
||||
ocispec.Annotations[vcAnnotations.BlockDeviceDriver] = "virtio-scsi"
|
||||
ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse] = "true"
|
||||
ocispec.Annotations[vcAnnotations.EnableIOThreads] = "true"
|
||||
@ -802,6 +803,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
assert.Equal(config.HypervisorConfig.Mlock, false)
|
||||
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||
assert.Equal(config.HypervisorConfig.HugePages, true)
|
||||
assert.Equal(config.HypervisorConfig.IOMMU, true)
|
||||
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
|
||||
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
|
||||
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
|
||||
|
@ -409,6 +409,13 @@ func (q *qemu) buildDevices(initrdPath string) ([]govmmQemu.Device, *govmmQemu.I
|
||||
}
|
||||
}
|
||||
|
||||
if q.config.IOMMU {
|
||||
devices, err = q.arch.appendIOMMU(devices)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var ioThread *govmmQemu.IOThread
|
||||
if q.config.BlockDeviceDriver == config.VirtioSCSI {
|
||||
return q.arch.appendSCSIController(devices, q.config.EnableIOThreads)
|
||||
|
@ -48,7 +48,6 @@ var kernelParams = []Param{
|
||||
{"reboot", "k"},
|
||||
{"console", "hvc0"},
|
||||
{"console", "hvc1"},
|
||||
{"iommu", "off"},
|
||||
{"cryptomgr.notests", ""},
|
||||
{"net.ifnames", "0"},
|
||||
{"pci", "lastbus=0"},
|
||||
@ -89,12 +88,31 @@ func newQemuArch(config HypervisorConfig) qemuArch {
|
||||
factory = true
|
||||
}
|
||||
|
||||
var qemuMachines = supportedQemuMachines
|
||||
if config.IOMMU {
|
||||
var q35QemuIOMMUOptions = "accel=kvm,kernel_irqchip=split"
|
||||
|
||||
kernelParams = append(kernelParams,
|
||||
Param{"intel_iommu", "on"})
|
||||
kernelParams = append(kernelParams,
|
||||
Param{"iommu", "pt"})
|
||||
|
||||
for i, m := range qemuMachines {
|
||||
if m.Type == QemuQ35 {
|
||||
qemuMachines[i].Options = q35QemuIOMMUOptions
|
||||
}
|
||||
}
|
||||
} else {
|
||||
kernelParams = append(kernelParams,
|
||||
Param{"iommu", "off"})
|
||||
}
|
||||
|
||||
q := &qemuAmd64{
|
||||
qemuArchBase: qemuArchBase{
|
||||
machineType: machineType,
|
||||
memoryOffset: config.MemOffset,
|
||||
qemuPaths: qemuPaths,
|
||||
supportedQemuMachines: supportedQemuMachines,
|
||||
supportedQemuMachines: qemuMachines,
|
||||
kernelParamsNonDebug: kernelParamsNonDebug,
|
||||
kernelParamsDebug: kernelParamsDebug,
|
||||
kernelParams: kernelParams,
|
||||
|
@ -239,3 +239,19 @@ func TestQemuAmd64WithInitrd(t *testing.T) {
|
||||
assert.NotContains(m.Options, qemuNvdimmOption)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQemuAmd64Iommu(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
config := qemuConfig(QemuQ35)
|
||||
config.IOMMU = true
|
||||
qemu := newQemuArch(config)
|
||||
|
||||
p := qemu.kernelParameters(false)
|
||||
assert.Contains(p, Param{"intel_iommu", "on"})
|
||||
|
||||
m, err := qemu.machine()
|
||||
|
||||
assert.NoError(err)
|
||||
assert.Contains(m.Options, "kernel_irqchip=split")
|
||||
}
|
||||
|
@ -130,6 +130,9 @@ type qemuArch interface {
|
||||
|
||||
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
|
||||
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device
|
||||
|
||||
// append vIOMMU device
|
||||
appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
||||
}
|
||||
|
||||
type qemuArchBase struct {
|
||||
@ -765,4 +768,22 @@ func (q *qemuArchBase) addBridge(b types.Bridge) {
|
||||
// appendPCIeRootPortDevice appends to devices the given pcie-root-port
|
||||
func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device {
|
||||
return genericAppendPCIeRootPort(devices, number, q.machineType)
|
||||
|
||||
}
|
||||
|
||||
// appendIOMMU appends a virtual IOMMU device
|
||||
func (q *qemuArchBase) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
||||
switch q.machineType {
|
||||
case QemuQ35:
|
||||
iommu := govmmQemu.IommuDev{
|
||||
Intremap: true,
|
||||
DeviceIotlb: true,
|
||||
CachingMode: true,
|
||||
}
|
||||
|
||||
devices = append(devices, iommu)
|
||||
return devices, nil
|
||||
default:
|
||||
return devices, fmt.Errorf("Machine Type %s does not support vIOMMU", q.machineType)
|
||||
}
|
||||
}
|
||||
|
@ -566,3 +566,27 @@ func TestQemuArchBaseAppendNetwork(t *testing.T) {
|
||||
assert.NoError(err)
|
||||
assert.Equal(expectedOut, devices)
|
||||
}
|
||||
|
||||
func TestQemuArchBaseAppendIOMMU(t *testing.T) {
|
||||
var devices []govmmQemu.Device
|
||||
var err error
|
||||
assert := assert.New(t)
|
||||
qemuArchBase := newQemuArchBase()
|
||||
|
||||
expectedOut := []govmmQemu.Device{
|
||||
govmmQemu.IommuDev{
|
||||
Intremap: true,
|
||||
DeviceIotlb: true,
|
||||
CachingMode: true,
|
||||
},
|
||||
}
|
||||
// Test IOMMU is not appended to PC machine type
|
||||
qemuArchBase.machineType = QemuPC
|
||||
devices, err = qemuArchBase.appendIOMMU(devices)
|
||||
assert.Error(err)
|
||||
|
||||
qemuArchBase.machineType = QemuQ35
|
||||
devices, err = qemuArchBase.appendIOMMU(devices)
|
||||
assert.NoError(err)
|
||||
assert.Equal(expectedOut, devices)
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ package virtcontainers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"runtime"
|
||||
"strings"
|
||||
@ -168,3 +169,7 @@ func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *gov
|
||||
// x-ignore-shared not support in arm64 for now
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *qemuArm64) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
||||
return devices, fmt.Errorf("Arm64 architecture does not support vIOMMU")
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
package virtcontainers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
govmmQemu "github.com/intel/govmm/qemu"
|
||||
@ -121,3 +122,7 @@ func (q *qemuPPC64le) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8)
|
||||
func (q *qemuPPC64le) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device {
|
||||
return genericAppendBridges(devices, q.Bridges, q.machineType)
|
||||
}
|
||||
|
||||
func (q *qemuPPC64le) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
||||
return devices, fmt.Errorf("PPC64le does not support appending a vIOMMU")
|
||||
}
|
||||
|
@ -268,3 +268,7 @@ func (q *qemuS390x) appendVSock(devices []govmmQemu.Device, vsock types.VSock) (
|
||||
return devices, nil
|
||||
|
||||
}
|
||||
|
||||
func (q *qemuS390x) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
||||
return devices, fmt.Errorf("S390x does not support appending a vIOMMU")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user