diff --git a/src/runtime/cli/config/configuration-fc.toml.in b/src/runtime/cli/config/configuration-fc.toml.in index c9850d974b..0f0dc63afa 100644 --- a/src/runtime/cli/config/configuration-fc.toml.in +++ b/src/runtime/cli/config/configuration-fc.toml.in @@ -129,6 +129,12 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@" # result in memory pre allocation #enable_hugepages = true +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + # Enable swap of vm memory. Default false. # The behaviour is undefined if mem_prealloc is also set to true #enable_swap = true diff --git a/src/runtime/cli/config/configuration-qemu-virtiofs.toml.in b/src/runtime/cli/config/configuration-qemu-virtiofs.toml.in index 46a5ff0cab..129dc9963d 100644 --- a/src/runtime/cli/config/configuration-qemu-virtiofs.toml.in +++ b/src/runtime/cli/config/configuration-qemu-virtiofs.toml.in @@ -183,6 +183,12 @@ enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ # simulated block device nodes for vhost-user devices to live. vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/src/runtime/cli/config/configuration-qemu.toml.in b/src/runtime/cli/config/configuration-qemu.toml.in index 46ce7d9b53..289ddde1a8 100644 --- a/src/runtime/cli/config/configuration-qemu.toml.in +++ b/src/runtime/cli/config/configuration-qemu.toml.in @@ -190,6 +190,12 @@ enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@ # simulated block device nodes for vhost-user devices to live. vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: intel_iommu=on,iommu=pt +#enable_iommu = true + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/src/runtime/go.mod b/src/runtime/go.mod index aefcb5b238..6c637c4417 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -29,7 +29,7 @@ require ( github.com/gogo/protobuf v1.3.1 github.com/hashicorp/go-multierror v1.0.0 github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d - github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c + github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f github.com/mitchellh/mapstructure v1.1.2 github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 74c31e8741..cd32690605 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -130,6 +130,9 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c h1:hbbnB7xG1bSzUBqSTeNATPODx3CXM/omWUF8RMfFY5s= github.com/intel/govmm v0.0.0-20200304142514-e969afbec52c/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g= +github.com/intel/govmm v0.0.0-20200527135442-7efaf0b1cde3/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g= +github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b h1:QqUb1HVk0Nb9zyzvIkMmhI7DP5gzyWPx/6md21M52U0= +github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g= github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index aaf78cc3f9..968d50a18a 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -38,6 +38,7 @@ const defaultBlockDeviceCacheNoflush bool = false const defaultEnableIOThreads bool = false const defaultEnableMemPrealloc bool = false const defaultEnableHugePages bool = false +const defaultEnableIOMMU bool = false const defaultFileBackedMemRootDir string = "" const defaultEnableSwap bool = false const defaultEnableDebug bool = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index cd946dadc0..0136c3c186 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -118,6 +118,7 @@ type hypervisor struct { MemPrealloc bool `toml:"enable_mem_prealloc"` HugePages bool `toml:"enable_hugepages"` VirtioMem bool `toml:"enable_virtio_mem"` + IOMMU bool `toml:"enable_iommu"` FileBackedMemRootDir string `toml:"file_mem_backend"` Swap bool `toml:"enable_swap"` Debug bool `toml:"enable_debug"` @@ -645,6 +646,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { VirtioFSExtraArgs: h.VirtioFSExtraArgs, MemPrealloc: h.MemPrealloc, HugePages: h.HugePages, + IOMMU: h.IOMMU, FileBackedMemRootDir: h.FileBackedMemRootDir, Mlock: !h.Swap, Debug: h.Debug, @@ -1086,6 +1088,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, HugePages: defaultEnableHugePages, + IOMMU: defaultEnableIOMMU, FileBackedMemRootDir: defaultFileBackedMemRootDir, Mlock: !defaultEnableSwap, Debug: defaultEnableDebug, diff --git a/src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go b/src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go index a5e5dfaf96..a149dad8b0 100644 --- a/src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go +++ b/src/runtime/vendor/github.com/intel/govmm/qemu/qemu.go @@ -51,6 +51,11 @@ type Machine struct { Options string } +const ( + // MachineTypeMicrovm is the QEMU microvm machine type for amd64 + MachineTypeMicrovm string = "microvm" +) + // Device is the qemu device interface. type Device interface { Valid() bool @@ -127,7 +132,11 @@ const ( func isDimmSupported(config *Config) bool { switch runtime.GOARCH { - case "amd64", "386": + case "amd64", "386", "ppc64le": + if config != nil && config.Machine.Type == MachineTypeMicrovm { + // microvm does not support NUMA + return false + } return true default: return false @@ -153,6 +162,9 @@ const ( func (transport VirtioTransport) defaultTransport(config *Config) VirtioTransport { switch runtime.GOARCH { case "amd64", "386": + if config != nil && config.Machine.Type == MachineTypeMicrovm { + return TransportMMIO + } return TransportPCI case "s390x": return TransportCCW @@ -868,6 +880,9 @@ type SerialDevice struct { // Transport is the virtio transport for this device. Transport VirtioTransport + + // MaxPorts is the maximum number of ports for this device. + MaxPorts uint } // Valid returns true if the SerialDevice structure is valid and complete. @@ -891,6 +906,9 @@ func (dev SerialDevice) QemuParams(config *Config) []string { deviceParams = append(deviceParams, fmt.Sprintf(",id=%s", dev.ID)) if dev.Transport.isVirtioPCI(config) { deviceParams = append(deviceParams, fmt.Sprintf(",romfile=%s", dev.ROMFile)) + if dev.Driver == VirtioSerial && dev.MaxPorts != 0 { + deviceParams = append(deviceParams, fmt.Sprintf(",max_ports=%d", dev.MaxPorts)) + } } if dev.Transport.isVirtioCCW(config) { @@ -1843,6 +1861,52 @@ func (b BalloonDevice) deviceName(config *Config) string { return BalloonDeviceTransport[b.Transport] } +// IommuDev represents a Intel IOMMU Device +type IommuDev struct { + Intremap bool + DeviceIotlb bool + CachingMode bool +} + +// Valid returns true if the IommuDev is valid +func (dev IommuDev) Valid() bool { + return true +} + +// deviceName the qemu device name +func (dev IommuDev) deviceName() string { + return "intel-iommu" +} + +// QemuParams returns the qemu parameters built out of the IommuDev. +func (dev IommuDev) QemuParams(_ *Config) []string { + var qemuParams []string + var deviceParams []string + + deviceParams = append(deviceParams, dev.deviceName()) + if dev.Intremap { + deviceParams = append(deviceParams, "intremap=on") + } else { + deviceParams = append(deviceParams, "intremap=off") + } + + if dev.DeviceIotlb { + deviceParams = append(deviceParams, "device-iotlb=on") + } else { + deviceParams = append(deviceParams, "device-iotlb=off") + } + + if dev.CachingMode { + deviceParams = append(deviceParams, "caching-mode=on") + } else { + deviceParams = append(deviceParams, "caching-mode=off") + } + + qemuParams = append(qemuParams, "-device") + qemuParams = append(qemuParams, strings.Join(deviceParams, ",")) + return qemuParams +} + // RTCBaseType is the qemu RTC base time type. type RTCBaseType string @@ -1864,6 +1928,9 @@ const ( // Host is for using the host clock as a reference. Host RTCClock = "host" + // RT is for using the host monotonic clock as a reference. + RT RTCClock = "rt" + // VM is for using the guest clock as a reference VM RTCClock = "vm" ) @@ -1890,7 +1957,7 @@ type RTC struct { // Valid returns true if the RTC structure is valid and complete. func (rtc RTC) Valid() bool { - if rtc.Clock != Host && rtc.Clock != VM { + if rtc.Clock != Host && rtc.Clock != RT && rtc.Clock != VM { return false } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 7b7be5baa3..a5e7bd25c2 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -358,6 +358,9 @@ type HypervisorConfig struct { // VirtioMem is used to enable/disable virtio-mem VirtioMem bool + // IOMMU specifies if the VM should have a vIOMMU + IOMMU bool + // Realtime Used to enable/disable realtime Realtime bool diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 10ce7833e2..4544153a6e 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -148,6 +148,9 @@ const ( // HugePages is a sandbox annotation to specify if the memory should be pre-allocated from huge pages HugePages = kataAnnotHypervisorPrefix + "enable_hugepages" + // Iommu is a sandbox annotation to specify if the VM should have a vIOMMU device + IOMMU = kataAnnotHypervisorPrefix + "enable_iommu" + // FileBackedMemRootDir is a sandbox annotation to soecify file based memory backend root directory FileBackedMemRootDir = kataAnnotHypervisorPrefix + "file_mem_backend" diff --git a/src/runtime/virtcontainers/pkg/oci/utils.go b/src/runtime/virtcontainers/pkg/oci/utils.go index 40fdb94467..476f0d6603 100644 --- a/src/runtime/virtcontainers/pkg/oci/utils.go +++ b/src/runtime/virtcontainers/pkg/oci/utils.go @@ -539,6 +539,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig sbConfig.HypervisorConfig.HugePages = hugePages } + + if value, ok := ocispec.Annotations[vcAnnotations.IOMMU]; ok { + iommu, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for iommu: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.IOMMU = iommu + } return nil } diff --git a/src/runtime/virtcontainers/pkg/oci/utils_test.go b/src/runtime/virtcontainers/pkg/oci/utils_test.go index 662095d957..c78f9aa888 100644 --- a/src/runtime/virtcontainers/pkg/oci/utils_test.go +++ b/src/runtime/virtcontainers/pkg/oci/utils_test.go @@ -771,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.EnableSwap] = "true" ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm" ocispec.Annotations[vcAnnotations.HugePages] = "true" + ocispec.Annotations[vcAnnotations.IOMMU] = "true" ocispec.Annotations[vcAnnotations.BlockDeviceDriver] = "virtio-scsi" ocispec.Annotations[vcAnnotations.DisableBlockDeviceUse] = "true" ocispec.Annotations[vcAnnotations.EnableIOThreads] = "true" @@ -802,6 +803,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { assert.Equal(config.HypervisorConfig.Mlock, false) assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") assert.Equal(config.HypervisorConfig.HugePages, true) + assert.Equal(config.HypervisorConfig.IOMMU, true) assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi") assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true) assert.Equal(config.HypervisorConfig.EnableIOThreads, true) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 45524a5a48..fca0e57dee 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -409,6 +409,13 @@ func (q *qemu) buildDevices(initrdPath string) ([]govmmQemu.Device, *govmmQemu.I } } + if q.config.IOMMU { + devices, err = q.arch.appendIOMMU(devices) + if err != nil { + return nil, nil, err + } + } + var ioThread *govmmQemu.IOThread if q.config.BlockDeviceDriver == config.VirtioSCSI { return q.arch.appendSCSIController(devices, q.config.EnableIOThreads) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 274b5f3f0d..2da6073650 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -48,7 +48,6 @@ var kernelParams = []Param{ {"reboot", "k"}, {"console", "hvc0"}, {"console", "hvc1"}, - {"iommu", "off"}, {"cryptomgr.notests", ""}, {"net.ifnames", "0"}, {"pci", "lastbus=0"}, @@ -89,12 +88,31 @@ func newQemuArch(config HypervisorConfig) qemuArch { factory = true } + var qemuMachines = supportedQemuMachines + if config.IOMMU { + var q35QemuIOMMUOptions = "accel=kvm,kernel_irqchip=split" + + kernelParams = append(kernelParams, + Param{"intel_iommu", "on"}) + kernelParams = append(kernelParams, + Param{"iommu", "pt"}) + + for i, m := range qemuMachines { + if m.Type == QemuQ35 { + qemuMachines[i].Options = q35QemuIOMMUOptions + } + } + } else { + kernelParams = append(kernelParams, + Param{"iommu", "off"}) + } + q := &qemuAmd64{ qemuArchBase: qemuArchBase{ machineType: machineType, memoryOffset: config.MemOffset, qemuPaths: qemuPaths, - supportedQemuMachines: supportedQemuMachines, + supportedQemuMachines: qemuMachines, kernelParamsNonDebug: kernelParamsNonDebug, kernelParamsDebug: kernelParamsDebug, kernelParams: kernelParams, diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 4c7e98a188..2032e1f01d 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -239,3 +239,19 @@ func TestQemuAmd64WithInitrd(t *testing.T) { assert.NotContains(m.Options, qemuNvdimmOption) } } + +func TestQemuAmd64Iommu(t *testing.T) { + assert := assert.New(t) + + config := qemuConfig(QemuQ35) + config.IOMMU = true + qemu := newQemuArch(config) + + p := qemu.kernelParameters(false) + assert.Contains(p, Param{"intel_iommu", "on"}) + + m, err := qemu.machine() + + assert.NoError(err) + assert.Contains(m.Options, "kernel_irqchip=split") +} diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index d38ea7b052..f790a36ee6 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -130,6 +130,9 @@ type qemuArch interface { // appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device + + // append vIOMMU device + appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) } type qemuArchBase struct { @@ -765,4 +768,22 @@ func (q *qemuArchBase) addBridge(b types.Bridge) { // appendPCIeRootPortDevice appends to devices the given pcie-root-port func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32) []govmmQemu.Device { return genericAppendPCIeRootPort(devices, number, q.machineType) + +} + +// appendIOMMU appends a virtual IOMMU device +func (q *qemuArchBase) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) { + switch q.machineType { + case QemuQ35: + iommu := govmmQemu.IommuDev{ + Intremap: true, + DeviceIotlb: true, + CachingMode: true, + } + + devices = append(devices, iommu) + return devices, nil + default: + return devices, fmt.Errorf("Machine Type %s does not support vIOMMU", q.machineType) + } } diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index dec0fe6e24..8eacb085ac 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -566,3 +566,27 @@ func TestQemuArchBaseAppendNetwork(t *testing.T) { assert.NoError(err) assert.Equal(expectedOut, devices) } + +func TestQemuArchBaseAppendIOMMU(t *testing.T) { + var devices []govmmQemu.Device + var err error + assert := assert.New(t) + qemuArchBase := newQemuArchBase() + + expectedOut := []govmmQemu.Device{ + govmmQemu.IommuDev{ + Intremap: true, + DeviceIotlb: true, + CachingMode: true, + }, + } + // Test IOMMU is not appended to PC machine type + qemuArchBase.machineType = QemuPC + devices, err = qemuArchBase.appendIOMMU(devices) + assert.Error(err) + + qemuArchBase.machineType = QemuQ35 + devices, err = qemuArchBase.appendIOMMU(devices) + assert.NoError(err) + assert.Equal(expectedOut, devices) +} diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index 6d089cf010..9d1964c1c6 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -7,6 +7,7 @@ package virtcontainers import ( "context" + "fmt" "io/ioutil" "runtime" "strings" @@ -168,3 +169,7 @@ func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *gov // x-ignore-shared not support in arm64 for now return nil } + +func (q *qemuArm64) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) { + return devices, fmt.Errorf("Arm64 architecture does not support vIOMMU") +} diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 5034193157..05a85f4bdc 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -6,6 +6,7 @@ package virtcontainers import ( + "fmt" "time" govmmQemu "github.com/intel/govmm/qemu" @@ -121,3 +122,7 @@ func (q *qemuPPC64le) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) func (q *qemuPPC64le) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { return genericAppendBridges(devices, q.Bridges, q.machineType) } + +func (q *qemuPPC64le) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) { + return devices, fmt.Errorf("PPC64le does not support appending a vIOMMU") +} diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 8480ff6957..d6e449ff38 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -268,3 +268,7 @@ func (q *qemuS390x) appendVSock(devices []govmmQemu.Device, vsock types.VSock) ( return devices, nil } + +func (q *qemuS390x) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) { + return devices, fmt.Errorf("S390x does not support appending a vIOMMU") +}