// Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 // package virtcontainers import ( "context" "fmt" "time" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/sirupsen/logrus" "github.com/intel-go/cpuid" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" ) type qemuAmd64 struct { // inherit from qemuArchBase, overwrite methods if needed qemuArchBase vmFactory bool devLoadersCount uint32 } const ( defaultQemuPath = "/usr/bin/qemu-system-x86_64" defaultQemuMachineType = QemuQ35 defaultQemuMachineOptions = "accel=kvm,kernel_irqchip=on" qmpMigrationWaitTimeout = 5 * time.Second tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" tdxCPUFlag = "tdx" sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" ) var qemuPaths = map[string]string{ QemuQ35: defaultQemuPath, QemuMicrovm: defaultQemuPath, } var kernelParams = []Param{ {"tsc", "reliable"}, {"no_timer_check", ""}, {"rcupdate.rcu_expedited", "1"}, {"i8042.direct", "1"}, {"i8042.dumbkbd", "1"}, {"i8042.nopnp", "1"}, {"i8042.noaux", "1"}, {"noreplace-smp", ""}, {"reboot", "k"}, {"console", "hvc0"}, {"console", "hvc1"}, {"cryptomgr.notests", ""}, {"net.ifnames", "0"}, {"pci", "lastbus=0"}, } var supportedQemuMachines = []govmmQemu.Machine{ { Type: QemuQ35, Options: defaultQemuMachineOptions, }, { Type: QemuVirt, Options: defaultQemuMachineOptions, }, { Type: QemuMicrovm, Options: defaultQemuMachineOptions, }, } // MaxQemuVCPUs returns the maximum number of vCPUs supported func MaxQemuVCPUs() uint32 { return uint32(240) } func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { machineType = defaultQemuMachineType } var mp *govmmQemu.Machine for _, m := range supportedQemuMachines { if m.Type == machineType { mp = &m break } } if mp == nil { return nil, fmt.Errorf("unrecognised machinetype: %v", machineType) } factory := false if config.BootToBeTemplate || config.BootFromTemplate { factory = true } // IOMMU and Guest Protection require a split IRQ controller for handling interrupts // otherwise QEMU won't be able to create the kernel irqchip if config.IOMMU || config.ConfidentialGuest { mp.Options = "accel=kvm,kernel_irqchip=split" } if config.IOMMU { kernelParams = append(kernelParams, Param{"intel_iommu", "on"}) kernelParams = append(kernelParams, Param{"iommu", "pt"}) } q := &qemuAmd64{ qemuArchBase: qemuArchBase{ qemuMachine: *mp, qemuExePath: qemuPaths[machineType], memoryOffset: config.MemOffset, kernelParamsNonDebug: kernelParamsNonDebug, kernelParamsDebug: kernelParamsDebug, kernelParams: kernelParams, disableNvdimm: config.DisableImageNvdimm, dax: true, protection: noneProtection, }, vmFactory: factory, } if config.ConfidentialGuest { if err := q.enableProtection(); err != nil { return nil, err } } q.handleImagePath(config) return q, nil } func (q *qemuAmd64) capabilities() types.Capabilities { var caps types.Capabilities if q.qemuMachine.Type == QemuQ35 || q.qemuMachine.Type == QemuVirt { caps.SetBlockDeviceHotplugSupport() } caps.SetMultiQueueSupport() caps.SetFsSharingSupport() return caps } func (q *qemuAmd64) bridges(number uint32) { q.Bridges = genericBridges(number, q.qemuMachine.Type) } func (q *qemuAmd64) cpuModel() string { cpuModel := defaultCPUModel // VMX is not migratable yet. // issue: https://github.com/kata-containers/runtime/issues/1750 if q.vmFactory { hvLogger.WithField("subsystem", "qemuAmd64").Warn("VMX is not migratable yet: turning it off") cpuModel += ",vmx=off" } return cpuModel } func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory { return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset) } // Is Memory Hotplug supported by this architecture/machine type combination? func (q *qemuAmd64) supportGuestMemoryHotplug() bool { // true for all amd64 machine types except for microvm. return q.qemuMachine.Type != govmmQemu.MachineTypeMicrovm } func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { if !q.disableNvdimm { return q.appendNvdimmImage(devices, path) } return q.appendBlockImage(ctx, devices, path) } // enable protection func (q *qemuAmd64) enableProtection() error { var err error q.protection, err = availableGuestProtection() if err != nil { return err } logger := hvLogger.WithFields(logrus.Fields{ "subsystem": "qemuAmd64", "machine": q.qemuMachine, "kernel-params-debug": q.kernelParamsDebug, "kernel-params-non-debug": q.kernelParamsNonDebug, "kernel-params": q.kernelParams}) switch q.protection { case tdxProtection: if q.qemuMachine.Options != "" { q.qemuMachine.Options += "," } q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx" q.kernelParams = append(q.kernelParams, Param{"tdx_guest", ""}) logger.Info("Enabling TDX guest protection") return nil case sevProtection: if q.qemuMachine.Options != "" { q.qemuMachine.Options += "," } q.qemuMachine.Options += "confidential-guest-support=sev" logger.Info("Enabling SEV guest protection") return nil // TODO: Add support for other x86_64 technologies default: return fmt.Errorf("This system doesn't support Confidential Computing (Guest Protection)") } } // append protection device func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware string) ([]govmmQemu.Device, string, error) { switch q.protection { case tdxProtection: id := q.devLoadersCount q.devLoadersCount += 1 return append(devices, govmmQemu.Object{ Driver: govmmQemu.Loader, Type: govmmQemu.TDXGuest, ID: "tdx", DeviceID: fmt.Sprintf("fd%d", id), Debug: false, File: firmware, }), "", nil case sevProtection: return append(devices, govmmQemu.Object{ Type: govmmQemu.SEVGuest, ID: "sev", Debug: false, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, }), "", nil case noneProtection: return devices, firmware, nil default: return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection) } }