From 789a59549e732c5bf94b14b0684c1c4b199f3fc1 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 31 May 2021 11:47:58 +0300 Subject: [PATCH 1/2] virtcontainers: Remove the pc machine Keeping around two different x86 machines has no added value and require more tests and maintenance. Prefer the q35 machine since it has more features and drop the pc machine. Fixes #1953 Depends-on: github.com/kata-containers/tests#3586 Signed-off-by: Marcel Apfelbaum --- .../Intel-GPU-passthrough-and-Kata.md | 4 +- .../Nvidia-GPU-passthrough-and-Kata.md | 7 --- .../cli/config/configuration-fc.toml.in | 2 +- .../cli/config/configuration-qemu.toml.in | 2 +- .../pkg/katautils/config-settings.go.in | 2 +- src/runtime/virtcontainers/qemu.go | 2 - src/runtime/virtcontainers/qemu_amd64.go | 10 +--- src/runtime/virtcontainers/qemu_amd64_test.go | 59 ++++--------------- src/runtime/virtcontainers/qemu_arch_base.go | 3 - .../virtcontainers/qemu_arch_base_test.go | 8 +-- src/runtime/virtcontainers/qemu_test.go | 2 +- 11 files changed, 20 insertions(+), 81 deletions(-) diff --git a/docs/use-cases/Intel-GPU-passthrough-and-Kata.md b/docs/use-cases/Intel-GPU-passthrough-and-Kata.md index e5fe2a2fb5..eb51fe763d 100644 --- a/docs/use-cases/Intel-GPU-passthrough-and-Kata.md +++ b/docs/use-cases/Intel-GPU-passthrough-and-Kata.md @@ -65,8 +65,8 @@ configuration in the Kata `configuration.toml` file as shown below. $ sudo sed -i -e 's/^# *\(hotplug_vfio_on_root_bus\).*=.*$/\1 = true/g' /usr/share/defaults/kata-containers/configuration.toml ``` -Make sure you are using the `pc` machine type by verifying `machine_type = "pc"` is -set in the `configuration.toml`. +Make sure you are using the `q35` machine type by verifying `machine_type = "q35"` is +set in the `configuration.toml`. Make sure `pcie_root_port` is set to a positive value. ## Build Kata Containers kernel with GPU support diff --git a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md index 32f90f3f96..2fed3ffe75 100644 --- a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md +++ b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md @@ -75,13 +75,6 @@ To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata vers Follow the [Kata Containers setup instructions](../install/README.md) to install the latest version of Kata. -The following configuration in the Kata `configuration.toml` file as shown below can work: -``` -machine_type = "pc" - -hotplug_vfio_on_root_bus = true -``` - To use large BARs devices (for example, Nvidia Tesla P100), you need Kata version 1.11.0 or above. The following configuration in the Kata `configuration.toml` file as shown below can work: diff --git a/src/runtime/cli/config/configuration-fc.toml.in b/src/runtime/cli/config/configuration-fc.toml.in index 8b54ce6c25..d4990cc316 100644 --- a/src/runtime/cli/config/configuration-fc.toml.in +++ b/src/runtime/cli/config/configuration-fc.toml.in @@ -178,7 +178,7 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@" # VFIO devices are hotplugged on a bridge by default. # Enable hotplugging on root bus. This may be required for devices with # a large PCI bar, as this is a current limitation with hotplugging on -# a bridge. This value is valid for "pc" machine type. +# a bridge. # Default false #hotplug_vfio_on_root_bus = true diff --git a/src/runtime/cli/config/configuration-qemu.toml.in b/src/runtime/cli/config/configuration-qemu.toml.in index b50f7bcc5a..d8eda22f96 100644 --- a/src/runtime/cli/config/configuration-qemu.toml.in +++ b/src/runtime/cli/config/configuration-qemu.toml.in @@ -277,7 +277,7 @@ pflashes = [] # VFIO devices are hotplugged on a bridge by default. # Enable hotplugging on root bus. This may be required for devices with # a large PCI bar, as this is a current limitation with hotplugging on -# a bridge. This value is valid for "pc" machine type. +# a bridge. # Default false #hotplug_vfio_on_root_bus = true diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 31a4828c46..e92d8741c9 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -20,7 +20,7 @@ var defaultCPUFeatures = "" var systemdUnitName = "kata-containers.target" const defaultKernelParams = "" -const defaultMachineType = "pc" +const defaultMachineType = "q35" const defaultVCPUCount uint32 = 1 const defaultMaxVCPUCount uint32 = 0 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 8c478f0b4b..89a8ce13d6 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -2095,8 +2095,6 @@ func genericBridges(number uint32, machineType string) []types.Bridge { case QemuQ35: // currently only pci bridges are supported // qemu-2.10 will introduce pcie bridges - fallthrough - case QemuPC: bt = types.PCI case QemuVirt: bt = types.PCI diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index ed57de7c18..d19bab5c5f 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -29,7 +29,7 @@ type qemuAmd64 struct { const ( defaultQemuPath = "/usr/bin/qemu-system-x86_64" - defaultQemuMachineType = QemuPC + defaultQemuMachineType = QemuQ35 defaultQemuMachineOptions = "accel=kvm,kernel_irqchip" @@ -44,7 +44,6 @@ const ( var qemuPaths = map[string]string{ QemuPCLite: "/usr/bin/qemu-lite-system-x86_64", - QemuPC: defaultQemuPath, QemuQ35: defaultQemuPath, QemuMicrovm: defaultQemuPath, } @@ -71,10 +70,6 @@ var supportedQemuMachines = []govmmQemu.Machine{ Type: QemuPCLite, Options: defaultQemuMachineOptions, }, - { - Type: QemuPC, - Options: defaultQemuMachineOptions, - }, { Type: QemuQ35, Options: defaultQemuMachineOptions, @@ -158,8 +153,7 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { func (q *qemuAmd64) capabilities() types.Capabilities { var caps types.Capabilities - if q.qemuMachine.Type == QemuPC || - q.qemuMachine.Type == QemuQ35 || + if q.qemuMachine.Type == QemuQ35 || q.qemuMachine.Type == QemuVirt { caps.SetBlockDeviceHotplugSupport() } diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 5016e58ad7..532970769c 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -42,14 +42,10 @@ func TestQemuAmd64BadMachineType(t *testing.T) { func TestQemuAmd64Capabilities(t *testing.T) { assert := assert.New(t) - amd64 := newTestQemu(assert, QemuPC) + amd64 := newTestQemu(assert, QemuQ35) caps := amd64.capabilities() assert.True(caps.IsBlockDeviceHotplugSupported()) - amd64 = newTestQemu(assert, QemuQ35) - caps = amd64.capabilities() - assert.True(caps.IsBlockDeviceHotplugSupported()) - amd64 = newTestQemu(assert, QemuMicrovm) caps = amd64.capabilities() assert.False(caps.IsBlockDeviceHotplugSupported()) @@ -57,23 +53,11 @@ func TestQemuAmd64Capabilities(t *testing.T) { func TestQemuAmd64Bridges(t *testing.T) { assert := assert.New(t) - amd64 := newTestQemu(assert, QemuPC) len := 5 + amd64 := newTestQemu(assert, QemuMicrovm) amd64.bridges(uint32(len)) bridges := amd64.getBridges() - assert.Len(bridges, len) - - for i, b := range bridges { - id := fmt.Sprintf("%s-bridge-%d", types.PCI, i) - assert.Equal(types.PCI, b.Type) - assert.Equal(id, b.ID) - assert.NotNil(b.Devices) - } - - amd64 = newTestQemu(assert, QemuMicrovm) - amd64.bridges(uint32(len)) - bridges = amd64.getBridges() assert.Nil(bridges) amd64 = newTestQemu(assert, QemuQ35) @@ -91,7 +75,7 @@ func TestQemuAmd64Bridges(t *testing.T) { func TestQemuAmd64CPUModel(t *testing.T) { assert := assert.New(t) - amd64 := newTestQemu(assert, QemuPC) + amd64 := newTestQemu(assert, QemuQ35) expectedOut := defaultCPUModel model := amd64.cpuModel() @@ -108,7 +92,7 @@ func TestQemuAmd64CPUModel(t *testing.T) { func TestQemuAmd64MemoryTopology(t *testing.T) { assert := assert.New(t) - amd64 := newTestQemu(assert, QemuPC) + amd64 := newTestQemu(assert, QemuQ35) memoryOffset := uint64(1024) hostMem := uint64(100) @@ -139,7 +123,7 @@ func TestQemuAmd64AppendImage(t *testing.T) { machinesCopy := make([]govmmQemu.Machine, len(supportedQemuMachines)) assert.Equal(len(supportedQemuMachines), copy(machinesCopy, supportedQemuMachines)) - cfg := qemuConfig(QemuPC) + cfg := qemuConfig(QemuQ35) cfg.ImagePath = f.Name() cfg.DisableImageNvdimm = false amd64, err := newQemuArch(cfg) @@ -189,41 +173,18 @@ func TestQemuAmd64AppendBridges(t *testing.T) { var devices []govmmQemu.Device assert := assert.New(t) - // check PC - amd64 := newTestQemu(assert, QemuPC) + // Check Q35 + amd64 := newTestQemu(assert, QemuQ35) amd64.bridges(1) bridges := amd64.getBridges() assert.Len(bridges, 1) - devices = amd64.appendBridges(devices) - assert.Len(devices, 1) - - expectedOut := []govmmQemu.Device{ - govmmQemu.BridgeDevice{ - Type: govmmQemu.PCIBridge, - Bus: defaultPCBridgeBus, - ID: bridges[0].ID, - Chassis: 1, - SHPC: true, - Addr: "2", - }, - } - - assert.Equal(expectedOut, devices) - - // Check Q35 - amd64 = newTestQemu(assert, QemuQ35) - - amd64.bridges(1) - bridges = amd64.getBridges() - assert.Len(bridges, 1) - devices = []govmmQemu.Device{} devices = amd64.appendBridges(devices) assert.Len(devices, 1) - expectedOut = []govmmQemu.Device{ + expectedOut := []govmmQemu.Device{ govmmQemu.BridgeDevice{ Type: govmmQemu.PCIBridge, Bus: defaultBridgeBus, @@ -240,7 +201,7 @@ func TestQemuAmd64AppendBridges(t *testing.T) { func TestQemuAmd64WithInitrd(t *testing.T) { assert := assert.New(t) - cfg := qemuConfig(QemuPC) + cfg := qemuConfig(QemuQ35) cfg.InitrdPath = "dummy-initrd" amd64, err := newQemuArch(cfg) assert.NoError(err) @@ -282,7 +243,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { var devices []govmmQemu.Device assert := assert.New(t) - amd64 := newTestQemu(assert, QemuPC) + amd64 := newTestQemu(assert, QemuQ35) id := amd64.(*qemuAmd64).devLoadersCount firmware := "tdvf.fd" diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 0948401a62..47b5afa5b7 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -212,9 +212,6 @@ const ( // QemuPCLite is the QEMU pc-lite machine type for amd64 QemuPCLite = "pc-lite" - // QemuPC is the QEMU pc machine type for amd64 - QemuPC = "pc" - // QemuQ35 is the QEMU Q35 machine type for amd64 QemuQ35 = "q35" diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 3db561c9f8..c1ea7d0b8d 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -28,7 +28,7 @@ const ( ) var qemuArchBaseMachine = govmmQemu.Machine{ - Type: "pc", + Type: "q35", } var qemuArchBaseQemuPaths = map[string]string{ @@ -143,7 +143,7 @@ func TestQemuAddDeviceToBridge(t *testing.T) { // addDeviceToBridge successfully q := newQemuArchBase() - q.qemuMachine.Type = QemuPC + q.qemuMachine.Type = QemuQ35 q.bridges(1) for i := uint32(1); i <= types.PCIBridgeMaxCapacity; i++ { @@ -559,10 +559,6 @@ func TestQemuArchBaseAppendIOMMU(t *testing.T) { CachingMode: true, }, } - // Test IOMMU is not appended to PC machine type - qemuArchBase.qemuMachine.Type = QemuPC - devices, err = qemuArchBase.appendIOMMU(devices) - assert.Error(err) qemuArchBase.qemuMachine.Type = QemuQ35 devices, err = qemuArchBase.appendIOMMU(devices) diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 645842c11d..b16fa6bda4 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -362,7 +362,7 @@ func TestQemuQemuPath(t *testing.T) { qemuConfig.HypervisorPath = expectedPath qkvm := &qemuArchBase{ qemuMachine: govmmQemu.Machine{ - Type: "pc", + Type: "q35", Options: "", }, qemuExePath: expectedPath, From ac6b9c53d22c74629ec351d60134cf863f52ab40 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 21 Jun 2021 20:13:13 +0300 Subject: [PATCH 2/2] runtime: Hot-plug virtio-mem device on PCI bridge Currently the virtio-mem device is hotplugged on the root bus. This doesn't work for PCIe machines like q35. Hotplug the virtio-mem device into the pci bridge instead. Fixes #1953 Signed-off-by: Marcel Apfelbaum --- src/runtime/go.mod | 2 +- src/runtime/go.sum | 2 ++ .../kata-containers/govmm/qemu/qmp.go | 12 ++++++++++-- src/runtime/vendor/modules.txt | 4 +--- src/runtime/virtcontainers/qemu.go | 18 +++++++++++++++--- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/runtime/go.mod b/src/runtime/go.mod index aa52037ac3..1116fbd960 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -31,7 +31,7 @@ require ( github.com/gogo/protobuf v1.3.1 github.com/hashicorp/go-multierror v1.0.0 github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 - github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f + github.com/kata-containers/govmm v0.0.0-20210622075516-263136e69ac8 github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f github.com/opencontainers/image-spec v1.0.1 // indirect github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 1193119bd7..a8ef2c61c9 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -274,6 +274,8 @@ github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee h1:M4N7AdSHg github.com/kata-containers/govmm v0.0.0-20210428163604-f0e9a35308ee/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk= github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f h1:jXMZY7GIz5kSv3/Rdiesg1WMvgXJKNOk3KxwxgNWAVk= github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk= +github.com/kata-containers/govmm v0.0.0-20210622075516-263136e69ac8 h1:ZQ/qgoFsgmvk7AikgW1pI4dFaXfkUEVn+KK63ju3vgU= +github.com/kata-containers/govmm v0.0.0-20210622075516-263136e69ac8/go.mod h1:VmAHbsL5lLfzHW/MNL96NVLF840DNEV5i683kISgFKk= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= diff --git a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qmp.go b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qmp.go index 97e924559c..23c288dc7d 100644 --- a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qmp.go +++ b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qmp.go @@ -1386,7 +1386,7 @@ func (q *QMP) ExecQueryCpusFast(ctx context.Context) ([]CPUInfoFast, error) { } // ExecMemdevAdd adds size of MiB memory device to the guest -func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, size int, share bool, driver, driverID string) error { +func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, size int, share bool, driver, driverID, addr, bus string) error { props := map[string]interface{}{"size": uint64(size) << 20} args := map[string]interface{}{ "qom-type": qomtype, @@ -1419,6 +1419,14 @@ func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, si "id": driverID, "memdev": id, } + + if bus != "" { + args["bus"] = bus + } + if addr != "" { + args["addr"] = addr + } + err = q.executeCommand(ctx, "device_add", args, nil) return err @@ -1426,7 +1434,7 @@ func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, si // ExecHotplugMemory adds size of MiB memory to the guest func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string, size int, share bool) error { - return q.ExecMemdevAdd(ctx, qomtype, id, mempath, size, share, "pc-dimm", "dimm"+id) + return q.ExecMemdevAdd(ctx, qomtype, id, mempath, size, share, "pc-dimm", "dimm"+id, "", "") } // ExecuteNVDIMMDeviceAdd adds a block device to a QEMU instance using diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index cef609ea5e..9648172339 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -143,8 +143,6 @@ github.com/coreos/go-systemd/dbus github.com/cri-o/cri-o/pkg/annotations # github.com/davecgh/go-spew v1.1.1 github.com/davecgh/go-spew/spew -# github.com/dlespiau/covertool v0.0.0-20180314162135-b0c4c6d0583a -## explicit # github.com/docker/distribution v2.7.1+incompatible ## explicit github.com/docker/distribution/digestset @@ -224,7 +222,7 @@ github.com/hashicorp/go-multierror # github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 ## explicit github.com/intel-go/cpuid -# github.com/kata-containers/govmm v0.0.0-20210520142420-eb57f004d89f +# github.com/kata-containers/govmm v0.0.0-20210622075516-263136e69ac8 ## explicit github.com/kata-containers/govmm/qemu # github.com/konsorten/go-windows-terminal-sequences v1.0.1 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 89a8ce13d6..edfb02d131 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -708,7 +708,7 @@ func (q *qemu) getMemArgs() (bool, string, string, error) { return share, target, memoryBack, nil } -func (q *qemu) setupVirtioMem() error { +func (q *qemu) setupVirtioMem(ctx context.Context) error { maxMem, err := q.hostMemMB() if err != nil { return err @@ -724,7 +724,19 @@ func (q *qemu) setupVirtioMem() error { if err = q.qmpSetup(); err != nil { return err } - err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0") + + addr, bridge, err := q.arch.addDeviceToBridge(ctx, "virtiomem-dev", types.PCI) + if err != nil { + return err + } + + defer func() { + if err != nil { + q.arch.removeDeviceFromBridge("virtiomem-dev") + } + }() + + err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0", addr, bridge.ID) if err == nil { q.config.VirtioMem = true q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB) @@ -832,7 +844,7 @@ func (q *qemu) startSandbox(ctx context.Context, timeout int) error { } if q.config.VirtioMem { - err = q.setupVirtioMem() + err = q.setupVirtioMem(ctx) } return err