diff --git a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md index 1a3253fdc8..58b36c02d7 100644 --- a/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md +++ b/docs/use-cases/Nvidia-GPU-passthrough-and-Kata.md @@ -67,7 +67,7 @@ To use large BARs devices (for example, Nvidia Tesla P100), you need Kata versio The following configuration in the Kata `configuration.toml` file as shown below can work: -Hotplug for PCI devices by `shpchp` (Linux's SHPC PCI Hotplug driver): +Hotplug for PCI devices by `acpi_pcihp` (Linux's ACPI PCI Hotplug driver): ``` machine_type = "q35" @@ -91,7 +91,6 @@ The following kernel config options need to be enabled: ``` # Support PCI/PCIe device hotplug (Required for large BARs device) CONFIG_HOTPLUG_PCI_PCIE=y -CONFIG_HOTPLUG_PCI_SHPC=y # Support for loading modules (Required for load Nvidia drivers) CONFIG_MODULES=y diff --git a/src/runtime/go.mod b/src/runtime/go.mod index d74e690245..3d3594149a 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -24,7 +24,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/hashicorp/go-multierror v1.0.0 github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 - github.com/kata-containers/govmm v0.0.0-20210831124834-2f8e417bb2c4 + github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7 github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f github.com/opencontainers/runc v1.0.1 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 54d0dc1582..a2647dd72b 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -357,8 +357,8 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kata-containers/govmm v0.0.0-20210831124834-2f8e417bb2c4 h1:F+/U5Vfep00pjh5oZFrVyDnVoQg8Wu7ZtXGGkaN5Glg= -github.com/kata-containers/govmm v0.0.0-20210831124834-2f8e417bb2c4/go.mod h1:A6QaNB6N6PRQ9mTRpFtUxiF5T5CJpzLALjxBrUQPlFI= +github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7 h1:lrtaReMyoviyn/Gtd9iAmQ9qNSTaS3QC1NgQ+h5fliI= +github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7/go.mod h1:A6QaNB6N6PRQ9mTRpFtUxiF5T5CJpzLALjxBrUQPlFI= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= diff --git a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go index ee814a9c6b..e57a4b26a9 100644 --- a/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go +++ b/src/runtime/vendor/github.com/kata-containers/govmm/qemu/qemu.go @@ -1802,6 +1802,15 @@ type BridgeDevice struct { // ROMFile specifies the ROM file being used for this device. ROMFile string + + // Address range reservations for devices behind the bridge + // NB: strings seem an odd choice, but if they were integers, + // they'd default to 0 by Go's rules in all the existing users + // who don't set them. 0 is a valid value for certain cases, + // but not you want by default. + IOReserve string + MemReserve string + Pref64Reserve string } // Valid returns true if the BridgeDevice structure is valid and complete. @@ -1852,6 +1861,16 @@ func (bridgeDev BridgeDevice) QemuParams(config *Config) []string { deviceParams = append(deviceParams, fmt.Sprintf("romfile=%s", bridgeDev.ROMFile)) } + if bridgeDev.IOReserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("io-reserve=%s", bridgeDev.IOReserve)) + } + if bridgeDev.MemReserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("mem-reserve=%s", bridgeDev.MemReserve)) + } + if bridgeDev.Pref64Reserve != "" { + deviceParams = append(deviceParams, fmt.Sprintf("pref64-reserve=%s", bridgeDev.Pref64Reserve)) + } + qemuParams = append(qemuParams, "-device") qemuParams = append(qemuParams, strings.Join(deviceParams, ",")) diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index f41c35f96e..407f522bde 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -180,7 +180,7 @@ github.com/hashicorp/go-multierror # github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 ## explicit github.com/intel-go/cpuid -# github.com/kata-containers/govmm v0.0.0-20210831124834-2f8e417bb2c4 +# github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7 ## explicit github.com/kata-containers/govmm/qemu # github.com/mailru/easyjson v0.7.0 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index c2cee7a2b7..48252a0b5f 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -2126,8 +2126,23 @@ func genericAppendBridges(devices []govmmQemu.Device, bridges []types.Bridge, ma ID: b.ID, // Each bridge is required to be assigned a unique chassis id > 0 Chassis: idx + 1, - SHPC: true, + SHPC: false, Addr: strconv.FormatInt(int64(bridges[idx].Addr), 10), + // Certain guest BIOS versions think + // !SHPC means no hotplug, and won't + // reserve the IO and memory windows + // that will be needed for devices + // added underneath this bridge. This + // will only break for certain + // combinations of exact qemu, BIOS + // and guest kernel versions, but for + // consistency, just hint the usual + // default windows for a bridge (as + // the BIOS would use with SHPC) so + // that we can do ACPI hotplug. + IOReserve: "4k", + MemReserve: "1m", + Pref64Reserve: "1m", }, ) } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index eab95ff571..689575ab5d 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -193,11 +193,6 @@ func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device, return q.appendBlockImage(ctx, devices, path) } -// appendBridges appends to devices the given bridges -func (q *qemuAmd64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { - return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) -} - // enable protection func (q *qemuAmd64) enableProtection() error { var err error diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 106abf31c5..4b8d044819 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -187,12 +187,15 @@ func TestQemuAmd64AppendBridges(t *testing.T) { expectedOut := []govmmQemu.Device{ govmmQemu.BridgeDevice{ - Type: govmmQemu.PCIBridge, - Bus: defaultBridgeBus, - ID: bridges[0].ID, - Chassis: 1, - SHPC: true, - Addr: "2", + Type: govmmQemu.PCIBridge, + Bus: defaultBridgeBus, + ID: bridges[0].ID, + Chassis: 1, + SHPC: false, + Addr: "2", + IOReserve: "4k", + MemReserve: "1m", + Pref64Reserve: "1m", }, } diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 98fdfb7661..f338d4e4e0 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -12,7 +12,6 @@ import ( "fmt" "os" "runtime" - "strconv" "strings" govmmQemu "github.com/kata-containers/govmm/qemu" @@ -456,31 +455,7 @@ func (q *qemuArchBase) appendSCSIController(_ context.Context, devices []govmmQe // appendBridges appends to devices the given bridges func (q *qemuArchBase) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { - for idx, b := range q.Bridges { - if b.Type == types.CCW { - continue - } - t := govmmQemu.PCIBridge - if b.Type == types.PCIE { - t = govmmQemu.PCIEBridge - } - - q.Bridges[idx].Addr = bridgePCIStartAddr + idx - - devices = append(devices, - govmmQemu.BridgeDevice{ - Type: t, - Bus: defaultBridgeBus, - ID: b.ID, - // Each bridge is required to be assigned a unique chassis id > 0 - Chassis: idx + 1, - SHPC: true, - Addr: strconv.FormatInt(int64(q.Bridges[idx].Addr), 10), - }, - ) - } - - return devices + return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) } func generic9PVolume(volume types.Volume, nestedRun bool) govmmQemu.FSDevice { diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 1cd904864b..4b495f73d9 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -307,12 +307,15 @@ func TestQemuArchBaseAppendBridges(t *testing.T) { expectedOut := []govmmQemu.Device{ govmmQemu.BridgeDevice{ - Type: govmmQemu.PCIBridge, - Bus: defaultBridgeBus, - ID: bridges[0].ID, - Chassis: 1, - SHPC: true, - Addr: "2", + Type: govmmQemu.PCIBridge, + Bus: defaultBridgeBus, + ID: bridges[0].ID, + Chassis: 1, + SHPC: false, + Addr: "2", + IOReserve: "4k", + MemReserve: "1m", + Pref64Reserve: "1m", }, } diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index be45a4e3a5..eced3b044d 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -89,11 +89,6 @@ func (q *qemuArm64) bridges(number uint32) { q.Bridges = genericBridges(number, q.qemuMachine.Type) } -// appendBridges appends to devices the given bridges -func (q *qemuArm64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { - return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) -} - func (q *qemuArm64) appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { if !q.disableNvdimm { return q.appendNvdimmImage(devices, path) diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index e421aa79b4..0d777af41b 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -78,12 +78,15 @@ func TestQemuArm64AppendBridges(t *testing.T) { expectedOut := []govmmQemu.Device{ govmmQemu.BridgeDevice{ - Type: govmmQemu.PCIBridge, - Bus: defaultBridgeBus, - ID: bridges[0].ID, - Chassis: 1, - SHPC: true, - Addr: "2", + Type: govmmQemu.PCIBridge, + Bus: defaultBridgeBus, + ID: bridges[0].ID, + Chassis: 1, + SHPC: false, + Addr: "2", + IOReserve: "4k", + MemReserve: "1m", + Pref64Reserve: "1m", }, } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index bff6feff16..51c9003ba2 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -123,11 +123,6 @@ func (q *qemuPPC64le) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) return genericMemoryTopology(memoryMb, hostMemoryMb, slots, q.memoryOffset) } -// appendBridges appends to devices the given bridges -func (q *qemuPPC64le) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { - return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) -} - func (q *qemuPPC64le) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) { return devices, fmt.Errorf("PPC64le does not support appending a vIOMMU") } diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 829078cffa..b7611deccd 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -256,11 +256,6 @@ func (q *qemuS390x) append9PVolume(ctx context.Context, devices []govmmQemu.Devi return devices, nil } -// appendBridges appends to devices the given bridges -func (q *qemuS390x) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device { - return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type) -} - func (q *qemuS390x) appendSCSIController(ctx context.Context, devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread, error) { d, t := genericSCSIController(enableIOThreads, q.nestedRun) addr, b, err := q.addDeviceToBridge(ctx, d.ID, types.CCW)