From a8717286cac32a7cfd898871cabafa493def6112 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sun, 22 Dec 2019 20:18:55 -0800 Subject: [PATCH] qemu: add disalbe_image_nvdimm option To control whether an image rootfs is used as nvdimm device or just plain virtio-block device. Fixes: #2372 Signed-off-by: Peng Tao --- .../configuration-qemu-virtiofs.toml.in | 5 ++++ cli/config/configuration-qemu.toml.in | 5 ++++ pkg/katautils/config-settings.go | 1 + pkg/katautils/config.go | 3 ++ virtcontainers/hypervisor.go | 10 +++++++ virtcontainers/persist.go | 2 ++ virtcontainers/persist/api/config.go | 3 ++ virtcontainers/qemu.go | 2 +- virtcontainers/qemu_amd64.go | 21 ++++---------- virtcontainers/qemu_amd64_test.go | 25 ++++++++++++++-- virtcontainers/qemu_arch_base.go | 19 ++++++++++++ virtcontainers/qemu_arm64.go | 29 ++++--------------- virtcontainers/qemu_ppc64le.go | 2 -- virtcontainers/qemu_s390x.go | 4 +-- 14 files changed, 85 insertions(+), 46 deletions(-) diff --git a/cli/config/configuration-qemu-virtiofs.toml.in b/cli/config/configuration-qemu-virtiofs.toml.in index 6a13cebaea..b0e3bc55fa 100644 --- a/cli/config/configuration-qemu-virtiofs.toml.in +++ b/cli/config/configuration-qemu-virtiofs.toml.in @@ -205,6 +205,11 @@ enable_iothreads = @DEFENABLEIOTHREADS@ # Default false #use_vsock = true +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# Default false +#disable_image_nvdimm = true + # VFIO devices are hotplugged on a bridge by default. # Enable hotplugging on root bus. This may be required for devices with # a large PCI bar, as this is a current limitation with hotplugging on diff --git a/cli/config/configuration-qemu.toml.in b/cli/config/configuration-qemu.toml.in index 87b49c0a91..c42218d56f 100644 --- a/cli/config/configuration-qemu.toml.in +++ b/cli/config/configuration-qemu.toml.in @@ -206,6 +206,11 @@ enable_iothreads = @DEFENABLEIOTHREADS@ # Default false #use_vsock = true +# If false and nvdimm is supported, use nvdimm device to plug guest image. +# Otherwise virtio-block device is used. +# Default is false +#disable_image_nvdimm = true + # VFIO devices are hotplugged on a bridge by default. # Enable hotplugging on root bus. This may be required for devices with # a large PCI bar, as this is a current limitation with hotplugging on diff --git a/pkg/katautils/config-settings.go b/pkg/katautils/config-settings.go index 428e712d3b..17e6141c29 100644 --- a/pkg/katautils/config-settings.go +++ b/pkg/katautils/config-settings.go @@ -46,6 +46,7 @@ const defaultHotplugVFIOOnRootBus bool = false const defaultEntropySource = "/dev/urandom" const defaultGuestHookPath string = "" const defaultVirtioFSCacheMode = "none" +const defaultDisableImageNvdimm = false const defaultTemplatePath string = "/run/vc/vm/template" const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock" diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index b5f685c02a..9badc68203 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -120,6 +120,7 @@ type hypervisor struct { DisableNestingChecks bool `toml:"disable_nesting_checks"` EnableIOThreads bool `toml:"enable_iothreads"` UseVSock bool `toml:"use_vsock"` + DisableImageNvdimm bool `toml:"disable_image_nvdimm"` HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"` DisableVhostNet bool `toml:"disable_vhost_net"` GuestHookPath string `toml:"guest_hook_path"` @@ -643,6 +644,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EnableIOThreads: h.EnableIOThreads, Msize9p: h.msize9p(), UseVSock: useVSock, + DisableImageNvdimm: h.DisableImageNvdimm, HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, DisableVhostNet: h.DisableVhostNet, GuestHookPath: h.guestHookPath(), @@ -1069,6 +1071,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, GuestHookPath: defaultGuestHookPath, VirtioFSCache: defaultVirtioFSCacheMode, + DisableImageNvdimm: defaultDisableImageNvdimm, } } diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 42562e8a6e..d96232dc80 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -90,6 +90,13 @@ var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck {"rootfstype", "ext4"}, } +// agnostic list of kernel root parameters for NVDIMM +var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck + {"root", "/dev/pmem0p1"}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", "ext4"}, +} + // agnostic list of kernel root parameters for virtio-blk var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck {"root", "/dev/vda1"}, @@ -352,6 +359,9 @@ type HypervisorConfig struct { // UseVSock use a vsock for agent communication UseVSock bool + // DisableImageNvdimm is used to disable guest rootfs image nvdimm devices + DisableImageNvdimm bool + // HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the // root bus instead of a bridge. HotplugVFIOOnRootBus bool diff --git a/virtcontainers/persist.go b/virtcontainers/persist.go index 6755aaee85..39bede130f 100644 --- a/virtcontainers/persist.go +++ b/virtcontainers/persist.go @@ -241,6 +241,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { Mlock: sconfig.HypervisorConfig.Mlock, DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks, UseVSock: sconfig.HypervisorConfig.UseVSock, + DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm, HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus, BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, @@ -532,6 +533,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { Mlock: hconf.Mlock, DisableNestingChecks: hconf.DisableNestingChecks, UseVSock: hconf.UseVSock, + DisableImageNvdimm: hconf.DisableImageNvdimm, HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus, BootToBeTemplate: hconf.BootToBeTemplate, BootFromTemplate: hconf.BootFromTemplate, diff --git a/virtcontainers/persist/api/config.go b/virtcontainers/persist/api/config.go index d718241fa3..5cc12195d4 100644 --- a/virtcontainers/persist/api/config.go +++ b/virtcontainers/persist/api/config.go @@ -142,6 +142,9 @@ type HypervisorConfig struct { // UseVSock use a vsock for agent communication UseVSock bool + // DisableImageNvdimm disables nvdimm for guest rootfs image + DisableImageNvdimm bool + // HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the // root bus instead of a bridge. HotplugVFIOOnRootBus bool diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index f41c143c1f..056f574dac 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -248,7 +248,7 @@ func (q *qemu) setup(id string, hypervisorConfig *HypervisorConfig, vcStore *sto if err != nil { return err } - if initrdPath == "" && imagePath != "" { + if initrdPath == "" && imagePath != "" && !q.config.DisableImageNvdimm { q.nvdimmCount = 1 } else { q.nvdimmCount = 0 diff --git a/virtcontainers/qemu_amd64.go b/virtcontainers/qemu_amd64.go index 5e870c5f59..5068052c8a 100644 --- a/virtcontainers/qemu_amd64.go +++ b/virtcontainers/qemu_amd64.go @@ -6,7 +6,6 @@ package virtcontainers import ( - "strings" "time" "github.com/kata-containers/runtime/virtcontainers/types" @@ -25,8 +24,6 @@ const defaultQemuPath = "/usr/bin/qemu-system-x86_64" const defaultQemuMachineType = QemuPC -const qemuNvdimmOption = "nvdimm" - const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip" const qmpMigrationWaitTimeout = 5 * time.Second @@ -37,8 +34,6 @@ var qemuPaths = map[string]string{ QemuQ35: defaultQemuPath, } -var kernelRootParams = commonNvdimmKernelRootParams - var kernelParams = []Param{ {"tsc", "reliable"}, {"no_timer_check", ""}, @@ -101,19 +96,12 @@ func newQemuArch(config HypervisorConfig) qemuArch { kernelParamsNonDebug: kernelParamsNonDebug, kernelParamsDebug: kernelParamsDebug, kernelParams: kernelParams, + disableNvdimm: config.DisableImageNvdimm, + dax: true, }, vmFactory: factory, } - if config.ImagePath != "" { - for i := range q.supportedQemuMachines { - q.supportedQemuMachines[i].Options = strings.Join([]string{ - q.supportedQemuMachines[i].Options, - qemuNvdimmOption, - }, ",") - } - } - q.handleImagePath(config) return q @@ -158,7 +146,10 @@ func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) g } func (q *qemuAmd64) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { - return q.appendNvdimmImage(devices, path) + if !q.disableNvdimm { + return q.appendNvdimmImage(devices, path) + } + return q.appendBlockImage(devices, path) } // appendBridges appends to devices the given bridges diff --git a/virtcontainers/qemu_amd64_test.go b/virtcontainers/qemu_amd64_test.go index 7c046fbeae..f9d477edc5 100644 --- a/virtcontainers/qemu_amd64_test.go +++ b/virtcontainers/qemu_amd64_test.go @@ -114,7 +114,6 @@ func TestQemuAmd64MemoryTopology(t *testing.T) { } func TestQemuAmd64AppendImage(t *testing.T) { - var devices []govmmQemu.Device assert := assert.New(t) f, err := ioutil.TempFile("", "img") @@ -131,6 +130,7 @@ func TestQemuAmd64AppendImage(t *testing.T) { cfg := qemuConfig(QemuPC) cfg.ImagePath = f.Name() + cfg.DisableImageNvdimm = false amd64 := newQemuArch(cfg) for _, m := range amd64.(*qemuAmd64).supportedQemuMachines { assert.Contains(m.Options, qemuNvdimmOption) @@ -147,12 +147,33 @@ func TestQemuAmd64AppendImage(t *testing.T) { }, } - devices, err = amd64.appendImage(devices, f.Name()) + devices, err := amd64.appendImage(nil, f.Name()) assert.NoError(err) assert.Equal(expectedOut, devices) // restore default supportedQemuMachines options assert.Equal(len(supportedQemuMachines), copy(supportedQemuMachines, machinesCopy)) + + cfg.DisableImageNvdimm = true + amd64 = newQemuArch(cfg) + for _, m := range amd64.(*qemuAmd64).supportedQemuMachines { + assert.NotContains(m.Options, qemuNvdimmOption) + } + + found := false + devices, err = amd64.appendImage(nil, f.Name()) + assert.NoError(err) + for _, d := range devices { + if b, ok := d.(govmmQemu.BlockDevice); ok { + assert.Equal(b.Driver, govmmQemu.VirtioBlock) + assert.True(b.ShareRW) + found = true + } + } + assert.True(found) + + // restore default supportedQemuMachines options + assert.Equal(len(supportedQemuMachines), copy(supportedQemuMachines, machinesCopy)) } func TestQemuAmd64AppendBridges(t *testing.T) { diff --git a/virtcontainers/qemu_arch_base.go b/virtcontainers/qemu_arch_base.go index d32b23d40a..6ef639ac6a 100644 --- a/virtcontainers/qemu_arch_base.go +++ b/virtcontainers/qemu_arch_base.go @@ -12,6 +12,7 @@ import ( "fmt" "os" "strconv" + "strings" govmmQemu "github.com/intel/govmm/qemu" @@ -133,6 +134,8 @@ type qemuArchBase struct { memoryOffset uint32 nestedRun bool vhost bool + disableNvdimm bool + dax bool networkIndex int qemuPaths map[string]string supportedQemuMachines []govmmQemu.Machine @@ -178,6 +181,8 @@ const ( QemuCCWVirtio = "s390-ccw-virtio" qmpCapMigrationIgnoreShared = "x-ignore-shared" + + qemuNvdimmOption = "nvdimm" ) // kernelParamsNonDebug is a list of the default kernel @@ -660,6 +665,20 @@ func (q *qemuArchBase) appendRNGDevice(devices []govmmQemu.Device, rngDev config func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { if config.ImagePath != "" { + kernelRootParams := commonVirtioblkKernelRootParams + if !q.disableNvdimm { + for i := range q.supportedQemuMachines { + q.supportedQemuMachines[i].Options = strings.Join([]string{ + q.supportedQemuMachines[i].Options, + qemuNvdimmOption, + }, ",") + } + if q.dax { + kernelRootParams = commonNvdimmKernelRootParams + } else { + kernelRootParams = commonNvdimmNoDAXKernelRootParams + } + } q.kernelParams = append(q.kernelParams, kernelRootParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) diff --git a/virtcontainers/qemu_arm64.go b/virtcontainers/qemu_arm64.go index 4a65e2a7a7..6d089cf010 100644 --- a/virtcontainers/qemu_arm64.go +++ b/virtcontainers/qemu_arm64.go @@ -25,8 +25,6 @@ const defaultQemuPath = "/usr/bin/qemu-system-aarch64" const defaultQemuMachineType = QemuVirt -const qemuNvdimmOption = "nvdimm" - const qmpMigrationWaitTimeout = 10 * time.Second var defaultQemuMachineOptions = "usb=off,accel=kvm,gic-version=" + getGuestGICVersion() @@ -41,15 +39,6 @@ var kernelParams = []Param{ {"iommu.passthrough", "0"}, } -// For now, AArch64 doesn't support DAX, so we couldn't use -// commonNvdimmKernelRootParams, the agnostic list of kernel -// root parameters for NVDIMM -var kernelRootParams = []Param{ - {"root", "/dev/pmem0p1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} - var supportedQemuMachines = []govmmQemu.Machine{ { Type: QemuVirt, @@ -150,20 +139,11 @@ func newQemuArch(config HypervisorConfig) qemuArch { kernelParamsNonDebug: kernelParamsNonDebug, kernelParamsDebug: kernelParamsDebug, kernelParams: kernelParams, + disableNvdimm: config.DisableImageNvdimm, }, } - if config.ImagePath != "" { - for i := range q.supportedQemuMachines { - q.supportedQemuMachines[i].Options = strings.Join([]string{ - q.supportedQemuMachines[i].Options, - qemuNvdimmOption, - }, ",") - } - q.kernelParams = append(q.kernelParams, kernelRootParams...) - q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) - q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) - } + q.handleImagePath(config) return q } @@ -178,7 +158,10 @@ func (q *qemuArm64) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device } func (q *qemuArm64) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { - return q.appendNvdimmImage(devices, path) + if !q.disableNvdimm { + return q.appendNvdimmImage(devices, path) + } + return q.appendBlockImage(devices, path) } func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error { diff --git a/virtcontainers/qemu_ppc64le.go b/virtcontainers/qemu_ppc64le.go index 21e3b6c2f2..3a6141b1d7 100644 --- a/virtcontainers/qemu_ppc64le.go +++ b/virtcontainers/qemu_ppc64le.go @@ -32,8 +32,6 @@ var qemuPaths = map[string]string{ QemuPseries: defaultQemuPath, } -var kernelRootParams = []Param{} - var kernelParams = []Param{ {"tsc", "reliable"}, {"no_timer_check", ""}, diff --git a/virtcontainers/qemu_s390x.go b/virtcontainers/qemu_s390x.go index d2575ccdcc..b3532e285a 100644 --- a/virtcontainers/qemu_s390x.go +++ b/virtcontainers/qemu_s390x.go @@ -38,8 +38,6 @@ var kernelParams = []Param{ {"console", "ttysclp0"}, } -var kernelRootParams = commonVirtioblkKernelRootParams - var ccwbridge = types.NewBridge(types.CCW, "", make(map[uint32]string, types.CCWBridgeMaxCapacity), 0) var supportedQemuMachines = []govmmQemu.Machine{ @@ -78,7 +76,7 @@ func newQemuArch(config HypervisorConfig) qemuArch { q.Bridges = append(q.Bridges, ccwbridge) if config.ImagePath != "" { - q.kernelParams = append(q.kernelParams, kernelRootParams...) + q.kernelParams = append(q.kernelParams, commonVirtioblkKernelRootParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) }