diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 4644d57591..f2f8b5bef3 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -46,6 +46,8 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.block_device_cache_noflush` | `boolean` | Denotes whether flush requests for the device are ignored | | `io.katacontainers.config.hypervisor.block_device_cache_set` | `boolean` | cache-related options will be set to block devices or not | | `io.katacontainers.config.hypervisor.block_device_driver` | string | the driver to be used for block device, valid values are `virtio-blk`, `virtio-scsi`, `nvdimm`| +| `io.katacontainers.config.hypervisor.blk_logical_sector_size` | uint32 | logical sector size in bytes reported by block devices to the guest (0 = hypervisor default, must be a power of 2 between 512 and 65536) | +| `io.katacontainers.config.hypervisor.blk_physical_sector_size` | uint32 | physical sector size in bytes reported by block devices to the guest (0 = hypervisor default, must be a power of 2 between 512 and 65536) | | `io.katacontainers.config.hypervisor.cpu_features` | `string` | Comma-separated list of CPU features to pass to the CPU (QEMU) | | `io.katacontainers.config.hypervisor.default_max_vcpus` | uint32| the maximum number of vCPUs allocated for the VM by the hypervisor | | `io.katacontainers.config.hypervisor.default_memory` | uint32| the memory assigned for a VM by the hypervisor in `MiB` | diff --git a/src/runtime/config/configuration-qemu-cca.toml.in b/src/runtime/config/configuration-qemu-cca.toml.in index b5f0accd54..6f01d5f340 100644 --- a/src/runtime/config/configuration-qemu-cca.toml.in +++ b/src/runtime/config/configuration-qemu-cca.toml.in @@ -235,6 +235,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently only implemented # for SCSI. diff --git a/src/runtime/config/configuration-qemu-coco-dev.toml.in b/src/runtime/config/configuration-qemu-coco-dev.toml.in index 84eb91f3fa..851ceeec55 100644 --- a/src/runtime/config/configuration-qemu-coco-dev.toml.in +++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in @@ -247,6 +247,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index 855794e650..eff87f6f9a 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -287,6 +287,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 185522fc99..eab536be0e 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -264,6 +264,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index c3b8f80610..bd44a2f099 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -246,6 +246,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-se.toml.in b/src/runtime/config/configuration-qemu-se.toml.in index 3ec9895bc1..519ec2d20d 100644 --- a/src/runtime/config/configuration-qemu-se.toml.in +++ b/src/runtime/config/configuration-qemu-se.toml.in @@ -249,6 +249,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-snp.toml.in b/src/runtime/config/configuration-qemu-snp.toml.in index b5f3e21078..fa6821be0a 100644 --- a/src/runtime/config/configuration-qemu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-snp.toml.in @@ -281,6 +281,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in index ab88c9a30b..a4b3bb41c1 100644 --- a/src/runtime/config/configuration-qemu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -263,6 +263,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index c786d2db14..7f05088170 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -241,6 +241,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable iothreads (data-plane) to be used. This causes IO to be # handled in a separate IO thread. This is currently implemented # for virtio-scsi and virtio-blk. diff --git a/src/runtime/config/configuration-stratovirt.toml.in b/src/runtime/config/configuration-stratovirt.toml.in index 4c17f33180..099ea4c9bf 100644 --- a/src/runtime/config/configuration-stratovirt.toml.in +++ b/src/runtime/config/configuration-stratovirt.toml.in @@ -181,6 +181,16 @@ block_device_cache_direct = false # Default false block_device_cache_noflush = false +# Specifies the logical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_logical_sector_size = 0 + +# Specifies the physical sector size, in bytes, reported by block devices to the guest. +# Common values are 512 and 4096. Set to 0 to use the QEMU/hypervisor default. +# Default 0 +block_device_physical_sector_size = 0 + # Enable huge pages for VM RAM, default false # Enabling this will result in the VM memory # being allocated using huge pages. diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 3c230f9e68..efe565542b 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -859,8 +859,10 @@ func (q *QMP) ExecuteBlockdevAddWithDriverCache(ctx context.Context, driver stri // shared denotes if the drive can be shared allowing it to be passed more than once. // disableModern indicates if virtio version 1.0 should be replaced by the // former version 0.9, as there is a KVM bug that occurs when using virtio -// 1.0 in nested environments. -func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, bus, romfile string, shared, disableModern bool) error { +// 1.0 in nested environments. logicalBlockSize and physicalBlockSize specify +// the logical and physical block sizes for the device; if either is 0, the +// hypervisor default is used for that size. +func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, bus, romfile string, shared, disableModern bool, logicalBlockSize, physicalBlockSize uint32) error { args := map[string]interface{}{ "id": devID, "driver": driver, @@ -886,6 +888,14 @@ func (q *QMP) ExecuteDeviceAdd(ctx context.Context, blockdevID, devID, driver, b } } + if logicalBlockSize > 0 { + args["logical_block_size"] = logicalBlockSize + } + + if physicalBlockSize > 0 { + args["physical_block_size"] = physicalBlockSize + } + return q.executeCommand(ctx, "device_add", args, nil) } @@ -1108,8 +1118,9 @@ func (q *QMP) ExecuteDeviceDel(ctx context.Context, devID string) error { // a block device. shared denotes if the drive can be shared allowing it to be passed more than once. // disableModern indicates if virtio version 1.0 should be replaced by the // former version 0.9, as there is a KVM bug that occurs when using virtio -// 1.0 in nested environments. -func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string) error { +// 1.0 in nested environments. logicalBlockSize and physicalBlockSize specify the logical and +// physical sector sizes reported to the guest; set to 0 to use the hypervisor default. +func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver, addr, bus, romfile string, queues int, shared, disableModern bool, iothreadID string, logicalBlockSize, physicalBlockSize uint32) error { args := map[string]interface{}{ "id": devID, "driver": driver, @@ -1140,6 +1151,14 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver args["iothread"] = iothreadID } + if logicalBlockSize > 0 { + args["logical_block_size"] = logicalBlockSize + } + + if physicalBlockSize > 0 { + args["physical_block_size"] = physicalBlockSize + } + return q.executeCommand(ctx, "device_add", args, nil) } diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index 107dfe1aab..f53c64d60c 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -208,6 +208,31 @@ func (b *qmpTestCommandBuffer) Write(p []byte) (int, error) { b.cmds[currentCmd].name, gotCmdName) result = "error" } + + // When expected args are provided, verify that each expected key/value + // is present in the actual QMP arguments. Existing tests pass nil args + // and are unaffected by this check. + if expectedArgs := b.cmds[currentCmd].args; expectedArgs != nil { + gotArgs, _ := cmdJSON["arguments"].(map[string]interface{}) + for k, v := range expectedArgs { + got, ok := gotArgs[k] + if !ok { + b.t.Errorf("Command %s: missing expected argument %q", gotCmdName, k) + continue + } + // JSON numbers decode as float64 + expectedFloat, expectedIsFloat := toFloat64(v) + gotFloat, gotIsFloat := toFloat64(got) + if expectedIsFloat && gotIsFloat { + if expectedFloat != gotFloat { + b.t.Errorf("Command %s: argument %q = %v, want %v", gotCmdName, k, got, v) + } + } else if fmt.Sprintf("%v", got) != fmt.Sprintf("%v", v) { + b.t.Errorf("Command %s: argument %q = %v, want %v", gotCmdName, k, got, v) + } + } + } + resultMap := make(map[string]interface{}) resultMap[result] = b.results[currentCmd].data encodedRes, err := json.Marshal(&resultMap) @@ -219,6 +244,26 @@ func (b *qmpTestCommandBuffer) Write(p []byte) (int, error) { return len(p), nil } +// toFloat64 attempts to convert a numeric value to float64 for comparison. +// JSON unmarshalling decodes all numbers as float64, while Go code may pass +// int, uint32, etc. This helper normalises both sides for comparison. +func toFloat64(v interface{}) (float64, bool) { + switch n := v.(type) { + case float64: + return n, true + case int: + return float64(n), true + case int64: + return float64(n), true + case uint32: + return float64(n), true + case uint64: + return float64(n), true + default: + return 0, false + } +} + func checkVersion(t *testing.T, connectedCh <-chan *QMPVersion) *QMPVersion { var version *QMPVersion select { @@ -605,7 +650,7 @@ func TestQMPDeviceAdd(t *testing.T) { blockdevID := fmt.Sprintf("drive_%s", volumeUUID) devID := fmt.Sprintf("device_%s", volumeUUID) err := q.ExecuteDeviceAdd(context.Background(), blockdevID, devID, - "virtio-blk-pci", "", "", true, false) + "virtio-blk-pci", "", "", true, false, 0, 0) if err != nil { t.Fatalf("Unexpected error %v", err) } @@ -1070,7 +1115,31 @@ func TestQMPPCIDeviceAdd(t *testing.T) { blockdevID := fmt.Sprintf("drive_%s", volumeUUID) devID := fmt.Sprintf("device_%s", volumeUUID) err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID, - "virtio-blk-pci", "0x1", "", "", 1, true, false, "") + "virtio-blk-pci", "0x1", "", "", 1, true, false, "", 0, 0) + if err != nil { + t.Fatalf("Unexpected error %v", err) + } + q.Shutdown() + <-disconnectedCh +} + +// Checks that PCI block devices with explicit logical and physical block sizes are +// correctly added using device_add, and that the sizes appear in the QMP arguments. +func TestQMPPCIDeviceAddWithBlockSize(t *testing.T) { + connectedCh := make(chan *QMPVersion) + disconnectedCh := make(chan struct{}) + buf := newQMPTestCommandBuffer(t) + buf.AddCommand("device_add", map[string]interface{}{ + "logical_block_size": uint32(512), + "physical_block_size": uint32(4096), + }, "return", nil) + cfg := QMPConfig{Logger: qmpTestLogger{}} + q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) + q.version = checkVersion(t, connectedCh) + blockdevID := fmt.Sprintf("drive_%s", volumeUUID) + devID := fmt.Sprintf("device_%s", volumeUUID) + err := q.ExecutePCIDeviceAdd(context.Background(), blockdevID, devID, + "virtio-blk-pci", "0x1", "", "", 1, true, false, "", 512, 4096) if err != nil { t.Fatalf("Unexpected error %v", err) } diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index d4c860fe38..2ea52c6bed 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -146,6 +146,8 @@ type hypervisor struct { BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` + BlockDeviceLogicalSectorSize uint32 `toml:"block_device_logical_sector_size"` + BlockDevicePhysicalSectorSize uint32 `toml:"block_device_physical_sector_size"` EnableVhostUserStore bool `toml:"enable_vhost_user_store"` VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` @@ -593,6 +595,20 @@ func (h hypervisor) blockDeviceDriver() (string, error) { return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers) } +func (h hypervisor) blockDeviceLogicalSectorSize() (uint32, error) { + if err := validateBlockDeviceSectorSize(cfgBlockDeviceLogicalSectorSize, h.BlockDeviceLogicalSectorSize); err != nil { + return 0, err + } + return h.BlockDeviceLogicalSectorSize, nil +} + +func (h hypervisor) blockDevicePhysicalSectorSize() (uint32, error) { + if err := validateBlockDeviceSectorSize(cfgBlockDevicePhysicalSectorSize, h.BlockDevicePhysicalSectorSize); err != nil { + return 0, err + } + return h.BlockDevicePhysicalSectorSize, nil +} + func (h hypervisor) blockDeviceAIO() (string, error) { supportedBlockAIO := []string{config.AIOIOUring, config.AIONative, config.AIOThreads} @@ -877,6 +893,28 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { }, nil } +const ( + cfgBlockDeviceLogicalSectorSize = "block_device_logical_sector_size" + cfgBlockDevicePhysicalSectorSize = "block_device_physical_sector_size" +) + +func validateBlockDeviceSectorSize(name string, size uint32) error { + if size == 0 { + return nil + } + if size < 512 || size > 65536 || (size&(size-1)) != 0 { + return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", name, size) + } + return nil +} + +func validateBlockDeviceSectorSizes(logical, physical uint32) error { + if logical != 0 && physical != 0 && logical > physical { + return fmt.Errorf("invalid sector sizes: logical (%d) must not be larger than physical (%d)", logical, physical) + } + return nil +} + func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { hypervisor, err := h.path() if err != nil { @@ -973,88 +1011,104 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + blockLogicalSectorSize, err := h.blockDeviceLogicalSectorSize() + if err != nil { + return vc.HypervisorConfig{}, err + } + + blockPhysicalSectorSize, err := h.blockDevicePhysicalSectorSize() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if err := validateBlockDeviceSectorSizes(blockLogicalSectorSize, blockPhysicalSectorSize); err != nil { + return vc.HypervisorConfig{}, err + } + return vc.HypervisorConfig{ - HypervisorPath: hypervisor, - HypervisorPathList: h.HypervisorPathList, - KernelPath: kernel, - InitrdPath: initrd, - ImagePath: image, - RootfsType: rootfsType, - FirmwarePath: firmware, - FirmwareVolumePath: firmwareVolume, - PFlash: pflashes, - MachineAccelerators: machineAccelerators, - CPUFeatures: cpuFeatures, - KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), - KernelVerityParams: h.kernelVerityParams(), - HypervisorMachineType: machineType, - QgsPort: h.qgsPort(), - NumVCPUsF: h.defaultVCPUs(), - DefaultMaxVCPUs: h.defaultMaxVCPUs(), - MemorySize: h.defaultMemSz(), - MemSlots: h.defaultMemSlots(), - MemOffset: h.defaultMemOffset(), - DefaultMaxMemorySize: h.defaultMaxMemSz(), - VirtioMem: h.VirtioMem, - EntropySource: h.GetEntropySource(), - EntropySourceList: h.EntropySourceList, - DefaultBridges: h.defaultBridges(), - DisableBlockDeviceUse: h.DisableBlockDeviceUse, - SharedFS: sharedFS, - VirtioFSDaemon: h.VirtioFSDaemon, - VirtioFSDaemonList: h.VirtioFSDaemonList, - HypervisorLoglevel: h.defaultHypervisorLoglevel(), - VirtioFSCacheSize: h.VirtioFSCacheSize, - VirtioFSCache: h.defaultVirtioFSCache(), - VirtioFSQueueSize: h.VirtioFSQueueSize, - VirtioFSExtraArgs: h.VirtioFSExtraArgs, - MemPrealloc: h.MemPrealloc, - ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory, - HugePages: h.HugePages, - IOMMU: h.IOMMU, - IOMMUPlatform: h.getIOMMUPlatform(), - GuestNUMANodes: h.defaultGuestNUMANodes(), - FileBackedMemRootDir: h.FileBackedMemRootDir, - FileBackedMemRootList: h.FileBackedMemRootList, - Debug: h.Debug, - DisableNestingChecks: h.DisableNestingChecks, - BlockDeviceDriver: blockDriver, - BlockDeviceAIO: blockAIO, - BlockDeviceCacheSet: h.BlockDeviceCacheSet, - BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, - BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, - EnableIOThreads: h.EnableIOThreads, - IndepIOThreads: h.indepiothreads(), - Msize9p: h.msize9p(), - DisableImageNvdimm: h.DisableImageNvdimm, - HotPlugVFIO: h.hotPlugVFIO(), - ColdPlugVFIO: h.coldPlugVFIO(), - PCIeRootPort: h.pcieRootPort(), - PCIeSwitchPort: h.pcieSwitchPort(), - DisableVhostNet: h.DisableVhostNet, - EnableVhostUserStore: h.EnableVhostUserStore, - VhostUserStorePath: h.vhostUserStorePath(), - VhostUserStorePathList: h.VhostUserStorePathList, - VhostUserDeviceReconnect: h.VhostUserDeviceReconnect, - SeccompSandbox: h.SeccompSandbox, - GuestHookPath: h.guestHookPath(), - RxRateLimiterMaxRate: rxRateLimiterMaxRate, - TxRateLimiterMaxRate: txRateLimiterMaxRate, - EnableAnnotations: h.EnableAnnotations, - GuestMemoryDumpPath: h.GuestMemoryDumpPath, - GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, - ConfidentialGuest: h.ConfidentialGuest, - SevSnpGuest: h.SevSnpGuest, - GuestSwap: h.GuestSwap, - Rootless: h.Rootless, - LegacySerial: h.LegacySerial, - DisableSeLinux: h.DisableSeLinux, - DisableGuestSeLinux: h.DisableGuestSeLinux, - ExtraMonitorSocket: extraMonitorSocket, - SnpIdBlock: h.SnpIdBlock, - SnpIdAuth: h.SnpIdAuth, - SnpGuestPolicy: h.SnpGuestPolicy, - MeasurementAlgo: h.GetMeasurementAlgo(), + HypervisorPath: hypervisor, + HypervisorPathList: h.HypervisorPathList, + KernelPath: kernel, + InitrdPath: initrd, + ImagePath: image, + RootfsType: rootfsType, + FirmwarePath: firmware, + FirmwareVolumePath: firmwareVolume, + PFlash: pflashes, + MachineAccelerators: machineAccelerators, + CPUFeatures: cpuFeatures, + KernelParams: vc.DeserializeParams(vc.KernelParamFields(kernelParams)), + KernelVerityParams: h.kernelVerityParams(), + HypervisorMachineType: machineType, + QgsPort: h.qgsPort(), + NumVCPUsF: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), + MemorySize: h.defaultMemSz(), + MemSlots: h.defaultMemSlots(), + MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), + VirtioMem: h.VirtioMem, + EntropySource: h.GetEntropySource(), + EntropySourceList: h.EntropySourceList, + DefaultBridges: h.defaultBridges(), + DisableBlockDeviceUse: h.DisableBlockDeviceUse, + SharedFS: sharedFS, + VirtioFSDaemon: h.VirtioFSDaemon, + VirtioFSDaemonList: h.VirtioFSDaemonList, + HypervisorLoglevel: h.defaultHypervisorLoglevel(), + VirtioFSCacheSize: h.VirtioFSCacheSize, + VirtioFSCache: h.defaultVirtioFSCache(), + VirtioFSQueueSize: h.VirtioFSQueueSize, + VirtioFSExtraArgs: h.VirtioFSExtraArgs, + MemPrealloc: h.MemPrealloc, + ReclaimGuestFreedMemory: h.ReclaimGuestFreedMemory, + HugePages: h.HugePages, + IOMMU: h.IOMMU, + IOMMUPlatform: h.getIOMMUPlatform(), + GuestNUMANodes: h.defaultGuestNUMANodes(), + FileBackedMemRootDir: h.FileBackedMemRootDir, + FileBackedMemRootList: h.FileBackedMemRootList, + Debug: h.Debug, + DisableNestingChecks: h.DisableNestingChecks, + BlockDeviceDriver: blockDriver, + BlockDeviceAIO: blockAIO, + BlockDeviceCacheSet: h.BlockDeviceCacheSet, + BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, + BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, + BlockDeviceLogicalSectorSize: blockLogicalSectorSize, + BlockDevicePhysicalSectorSize: blockPhysicalSectorSize, + EnableIOThreads: h.EnableIOThreads, + IndepIOThreads: h.indepiothreads(), + Msize9p: h.msize9p(), + DisableImageNvdimm: h.DisableImageNvdimm, + HotPlugVFIO: h.hotPlugVFIO(), + ColdPlugVFIO: h.coldPlugVFIO(), + PCIeRootPort: h.pcieRootPort(), + PCIeSwitchPort: h.pcieSwitchPort(), + DisableVhostNet: h.DisableVhostNet, + EnableVhostUserStore: h.EnableVhostUserStore, + VhostUserStorePath: h.vhostUserStorePath(), + VhostUserStorePathList: h.VhostUserStorePathList, + VhostUserDeviceReconnect: h.VhostUserDeviceReconnect, + SeccompSandbox: h.SeccompSandbox, + GuestHookPath: h.guestHookPath(), + RxRateLimiterMaxRate: rxRateLimiterMaxRate, + TxRateLimiterMaxRate: txRateLimiterMaxRate, + EnableAnnotations: h.EnableAnnotations, + GuestMemoryDumpPath: h.GuestMemoryDumpPath, + GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, + ConfidentialGuest: h.ConfidentialGuest, + SevSnpGuest: h.SevSnpGuest, + GuestSwap: h.GuestSwap, + Rootless: h.Rootless, + LegacySerial: h.LegacySerial, + DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, + ExtraMonitorSocket: extraMonitorSocket, + SnpIdBlock: h.SnpIdBlock, + SnpIdAuth: h.SnpIdAuth, + SnpGuestPolicy: h.SnpGuestPolicy, + MeasurementAlgo: h.GetMeasurementAlgo(), }, nil } @@ -1283,42 +1337,58 @@ func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { fmt.Errorf("cannot enable %s without daemon path in configuration file", sharedFS) } + blockLogicalSectorSize, err := h.blockDeviceLogicalSectorSize() + if err != nil { + return vc.HypervisorConfig{}, err + } + + blockPhysicalSectorSize, err := h.blockDevicePhysicalSectorSize() + if err != nil { + return vc.HypervisorConfig{}, err + } + + if err := validateBlockDeviceSectorSizes(blockLogicalSectorSize, blockPhysicalSectorSize); err != nil { + return vc.HypervisorConfig{}, err + } + return vc.HypervisorConfig{ - HypervisorPath: hypervisor, - HypervisorPathList: h.HypervisorPathList, - KernelPath: kernel, - InitrdPath: initrd, - ImagePath: image, - RootfsType: rootfsType, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), - KernelVerityParams: h.kernelVerityParams(), - HypervisorMachineType: machineType, - NumVCPUsF: h.defaultVCPUs(), - DefaultMaxVCPUs: h.defaultMaxVCPUs(), - MemorySize: h.defaultMemSz(), - MemSlots: h.defaultMemSlots(), - MemOffset: h.defaultMemOffset(), - DefaultMaxMemorySize: h.defaultMaxMemSz(), - EntropySource: h.GetEntropySource(), - DefaultBridges: h.defaultBridges(), - DisableBlockDeviceUse: h.DisableBlockDeviceUse, - SharedFS: sharedFS, - VirtioFSDaemon: h.VirtioFSDaemon, - VirtioFSDaemonList: h.VirtioFSDaemonList, - HypervisorLoglevel: h.defaultHypervisorLoglevel(), - VirtioFSCacheSize: h.VirtioFSCacheSize, - VirtioFSCache: h.defaultVirtioFSCache(), - VirtioFSExtraArgs: h.VirtioFSExtraArgs, - HugePages: h.HugePages, - Debug: h.Debug, - DisableNestingChecks: h.DisableNestingChecks, - BlockDeviceDriver: blockDriver, - DisableVhostNet: true, - GuestHookPath: h.guestHookPath(), - EnableAnnotations: h.EnableAnnotations, - DisableSeccomp: h.DisableSeccomp, - DisableSeLinux: h.DisableSeLinux, - DisableGuestSeLinux: h.DisableGuestSeLinux, + HypervisorPath: hypervisor, + HypervisorPathList: h.HypervisorPathList, + KernelPath: kernel, + InitrdPath: initrd, + ImagePath: image, + RootfsType: rootfsType, + KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + KernelVerityParams: h.kernelVerityParams(), + HypervisorMachineType: machineType, + NumVCPUsF: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), + MemorySize: h.defaultMemSz(), + MemSlots: h.defaultMemSlots(), + MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), + EntropySource: h.GetEntropySource(), + DefaultBridges: h.defaultBridges(), + DisableBlockDeviceUse: h.DisableBlockDeviceUse, + SharedFS: sharedFS, + VirtioFSDaemon: h.VirtioFSDaemon, + VirtioFSDaemonList: h.VirtioFSDaemonList, + HypervisorLoglevel: h.defaultHypervisorLoglevel(), + VirtioFSCacheSize: h.VirtioFSCacheSize, + VirtioFSCache: h.defaultVirtioFSCache(), + VirtioFSExtraArgs: h.VirtioFSExtraArgs, + HugePages: h.HugePages, + Debug: h.Debug, + DisableNestingChecks: h.DisableNestingChecks, + BlockDeviceDriver: blockDriver, + BlockDeviceLogicalSectorSize: blockLogicalSectorSize, + BlockDevicePhysicalSectorSize: blockPhysicalSectorSize, + DisableVhostNet: true, + GuestHookPath: h.guestHookPath(), + EnableAnnotations: h.EnableAnnotations, + DisableSeccomp: h.DisableSeccomp, + DisableSeLinux: h.DisableSeLinux, + DisableGuestSeLinux: h.DisableGuestSeLinux, }, nil } diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 835ba2b97c..ef9e70f8ec 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -708,6 +708,41 @@ func TestNewQemuHypervisorConfig(t *testing.T) { } +func TestValidateBlockDeviceSectorSize(t *testing.T) { + assert := assert.New(t) + + for _, size := range []uint32{0, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536} { + assert.NoError(validateBlockDeviceSectorSize("test_field", size), "expected size %d to be accepted", size) + } + + for _, size := range []uint32{3, 100, 1000, 3000, 5000} { + assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected non-power-of-2 size %d to be rejected", size) + } + + for _, size := range []uint32{1, 256} { + assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected below-minimum size %d to be rejected", size) + } + + for _, size := range []uint32{131072, 1048576} { + assert.Error(validateBlockDeviceSectorSize("test_field", size), "expected above-maximum size %d to be rejected", size) + } +} + +func TestValidateBlockDeviceSectorSizes(t *testing.T) { + assert := assert.New(t) + + assert.NoError(validateBlockDeviceSectorSizes(0, 0)) + assert.NoError(validateBlockDeviceSectorSizes(512, 0)) + assert.NoError(validateBlockDeviceSectorSizes(0, 4096)) + assert.NoError(validateBlockDeviceSectorSizes(512, 4096)) + assert.NoError(validateBlockDeviceSectorSizes(4096, 4096)) + assert.NoError(validateBlockDeviceSectorSizes(512, 512)) + + assert.Error(validateBlockDeviceSectorSizes(4096, 512), "logical > physical should be rejected") + assert.Error(validateBlockDeviceSectorSizes(4096, 1024), "logical > physical should be rejected") + assert.Error(validateBlockDeviceSectorSizes(65536, 512), "logical > physical should be rejected") +} + func TestNewFirecrackerHypervisorConfig(t *testing.T) { dir := t.TempDir() diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index b8a5285f66..b09a97e994 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -920,9 +920,39 @@ func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) return err } - return newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheNoflush).setBool(func(blockDeviceCacheNoflush bool) { + if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheNoflush).setBool(func(blockDeviceCacheNoflush bool) { sbConfig.HypervisorConfig.BlockDeviceCacheNoflush = blockDeviceCacheNoflush - }) + }); err != nil { + return err + } + + if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceLogicalSectorSize).setUintWithCheck(func(size uint64) error { + if size != 0 && (size < 512 || size > 65536 || (size&(size-1)) != 0) { + return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", vcAnnotations.BlockDeviceLogicalSectorSize, size) + } + sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize = uint32(size) + return nil + }); err != nil { + return err + } + + if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDevicePhysicalSectorSize).setUintWithCheck(func(size uint64) error { + if size != 0 && (size < 512 || size > 65536 || (size&(size-1)) != 0) { + return fmt.Errorf("invalid %s %d: must be 0 or a power of 2 between 512 and 65536", vcAnnotations.BlockDevicePhysicalSectorSize, size) + } + sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize = uint32(size) + return nil + }); err != nil { + return err + } + + logical := sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize + physical := sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize + if logical != 0 && physical != 0 && logical > physical { + return fmt.Errorf("invalid sector sizes: logical (%d) must not be larger than physical (%d)", logical, physical) + } + + return nil } func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error { diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index 1d38baaae7..9046e93eb7 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -665,6 +665,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { // 10Mbit ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000" ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000" + ocispec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "512" + ocispec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096" err := addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.NoError(err) @@ -706,6 +708,8 @@ func TestAddHypervisorAnnotations(t *testing.T) { assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true) assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000)) assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000)) + assert.Equal(sbConfig.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(512)) + assert.Equal(sbConfig.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096)) // In case an absurd large value is provided, the config value if not over-ridden ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536" @@ -726,6 +730,80 @@ func TestAddHypervisorAnnotations(t *testing.T) { assert.Error(err) } +func TestBlockDeviceSectorSizeAnnotations(t *testing.T) { + assert := assert.New(t) + + runtimeConfig := RuntimeConfig{ + HypervisorType: vc.QemuHypervisor, + } + runtimeConfig.HypervisorConfig.EnableAnnotations = []string{".*"} + + newSpec := func() specs.Spec { + return specs.Spec{Annotations: make(map[string]string)} + } + newConfig := func() vc.SandboxConfig { + return vc.SandboxConfig{Annotations: make(map[string]string)} + } + + // Valid: 0 means "use hypervisor default", no override applied + for _, v := range []string{"0", "512", "1024", "2048", "4096", "8192", "16384", "32768", "65536"} { + spec := newSpec() + cfg := newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v + spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = v + assert.NoError(addAnnotations(spec, &cfg, runtimeConfig), "expected valid size %s to be accepted", v) + } + + // Invalid: not a power of 2 + for _, v := range []string{"3", "100", "1000", "3000", "5000"} { + spec := newSpec() + cfg := newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v + assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected non-power-of-2 size %s to be rejected", v) + } + + // Invalid: below minimum (512) + for _, v := range []string{"1", "256"} { + spec := newSpec() + cfg := newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = v + assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected below-minimum size %s to be rejected", v) + } + + // Invalid: above maximum (65536) + for _, v := range []string{"131072", "1048576"} { + spec := newSpec() + cfg := newConfig() + spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = v + assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "expected above-maximum size %s to be rejected", v) + } + + // Logical 4096 with physical 4096 — both valid + spec := newSpec() + cfg := newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "4096" + spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096" + assert.NoError(addAnnotations(spec, &cfg, runtimeConfig)) + assert.Equal(cfg.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(4096)) + assert.Equal(cfg.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096)) + + // Logical 512 with physical 4096 — both valid + spec = newSpec() + cfg = newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "512" + spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "4096" + assert.NoError(addAnnotations(spec, &cfg, runtimeConfig)) + assert.Equal(cfg.HypervisorConfig.BlockDeviceLogicalSectorSize, uint32(512)) + assert.Equal(cfg.HypervisorConfig.BlockDevicePhysicalSectorSize, uint32(4096)) + + // Invalid: logical > physical + spec = newSpec() + cfg = newConfig() + spec.Annotations[vcAnnotations.BlockDeviceLogicalSectorSize] = "4096" + spec.Annotations[vcAnnotations.BlockDevicePhysicalSectorSize] = "512" + assert.Error(addAnnotations(spec, &cfg, runtimeConfig), "logical > physical should be rejected") +} + func TestAddRemoteHypervisorAnnotations(t *testing.T) { // Remote hypervisor uses DefaultVCPUs, DefaultMemory etc as annotations to pick the size of the separate VM to create, // so doesn't need to be bound by the host's capacity limits. diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 4b0fa5f7cb..b631960f6b 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -754,6 +754,16 @@ type HypervisorConfig struct { // Denotes whether flush requests for the device are ignored. BlockDeviceCacheNoflush bool + // BlockDeviceLogicalSectorSize specifies the logical sector size reported + // by block devices to the guest, in bytes. Common values are 512 and 4096. + // Set to 0 to use the hypervisor default. + BlockDeviceLogicalSectorSize uint32 + + // BlockDevicePhysicalSectorSize specifies the physical sector size reported + // by block devices to the guest, in bytes. Common values are 512 and 4096. + // Set to 0 to use the hypervisor default. + BlockDevicePhysicalSectorSize uint32 + // DisableBlockDeviceUse disallows a block device from being used. DisableBlockDeviceUse bool diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 688f4b6c34..47718bc128 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -201,62 +201,64 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { } ss.Config.HypervisorConfig = persistapi.HypervisorConfig{ - NumVCPUsF: sconfig.HypervisorConfig.NumVCPUsF, - DefaultMaxVCPUs: sconfig.HypervisorConfig.DefaultMaxVCPUs, - MemorySize: sconfig.HypervisorConfig.MemorySize, - DefaultBridges: sconfig.HypervisorConfig.DefaultBridges, - Msize9p: sconfig.HypervisorConfig.Msize9p, - MemSlots: sconfig.HypervisorConfig.MemSlots, - MemOffset: sconfig.HypervisorConfig.MemOffset, - VirtioMem: sconfig.HypervisorConfig.VirtioMem, - VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize, - KernelPath: sconfig.HypervisorConfig.KernelPath, - ImagePath: sconfig.HypervisorConfig.ImagePath, - InitrdPath: sconfig.HypervisorConfig.InitrdPath, - FirmwarePath: sconfig.HypervisorConfig.FirmwarePath, - MachineAccelerators: sconfig.HypervisorConfig.MachineAccelerators, - CPUFeatures: sconfig.HypervisorConfig.CPUFeatures, - HypervisorPath: sconfig.HypervisorConfig.HypervisorPath, - HypervisorPathList: sconfig.HypervisorConfig.HypervisorPathList, - JailerPath: sconfig.HypervisorConfig.JailerPath, - JailerPathList: sconfig.HypervisorConfig.JailerPathList, - BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver, - HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType, - MemoryPath: sconfig.HypervisorConfig.MemoryPath, - DevicesStatePath: sconfig.HypervisorConfig.DevicesStatePath, - EntropySource: sconfig.HypervisorConfig.EntropySource, - EntropySourceList: sconfig.HypervisorConfig.EntropySourceList, - SharedFS: sconfig.HypervisorConfig.SharedFS, - VirtioFSDaemon: sconfig.HypervisorConfig.VirtioFSDaemon, - VirtioFSDaemonList: sconfig.HypervisorConfig.VirtioFSDaemonList, - VirtioFSCache: sconfig.HypervisorConfig.VirtioFSCache, - VirtioFSExtraArgs: sconfig.HypervisorConfig.VirtioFSExtraArgs[:], - BlockDeviceCacheSet: sconfig.HypervisorConfig.BlockDeviceCacheSet, - BlockDeviceCacheDirect: sconfig.HypervisorConfig.BlockDeviceCacheDirect, - BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush, - DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse, - EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads, - IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads, - Debug: sconfig.HypervisorConfig.Debug, - MemPrealloc: sconfig.HypervisorConfig.MemPrealloc, - HugePages: sconfig.HypervisorConfig.HugePages, - FileBackedMemRootDir: sconfig.HypervisorConfig.FileBackedMemRootDir, - FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList, - DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks, - DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm, - BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, - BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, - DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, - EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore, - SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox, - VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath, - VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList, - GuestHookPath: sconfig.HypervisorConfig.GuestHookPath, - VMid: sconfig.HypervisorConfig.VMid, - RxRateLimiterMaxRate: sconfig.HypervisorConfig.RxRateLimiterMaxRate, - TxRateLimiterMaxRate: sconfig.HypervisorConfig.TxRateLimiterMaxRate, - SGXEPCSize: sconfig.HypervisorConfig.SGXEPCSize, - EnableAnnotations: sconfig.HypervisorConfig.EnableAnnotations, + NumVCPUsF: sconfig.HypervisorConfig.NumVCPUsF, + DefaultMaxVCPUs: sconfig.HypervisorConfig.DefaultMaxVCPUs, + MemorySize: sconfig.HypervisorConfig.MemorySize, + DefaultBridges: sconfig.HypervisorConfig.DefaultBridges, + Msize9p: sconfig.HypervisorConfig.Msize9p, + MemSlots: sconfig.HypervisorConfig.MemSlots, + MemOffset: sconfig.HypervisorConfig.MemOffset, + VirtioMem: sconfig.HypervisorConfig.VirtioMem, + VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize, + KernelPath: sconfig.HypervisorConfig.KernelPath, + ImagePath: sconfig.HypervisorConfig.ImagePath, + InitrdPath: sconfig.HypervisorConfig.InitrdPath, + FirmwarePath: sconfig.HypervisorConfig.FirmwarePath, + MachineAccelerators: sconfig.HypervisorConfig.MachineAccelerators, + CPUFeatures: sconfig.HypervisorConfig.CPUFeatures, + HypervisorPath: sconfig.HypervisorConfig.HypervisorPath, + HypervisorPathList: sconfig.HypervisorConfig.HypervisorPathList, + JailerPath: sconfig.HypervisorConfig.JailerPath, + JailerPathList: sconfig.HypervisorConfig.JailerPathList, + BlockDeviceDriver: sconfig.HypervisorConfig.BlockDeviceDriver, + HypervisorMachineType: sconfig.HypervisorConfig.HypervisorMachineType, + MemoryPath: sconfig.HypervisorConfig.MemoryPath, + DevicesStatePath: sconfig.HypervisorConfig.DevicesStatePath, + EntropySource: sconfig.HypervisorConfig.EntropySource, + EntropySourceList: sconfig.HypervisorConfig.EntropySourceList, + SharedFS: sconfig.HypervisorConfig.SharedFS, + VirtioFSDaemon: sconfig.HypervisorConfig.VirtioFSDaemon, + VirtioFSDaemonList: sconfig.HypervisorConfig.VirtioFSDaemonList, + VirtioFSCache: sconfig.HypervisorConfig.VirtioFSCache, + VirtioFSExtraArgs: sconfig.HypervisorConfig.VirtioFSExtraArgs[:], + BlockDeviceCacheSet: sconfig.HypervisorConfig.BlockDeviceCacheSet, + BlockDeviceCacheDirect: sconfig.HypervisorConfig.BlockDeviceCacheDirect, + BlockDeviceCacheNoflush: sconfig.HypervisorConfig.BlockDeviceCacheNoflush, + BlockDeviceLogicalSectorSize: sconfig.HypervisorConfig.BlockDeviceLogicalSectorSize, + BlockDevicePhysicalSectorSize: sconfig.HypervisorConfig.BlockDevicePhysicalSectorSize, + DisableBlockDeviceUse: sconfig.HypervisorConfig.DisableBlockDeviceUse, + EnableIOThreads: sconfig.HypervisorConfig.EnableIOThreads, + IndepIOThreads: sconfig.HypervisorConfig.IndepIOThreads, + Debug: sconfig.HypervisorConfig.Debug, + MemPrealloc: sconfig.HypervisorConfig.MemPrealloc, + HugePages: sconfig.HypervisorConfig.HugePages, + FileBackedMemRootDir: sconfig.HypervisorConfig.FileBackedMemRootDir, + FileBackedMemRootList: sconfig.HypervisorConfig.FileBackedMemRootList, + DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks, + DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm, + BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate, + BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate, + DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet, + EnableVhostUserStore: sconfig.HypervisorConfig.EnableVhostUserStore, + SeccompSandbox: sconfig.HypervisorConfig.SeccompSandbox, + VhostUserStorePath: sconfig.HypervisorConfig.VhostUserStorePath, + VhostUserStorePathList: sconfig.HypervisorConfig.VhostUserStorePathList, + GuestHookPath: sconfig.HypervisorConfig.GuestHookPath, + VMid: sconfig.HypervisorConfig.VMid, + RxRateLimiterMaxRate: sconfig.HypervisorConfig.RxRateLimiterMaxRate, + TxRateLimiterMaxRate: sconfig.HypervisorConfig.TxRateLimiterMaxRate, + SGXEPCSize: sconfig.HypervisorConfig.SGXEPCSize, + EnableAnnotations: sconfig.HypervisorConfig.EnableAnnotations, } ss.Config.KataAgentConfig = &persistapi.KataAgentConfig{ @@ -441,65 +443,67 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { hconf := savedConf.HypervisorConfig sconfig.HypervisorConfig = HypervisorConfig{ - NumVCPUsF: hconf.NumVCPUsF, - DefaultMaxVCPUs: hconf.DefaultMaxVCPUs, - MemorySize: hconf.MemorySize, - DefaultBridges: hconf.DefaultBridges, - Msize9p: hconf.Msize9p, - MemSlots: hconf.MemSlots, - MemOffset: hconf.MemOffset, - VirtioMem: hconf.VirtioMem, - VirtioFSCacheSize: hconf.VirtioFSCacheSize, - KernelPath: hconf.KernelPath, - ImagePath: hconf.ImagePath, - InitrdPath: hconf.InitrdPath, - FirmwarePath: hconf.FirmwarePath, - MachineAccelerators: hconf.MachineAccelerators, - CPUFeatures: hconf.CPUFeatures, - HypervisorPath: hconf.HypervisorPath, - HypervisorPathList: hconf.HypervisorPathList, - JailerPath: hconf.JailerPath, - JailerPathList: hconf.JailerPathList, - BlockDeviceDriver: hconf.BlockDeviceDriver, - HypervisorMachineType: hconf.HypervisorMachineType, - MemoryPath: hconf.MemoryPath, - DevicesStatePath: hconf.DevicesStatePath, - EntropySource: hconf.EntropySource, - EntropySourceList: hconf.EntropySourceList, - SharedFS: hconf.SharedFS, - VirtioFSDaemon: hconf.VirtioFSDaemon, - VirtioFSDaemonList: hconf.VirtioFSDaemonList, - VirtioFSCache: hconf.VirtioFSCache, - VirtioFSExtraArgs: hconf.VirtioFSExtraArgs[:], - BlockDeviceCacheSet: hconf.BlockDeviceCacheSet, - BlockDeviceCacheDirect: hconf.BlockDeviceCacheDirect, - BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush, - DisableBlockDeviceUse: hconf.DisableBlockDeviceUse, - EnableIOThreads: hconf.EnableIOThreads, - IndepIOThreads: hconf.IndepIOThreads, - Debug: hconf.Debug, - MemPrealloc: hconf.MemPrealloc, - HugePages: hconf.HugePages, - FileBackedMemRootDir: hconf.FileBackedMemRootDir, - FileBackedMemRootList: hconf.FileBackedMemRootList, - DisableNestingChecks: hconf.DisableNestingChecks, - DisableImageNvdimm: hconf.DisableImageNvdimm, - HotPlugVFIO: hconf.HotPlugVFIO, - ColdPlugVFIO: hconf.ColdPlugVFIO, - PCIeRootPort: hconf.PCIeRootPort, - PCIeSwitchPort: hconf.PCIeSwitchPort, - BootToBeTemplate: hconf.BootToBeTemplate, - BootFromTemplate: hconf.BootFromTemplate, - DisableVhostNet: hconf.DisableVhostNet, - EnableVhostUserStore: hconf.EnableVhostUserStore, - VhostUserStorePath: hconf.VhostUserStorePath, - VhostUserStorePathList: hconf.VhostUserStorePathList, - GuestHookPath: hconf.GuestHookPath, - VMid: hconf.VMid, - RxRateLimiterMaxRate: hconf.RxRateLimiterMaxRate, - TxRateLimiterMaxRate: hconf.TxRateLimiterMaxRate, - SGXEPCSize: hconf.SGXEPCSize, - EnableAnnotations: hconf.EnableAnnotations, + NumVCPUsF: hconf.NumVCPUsF, + DefaultMaxVCPUs: hconf.DefaultMaxVCPUs, + MemorySize: hconf.MemorySize, + DefaultBridges: hconf.DefaultBridges, + Msize9p: hconf.Msize9p, + MemSlots: hconf.MemSlots, + MemOffset: hconf.MemOffset, + VirtioMem: hconf.VirtioMem, + VirtioFSCacheSize: hconf.VirtioFSCacheSize, + KernelPath: hconf.KernelPath, + ImagePath: hconf.ImagePath, + InitrdPath: hconf.InitrdPath, + FirmwarePath: hconf.FirmwarePath, + MachineAccelerators: hconf.MachineAccelerators, + CPUFeatures: hconf.CPUFeatures, + HypervisorPath: hconf.HypervisorPath, + HypervisorPathList: hconf.HypervisorPathList, + JailerPath: hconf.JailerPath, + JailerPathList: hconf.JailerPathList, + BlockDeviceDriver: hconf.BlockDeviceDriver, + HypervisorMachineType: hconf.HypervisorMachineType, + MemoryPath: hconf.MemoryPath, + DevicesStatePath: hconf.DevicesStatePath, + EntropySource: hconf.EntropySource, + EntropySourceList: hconf.EntropySourceList, + SharedFS: hconf.SharedFS, + VirtioFSDaemon: hconf.VirtioFSDaemon, + VirtioFSDaemonList: hconf.VirtioFSDaemonList, + VirtioFSCache: hconf.VirtioFSCache, + VirtioFSExtraArgs: hconf.VirtioFSExtraArgs[:], + BlockDeviceCacheSet: hconf.BlockDeviceCacheSet, + BlockDeviceCacheDirect: hconf.BlockDeviceCacheDirect, + BlockDeviceCacheNoflush: hconf.BlockDeviceCacheNoflush, + BlockDeviceLogicalSectorSize: hconf.BlockDeviceLogicalSectorSize, + BlockDevicePhysicalSectorSize: hconf.BlockDevicePhysicalSectorSize, + DisableBlockDeviceUse: hconf.DisableBlockDeviceUse, + EnableIOThreads: hconf.EnableIOThreads, + IndepIOThreads: hconf.IndepIOThreads, + Debug: hconf.Debug, + MemPrealloc: hconf.MemPrealloc, + HugePages: hconf.HugePages, + FileBackedMemRootDir: hconf.FileBackedMemRootDir, + FileBackedMemRootList: hconf.FileBackedMemRootList, + DisableNestingChecks: hconf.DisableNestingChecks, + DisableImageNvdimm: hconf.DisableImageNvdimm, + HotPlugVFIO: hconf.HotPlugVFIO, + ColdPlugVFIO: hconf.ColdPlugVFIO, + PCIeRootPort: hconf.PCIeRootPort, + PCIeSwitchPort: hconf.PCIeSwitchPort, + BootToBeTemplate: hconf.BootToBeTemplate, + BootFromTemplate: hconf.BootFromTemplate, + DisableVhostNet: hconf.DisableVhostNet, + EnableVhostUserStore: hconf.EnableVhostUserStore, + VhostUserStorePath: hconf.VhostUserStorePath, + VhostUserStorePathList: hconf.VhostUserStorePathList, + GuestHookPath: hconf.GuestHookPath, + VMid: hconf.VMid, + RxRateLimiterMaxRate: hconf.RxRateLimiterMaxRate, + TxRateLimiterMaxRate: hconf.TxRateLimiterMaxRate, + SGXEPCSize: hconf.SGXEPCSize, + EnableAnnotations: hconf.EnableAnnotations, } sconfig.AgentConfig = KataAgentConfig{ diff --git a/src/runtime/virtcontainers/persist/api/config.go b/src/runtime/virtcontainers/persist/api/config.go index 82d1cceb9d..ecaeed62cc 100644 --- a/src/runtime/virtcontainers/persist/api/config.go +++ b/src/runtime/virtcontainers/persist/api/config.go @@ -157,6 +157,14 @@ type HypervisorConfig struct { // Denotes whether flush requests for the device are ignored. BlockDeviceCacheNoflush bool + // BlockDeviceLogicalSectorSize specifies the logical sector size reported + // by block devices to the guest, in bytes. + BlockDeviceLogicalSectorSize uint32 + + // BlockDevicePhysicalSectorSize specifies the physical sector size reported + // by block devices to the guest, in bytes. + BlockDevicePhysicalSectorSize uint32 + // DisableBlockDeviceUse disallows a block device from being used. DisableBlockDeviceUse bool diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 63caa831bc..fe34c47acb 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -242,6 +242,21 @@ const ( // Denotes whether flush requests for the device are ignored. BlockDeviceCacheNoflush = kataAnnotHypervisorPrefix + "block_device_cache_noflush" + // BlockDeviceLogicalSectorSize is a sandbox annotation that specifies the logical sector size + // reported by block devices to the guest, in bytes. Common values are 512 and 4096. + // Set to 0 to use the hypervisor default. + // NOTE: the annotation key uses the abbreviated "blk_logical_sector_size" rather than + // "block_device_logical_sector_size" (as used in the config file) because Kubernetes + // enforces a 63-character limit on annotation name segments, and the full name with the + // "io.katacontainers.config.hypervisor." prefix would exceed that limit. + BlockDeviceLogicalSectorSize = kataAnnotHypervisorPrefix + "blk_logical_sector_size" + + // BlockDevicePhysicalSectorSize is a sandbox annotation that specifies the physical sector size + // reported by block devices to the guest, in bytes. Common values are 512 and 4096. + // Set to 0 to use the hypervisor default. + // NOTE: see BlockDeviceLogicalSectorSize for the reason the annotation key is abbreviated. + BlockDevicePhysicalSectorSize = kataAnnotHypervisorPrefix + "blk_physical_sector_size" + // RxRateLimiterMaxRate is a sandbox annotation that specifies max rate on network I/O inbound bandwidth. RxRateLimiterMaxRate = kataAnnotHypervisorPrefix + "rx_rate_limiter_max_rate" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 36f96c418a..4066c85e48 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1686,7 +1686,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri iothreadID = fmt.Sprintf("%s_%d", indepIOThreadsPrefix, 0) } - if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, iothreadID); err != nil { + if err = q.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, addr, bridge.ID, romFile, queues, true, defaultDisableModern, iothreadID, q.config.BlockDeviceLogicalSectorSize, q.config.BlockDevicePhysicalSectorSize); err != nil { return err } case q.config.BlockDeviceDriver == config.VirtioBlockCCW: @@ -1705,7 +1705,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri if err != nil { return err } - if err = q.qmpMonitorCh.qmp.ExecuteDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, devNoHotplug, "", true, false); err != nil { + if err = q.qmpMonitorCh.qmp.ExecuteDeviceAdd(q.qmpMonitorCh.ctx, drive.ID, devID, driver, devNoHotplug, "", true, false, q.config.BlockDeviceLogicalSectorSize, q.config.BlockDevicePhysicalSectorSize); err != nil { return err } case q.config.BlockDeviceDriver == config.VirtioSCSI: diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go index fc82019d0e..3f6a4c1b18 100644 --- a/src/runtime/virtcontainers/stratovirt.go +++ b/src/runtime/virtcontainers/stratovirt.go @@ -905,7 +905,7 @@ func (s *stratovirt) hotplugBlk(ctx context.Context, drive *config.BlockDrive, o } devAddr := fmt.Sprintf("%d", slot) - if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false, ""); err != nil { + if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false, "", s.config.BlockDeviceLogicalSectorSize, s.config.BlockDevicePhysicalSectorSize); err != nil { return err } case RemoveDevice: